mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-24 00:58:37 +09:00
Compare commits
148 Commits
2019.07.14
...
2019.10.22
Author | SHA1 | Date | |
---|---|---|---|
![]() |
820215f0e3 | ||
![]() |
b4818e3c7a | ||
![]() |
2297c0d7d9 | ||
![]() |
824fa51165 | ||
![]() |
34e3885bc9 | ||
![]() |
59296bae7e | ||
![]() |
755541a4c8 | ||
![]() |
86f63633c8 | ||
![]() |
0001157594 | ||
![]() |
bc48773ed4 | ||
![]() |
d07866f13e | ||
![]() |
2b115b9460 | ||
![]() |
e29e96a9f5 | ||
![]() |
6d394a66f5 | ||
![]() |
7815d6b743 | ||
![]() |
173190f5e3 | ||
![]() |
974311b5aa | ||
![]() |
30eb05cb41 | ||
![]() |
2af01c0293 | ||
![]() |
7e05df71b7 | ||
![]() |
a1ee23e98f | ||
![]() |
311ee45731 | ||
![]() |
c317b6163b | ||
![]() |
2765c47a8c | ||
![]() |
07b50f616e | ||
![]() |
1907f06e7b | ||
![]() |
d4bb825b83 | ||
![]() |
560d3b7d7c | ||
![]() |
4bf568d36c | ||
![]() |
05446d483d | ||
![]() |
3a37f2c3be | ||
![]() |
0b87beefe6 | ||
![]() |
fd4db1ebc2 | ||
![]() |
b64045cd2a | ||
![]() |
c2915de82e | ||
![]() |
4e72d02f39 | ||
![]() |
76e510b92c | ||
![]() |
9679a62a28 | ||
![]() |
ca20b13048 | ||
![]() |
894b3826f5 | ||
![]() |
aaf9d904aa | ||
![]() |
25e911a968 | ||
![]() |
74bc299453 | ||
![]() |
2906631e12 | ||
![]() |
326ae4ff96 | ||
![]() |
72fd4d0c6a | ||
![]() |
f4b865c613 | ||
![]() |
412f44f4b3 | ||
![]() |
6483fbd336 | ||
![]() |
8130ac42e5 | ||
![]() |
cb3e4a2947 | ||
![]() |
2a88a0c44d | ||
![]() |
33c1c7d80f | ||
![]() |
21d3c21e62 | ||
![]() |
a373befa25 | ||
![]() |
df63cafe49 | ||
![]() |
d06daf23da | ||
![]() |
8e9fdcbe27 | ||
![]() |
666d808e70 | ||
![]() |
7d327fea5b | ||
![]() |
4e3f1f0469 | ||
![]() |
4bc15a68d1 | ||
![]() |
edb2820ca5 | ||
![]() |
6cf6b357f5 | ||
![]() |
f455a934e9 | ||
![]() |
d9d3098675 | ||
![]() |
1cb812d3c2 | ||
![]() |
6fd26a7d4a | ||
![]() |
9cf26b6e1d | ||
![]() |
20e11b70ac | ||
![]() |
e1f692f0b3 | ||
![]() |
2f851a7d7d | ||
![]() |
4878759f3b | ||
![]() |
303d3e142c | ||
![]() |
bd10b229c0 | ||
![]() |
035c7a59e8 | ||
![]() |
bf1317d257 | ||
![]() |
bff90fc518 | ||
![]() |
31dbd054c8 | ||
![]() |
66d04c74e0 | ||
![]() |
d7da1e37c7 | ||
![]() |
f620d0d860 | ||
![]() |
79dd8884bb | ||
![]() |
df228355fd | ||
![]() |
8945b10f6e | ||
![]() |
7cb51b5daf | ||
![]() |
d78657fd18 | ||
![]() |
cc73d5ad15 | ||
![]() |
71f47617c8 | ||
![]() |
3f46a25a97 | ||
![]() |
9d058b3206 | ||
![]() |
b500955a58 | ||
![]() |
acc86c9a97 | ||
![]() |
b72305f078 | ||
![]() |
494d664e67 | ||
![]() |
d1fcf255c5 | ||
![]() |
183a18c4e7 | ||
![]() |
393cc31d5e | ||
![]() |
0add33abcb | ||
![]() |
0326bcb6c1 | ||
![]() |
def849e0e6 | ||
![]() |
69611a1616 | ||
![]() |
351f37c022 | ||
![]() |
3bce4ff7d9 | ||
![]() |
ffddb11264 | ||
![]() |
64b6a4e91e | ||
![]() |
b3d39be239 | ||
![]() |
1357734978 | ||
![]() |
eb9c9c74a6 | ||
![]() |
5efbc1366f | ||
![]() |
995f319b06 | ||
![]() |
d9d3a5a816 | ||
![]() |
4f2d735803 | ||
![]() |
2e9522b061 | ||
![]() |
be306d6a31 | ||
![]() |
33b529fabd | ||
![]() |
07f3a05c87 | ||
![]() |
535111657b | ||
![]() |
826dcff99c | ||
![]() |
9a37ff82f1 | ||
![]() |
766c4f6090 | ||
![]() |
7279163412 | ||
![]() |
07ab44c420 | ||
![]() |
2c8b1a21e8 | ||
![]() |
c2d125d99f | ||
![]() |
85c2c4b4ab | ||
![]() |
8614a03f9c | ||
![]() |
8dbf751aa2 | ||
![]() |
90634acfcf | ||
![]() |
eaba9dd6c2 | ||
![]() |
843ad1796b | ||
![]() |
608b8a4300 | ||
![]() |
ab794a553c | ||
![]() |
3b446ab351 | ||
![]() |
13a75688a5 | ||
![]() |
2e18adec98 | ||
![]() |
9c1da4a9f9 | ||
![]() |
5e1c39ac85 | ||
![]() |
1824bfdcdf | ||
![]() |
2f1991ff14 | ||
![]() |
8b4a0ebf10 | ||
![]() |
f61496863d | ||
![]() |
799756a3b3 | ||
![]() |
7d4dd3e5b4 | ||
![]() |
f2a213d025 | ||
![]() |
791d2e8117 | ||
![]() |
2adedc477e | ||
![]() |
898238e9f8 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.07.14
|
||||
[debug] youtube-dl version 2019.10.22
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.22**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.07.14
|
||||
[debug] youtube-dl version 2019.10.22
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.10.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.07.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.10.22**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
@@ -339,6 +339,72 @@ Incorrect:
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Inline values
|
||||
|
||||
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
TITLE_RE = r'<title>([^<]+)</title>'
|
||||
# ...some lines of code...
|
||||
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||
```
|
||||
|
||||
### Collapse fallbacks
|
||||
|
||||
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
|
||||
|
||||
#### Example
|
||||
|
||||
Good:
|
||||
|
||||
```python
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
Unwieldy:
|
||||
|
||||
```python
|
||||
description = (
|
||||
self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, default=None)
|
||||
or self._html_search_meta('twitter:description', webpage, default=None))
|
||||
```
|
||||
|
||||
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
|
||||
|
||||
### Trailing parentheses
|
||||
|
||||
Always move trailing parentheses after the last argument.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list,
|
||||
)
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
184
ChangeLog
184
ChangeLog
@@ -1,3 +1,187 @@
|
||||
version 2019.10.22
|
||||
|
||||
Core
|
||||
* [utils] Improve subtitles_filename (#22753)
|
||||
|
||||
Extractors
|
||||
* [facebook] Bypass download rate limits (#21018)
|
||||
+ [contv] Add support for contv.com
|
||||
- [viewster] Remove extractor
|
||||
* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239)
|
||||
* Update the list of domains
|
||||
+ Add support for aa-encoded video data
|
||||
* Improve jwplayer format extraction
|
||||
+ Add support for Clappr sources
|
||||
* [mangomolo] Fix video format extraction and add support for player URLs
|
||||
* [audioboom] Improve metadata extraction
|
||||
* [twitch] Update VOD URL matching (#22395, #22727)
|
||||
- [mit] Remove support for video.mit.edu (#22403)
|
||||
- [servingsys] Remove extractor (#22639)
|
||||
* [dumpert] Fix extraction (#22428, #22564)
|
||||
* [atresplayer] Fix extraction (#16277, #16716)
|
||||
|
||||
|
||||
version 2019.10.16
|
||||
|
||||
Core
|
||||
* [extractor/common] Make _is_valid_url more relaxed
|
||||
|
||||
Extractors
|
||||
* [vimeo] Improve album videos id extraction (#22599)
|
||||
+ [globo] Extract subtitles (#22713)
|
||||
* [bokecc] Improve player params extraction (#22638)
|
||||
* [nexx] Handle result list (#22666)
|
||||
* [vimeo] Fix VHX embed extraction
|
||||
* [nbc] Switch to graphql API (#18581, #22693, #22701)
|
||||
- [vessel] Remove extractor
|
||||
- [promptfile] Remove extractor (#6239)
|
||||
* [kaltura] Fix service URL extraction (#22658)
|
||||
* [kaltura] Fix embed info strip (#22658)
|
||||
* [globo] Fix format extraction (#20319)
|
||||
* [redtube] Improve metadata extraction (#22492, #22615)
|
||||
* [pornhub:uservideos:upload] Fix extraction (#22619)
|
||||
+ [telequebec:squat] Add support for squat.telequebec.tv (#18503)
|
||||
- [wimp] Remove extractor (#22088, #22091)
|
||||
+ [gfycat] Extend URL regular expression (#22225)
|
||||
+ [chaturbate] Extend URL regular expression (#22309)
|
||||
* [peertube] Update instances (#22414)
|
||||
+ [telequebec] Add support for coucou.telequebec.tv (#22482)
|
||||
+ [xvideos] Extend URL regular expression (#22471)
|
||||
- [youtube] Remove support for invidious.enkirton.net (#22543)
|
||||
+ [openload] Add support for oload.monster (#22592)
|
||||
* [nrktv:seriebase] Fix extraction (#22596)
|
||||
+ [youtube] Add support for yt.lelux.fi (#22597)
|
||||
* [orf:tvthek] Make manifest requests non fatal (#22578)
|
||||
* [teachable] Skip login when already logged in (#22572)
|
||||
* [viewlift] Improve extraction (#22545)
|
||||
* [nonktube] Fix extraction (#22544)
|
||||
|
||||
|
||||
version 2019.09.28
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
|
||||
|
||||
Extractors
|
||||
* [vk] Fix extraction (#22522)
|
||||
* [heise] Fix kaltura embeds extraction (#22514)
|
||||
* [ted] Check for resources validity and extract subtitled downloads (#22513)
|
||||
+ [youtube] Add support for
|
||||
owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
|
||||
+ [nhk] Add support for clips
|
||||
* [nhk] Fix video extraction (#22249, #22353)
|
||||
* [byutv] Fix extraction (#22070)
|
||||
+ [openload] Add support for oload.online (#22304)
|
||||
+ [youtube] Add support for invidious.drycat.fr (#22451)
|
||||
* [jwplatfom] Do not match video URLs (#20596, #22148)
|
||||
* [youtube:playlist] Unescape playlist uploader (#22483)
|
||||
+ [bilibili] Add support audio albums and songs (#21094)
|
||||
+ [instagram] Add support for tv URLs
|
||||
+ [mixcloud] Allow uppercase letters in format URLs (#19280)
|
||||
* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
|
||||
#12842, #13912, #15669, #16303)
|
||||
* [hotstar] Use native HLS downloader by default
|
||||
+ [hotstar] Extract more formats (#22323)
|
||||
* [9now] Fix extraction (#22361)
|
||||
* [zdf] Bypass geo restriction
|
||||
+ [tv4] Extract series metadata
|
||||
* [tv4] Fix extraction (#22443)
|
||||
|
||||
|
||||
version 2019.09.12.1
|
||||
|
||||
Extractors
|
||||
* [youtube] Remove quality and tbr for itag 43 (#22372)
|
||||
|
||||
|
||||
version 2019.09.12
|
||||
|
||||
Extractors
|
||||
* [youtube] Quick extraction tempfix (#22367, #22163)
|
||||
|
||||
|
||||
version 2019.09.01
|
||||
|
||||
Core
|
||||
+ [extractor/generic] Add support for squarespace embeds (#21294, #21802,
|
||||
#21859)
|
||||
+ [downloader/external] Respect mtime option for aria2c (#22242)
|
||||
|
||||
Extractors
|
||||
+ [xhamster:user] Add support for user pages (#16330, #18454)
|
||||
+ [xhamster] Add support for more domains
|
||||
+ [verystream] Add support for woof.tube (#22217)
|
||||
+ [dailymotion] Add support for lequipe.fr (#21328, #22152)
|
||||
+ [openload] Add support for oload.vip (#22205)
|
||||
+ [bbccouk] Extend URL regular expression (#19200)
|
||||
+ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223)
|
||||
* [safari] Fix authentication (#22161, #22184)
|
||||
* [usanetwork] Fix extraction (#22105)
|
||||
+ [einthusan] Add support for einthusan.ca (#22171)
|
||||
* [youtube] Improve unavailable message extraction (#22117)
|
||||
+ [piksel] Extract subtitles (#20506)
|
||||
|
||||
|
||||
version 2019.08.13
|
||||
|
||||
Core
|
||||
* [downloader/fragment] Fix ETA calculation of resumed download (#21992)
|
||||
* [YoutubeDL] Check annotations availability (#18582)
|
||||
|
||||
Extractors
|
||||
* [youtube:playlist] Improve flat extraction (#21927)
|
||||
* [youtube] Fix annotations extraction (#22045)
|
||||
+ [discovery] Extract series meta field (#21808)
|
||||
* [youtube] Improve error detection (#16445)
|
||||
* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986)
|
||||
+ [roosterteeth] Add support for watch URLs
|
||||
* [discovery] Limit video data by show slug (#21980)
|
||||
|
||||
|
||||
version 2019.08.02
|
||||
|
||||
Extractors
|
||||
+ [tvigle] Add support for HLS and DASH formats (#21967)
|
||||
* [tvigle] Fix extraction (#21967)
|
||||
+ [yandexvideo] Add support for DASH formats (#21971)
|
||||
* [discovery] Use API call for video data extraction (#21808)
|
||||
+ [mgtv] Extract format_note (#21881)
|
||||
* [tvn24] Fix metadata extraction (#21833, #21834)
|
||||
* [dlive] Relax URL regular expression (#21909)
|
||||
+ [openload] Add support for oload.best (#21913)
|
||||
* [youtube] Improve metadata extraction for age gate content (#21943)
|
||||
|
||||
|
||||
version 2019.07.30
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix and improve title and description extraction (#21934)
|
||||
|
||||
|
||||
version 2019.07.27
|
||||
|
||||
Extractors
|
||||
+ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265)
|
||||
+ [discovery] Add support go.discovery.com URLs
|
||||
* [youtube:playlist] Relax video regular expression (#21844)
|
||||
* [generic] Restrict --default-search schemeless URLs detection pattern
|
||||
(#21842)
|
||||
* [vrv] Fix CMS signing query extraction (#21809)
|
||||
|
||||
|
||||
version 2019.07.16
|
||||
|
||||
Extractors
|
||||
+ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv
|
||||
(#21281, #21290)
|
||||
* [kaltura] Check source format URL (#21290)
|
||||
* [ctsnews] Fix YouTube embeds extraction (#21678)
|
||||
+ [einthusan] Add support for einthusan.com (#21748, #21775)
|
||||
+ [youtube] Add support for invidious.mastodon.host (#21777)
|
||||
+ [gfycat] Extend URL regular expression (#21779, #21780)
|
||||
* [youtube] Restrict is_live extraction (#21782)
|
||||
|
||||
|
||||
version 2019.07.14
|
||||
|
||||
Extractors
|
||||
|
66
README.md
66
README.md
@@ -1216,6 +1216,72 @@ Incorrect:
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Inline values
|
||||
|
||||
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
TITLE_RE = r'<title>([^<]+)</title>'
|
||||
# ...some lines of code...
|
||||
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||
```
|
||||
|
||||
### Collapse fallbacks
|
||||
|
||||
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
|
||||
|
||||
#### Example
|
||||
|
||||
Good:
|
||||
|
||||
```python
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
Unwieldy:
|
||||
|
||||
```python
|
||||
description = (
|
||||
self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, default=None)
|
||||
or self._html_search_meta('twitter:description', webpage, default=None))
|
||||
```
|
||||
|
||||
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
|
||||
|
||||
### Trailing parentheses
|
||||
|
||||
Always move trailing parentheses after the last argument.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list,
|
||||
)
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
@@ -98,6 +98,8 @@
|
||||
- **Bigflix**
|
||||
- **Bild**: Bild.de
|
||||
- **BiliBili**
|
||||
- **BilibiliAudio**
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
@@ -181,6 +183,7 @@
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CONtv**
|
||||
- **Corus**
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
@@ -692,7 +695,6 @@
|
||||
- **PornoXO**
|
||||
- **PornTube**
|
||||
- **PressTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
@@ -783,7 +785,6 @@
|
||||
- **Seeker**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
- **ServingSys**
|
||||
- **Servus**
|
||||
- **Sexu**
|
||||
- **SeznamZpravy**
|
||||
@@ -882,6 +883,7 @@
|
||||
- **TeleQuebec**
|
||||
- **TeleQuebecEmission**
|
||||
- **TeleQuebecLive**
|
||||
- **TeleQuebecSquat**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TennisTV**
|
||||
@@ -989,7 +991,6 @@
|
||||
- **VeeHD**
|
||||
- **Veoh**
|
||||
- **verystream**
|
||||
- **Vessel**
|
||||
- **Vesti**: Вести.Ru
|
||||
- **Vevo**
|
||||
- **VevoPlaylist**
|
||||
@@ -1004,7 +1005,6 @@
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videomore**
|
||||
@@ -1022,7 +1022,6 @@
|
||||
- **vier:videos**
|
||||
- **ViewLift**
|
||||
- **ViewLiftEmbed**
|
||||
- **Viewster**
|
||||
- **Viidea**
|
||||
- **viki**
|
||||
- **viki:channel**
|
||||
@@ -1088,7 +1087,6 @@
|
||||
- **Weibo**
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **WorldStarHipHop**
|
||||
@@ -1097,9 +1095,10 @@
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
- **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XHamsterUser**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
- **xiami:artist**: 虾米音乐 - 歌手
|
||||
- **xiami:collection**: 虾米音乐 - 精选集
|
||||
@@ -1117,6 +1116,7 @@
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **yahoo:gyao**
|
||||
- **yahoo:gyao:player**
|
||||
- **yahoo:japannews**: Yahoo! Japan News
|
||||
- **YandexDisk**
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
|
@@ -74,6 +74,7 @@ from youtube_dl.utils import (
|
||||
str_to_int,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
subtitles_filename,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -261,6 +262,11 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||
|
||||
def test_subtitles_filename(self):
|
||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
||||
self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
|
||||
|
||||
def test_remove_start(self):
|
||||
self.assertEqual(remove_start(None, 'A - '), None)
|
||||
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||
|
@@ -852,8 +852,9 @@ class YoutubeDL(object):
|
||||
extract_flat = self.params.get('extract_flat', False)
|
||||
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
|
||||
or extract_flat is True):
|
||||
if self.params.get('forcejson', False):
|
||||
self.to_stdout(json.dumps(ie_result))
|
||||
self.__forced_printings(
|
||||
ie_result, self.prepare_filename(ie_result),
|
||||
incomplete=True)
|
||||
return ie_result
|
||||
|
||||
if result_type == 'video':
|
||||
@@ -1693,6 +1694,36 @@ class YoutubeDL(object):
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
def __forced_printings(self, info_dict, filename, incomplete):
|
||||
def print_mandatory(field):
|
||||
if (self.params.get('force%s' % field, False)
|
||||
and (not incomplete or info_dict.get(field) is not None)):
|
||||
self.to_stdout(info_dict[field])
|
||||
|
||||
def print_optional(field):
|
||||
if (self.params.get('force%s' % field, False)
|
||||
and info_dict.get(field) is not None):
|
||||
self.to_stdout(info_dict[field])
|
||||
|
||||
print_mandatory('title')
|
||||
print_mandatory('id')
|
||||
if self.params.get('forceurl', False) and not incomplete:
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
for f in info_dict['requested_formats']:
|
||||
self.to_stdout(f['url'] + f.get('play_path', ''))
|
||||
else:
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
print_optional('thumbnail')
|
||||
print_optional('description')
|
||||
if self.params.get('forcefilename', False) and filename is not None:
|
||||
self.to_stdout(filename)
|
||||
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||
print_mandatory('format')
|
||||
if self.params.get('forcejson', False):
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result."""
|
||||
|
||||
@@ -1703,9 +1734,8 @@ class YoutubeDL(object):
|
||||
if self._num_downloads >= int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
# TODO: backward compatibility, to be removed
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
if len(info_dict['title']) > 200:
|
||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||
|
||||
if 'format' not in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
@@ -1720,29 +1750,7 @@ class YoutubeDL(object):
|
||||
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
self.to_stdout(info_dict['fulltitle'])
|
||||
if self.params.get('forceid', False):
|
||||
self.to_stdout(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
for f in info_dict['requested_formats']:
|
||||
self.to_stdout(f['url'] + f.get('play_path', ''))
|
||||
else:
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||
self.to_stdout(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||
self.to_stdout(info_dict['description'])
|
||||
if self.params.get('forcefilename', False) and filename is not None:
|
||||
self.to_stdout(filename)
|
||||
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||
if self.params.get('forceformat', False):
|
||||
self.to_stdout(info_dict['format'])
|
||||
if self.params.get('forcejson', False):
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
self.__forced_printings(info_dict, filename, incomplete=False)
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
@@ -1783,6 +1791,8 @@ class YoutubeDL(object):
|
||||
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
||||
self.to_screen('[info] Video annotations are already present')
|
||||
elif not info_dict.get('annotations'):
|
||||
self.report_warning('There are no annotations to write.')
|
||||
else:
|
||||
try:
|
||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||
@@ -1804,7 +1814,7 @@ class YoutubeDL(object):
|
||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||
else:
|
||||
|
@@ -94,7 +94,7 @@ def _real_main(argv=None):
|
||||
if opts.verbose:
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except IOError:
|
||||
sys.exit('ERROR: batch file could not be read')
|
||||
sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
|
||||
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
@@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD):
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attemps
|
||||
# retried with the same request data this usually succeeds (1-2 attempts
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
|
@@ -194,6 +194,7 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
@@ -190,12 +190,13 @@ class FragmentFD(FileDownloader):
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
|
||||
'downloaded_bytes': resume_len,
|
||||
'fragment_index': ctx['fragment_index'],
|
||||
'fragment_count': total_frags,
|
||||
'filename': ctx['filename'],
|
||||
@@ -234,8 +235,8 @@ class FragmentFD(FileDownloader):
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size,
|
||||
state['downloaded_bytes'])
|
||||
start, time_now, estimated_size - resume_len,
|
||||
state['downloaded_bytes'] - resume_len)
|
||||
state['speed'] = s.get('speed') or ctx.get('speed')
|
||||
ctx['speed'] = state['speed']
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
|
@@ -146,7 +146,7 @@ def write_piff_header(stream, params):
|
||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||
avcc_payload = u8.pack(1) # configuration version
|
||||
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
|
||||
avcc_payload += u16.pack(len(sps))
|
||||
avcc_payload += sps
|
||||
|
@@ -5,14 +5,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
remove_end,
|
||||
)
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class AsianCrushIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
@@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor):
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:3db14e9186197857e7063522cb89a805',
|
||||
'description': 'md5:7e986615808bcfb11756eb503a751487',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
@@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor):
|
||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||
|
||||
player = self._download_webpage(
|
||||
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
|
||||
'https://api.%s/embeddedVideoPlayer' % host, video_id,
|
||||
query={'id': entry_id})
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
@@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor):
|
||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||
default='513551')
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
ie=KalturaIE.ie_key(), video_id=kaltura_id,
|
||||
video_title=title)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
|
||||
_TEST = {
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||
'info_dict': {
|
||||
'id': '12481',
|
||||
@@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
@@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
|
||||
' | AsianCrush')
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
|
@@ -1,202 +1,118 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import hmac
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||
'md5': 'efd56753cda1bb64df52a3074f62e38a',
|
||||
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
|
||||
'info_dict': {
|
||||
'id': 'capitulo-10-especial-solidario-nochebuena',
|
||||
'id': '5d4aa2c57ed1a88fc715a615',
|
||||
'ext': 'mp4',
|
||||
'title': 'Especial Solidario de Nochebuena',
|
||||
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
|
||||
'duration': 5527.6,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'title': 'Capítulo 7: Asuntos pendientes',
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
},
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
|
||||
'md5': '6e52cbb513c405e403dbacb7aacf8747',
|
||||
'info_dict': {
|
||||
'id': 'capitulo-112-david-bustamante',
|
||||
'ext': 'flv',
|
||||
'title': 'David Bustamante',
|
||||
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
|
||||
'duration': 1439.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
|
||||
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
|
||||
_TIMESTAMP_SHIFT = 30000
|
||||
|
||||
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
|
||||
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
|
||||
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
|
||||
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
|
||||
|
||||
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
|
||||
|
||||
_ERRORS = {
|
||||
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
|
||||
'DELETED': 'This video has expired and is no longer available for online streaming.',
|
||||
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
|
||||
# 'PREMIUM': 'PREMIUM',
|
||||
}
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'j_username': username,
|
||||
'j_password': password,
|
||||
}
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in')
|
||||
try:
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}, data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 400)
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
|
||||
response, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
try:
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 403)
|
||||
|
||||
episode_id = self._search_regex(
|
||||
r'episode="([^"]+)"', webpage, 'episode id')
|
||||
|
||||
request = sanitized_Request(
|
||||
self._PLAYER_URL_TEMPLATE % episode_id,
|
||||
headers={'User-Agent': self._USER_AGENT})
|
||||
player = self._download_json(request, episode_id, 'Downloading player JSON')
|
||||
|
||||
episode_type = player.get('typeOfEpisode')
|
||||
error_message = self._ERRORS.get(episode_type)
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
title = episode['titulo']
|
||||
|
||||
formats = []
|
||||
video_url = player.get('urlVideo')
|
||||
if video_url:
|
||||
format_info = {
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
}
|
||||
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
|
||||
if mobj:
|
||||
format_info.update({
|
||||
'width': int_or_none(mobj.group('width')),
|
||||
'height': int_or_none(mobj.group('height')),
|
||||
'tbr': int_or_none(mobj.group('bitrate')),
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
||||
timestamp = int_or_none(self._download_webpage(
|
||||
self._TIME_API_URL,
|
||||
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
|
||||
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
|
||||
token = hmac.new(
|
||||
self._MAGIC.encode('ascii'),
|
||||
(episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
|
||||
).hexdigest()
|
||||
|
||||
request = sanitized_Request(
|
||||
self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
|
||||
headers={'User-Agent': self._USER_AGENT})
|
||||
|
||||
fmt_json = self._download_json(
|
||||
request, video_id, 'Downloading windows video JSON')
|
||||
|
||||
result = fmt_json.get('resultDes')
|
||||
if result.lower() != 'ok':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
||||
|
||||
for format_id, video_url in fmt_json['resultObject'].items():
|
||||
if format_id == 'token' or not video_url.startswith('http'):
|
||||
for source in episode.get('sources', []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
if 'geodeswowsmpra3player' in video_url:
|
||||
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
||||
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
||||
# this videos are protected by DRM, the f4m downloader doesn't support them
|
||||
continue
|
||||
video_url_hd = video_url.replace('free_es', 'es')
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
|
||||
fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
|
||||
fatal=False))
|
||||
src_type = source.get('type')
|
||||
if src_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif src_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
path_data = player.get('pathData')
|
||||
|
||||
episode = self._download_xml(
|
||||
self._EPISODE_URL_TEMPLATE % path_data, video_id,
|
||||
'Downloading episode XML')
|
||||
|
||||
duration = float_or_none(xpath_text(
|
||||
episode, './media/asset/info/technical/contentDuration', 'duration'))
|
||||
|
||||
art = episode.find('./media/asset/info/art')
|
||||
title = xpath_text(art, './name', 'title')
|
||||
description = xpath_text(art, './description', 'description')
|
||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||
if subtitle_url:
|
||||
subtitles['es'] = [{
|
||||
'ext': 'srt',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
heartbeat = episode.get('heartbeat') or {}
|
||||
omniture = episode.get('omniture') or {}
|
||||
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'description': episode.get('descripcion'),
|
||||
'thumbnail': episode.get('imgPoster'),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'channel': get_meta('channel'),
|
||||
'season': get_meta('season'),
|
||||
'episode_number': int_or_none(get_meta('episodeNumber')),
|
||||
}
|
||||
|
@@ -2,22 +2,25 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AudioBoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
|
||||
'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
|
||||
'md5': '7b00192e593ff227e6a315486979a42d',
|
||||
'info_dict': {
|
||||
'id': '4279833',
|
||||
'id': '7398103',
|
||||
'ext': 'mp3',
|
||||
'title': '3/09/2016 Czaban Hour 3',
|
||||
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
|
||||
'duration': 2245.72,
|
||||
'uploader': 'SB Nation A.M.',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
|
||||
'title': 'Asim Chaudhry',
|
||||
'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
|
||||
'duration': 4000.99,
|
||||
'uploader': 'Sue Perkins: An hour or so with...',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
@@ -32,8 +35,8 @@ class AudioBoomIE(InfoExtractor):
|
||||
clip = None
|
||||
|
||||
clip_store = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
|
||||
self._html_search_regex(
|
||||
r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
|
||||
webpage, 'clip store', default='{}', group='json'),
|
||||
video_id, fatal=False)
|
||||
if clip_store:
|
||||
@@ -47,14 +50,15 @@ class AudioBoomIE(InfoExtractor):
|
||||
|
||||
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
||||
'audio', webpage, 'audio url')
|
||||
title = from_clip('title') or self._og_search_title(webpage)
|
||||
description = from_clip('description') or self._og_search_description(webpage)
|
||||
title = from_clip('title') or self._html_search_meta(
|
||||
['og:title', 'og:audio:title', 'audio_title'], webpage)
|
||||
description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
|
||||
|
||||
duration = float_or_none(from_clip('duration') or self._html_search_meta(
|
||||
'weibo:audio:duration', webpage))
|
||||
|
||||
uploader = from_clip('author') or self._og_search_property(
|
||||
'audio:artist', webpage, 'uploader', fatal=False)
|
||||
uploader = from_clip('author') or self._html_search_meta(
|
||||
['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
|
||||
uploader_url = from_clip('author_url') or self._html_search_meta(
|
||||
'audioboo:channel', webpage, 'uploader url')
|
||||
|
||||
|
@@ -40,6 +40,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||
music/(?:clips|audiovideo/popular)[/#]|
|
||||
radio/player/|
|
||||
sounds/play/|
|
||||
events/[^/]+/play/[^/]+/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
@@ -70,7 +71,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
},
|
||||
'params': {
|
||||
@@ -220,6 +221,20 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'm0007jz9',
|
||||
'ext': 'mp4',
|
||||
'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
|
||||
'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
|
||||
'duration': 9840,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@@ -609,7 +624,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'title': 'Russia stages massive WW2 parade',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
@@ -306,3 +307,115 @@ class BiliBiliBangumiIE(InfoExtractor):
|
||||
return self.playlist_result(
|
||||
entries, bangumi_id,
|
||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||
|
||||
|
||||
class BilibiliAudioBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, sid, query=None):
|
||||
if not query:
|
||||
query = {'sid': sid}
|
||||
return self._download_json(
|
||||
'https://www.bilibili.com/audio/music-service-c/web/' + path,
|
||||
sid, query=query)['data']
|
||||
|
||||
|
||||
class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bilibili.com/audio/au1003142',
|
||||
'md5': 'fec4987014ec94ef9e666d4d158ad03b',
|
||||
'info_dict': {
|
||||
'id': '1003142',
|
||||
'ext': 'm4a',
|
||||
'title': '【tsukimi】YELLOW / 神山羊',
|
||||
'artist': 'tsukimi',
|
||||
'comment_count': int,
|
||||
'description': 'YELLOW的mp3版!',
|
||||
'duration': 183,
|
||||
'subtitles': {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
}],
|
||||
},
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'timestamp': 1564836614,
|
||||
'upload_date': '20190803',
|
||||
'uploader': 'tsukimi-つきみぐー',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
au_id = self._match_id(url)
|
||||
|
||||
play_data = self._call_api('url', au_id)
|
||||
formats = [{
|
||||
'url': play_data['cdns'][0],
|
||||
'filesize': int_or_none(play_data.get('size')),
|
||||
}]
|
||||
|
||||
song = self._call_api('song/info', au_id)
|
||||
title = song['title']
|
||||
statistic = song.get('statistic') or {}
|
||||
|
||||
subtitles = None
|
||||
lyric = song.get('lyric')
|
||||
if lyric:
|
||||
subtitles = {
|
||||
'origin': [{
|
||||
'url': lyric,
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
'id': au_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'artist': song.get('author'),
|
||||
'comment_count': int_or_none(statistic.get('comment')),
|
||||
'description': song.get('intro'),
|
||||
'duration': int_or_none(song.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': song.get('cover'),
|
||||
'timestamp': int_or_none(song.get('passtime')),
|
||||
'uploader': song.get('uname'),
|
||||
'view_count': int_or_none(statistic.get('play')),
|
||||
}
|
||||
|
||||
|
||||
class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bilibili.com/audio/am10624',
|
||||
'info_dict': {
|
||||
'id': '10624',
|
||||
'title': '每日新曲推荐(每日11:00更新)',
|
||||
'description': '每天11:00更新,为你推送最新音乐',
|
||||
},
|
||||
'playlist_count': 19,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
am_id = self._match_id(url)
|
||||
|
||||
songs = self._call_api(
|
||||
'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
|
||||
|
||||
entries = []
|
||||
for song in songs:
|
||||
sid = str_or_none(song.get('id'))
|
||||
if not sid:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'https://www.bilibili.com/audio/au' + sid,
|
||||
BilibiliAudioIE.ie_key(), sid))
|
||||
|
||||
if entries:
|
||||
album_data = self._call_api('menu/info', am_id) or {}
|
||||
album_title = album_data.get('title')
|
||||
if album_title:
|
||||
for entry in entries:
|
||||
entry['album'] = album_title
|
||||
return self.playlist_result(
|
||||
entries, am_id, album_title, album_data.get('intro'))
|
||||
|
||||
return self.playlist_result(entries, am_id)
|
||||
|
@@ -11,8 +11,8 @@ from ..utils import ExtractorError
|
||||
class BokeCCBaseIE(InfoExtractor):
|
||||
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
|
||||
player_params_str = self._html_search_regex(
|
||||
r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
|
||||
webpage, 'player params')
|
||||
r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
|
||||
webpage, 'player params', group='query')
|
||||
|
||||
player_params = compat_parse_qs(player_params_str)
|
||||
|
||||
@@ -36,9 +36,9 @@ class BokeCCIE(BokeCCBaseIE):
|
||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
|
||||
'info_dict': {
|
||||
'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30',
|
||||
'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
|
||||
'ext': 'flv',
|
||||
'title': 'BokeCC Video',
|
||||
},
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
|
||||
@@ -11,14 +10,12 @@ from .adobepass import AdobePassIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
@@ -27,18 +24,19 @@ from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
clean_html,
|
||||
mimetype2ext,
|
||||
UnsupportedError,
|
||||
)
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
IE_NAME = 'brightcove:legacy'
|
||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -55,7 +53,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'timestamp': 1368213670,
|
||||
'upload_date': '20130510',
|
||||
'uploader_id': '1589608506001',
|
||||
}
|
||||
},
|
||||
'skip': 'The player has been deactivated by the content owner',
|
||||
},
|
||||
{
|
||||
# From http://medianetwork.oracle.com/video/player/1785452137001
|
||||
@@ -70,6 +69,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'upload_date': '20120814',
|
||||
'uploader_id': '1460825906',
|
||||
},
|
||||
'skip': 'video not playable',
|
||||
},
|
||||
{
|
||||
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
||||
@@ -79,7 +79,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'This Bracelet Acts as a Personal Thermostat',
|
||||
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||
'uploader': 'Mashable',
|
||||
# 'uploader': 'Mashable',
|
||||
'timestamp': 1382041798,
|
||||
'upload_date': '20131017',
|
||||
'uploader_id': '1130468786001',
|
||||
@@ -124,6 +124,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'id': '3550319591001',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'skip': 'Unsupported URL',
|
||||
},
|
||||
{
|
||||
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
|
||||
@@ -133,6 +134,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'title': 'Lesson 08',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
'skip': 'Unsupported URL',
|
||||
},
|
||||
{
|
||||
# playerID inferred from bcpid
|
||||
@@ -141,12 +143,6 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'only_matching': True, # Tested in GenericIE
|
||||
}
|
||||
]
|
||||
FLV_VCODECS = {
|
||||
1: 'SORENSON',
|
||||
2: 'ON2',
|
||||
3: 'H264',
|
||||
4: 'VP8',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url(cls, object_str):
|
||||
@@ -238,7 +234,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
return update_url_query(cls._FEDERATED_URL, params)
|
||||
return update_url_query(
|
||||
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
@@ -297,38 +294,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
videoPlayer = query.get('@videoPlayer')
|
||||
if videoPlayer:
|
||||
# We set the original url as the default 'Referer' header
|
||||
referer = smuggled_data.get('Referer', url)
|
||||
referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
|
||||
video_id = videoPlayer[0]
|
||||
if 'playerID' not in query:
|
||||
mobj = re.search(r'/bcpid(\d+)', url)
|
||||
if mobj is not None:
|
||||
query['playerID'] = [mobj.group(1)]
|
||||
return self._get_video_info(
|
||||
videoPlayer[0], query, referer=referer)
|
||||
elif 'playerKey' in query:
|
||||
player_key = query['playerKey']
|
||||
return self._get_playlist_info(player_key[0])
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _brightcove_new_url_result(self, publisher_id, video_id):
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
|
||||
def _get_video_info(self, video_id, query, referer=None):
|
||||
headers = {}
|
||||
linkBase = query.get('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
referer = linkBase[0]
|
||||
if referer is not None:
|
||||
headers['Referer'] = referer
|
||||
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
publisher_id = query.get('publisherId')
|
||||
if publisher_id and publisher_id[0].isdigit():
|
||||
publisher_id = publisher_id[0]
|
||||
@@ -339,6 +310,9 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
else:
|
||||
player_id = query.get('playerID')
|
||||
if player_id and player_id[0].isdigit():
|
||||
headers = {}
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
@@ -349,136 +323,16 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if player_key:
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
return self._brightcove_new_url_result(publisher_id, video_id)
|
||||
raise ExtractorError(
|
||||
'brightcove said: %s' % error_msg, expected=True)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
|
||||
info = json.loads(info)['data']
|
||||
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
||||
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
|
||||
|
||||
return self._extract_video_info(video_info)
|
||||
|
||||
def _get_playlist_info(self, player_key):
|
||||
info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
|
||||
playlist_info = self._download_webpage(
|
||||
info_url, player_key, 'Downloading playlist information')
|
||||
|
||||
json_data = json.loads(playlist_info)
|
||||
if 'videoList' in json_data:
|
||||
playlist_info = json_data['videoList']
|
||||
playlist_dto = playlist_info['mediaCollectionDTO']
|
||||
elif 'playlistTabs' in json_data:
|
||||
playlist_info = json_data['playlistTabs']
|
||||
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
|
||||
else:
|
||||
raise ExtractorError('Empty playlist')
|
||||
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||
playlist_title=playlist_dto['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
video_id = compat_str(video_info['id'])
|
||||
publisher_id = video_info.get('publisherId')
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_info['displayName'].strip(),
|
||||
'description': video_info.get('shortDescription'),
|
||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||
'uploader': video_info.get('publisherName'),
|
||||
'uploader_id': compat_str(publisher_id) if publisher_id else None,
|
||||
'duration': float_or_none(video_info.get('length'), 1000),
|
||||
'timestamp': int_or_none(video_info.get('creationDate'), 1000),
|
||||
}
|
||||
|
||||
renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
|
||||
if renditions:
|
||||
formats = []
|
||||
for rend in renditions:
|
||||
url = rend['defaultURL']
|
||||
if not url:
|
||||
continue
|
||||
ext = None
|
||||
if rend['remote']:
|
||||
url_comp = compat_urllib_parse_urlparse(url)
|
||||
if url_comp.path.endswith('.m3u8'):
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
elif 'akamaihd.net' in url_comp.netloc:
|
||||
# This type of renditions are served through
|
||||
# akamaihd.net, but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
if ext is None:
|
||||
ext = determine_ext(url)
|
||||
tbr = int_or_none(rend.get('encodingRate'), 1000)
|
||||
a_format = {
|
||||
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'filesize': int_or_none(rend.get('size')) or None,
|
||||
'tbr': tbr,
|
||||
}
|
||||
if rend.get('audioOnly'):
|
||||
a_format.update({
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
a_format.update({
|
||||
'height': int_or_none(rend.get('frameHeight')),
|
||||
'width': int_or_none(rend.get('frameWidth')),
|
||||
'vcodec': rend.get('videoCodec'),
|
||||
})
|
||||
|
||||
# m3u8 manifests with remote == false are media playlists
|
||||
# Not calling _extract_m3u8_formats here to save network traffic
|
||||
if ext == 'm3u8':
|
||||
a_format.update({
|
||||
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
elif video_info.get('FLVFullLengthURL') is not None:
|
||||
info.update({
|
||||
'url': video_info['FLVFullLengthURL'],
|
||||
'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
|
||||
'filesize': int_or_none(video_info.get('FLVFullSize')),
|
||||
})
|
||||
|
||||
if self._downloader.params.get('include_ads', False):
|
||||
adServerURL = video_info.get('_youtubedl_adServerURL')
|
||||
if adServerURL:
|
||||
ad_info = {
|
||||
'_type': 'url',
|
||||
'url': adServerURL,
|
||||
}
|
||||
if 'url' in info:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': info['title'],
|
||||
'entries': [ad_info, info],
|
||||
}
|
||||
else:
|
||||
return ad_info
|
||||
|
||||
if not info.get('url') and not info.get('formats'):
|
||||
uploader_id = info.get('uploader_id')
|
||||
if uploader_id:
|
||||
info.update(self._brightcove_new_url_result(uploader_id, video_id))
|
||||
else:
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
return info
|
||||
if publisher_id:
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
if referer:
|
||||
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
# TODO: figure out if it's possible to extract playlistId from playerKey
|
||||
# elif 'playerKey' in query:
|
||||
# player_key = query['playerKey']
|
||||
# return self._get_playlist_info(player_key[0])
|
||||
raise UnsupportedError(url)
|
||||
|
||||
|
||||
class BrightcoveNewIE(AdobePassIE):
|
||||
|
@@ -3,7 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
@@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
info = self._download_json(
|
||||
video = self._download_json(
|
||||
'https://api.byutv.org/api3/catalog/getvideosforcontent',
|
||||
display_id, query={
|
||||
'contentid': video_id,
|
||||
@@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor):
|
||||
'x-byutv-platformkey': 'xsaaw9c7y5',
|
||||
})
|
||||
|
||||
ep = info.get('ooyalaVOD')
|
||||
ep = video.get('ooyalaVOD')
|
||||
if ep:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor):
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
|
||||
ep = info['dvr']
|
||||
title = ep['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
info = {}
|
||||
formats = []
|
||||
for format_id, ep in video.items():
|
||||
if not isinstance(ep, dict):
|
||||
continue
|
||||
video_url = url_or_none(ep.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
merge_dicts(info, {
|
||||
'title': ep.get('title'),
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
'duration': parse_duration(ep.get('length')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
'duration': parse_duration(ep.get('length')),
|
||||
'title': display_id,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@@ -7,7 +7,7 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ChaturbateIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.chaturbate.com/siswet19/',
|
||||
'info_dict': {
|
||||
@@ -21,6 +21,9 @@ class ChaturbateIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Room is offline',
|
||||
}, {
|
||||
'url': 'https://chaturbate.com/fullvideo/?b=caylin',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://en.chaturbate.com/siswet19/',
|
||||
'only_matching': True,
|
||||
@@ -32,7 +35,8 @@ class ChaturbateIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=self.geo_verification_headers())
|
||||
'https://chaturbate.com/%s/' % video_id, video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
m3u8_urls = []
|
||||
|
||||
|
@@ -220,7 +220,7 @@ class InfoExtractor(object):
|
||||
* "preference" (optional, int) - quality of the image
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
* "resolution" (optional, string "{width}x{height}",
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
@@ -1424,12 +1424,10 @@ class InfoExtractor(object):
|
||||
try:
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||
return False
|
||||
raise
|
||||
except ExtractorError:
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||
return False
|
||||
|
||||
def http_scheme(self):
|
||||
""" Either "http:" or "https:", depending on the user's preferences """
|
||||
|
118
youtube_dl/extractor/contv.py
Normal file
118
youtube_dl/extractor/contv.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CONtvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
|
||||
'info_dict': {
|
||||
'id': 'CEG10022949',
|
||||
'ext': 'mp4',
|
||||
'title': 'Days Of Thrills & Laughter',
|
||||
'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
|
||||
'upload_date': '20180703',
|
||||
'timestamp': 1530634789.61,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
|
||||
'info_dict': {
|
||||
'id': 'CLIP-show_fotld_bts',
|
||||
'title': 'Fight of the Living Dead: Behind the Scenes Bites',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
details = self._download_json(
|
||||
'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
|
||||
video_id, query={'device': 'web'})
|
||||
|
||||
if details.get('type') == 'episodic':
|
||||
seasons = self._download_json(
|
||||
'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
|
||||
video_id)
|
||||
entries = []
|
||||
for season in seasons:
|
||||
for episode in season.get('episodes', []):
|
||||
episode_id = episode.get('id')
|
||||
if not episode_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'https://www.contv.com/details-movie/' + episode_id,
|
||||
CONtvIE.ie_key(), episode_id))
|
||||
return self.playlist_result(entries, video_id, details.get('title'))
|
||||
|
||||
m_details = details['details']
|
||||
title = details['title']
|
||||
|
||||
formats = []
|
||||
|
||||
media_hls_url = m_details.get('media_hls_url')
|
||||
if media_hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_hls_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
media_mp4_url = m_details.get('media_mp4_url')
|
||||
if media_mp4_url:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': media_mp4_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
captions = m_details.get('captions') or {}
|
||||
for caption_url in captions.values():
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': caption_url
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image in m_details.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
|
||||
description = None
|
||||
for p in ('large_', 'medium_', 'small_', ''):
|
||||
d = m_details.get(p + 'description')
|
||||
if d:
|
||||
description = d
|
||||
break
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': description,
|
||||
'timestamp': float_or_none(details.get('metax_added_on'), 1000),
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(m_details.get('duration'), 1000),
|
||||
'view_count': int_or_none(details.get('num_watched')),
|
||||
'like_count': int_or_none(details.get('num_fav')),
|
||||
'categories': details.get('category'),
|
||||
'tags': details.get('tags'),
|
||||
'season_number': int_or_none(details.get('season')),
|
||||
'episode_number': int_or_none(details.get('episode')),
|
||||
'release_year': int_or_none(details.get('pub_year')),
|
||||
}
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
@@ -14,8 +15,8 @@ class CtsNewsIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
|
||||
'title': '以色列.真主黨交火 3人死亡 - 華視新聞網',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
@@ -26,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網',
|
||||
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
@@ -62,8 +63,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
video_url = mp4_feed['source_url']
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
|
||||
youtube_url = YoutubeIE._extract_url(page)
|
||||
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
|
@@ -48,7 +48,14 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
)
|
||||
/(?P<id>[^/?_]+)
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
|
||||
_FORMATS = [
|
||||
@@ -133,6 +140,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lequipe.fr/video/x791mem',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -5,23 +5,17 @@ import re
|
||||
import string
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
(?:(?:www|go)\.)?discovery|
|
||||
(?:www\.)?
|
||||
(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
@@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
||||
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
||||
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
|
||||
'info_dict': {
|
||||
'id': '5a2d9b4d6b66d17a5026e1fd',
|
||||
'id': '5a2f35ce6b66d17a5026e29e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dave Foley',
|
||||
'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
|
||||
'duration': 608,
|
||||
'title': 'Riding with Matthew Perry',
|
||||
'description': 'md5:a34333153e79bc4526019a5129e7f878',
|
||||
'duration': 84,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
@@ -56,20 +50,20 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# using `show_slug` is important to get the correct video data
|
||||
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_GEO_BYPASS = False
|
||||
_API_BASE_URL = 'https://api.discovery.com/v1/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
react_data = self._parse_json(self._search_regex(
|
||||
r'window\.__reactTransmitPacket\s*=\s*({.+?});',
|
||||
webpage, 'react data'), display_id)
|
||||
content_blocks = react_data['layout'][path]['contentBlocks']
|
||||
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
|
||||
video_id = video['id']
|
||||
site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
access_token = None
|
||||
cookies = self._get_cookies(url)
|
||||
@@ -79,27 +73,36 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
if auth_storage_cookie and auth_storage_cookie.value:
|
||||
auth_storage = self._parse_json(compat_urllib_parse_unquote(
|
||||
compat_urllib_parse_unquote(auth_storage_cookie.value)),
|
||||
video_id, fatal=False) or {}
|
||||
display_id, fatal=False) or {}
|
||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
'https://%s.com/anonymous' % site, display_id, query={
|
||||
'https://%s.com/anonymous' % site, display_id,
|
||||
'Downloading token JSON metadata', query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
compat_str) or '3020a40c2356a645b4b4',
|
||||
'client_id': '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
try:
|
||||
video = self._download_json(
|
||||
self._API_BASE_URL + 'content/videos',
|
||||
display_id, 'Downloading content JSON metadata',
|
||||
headers=headers, query={
|
||||
'embed': 'show.name',
|
||||
'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
|
||||
'slug': display_id,
|
||||
'show_slug': show_slug,
|
||||
})[0]
|
||||
video_id = video['id']
|
||||
stream = self._download_json(
|
||||
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
||||
display_id, headers=headers)
|
||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
|
@@ -9,8 +9,8 @@ from ..utils import int_or_none
|
||||
|
||||
class DLiveVODIE(InfoExtractor):
|
||||
IE_NAME = 'dlive:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[a-zA-Z0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
|
||||
'info_dict': {
|
||||
'id': '3mTzOl4WR',
|
||||
@@ -20,7 +20,10 @@ class DLiveVODIE(InfoExtractor):
|
||||
'timestamp': 1562011015,
|
||||
'uploader_id': 'pdp',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
|
||||
|
@@ -1,20 +1,17 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
||||
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
|
||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||
'info_dict': {
|
||||
'id': '6646981/951bc60f',
|
||||
@@ -24,46 +21,60 @@ class DumpertIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
|
||||
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
protocol = mobj.group('protocol')
|
||||
|
||||
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
files_base64 = self._search_regex(
|
||||
r'data-files="([^"]+)"', webpage, 'data files')
|
||||
|
||||
files = self._parse_json(
|
||||
compat_b64decode(files_base64).decode('utf-8'),
|
||||
video_id)
|
||||
video_id = self._match_id(url).replace('_', '/')
|
||||
item = self._download_json(
|
||||
'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'),
|
||||
video_id)['items'][0]
|
||||
title = item['title']
|
||||
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
|
||||
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
} for format_id, video_url in files.items() if format_id != 'still']
|
||||
formats = []
|
||||
for variant in media.get('variants', []):
|
||||
uri = variant.get('uri')
|
||||
if not uri:
|
||||
continue
|
||||
version = variant.get('version')
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': version,
|
||||
'quality': quality(version),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'title', webpage) or self._og_search_title(webpage)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage) or self._og_search_description(webpage)
|
||||
thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
|
||||
thumbnails = []
|
||||
stills = item.get('stills') or {}
|
||||
for t in ('thumb', 'still'):
|
||||
for s in ('', '-medium', '-large'):
|
||||
still_id = t + s
|
||||
still_url = stills.get(still_id)
|
||||
if not still_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': still_id,
|
||||
'url': still_url,
|
||||
})
|
||||
|
||||
stats = item.get('stats') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats
|
||||
'description': item.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'like_count': int_or_none(stats.get('kudos_total')),
|
||||
'view_count': int_or_none(stats.get('views_total')),
|
||||
}
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -18,7 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
@@ -32,6 +33,12 @@ class EinthusanIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.com/movie/watch/9097/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
@@ -41,7 +48,9 @@ class EinthusanIE(InfoExtractor):
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -53,7 +62,7 @@ class EinthusanIE(InfoExtractor):
|
||||
page_id = self._html_search_regex(
|
||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||
video_data = self._download_json(
|
||||
'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
|
||||
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
||||
data=urlencode_postdata({
|
||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||
'xJson': json.dumps({
|
||||
|
@@ -104,6 +104,8 @@ from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
BilibiliAudioIE,
|
||||
BilibiliAudioAlbumIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bitchute import (
|
||||
@@ -229,6 +231,7 @@ from .commonprotocols import (
|
||||
RtmpIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .contv import CONtvIE
|
||||
from .corus import CorusIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
@@ -642,7 +645,7 @@ from .minhateca import MinhatecaIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mit import TechTVMITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import (
|
||||
MixcloudIE,
|
||||
@@ -891,7 +894,6 @@ from .puhutv import (
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .presstv import PressTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
@@ -994,7 +996,6 @@ from .scrippsnetworks import ScrippsNetworksWatchIE
|
||||
from .seeker import SeekerIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .servus import ServusIE
|
||||
from .sevenplus import SevenPlusIE
|
||||
from .sexu import SexuIE
|
||||
@@ -1127,6 +1128,7 @@ from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telequebec import (
|
||||
TeleQuebecIE,
|
||||
TeleQuebecSquatIE,
|
||||
TeleQuebecEmissionIE,
|
||||
TeleQuebecLiveIE,
|
||||
)
|
||||
@@ -1280,7 +1282,6 @@ from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vessel import VesselIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import (
|
||||
VevoIE,
|
||||
@@ -1322,7 +1323,6 @@ from .viewlift import (
|
||||
ViewLiftIE,
|
||||
ViewLiftEmbedIE,
|
||||
)
|
||||
from .viewster import ViewsterIE
|
||||
from .viidea import ViideaIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
@@ -1411,7 +1411,6 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wsj import (
|
||||
@@ -1425,6 +1424,7 @@ from .xfileshare import XFileShareIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
XHamsterUserIE,
|
||||
)
|
||||
from .xiami import (
|
||||
XiamiSongIE,
|
||||
@@ -1448,6 +1448,7 @@ from .yahoo import (
|
||||
YahooSearchIE,
|
||||
YahooGyaOPlayerIE,
|
||||
YahooGyaOIE,
|
||||
YahooJapanNewsIE,
|
||||
)
|
||||
from .yandexdisk import YandexDiskIE
|
||||
from .yandexmusic import (
|
||||
|
@@ -405,6 +405,11 @@ class FacebookIE(InfoExtractor):
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
|
@@ -77,7 +77,6 @@ from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .vessel import VesselIE
|
||||
from .kaltura import KalturaIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .facebook import FacebookIE
|
||||
@@ -2075,6 +2074,22 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# Squarespace video embed, 2019-08-28
|
||||
'url': 'http://ootboxford.com',
|
||||
'info_dict': {
|
||||
'id': 'Tc7b_JGdZfw',
|
||||
'title': 'Out of the Blue, at Childish Things 10',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
|
||||
'uploader_id': 'helendouglashouse',
|
||||
'uploader': 'Helen & Douglas House',
|
||||
'upload_date': '20140328',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Zype embed
|
||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
@@ -2226,7 +2241,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if '/' in url:
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2395,6 +2410,12 @@ class GenericIE(InfoExtractor):
|
||||
# Unescaping the whole page allows to handle those cases in a generic way
|
||||
webpage = compat_urllib_parse_unquote(webpage)
|
||||
|
||||
# Unescape squarespace embeds to be detected by generic extractor,
|
||||
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||
webpage = re.sub(
|
||||
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
|
||||
lambda x: unescapeHTML(x.group(0)), webpage)
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
@@ -2469,11 +2490,6 @@ class GenericIE(InfoExtractor):
|
||||
if tp_urls:
|
||||
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
||||
|
||||
# Look for Vessel embeds
|
||||
vessel_urls = VesselIE._extract_urls(webpage)
|
||||
if vessel_urls:
|
||||
return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
@@ -2946,10 +2962,14 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for Mangomolo embeds
|
||||
mobj = re.search(
|
||||
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
|
||||
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
|
||||
(?:
|
||||
admin\.mangomolo\.com/analytics/index\.php/customers/embed|
|
||||
player\.mangomolo\.com/v1
|
||||
)/
|
||||
(?:
|
||||
video\?.*?\bid=(?P<video_id>\d+)|
|
||||
index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
||||
(?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
||||
).+?)\1''', webpage)
|
||||
if mobj is not None:
|
||||
info = {
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^-/?#]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
@@ -44,12 +44,21 @@ class GfycatIE(InfoExtractor):
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -96,21 +96,31 @@ class GloboIE(InfoExtractor):
|
||||
video = self._download_json(
|
||||
'http://api.globovideos.com/videos/%s/playlist' % video_id,
|
||||
video_id)['videos'][0]
|
||||
if video.get('encrypted') is True:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for resource in video['resources']:
|
||||
resource_id = resource.get('_id')
|
||||
resource_url = resource.get('url')
|
||||
if not resource_id or not resource_url:
|
||||
resource_type = resource.get('type')
|
||||
if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
|
||||
continue
|
||||
|
||||
if resource_type == 'subtitle':
|
||||
subtitles.setdefault(resource.get('language') or 'por', []).append({
|
||||
'url': resource_url,
|
||||
})
|
||||
continue
|
||||
|
||||
security = self._download_json(
|
||||
'http://security.video.globo.com/videos/%s/hash' % video_id,
|
||||
video_id, 'Downloading security hash for %s' % resource_id, query={
|
||||
'player': 'flash',
|
||||
'version': '17.0.0.132',
|
||||
'player': 'desktop',
|
||||
'version': '5.19.1',
|
||||
'resource_id': resource_id,
|
||||
})
|
||||
|
||||
@@ -122,19 +132,18 @@ class GloboIE(InfoExtractor):
|
||||
'%s returned error: %s' % (self.IE_NAME, message), expected=True)
|
||||
continue
|
||||
|
||||
hash_code = security_hash[:2]
|
||||
received_time = security_hash[2:12]
|
||||
received_random = security_hash[12:22]
|
||||
received_md5 = security_hash[22:]
|
||||
assert security_hash[:2] in ('04', '14')
|
||||
received_time = security_hash[3:13]
|
||||
received_md5 = security_hash[24:]
|
||||
|
||||
sign_time = compat_str(int(received_time) + 86400)
|
||||
padding = '%010d' % random.randint(1, 10000000000)
|
||||
|
||||
md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
|
||||
md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode()
|
||||
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||
signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
|
||||
signed_hash = security_hash[:23] + sign_time + padding + signed_md5
|
||||
|
||||
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
||||
signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
|
||||
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@@ -164,7 +173,8 @@ class GloboIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -105,8 +105,7 @@ class HeiseIE(InfoExtractor):
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
def _make_kaltura_result(kaltura_url):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(kaltura_url, {'source_url': url}),
|
||||
@@ -115,6 +114,16 @@ class HeiseIE(InfoExtractor):
|
||||
'description': description,
|
||||
}
|
||||
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
return _make_kaltura_result(kaltura_url)
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
|
||||
default=None, group='id')
|
||||
if kaltura_id:
|
||||
return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
|
||||
|
||||
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||
if yt_urls:
|
||||
return self.playlist_from_matches(
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
|
||||
@@ -126,6 +127,8 @@ class HotStarIE(HotStarBaseIE):
|
||||
format_url = url_or_none(playback_set.get('playbackUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_url = re.sub(
|
||||
r'(?<=//staragvod)(\d)', r'web\1', format_url)
|
||||
tags = str_or_none(playback_set.get('tagsCombination')) or ''
|
||||
if tags and 'encryption:plain' not in tags:
|
||||
continue
|
||||
@@ -133,7 +136,8 @@ class HotStarIE(HotStarBaseIE):
|
||||
try:
|
||||
if 'package:hls' in tags or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls'))
|
||||
elif 'package:dash' in tags or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash'))
|
||||
|
@@ -22,7 +22,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
@@ -92,6 +92,9 @@ class InstagramIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/tv/aye83DjauH/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TESTS = [{
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
|
||||
|
@@ -103,6 +103,11 @@ class KalturaIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# unavailable source format
|
||||
'url': 'kaltura:513551:1_66x4rg7o',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -146,14 +151,15 @@ class KalturaIE(InfoExtractor):
|
||||
if mobj:
|
||||
embed_info = mobj.groupdict()
|
||||
for k, v in embed_info.items():
|
||||
embed_info[k] = v.strip()
|
||||
if v:
|
||||
embed_info[k] = v.strip()
|
||||
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
||||
escaped_pid = re.escape(embed_info['partner_id'])
|
||||
service_url = re.search(
|
||||
r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
|
||||
service_mobj = re.search(
|
||||
r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
|
||||
webpage)
|
||||
if service_url:
|
||||
url = smuggle_url(url, {'service_url': service_url.group(1)})
|
||||
if service_mobj:
|
||||
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
|
||||
return url
|
||||
|
||||
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
|
||||
@@ -306,12 +312,17 @@ class KalturaIE(InfoExtractor):
|
||||
f['fileExt'] = 'mp4'
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
format_id = '%(fileExt)s-%(bitrate)s' % f
|
||||
# Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
|
||||
if f.get('isOriginal') is True and not self._is_valid_url(
|
||||
video_url, entry_id, format_id):
|
||||
continue
|
||||
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
||||
# -f mp4-56)
|
||||
vcodec = 'none' if 'videoCodecId' not in f and f.get(
|
||||
'frameRate') == 0 else f.get('videoCodecId')
|
||||
formats.append({
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'format_id': format_id,
|
||||
'ext': f.get('fileExt'),
|
||||
'tbr': int_or_none(f['bitrate']),
|
||||
'fps': int_or_none(f.get('frameRate')),
|
||||
|
@@ -326,7 +326,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
elif play_json.get('code'):
|
||||
raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
|
||||
else:
|
||||
raise ExtractorError('Letv cloud returned an unknwon error')
|
||||
raise ExtractorError('Letv cloud returned an unknown error')
|
||||
|
||||
def b64decode(s):
|
||||
return compat_b64decode(s).decode('utf-8')
|
||||
|
@@ -10,18 +10,21 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class MangomoloBaseIE(InfoExtractor):
|
||||
_BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
|
||||
|
||||
def _get_real_id(self, page_id):
|
||||
return page_id
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._get_real_id(self._match_id(url))
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
webpage = self._download_webpage(
|
||||
'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
|
||||
|
||||
format_url = self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
|
||||
r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats = self._extract_wowza_formats(
|
||||
@@ -39,14 +42,16 @@ class MangomoloBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class MangomoloVideoIE(MangomoloBaseIE):
|
||||
IE_NAME = 'mangomolo:video'
|
||||
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)'
|
||||
_TYPE = 'video'
|
||||
IE_NAME = 'mangomolo:' + _TYPE
|
||||
_VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
|
||||
_IS_LIVE = False
|
||||
|
||||
|
||||
class MangomoloLiveIE(MangomoloBaseIE):
|
||||
IE_NAME = 'mangomolo:live'
|
||||
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
|
||||
_TYPE = 'live'
|
||||
IE_NAME = 'mangomolo:' + _TYPE
|
||||
_VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
|
||||
_IS_LIVE = True
|
||||
|
||||
def _get_real_id(self, page_id):
|
||||
|
@@ -82,6 +82,7 @@ class MGTVIE(InfoExtractor):
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
'format_note': stream.get('name'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -65,30 +65,6 @@ class TechTVMITIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class MITIE(TechTVMITIE):
|
||||
IE_NAME = 'video.mit.edu'
|
||||
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
|
||||
'md5': '7db01d5ccc1895fc5010e9c9e13648da',
|
||||
'info_dict': {
|
||||
'id': '21783',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Government is Profiling You',
|
||||
'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe .*?src="(.+?)"', webpage, 'embed url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
|
@@ -164,7 +164,7 @@ class MixcloudIE(InfoExtractor):
|
||||
def decrypt_url(f_url):
|
||||
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
|
||||
decrypted_url = self._decrypt_xor_cipher(k, f_url)
|
||||
if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
|
||||
if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
|
||||
return decrypted_url
|
||||
|
||||
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||
|
@@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
try_get,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
@@ -85,27 +84,41 @@ class NBCIE(AdobePassIE):
|
||||
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
||||
permalink = 'http' + compat_urllib_parse_unquote(permalink)
|
||||
response = self._download_json(
|
||||
'https://api.nbc.com/v3/videos', video_id, query={
|
||||
'filter[permalink]': permalink,
|
||||
'fields[videos]': 'description,entitlement,episodeNumber,guid,keywords,seasonNumber,title,vChipRating',
|
||||
'fields[shows]': 'shortTitle',
|
||||
'include': 'show.shortTitle',
|
||||
'https://friendship.nbc.co/v2/graphql', video_id, query={
|
||||
'query': '''{
|
||||
page(name: "%s", platform: web, type: VIDEO, userId: "0") {
|
||||
data {
|
||||
... on VideoPageData {
|
||||
description
|
||||
episodeNumber
|
||||
keywords
|
||||
locked
|
||||
mpxAccountId
|
||||
mpxGuid
|
||||
rating
|
||||
seasonNumber
|
||||
secondaryTitle
|
||||
seriesShortTitle
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % permalink,
|
||||
})
|
||||
video_data = response['data'][0]['attributes']
|
||||
video_data = response['data']['page']['data']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
video_id = video_data['guid']
|
||||
title = video_data['title']
|
||||
if video_data.get('entitlement') == 'auth':
|
||||
video_id = video_data['mpxGuid']
|
||||
title = video_data['secondaryTitle']
|
||||
if video_data.get('locked'):
|
||||
resource = self._get_mvpd_resource(
|
||||
'nbcentertainment', title, video_id,
|
||||
video_data.get('vChipRating'))
|
||||
video_data.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'nbcentertainment', resource)
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
|
||||
'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
|
||||
query), {'force_smil_url': True})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -117,7 +130,7 @@ class NBCIE(AdobePassIE):
|
||||
'season_number': int_or_none(video_data.get('seasonNumber')),
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'episode': title,
|
||||
'series': try_get(response, lambda x: x['included'][0]['attributes']['shortTitle']),
|
||||
'series': video_data.get('seriesShortTitle'),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
@@ -295,13 +295,23 @@ class NexxIE(InfoExtractor):
|
||||
|
||||
video = None
|
||||
|
||||
def find_video(result):
|
||||
if isinstance(result, dict):
|
||||
return result
|
||||
elif isinstance(result, list):
|
||||
vid = int(video_id)
|
||||
for v in result:
|
||||
if try_get(v, lambda x: x['general']['ID'], int) == vid:
|
||||
return v
|
||||
return None
|
||||
|
||||
response = self._download_json(
|
||||
'https://arc.nexx.cloud/api/video/%s.json' % video_id,
|
||||
video_id, fatal=False)
|
||||
if response and isinstance(response, dict):
|
||||
result = response.get('result')
|
||||
if result and isinstance(result, dict):
|
||||
video = result
|
||||
if result:
|
||||
video = find_video(result)
|
||||
|
||||
# not all videos work via arc, e.g. nexx:741:1269984
|
||||
if not video:
|
||||
@@ -348,7 +358,7 @@ class NexxIE(InfoExtractor):
|
||||
request_token = hashlib.md5(
|
||||
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
|
||||
|
||||
video = self._call_api(
|
||||
result = self._call_api(
|
||||
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
|
||||
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
|
||||
'addInteractionOptions': '1',
|
||||
@@ -363,6 +373,7 @@ class NexxIE(InfoExtractor):
|
||||
'X-Request-CID': cid,
|
||||
'X-Request-Token': request_token,
|
||||
})
|
||||
video = find_video(result)
|
||||
|
||||
general = video['general']
|
||||
title = general['title']
|
||||
|
@@ -10,6 +10,18 @@ class NhkVodIE(InfoExtractor):
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
||||
'info_dict': {
|
||||
'id': 'a95j5iza',
|
||||
'ext': 'mp4',
|
||||
'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
|
||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||
'timestamp': 1565965194,
|
||||
'upload_date': '20190816',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@@ -19,7 +31,7 @@ class NhkVodIE(InfoExtractor):
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json'
|
||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
@@ -28,7 +40,10 @@ class NhkVodIE(InfoExtractor):
|
||||
|
||||
is_video = m_type == 'video'
|
||||
episode = self._download_json(
|
||||
self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''),
|
||||
self._API_URL_TEMPLATE % (
|
||||
'v' if is_video else 'r',
|
||||
'clip' if episode_id[:4] == '9999' else 'esd',
|
||||
episode_id, lang, '/all' if is_video else ''),
|
||||
episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
|
||||
title = episode.get('sub_title_clean') or episode['sub_title']
|
||||
|
||||
@@ -60,8 +75,8 @@ class NhkVodIE(InfoExtractor):
|
||||
if is_video:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:' + episode['vod_id'],
|
||||
'ie_key': 'Piksel',
|
||||
'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
|
||||
})
|
||||
else:
|
||||
audio = episode['audio']
|
||||
|
@@ -85,7 +85,8 @@ class NickBrIE(MTVServicesInfoExtractor):
|
||||
https?://
|
||||
(?:
|
||||
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
|
||||
(?:www\.)?nickjr\.[a-z]{2}
|
||||
(?:www\.)?nickjr\.[a-z]{2}|
|
||||
(?:www\.)?nickelodeonjunior\.fr
|
||||
)
|
||||
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
|
||||
'''
|
||||
@@ -101,6 +102,9 @@ class NickBrIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -45,7 +45,11 @@ class NineNowIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
page_data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.*?});', webpage,
|
||||
'page data'), display_id)
|
||||
'page data', default='{}'), display_id, fatal=False)
|
||||
if not page_data:
|
||||
page_data = self._parse_json(self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
|
||||
webpage, 'page data'), display_id), display_id)
|
||||
|
||||
for kind in ('episode', 'clip'):
|
||||
current_key = page_data.get(kind, {}).get(
|
||||
|
@@ -25,9 +25,14 @@ class NonkTubeIE(NuevoBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._extract_nuevo(
|
||||
'https://www.nonktube.com/media/nuevo/econfig.php?key=%s'
|
||||
% video_id, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info['age_limit'] = 18
|
||||
title = self._og_search_title(webpage)
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
})
|
||||
return info
|
||||
|
@@ -406,7 +406,7 @@ class NRKTVSerieBaseIE(InfoExtractor):
|
||||
def _extract_series(self, webpage, display_id, fatal=True):
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
(r'INITIAL_DATA_*\s*=\s*({.+?})\s*;',
|
||||
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
|
||||
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
|
||||
display_id, fatal=False)
|
||||
|
@@ -243,7 +243,13 @@ class PhantomJSwrapper(object):
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
_DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)'
|
||||
_DOMAINS = r'''
|
||||
(?:
|
||||
openload\.(?:co|io|link|pw)|
|
||||
oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)|
|
||||
oladblock\.(?:services|xyz|me)|openloed\.co
|
||||
)
|
||||
'''
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<host>
|
||||
@@ -356,6 +362,12 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://oload.services/embed/bs1NWj1dCag/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.online/f/W8o2UfN1vNY/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.monster/f/W8o2UfN1vNY/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.press/embed/drTBl1aOTvk/',
|
||||
'only_matching': True,
|
||||
@@ -368,6 +380,9 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://oload.biz/f/bEk3Gp8ARr4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.best/embed/kkz9JgVZeWc/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
|
||||
'only_matching': True,
|
||||
@@ -380,12 +395,15 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://openloed.co/f/b8NWEgkqNLI/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.vip/f/kUEfGclsU9o',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
|
||||
r'(?x)<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
|
||||
% (cls._DOMAINS, cls._EMBED_WORD), webpage)
|
||||
|
||||
def _extract_decrypted_page(self, page_url, webpage, video_id):
|
||||
@@ -451,7 +469,7 @@ class OpenloadIE(InfoExtractor):
|
||||
class VerystreamIE(OpenloadIE):
|
||||
IE_NAME = 'verystream'
|
||||
|
||||
_DOMAINS = r'(?:verystream\.com)'
|
||||
_DOMAINS = r'(?:verystream\.com|woof\.tube)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<host>
|
||||
|
@@ -86,12 +86,13 @@ class ORFTVthekIE(InfoExtractor):
|
||||
if value:
|
||||
format_id_list.append(value)
|
||||
format_id = '-'.join(format_id_list)
|
||||
if determine_ext(fd['src']) == 'm3u8':
|
||||
ext = determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fd['src'], video_id, 'mp4', m3u8_id=format_id))
|
||||
elif determine_ext(fd['src']) == 'f4m':
|
||||
src, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
fd['src'], video_id, f4m_id=format_id))
|
||||
src, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
|
@@ -18,81 +18,385 @@ from ..utils import (
|
||||
class PeerTubeIE(InfoExtractor):
|
||||
_INSTANCES_RE = r'''(?:
|
||||
# Taken from https://instances.joinpeertube.org/instances
|
||||
peertube\.rainbowswingers\.net|
|
||||
tube\.stanisic\.nl|
|
||||
peer\.suiri\.us|
|
||||
medias\.libox\.fr|
|
||||
videomensoif\.ynh\.fr|
|
||||
peertube\.travelpandas\.eu|
|
||||
peertube\.rachetjay\.fr|
|
||||
peertube\.montecsys\.fr|
|
||||
tube\.eskuero\.me|
|
||||
peer\.tube|
|
||||
peertube\.umeahackerspace\.se|
|
||||
tube\.nx-pod\.de|
|
||||
video\.monsieurbidouille\.fr|
|
||||
tube\.openalgeria\.org|
|
||||
peertube\.pointsecu\.fr|
|
||||
vid\.lelux\.fi|
|
||||
video\.anormallostpod\.ovh|
|
||||
tube\.crapaud-fou\.org|
|
||||
peertube\.stemy\.me|
|
||||
lostpod\.space|
|
||||
exode\.me|
|
||||
peertube\.snargol\.com|
|
||||
vis\.ion\.ovh|
|
||||
videosdulib\.re|
|
||||
v\.mbius\.io|
|
||||
videos\.judrey\.eu|
|
||||
peertube\.osureplayviewer\.xyz|
|
||||
peertube\.mathieufamily\.ovh|
|
||||
www\.videos-libr\.es|
|
||||
fightforinfo\.com|
|
||||
peertube\.fediverse\.ru|
|
||||
peertube\.oiseauroch\.fr|
|
||||
video\.nesven\.eu|
|
||||
v\.bearvideo\.win|
|
||||
video\.qoto\.org|
|
||||
justporn\.cc|
|
||||
video\.vny\.fr|
|
||||
peervideo\.club|
|
||||
tube\.taker\.fr|
|
||||
peertube\.chantierlibre\.org|
|
||||
tube\.ipfixe\.info|
|
||||
tube\.kicou\.info|
|
||||
tube\.dodsorf\.as|
|
||||
videobit\.cc|
|
||||
video\.yukari\.moe|
|
||||
videos\.elbinario\.net|
|
||||
hkvideo\.live|
|
||||
pt\.tux\.tf|
|
||||
www\.hkvideo\.live|
|
||||
FIGHTFORINFO\.com|
|
||||
pt\.765racing\.com|
|
||||
peertube\.gnumeria\.eu\.org|
|
||||
nordenmedia\.com|
|
||||
peertube\.co\.uk|
|
||||
tube\.darfweb\.eu|
|
||||
tube\.kalah-france\.org|
|
||||
0ch\.in|
|
||||
vod\.mochi\.academy|
|
||||
film\.node9\.org|
|
||||
peertube\.hatthieves\.es|
|
||||
video\.fitchfamily\.org|
|
||||
peertube\.ddns\.net|
|
||||
video\.ifuncle\.kr|
|
||||
video\.fdlibre\.eu|
|
||||
tube\.22decembre\.eu|
|
||||
peertube\.harmoniescreatives\.com|
|
||||
tube\.fabrigli\.fr|
|
||||
video\.thedwyers\.co|
|
||||
video\.bruitbruit\.com|
|
||||
peertube\.foxfam\.club|
|
||||
peer\.philoxweb\.be|
|
||||
videos\.bugs\.social|
|
||||
peertube\.malbert\.xyz|
|
||||
peertube\.bilange\.ca|
|
||||
libretube\.net|
|
||||
diytelevision\.com|
|
||||
peertube\.fedilab\.app|
|
||||
libre\.video|
|
||||
video\.mstddntfdn\.online|
|
||||
us\.tv|
|
||||
peertube\.sl-network\.fr|
|
||||
peertube\.dynlinux\.io|
|
||||
peertube\.david\.durieux\.family|
|
||||
peertube\.linuxrocks\.online|
|
||||
peerwatch\.xyz|
|
||||
v\.kretschmann\.social|
|
||||
tube\.otter\.sh|
|
||||
yt\.is\.nota\.live|
|
||||
tube\.dragonpsi\.xyz|
|
||||
peertube\.boneheadmedia\.com|
|
||||
videos\.funkwhale\.audio|
|
||||
watch\.44con\.com|
|
||||
peertube\.gcaillaut\.fr|
|
||||
peertube\.icu|
|
||||
pony\.tube|
|
||||
spacepub\.space|
|
||||
tube\.stbr\.io|
|
||||
v\.mom-gay\.faith|
|
||||
tube\.port0\.xyz|
|
||||
peertube\.simounet\.net|
|
||||
play\.jergefelt\.se|
|
||||
peertube\.zeteo\.me|
|
||||
tube\.danq\.me|
|
||||
peertube\.kerenon\.com|
|
||||
tube\.fab-l3\.org|
|
||||
tube\.calculate\.social|
|
||||
peertube\.mckillop\.org|
|
||||
tube\.netzspielplatz\.de|
|
||||
vod\.ksite\.de|
|
||||
peertube\.laas\.fr|
|
||||
tube\.govital\.net|
|
||||
peertube\.stephenson\.cc|
|
||||
bistule\.nohost\.me|
|
||||
peertube\.kajalinifi\.de|
|
||||
video\.ploud\.jp|
|
||||
video\.omniatv\.com|
|
||||
peertube\.ffs2play\.fr|
|
||||
peertube\.leboulaire\.ovh|
|
||||
peertube\.tronic-studio\.com|
|
||||
peertube\.public\.cat|
|
||||
peertube\.metalbanana\.net|
|
||||
video\.1000i100\.fr|
|
||||
peertube\.alter-nativ-voll\.de|
|
||||
tube\.pasa\.tf|
|
||||
tube\.worldofhauru\.xyz|
|
||||
pt\.kamp\.site|
|
||||
peertube\.teleassist\.fr|
|
||||
videos\.mleduc\.xyz|
|
||||
conf\.tube|
|
||||
media\.privacyinternational\.org|
|
||||
pt\.forty-two\.nl|
|
||||
video\.halle-leaks\.de|
|
||||
video\.grosskopfgames\.de|
|
||||
peertube\.schaeferit\.de|
|
||||
peertube\.jackbot\.fr|
|
||||
tube\.extinctionrebellion\.fr|
|
||||
peertube\.f-si\.org|
|
||||
video\.subak\.ovh|
|
||||
videos\.koweb\.fr|
|
||||
peertube\.zergy\.net|
|
||||
peertube\.roflcopter\.fr|
|
||||
peertube\.floss-marketing-school\.com|
|
||||
vloggers\.social|
|
||||
peertube\.iriseden\.eu|
|
||||
videos\.ubuntu-paris\.org|
|
||||
peertube\.mastodon\.host|
|
||||
armstube\.com|
|
||||
peertube\.s2s\.video|
|
||||
peertube\.lol|
|
||||
tube\.open-plug\.eu|
|
||||
open\.tube|
|
||||
peertube\.ch|
|
||||
peertube\.normandie-libre\.fr|
|
||||
peertube\.slat\.org|
|
||||
video\.lacaveatonton\.ovh|
|
||||
peertube\.uno|
|
||||
peertube\.servebeer\.com|
|
||||
peertube\.fedi\.quebec|
|
||||
tube\.h3z\.jp|
|
||||
tube\.plus200\.com|
|
||||
peertube\.eric\.ovh|
|
||||
tube\.metadocs\.cc|
|
||||
tube\.unmondemeilleur\.eu|
|
||||
gouttedeau\.space|
|
||||
video\.antirep\.net|
|
||||
nrop\.cant\.at|
|
||||
tube\.ksl-bmx\.de|
|
||||
tube\.plaf\.fr|
|
||||
tube\.tchncs\.de|
|
||||
video\.devinberg\.com|
|
||||
hitchtube\.fr|
|
||||
peertube\.kosebamse\.com|
|
||||
yunopeertube\.myddns\.me|
|
||||
peertube\.varney\.fr|
|
||||
peertube\.anon-kenkai\.com|
|
||||
tube\.maiti\.info|
|
||||
tubee\.fr|
|
||||
videos\.dinofly\.com|
|
||||
toobnix\.org|
|
||||
videotape\.me|
|
||||
voca\.tube|
|
||||
video\.heromuster\.com|
|
||||
video\.lemediatv\.fr|
|
||||
video\.up\.edu\.ph|
|
||||
balafon\.video|
|
||||
video\.ivel\.fr|
|
||||
thickrips\.cloud|
|
||||
pt\.laurentkruger\.fr|
|
||||
video\.monarch-pass\.net|
|
||||
peertube\.artica\.center|
|
||||
video\.alternanet\.fr|
|
||||
indymotion\.fr|
|
||||
fanvid\.stopthatimp\.net|
|
||||
video\.farci\.org|
|
||||
v\.lesterpig\.com|
|
||||
video\.okaris\.de|
|
||||
tube\.pawelko\.net|
|
||||
peertube\.mablr\.org|
|
||||
tube\.fede\.re|
|
||||
pytu\.be|
|
||||
evertron\.tv|
|
||||
devtube\.dev-wiki\.de|
|
||||
raptube\.antipub\.org|
|
||||
video\.selea\.se|
|
||||
peertube\.mygaia\.org|
|
||||
video\.oh14\.de|
|
||||
peertube\.livingutopia\.org|
|
||||
peertube\.the-penguin\.de|
|
||||
tube\.thechangebook\.org|
|
||||
tube\.anjara\.eu|
|
||||
pt\.pube\.tk|
|
||||
video\.samedi\.pm|
|
||||
mplayer\.demouliere\.eu|
|
||||
widemus\.de|
|
||||
peertube\.me|
|
||||
peertube\.zapashcanon\.fr|
|
||||
video\.latavernedejohnjohn\.fr|
|
||||
peertube\.pcservice46\.fr|
|
||||
peertube\.mazzonetto\.eu|
|
||||
video\.irem\.univ-paris-diderot\.fr|
|
||||
video\.livecchi\.cloud|
|
||||
alttube\.fr|
|
||||
video\.coop\.tools|
|
||||
video\.cabane-libre\.org|
|
||||
peertube\.openstreetmap\.fr|
|
||||
videos\.alolise\.org|
|
||||
irrsinn\.video|
|
||||
video\.antopie\.org|
|
||||
scitech\.video|
|
||||
tube2\.nemsia\.org|
|
||||
video\.amic37\.fr|
|
||||
peertube\.freeforge\.eu|
|
||||
video\.arbitrarion\.com|
|
||||
video\.datsemultimedia\.com|
|
||||
stoptrackingus\.tv|
|
||||
peertube\.ricostrongxxx\.com|
|
||||
docker\.videos\.lecygnenoir\.info|
|
||||
peertube\.togart\.de|
|
||||
tube\.postblue\.info|
|
||||
videos\.domainepublic\.net|
|
||||
peertube\.cyber-tribal\.com|
|
||||
video\.gresille\.org|
|
||||
peertube\.dsmouse\.net|
|
||||
cinema\.yunohost\.support|
|
||||
tube\.theocevaer\.fr|
|
||||
repro\.video|
|
||||
tube\.4aem\.com|
|
||||
quaziinc\.com|
|
||||
peertube\.metawurst\.space|
|
||||
videos\.wakapo\.com|
|
||||
video\.ploud\.fr|
|
||||
video\.freeradical\.zone|
|
||||
tube\.valinor\.fr|
|
||||
refuznik\.video|
|
||||
pt\.kircheneuenburg\.de|
|
||||
peertube\.asrun\.eu|
|
||||
peertube\.lagob\.fr|
|
||||
videos\.side-ways\.net|
|
||||
91video\.online|
|
||||
video\.valme\.io|
|
||||
video\.taboulisme\.com|
|
||||
videos-libr\.es|
|
||||
tv\.mooh\.fr|
|
||||
nuage\.acostey\.fr|
|
||||
video\.monsieur-a\.fr|
|
||||
peertube\.librelois\.fr|
|
||||
videos\.pair2jeux\.tube|
|
||||
videos\.pueseso\.club|
|
||||
peer\.mathdacloud\.ovh|
|
||||
media\.assassinate-you\.net|
|
||||
vidcommons\.org|
|
||||
ptube\.rousset\.nom\.fr|
|
||||
tube\.cyano\.at|
|
||||
videos\.squat\.net|
|
||||
video\.iphodase\.fr|
|
||||
peertube\.makotoworkshop\.org|
|
||||
peertube\.serveur\.slv-valbonne\.fr|
|
||||
vault\.mle\.party|
|
||||
hostyour\.tv|
|
||||
videos\.hack2g2\.fr|
|
||||
libre\.tube|
|
||||
pire\.artisanlogiciel\.net|
|
||||
videos\.numerique-en-commun\.fr|
|
||||
video\.netsyms\.com|
|
||||
video\.die-partei\.social|
|
||||
video\.writeas\.org|
|
||||
peertube\.swarm\.solvingmaz\.es|
|
||||
tube\.pericoloso\.ovh|
|
||||
watching\.cypherpunk\.observer|
|
||||
videos\.adhocmusic\.com|
|
||||
tube\.rfc1149\.net|
|
||||
peertube\.librelabucm\.org|
|
||||
videos\.numericoop\.fr|
|
||||
peertube\.koehn\.com|
|
||||
peertube\.anarchmusicall\.net|
|
||||
tube\.kampftoast\.de|
|
||||
vid\.y-y\.li|
|
||||
peertube\.xtenz\.xyz|
|
||||
diode\.zone|
|
||||
tube\.egf\.mn|
|
||||
peertube\.nomagic\.uk|
|
||||
visionon\.tv|
|
||||
videos\.koumoul\.com|
|
||||
video\.rastapuls\.com|
|
||||
video\.mantlepro\.com|
|
||||
video\.deadsuperhero\.com|
|
||||
peertube\.musicstudio\.pro|
|
||||
peertube\.we-keys\.fr|
|
||||
artitube\.artifaille\.fr|
|
||||
peertube\.ethernia\.net|
|
||||
tube\.midov\.pl|
|
||||
peertube\.fr|
|
||||
watch\.snoot\.tube|
|
||||
peertube\.donnadieu\.fr|
|
||||
argos\.aquilenet\.fr|
|
||||
tube\.nemsia\.org|
|
||||
tube\.bruniau\.net|
|
||||
videos\.darckoune\.moe|
|
||||
tube\.traydent\.info|
|
||||
dev\.videos\.lecygnenoir\.info|
|
||||
peertube\.nayya\.org|
|
||||
peertube\.live|
|
||||
peertube\.mofgao\.space|
|
||||
video\.lequerrec\.eu|
|
||||
peertube\.amicale\.net|
|
||||
aperi\.tube|
|
||||
tube\.ac-lyon\.fr|
|
||||
video\.lw1\.at|
|
||||
www\.yiny\.org|
|
||||
videos\.pofilo\.fr|
|
||||
tube\.lou\.lt|
|
||||
choob\.h\.etbus\.ch|
|
||||
tube\.hoga\.fr|
|
||||
peertube\.heberge\.fr|
|
||||
video\.obermui\.de|
|
||||
videos\.cloudfrancois\.fr|
|
||||
betamax\.video|
|
||||
video\.typica\.us|
|
||||
tube\.piweb\.be|
|
||||
video\.blender\.org|
|
||||
peertube\.cat|
|
||||
tube\.kdy\.ch|
|
||||
pe\.ertu\.be|
|
||||
peertube\.social|
|
||||
videos\.lescommuns\.org|
|
||||
tv\.datamol\.org|
|
||||
videonaute\.fr|
|
||||
dialup\.express|
|
||||
peertube\.nogafa\.org|
|
||||
peertube\.pl|
|
||||
megatube\.lilomoino\.fr|
|
||||
peertube\.tamanoir\.foucry\.net|
|
||||
peertube\.inapurna\.org|
|
||||
peertube\.netzspielplatz\.de|
|
||||
video\.deadsuperhero\.com|
|
||||
peertube\.devosi\.org|
|
||||
peertube\.1312\.media|
|
||||
tube\.worldofhauru\.xyz|
|
||||
tube\.bootlicker\.party|
|
||||
skeptikon\.fr|
|
||||
peertube\.geekshell\.fr|
|
||||
tube\.opportunis\.me|
|
||||
peertube\.peshane\.net|
|
||||
video\.blueline\.mg|
|
||||
tube\.homecomputing\.fr|
|
||||
videos\.cloudfrancois\.fr|
|
||||
peertube\.viviers-fibre\.net|
|
||||
tube\.ouahpiti\.info|
|
||||
video\.tedomum\.net|
|
||||
video\.g3l\.org|
|
||||
fontube\.fr|
|
||||
peertube\.gaialabs\.ch|
|
||||
peertube\.extremely\.online|
|
||||
peertube\.public-infrastructure\.eu|
|
||||
tube\.kher\.nl|
|
||||
peertube\.qtg\.fr|
|
||||
tube\.22decembre\.eu|
|
||||
facegirl\.me|
|
||||
video\.migennes\.net|
|
||||
janny\.moe|
|
||||
tube\.p2p\.legal|
|
||||
video\.atlanti\.se|
|
||||
troll\.tv|
|
||||
peertube\.geekael\.fr|
|
||||
vid\.leotindall\.com|
|
||||
video\.anormallostpod\.ovh|
|
||||
p-tube\.h3z\.jp|
|
||||
tube\.darfweb\.eu|
|
||||
videos\.iut-orsay\.fr|
|
||||
peertube\.solidev\.net|
|
||||
videos\.symphonie-of-code\.fr|
|
||||
testtube\.ortg\.de|
|
||||
videos\.cemea\.org|
|
||||
peertube\.gwendalavir\.eu|
|
||||
video\.passageenseine\.fr|
|
||||
videos\.festivalparminous\.org|
|
||||
peertube\.touhoppai\.moe|
|
||||
peertube\.duckdns\.org|
|
||||
sikke\.fi|
|
||||
peertube\.mastodon\.host|
|
||||
firedragonvideos\.com|
|
||||
vidz\.dou\.bet|
|
||||
peertube\.koehn\.com|
|
||||
peer\.hostux\.social|
|
||||
share\.tube|
|
||||
peertube\.walkingmountains\.fr|
|
||||
medias\.libox\.fr|
|
||||
peertube\.moe|
|
||||
peertube\.xyz|
|
||||
jp\.peertube\.network|
|
||||
videos\.benpro\.fr|
|
||||
tube\.otter\.sh|
|
||||
peertube\.angristan\.xyz|
|
||||
peertube\.parleur\.net|
|
||||
peer\.ecutsa\.fr|
|
||||
peertube\.heraut\.eu|
|
||||
peertube\.tifox\.fr|
|
||||
peertube\.maly\.io|
|
||||
vod\.mochi\.academy|
|
||||
exode\.me|
|
||||
coste\.video|
|
||||
tube\.aquilenet\.fr|
|
||||
peertube\.gegeweb\.eu|
|
||||
framatube\.org|
|
||||
@@ -100,18 +404,11 @@ class PeerTubeIE(InfoExtractor):
|
||||
tube\.conferences-gesticulees\.net|
|
||||
peertube\.datagueule\.tv|
|
||||
video\.lqdn\.fr|
|
||||
meilleurtube\.delire\.party|
|
||||
tube\.mochi\.academy|
|
||||
peertube\.dav\.li|
|
||||
media\.zat\.im|
|
||||
pytu\.be|
|
||||
peertube\.valvin\.fr|
|
||||
peertube\.nsa\.ovh|
|
||||
video\.colibris-outilslibres\.org|
|
||||
video\.hispagatos\.org|
|
||||
tube\.svnet\.fr|
|
||||
peertube\.video|
|
||||
videos\.lecygnenoir\.info|
|
||||
peertube3\.cpy\.re|
|
||||
peertube2\.cpy\.re|
|
||||
videos\.tcit\.fr|
|
||||
@@ -126,7 +423,7 @@ class PeerTubeIE(InfoExtractor):
|
||||
(?P<id>%s)
|
||||
''' % (_INSTANCES_RE, _UUID_RE)
|
||||
_TESTS = [{
|
||||
'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
||||
'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
||||
'md5': '80f24ff364cc9d333529506a263e7feb',
|
||||
'info_dict': {
|
||||
'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
||||
|
@@ -15,18 +15,17 @@ from ..utils import (
|
||||
|
||||
|
||||
class PikselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.piksel.com/v/nv60p12f',
|
||||
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
|
||||
'url': 'http://player.piksel.com/v/ums2867l',
|
||||
'md5': '34e34c8d89dc2559976a6079db531e85',
|
||||
'info_dict': {
|
||||
'id': 'nv60p12f',
|
||||
'id': 'ums2867l',
|
||||
'ext': 'mp4',
|
||||
'title': 'فن الحياة - الحلقة 1',
|
||||
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
|
||||
'timestamp': 1465231790,
|
||||
'upload_date': '20160606',
|
||||
'title': 'GX-005 with Caption',
|
||||
'timestamp': 1481335659,
|
||||
'upload_date': '20161210'
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -39,8 +38,13 @@ class PikselIE(InfoExtractor):
|
||||
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
|
||||
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
|
||||
'timestamp': 1486171129,
|
||||
'upload_date': '20170204',
|
||||
'upload_date': '20170204'
|
||||
}
|
||||
},
|
||||
{
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
|
||||
'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -53,8 +57,11 @@ class PikselIE(InfoExtractor):
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-de-program-uuid=[\'"]([a-z0-9]+)',
|
||||
webpage, 'program uuid', default=display_id)
|
||||
app_token = self._search_regex([
|
||||
r'clientAPI\s*:\s*"([^"]+)"',
|
||||
r'data-de-api-key\s*=\s*"([^"]+)"'
|
||||
@@ -113,6 +120,13 @@ class PikselIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', []):
|
||||
caption_url = caption.get('url')
|
||||
if caption_url:
|
||||
subtitles.setdefault(caption.get('locale', 'en'), []).append({
|
||||
'url': caption_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -120,4 +134,5 @@ class PikselIE(InfoExtractor):
|
||||
'thumbnail': video_data.get('thumbnailUrl'),
|
||||
'timestamp': parse_iso8601(video_data.get('dateadd')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -18,43 +18,10 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class PlatziIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
platzi\.com/clases| # es version
|
||||
courses\.platzi\.com/classes # en version
|
||||
)/[^/]+/(?P<id>\d+)-[^/?\#&]+
|
||||
'''
|
||||
class PlatziBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://platzi.com/login/'
|
||||
_NETRC_MACHINE = 'platzi'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
|
||||
'md5': '8f56448241005b561c10f11a595b37e3',
|
||||
'info_dict': {
|
||||
'id': '12074',
|
||||
'ext': 'mp4',
|
||||
'title': 'Creando nuestra primera página',
|
||||
'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
|
||||
'duration': 420,
|
||||
},
|
||||
'skip': 'Requires platzi account credentials',
|
||||
}, {
|
||||
'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
|
||||
'info_dict': {
|
||||
'id': '13430',
|
||||
'ext': 'mp4',
|
||||
'title': 'Background',
|
||||
'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
|
||||
'duration': 360,
|
||||
},
|
||||
'skip': 'Requires platzi account credentials',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor):
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class PlatziIE(PlatziBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
platzi\.com/clases| # es version
|
||||
courses\.platzi\.com/classes # en version
|
||||
)/[^/]+/(?P<id>\d+)-[^/?\#&]+
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
|
||||
'md5': '8f56448241005b561c10f11a595b37e3',
|
||||
'info_dict': {
|
||||
'id': '12074',
|
||||
'ext': 'mp4',
|
||||
'title': 'Creando nuestra primera página',
|
||||
'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
|
||||
'duration': 420,
|
||||
},
|
||||
'skip': 'Requires platzi account credentials',
|
||||
}, {
|
||||
'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
|
||||
'info_dict': {
|
||||
'id': '13430',
|
||||
'ext': 'mp4',
|
||||
'title': 'Background',
|
||||
'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
|
||||
'duration': 360,
|
||||
},
|
||||
'skip': 'Requires platzi account credentials',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lecture_id = self._match_id(url)
|
||||
|
||||
@@ -104,7 +107,11 @@ class PlatziIE(InfoExtractor):
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
|
||||
# client_data may contain "};" so that we have to try more
|
||||
# strict regex first
|
||||
(r'client_data\s*=\s*({.+?})\s*;\s*\n',
|
||||
r'client_data\s*=\s*({.+?})\s*;'),
|
||||
webpage, 'client data'),
|
||||
lecture_id)
|
||||
|
||||
material = data['initialState']['material']
|
||||
@@ -146,7 +153,7 @@ class PlatziIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class PlatziCourseIE(InfoExtractor):
|
||||
class PlatziCourseIE(PlatziBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
|
@@ -403,6 +403,15 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
@staticmethod
|
||||
def _has_more(webpage):
|
||||
return re.search(
|
||||
r'''(?x)
|
||||
<li[^>]+\bclass=["\']page_next|
|
||||
<link[^>]+\brel=["\']next|
|
||||
<button[^>]+\bid=["\']moreDataBtn
|
||||
''', webpage) is not None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
@@ -411,13 +420,11 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
page = int_or_none(self._search_regex(
|
||||
r'\bpage=(\d+)', url, 'page', default=None))
|
||||
|
||||
page_url = self._make_page_url(url)
|
||||
|
||||
entries = []
|
||||
for page_num in (page, ) if page is not None else itertools.count(1):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
page_url, item_id, 'Downloading page %d' % page_num,
|
||||
url, item_id, 'Downloading page %d' % page_num,
|
||||
query={'page': page_num})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
@@ -547,18 +554,6 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
|
||||
else super(PornHubPagedVideoListIE, cls).suitable(url))
|
||||
|
||||
def _make_page_url(self, url):
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def _has_more(webpage):
|
||||
return re.search(
|
||||
r'''(?x)
|
||||
<li[^>]+\bclass=["\']page_next|
|
||||
<link[^>]+\brel=["\']next|
|
||||
<button[^>]+\bid=["\']moreDataBtn
|
||||
''', webpage) is not None
|
||||
|
||||
|
||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||
@@ -572,11 +567,3 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _make_page_url(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
return '%s/ajax' % mobj.group('url')
|
||||
|
||||
@staticmethod
|
||||
def _has_more(webpage):
|
||||
return True
|
||||
|
@@ -1,70 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PromptFileIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416',
|
||||
'md5': '5a7e285a26e0d66d9a263fae91bc92ce',
|
||||
'info_dict': {
|
||||
'id': '86D1CE8462-576CAAE416',
|
||||
'ext': 'mp4',
|
||||
'title': 'oceans.mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id,
|
||||
expected=True)
|
||||
|
||||
chash = self._search_regex(
|
||||
r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash')
|
||||
fields = self._hidden_inputs(webpage)
|
||||
keys = list(fields.keys())
|
||||
chash_key = keys[0] if len(keys) == 1 else next(
|
||||
key for key in keys if key.startswith('cha'))
|
||||
fields[chash_key] = chash + fields[chash_key]
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, 'Downloading video page',
|
||||
data=urlencode_postdata(fields),
|
||||
headers={'Content-type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
video_url = self._search_regex(
|
||||
(r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*Download File',
|
||||
r'<a[^>]+href=(["\'])(?P<url>https?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'),
|
||||
webpage, 'video url', group='url')
|
||||
title = self._html_search_regex(
|
||||
r'<span.+title="([^"]+)">', webpage, 'title')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
'ext': determine_ext(title),
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
@@ -45,11 +46,14 @@ class RedTubeIE(InfoExtractor):
|
||||
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
|
||||
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
|
||||
r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
|
||||
webpage, 'title', group='title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = self._html_search_regex(
|
||||
(r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
|
||||
r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
|
||||
webpage, 'title', group='title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
|
||||
formats = []
|
||||
sources = self._parse_json(
|
||||
@@ -88,28 +92,28 @@ class RedTubeIE(InfoExtractor):
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<span[^>]+>ADDED ([^<]+)<',
|
||||
webpage, 'upload date', fatal=False))
|
||||
r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<',
|
||||
webpage, 'upload date', default=None))
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'video:duration', webpage, default=None) or self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
(r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
|
||||
r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'),
|
||||
webpage, 'view count', fatal=False))
|
||||
r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)',
|
||||
r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'),
|
||||
webpage, 'view count', default=None))
|
||||
|
||||
# No self-labeling, but they describe themselves as
|
||||
# "Home of Videos Porno"
|
||||
age_limit = 18
|
||||
|
||||
return {
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RoosterTeethIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
|
||||
_LOGIN_URL = 'https://roosterteeth.com/login'
|
||||
_NETRC_MACHINE = 'roosterteeth'
|
||||
_TESTS = [{
|
||||
@@ -49,6 +49,9 @@ class RoosterTeethIE(InfoExtractor):
|
||||
# only available for FIRST members
|
||||
'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
|
@@ -32,7 +32,7 @@ class RtlNlIE(InfoExtractor):
|
||||
'duration': 1167.96,
|
||||
},
|
||||
}, {
|
||||
# best format avaialble a3t
|
||||
# best format available a3t
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||
'md5': 'dea7474214af1271d91ef332fb8be7ea',
|
||||
'info_dict': {
|
||||
|
@@ -68,9 +68,10 @@ class SafariBaseIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % credentials, expected=True)
|
||||
|
||||
# oreilly serves two same groot_sessionid cookies in Set-Cookie header
|
||||
# and expects first one to be actually set
|
||||
self._apply_first_set_cookie_header(urlh, 'groot_sessionid')
|
||||
# oreilly serves two same instances of the following cookies
|
||||
# in Set-Cookie header and expects first one to be actually set
|
||||
for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
|
||||
self._apply_first_set_cookie_header(urlh, cookie)
|
||||
|
||||
_, urlh = self._download_webpage_handle(
|
||||
auth.get('redirect_uri') or next_uri, None, 'Completing login',)
|
||||
|
@@ -1,72 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ServingSysIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
|
||||
'info_dict': {
|
||||
'id': '5349193',
|
||||
'title': 'AdAPPter_Hyundai_demo',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'baed851342df6846eb8677a60a011a0f',
|
||||
'info_dict': {
|
||||
'id': '29955898',
|
||||
'ext': 'flv',
|
||||
'title': 'AdAPPter_Hyundai_demo (1)',
|
||||
'duration': 74,
|
||||
'tbr': 1378,
|
||||
'width': 640,
|
||||
'height': 400,
|
||||
},
|
||||
}, {
|
||||
'md5': '979b4da2655c4bc2d81aeb915a8c5014',
|
||||
'info_dict': {
|
||||
'id': '29907998',
|
||||
'ext': 'flv',
|
||||
'title': 'AdAPPter_Hyundai_demo (2)',
|
||||
'duration': 34,
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
'tbr': 516,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
},
|
||||
'_skip': 'Blocked in the US [sic]',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
vast_doc = self._download_xml(url, pl_id)
|
||||
|
||||
title = vast_doc.find('.//AdTitle').text
|
||||
media = vast_doc.find('.//MediaFile').text
|
||||
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
|
||||
|
||||
doc = self._download_xml(info_url, pl_id, 'Downloading video info')
|
||||
entries = [{
|
||||
'_type': 'video',
|
||||
'id': a.attrib['id'],
|
||||
'title': '%s (%s)' % (title, a.attrib['assetID']),
|
||||
'url': a.attrib['URL'],
|
||||
'duration': int_or_none(a.attrib.get('length')),
|
||||
'tbr': int_or_none(a.attrib.get('bitrate')),
|
||||
'height': int_or_none(a.attrib.get('height')),
|
||||
'width': int_or_none(a.attrib.get('width')),
|
||||
} for a in doc.findall('.//AdditionalAssets/asset')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': pl_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
@@ -197,7 +197,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# not avaialble via api.soundcloud.com/i1/tracks/id/streams
|
||||
# not available via api.soundcloud.com/i1/tracks/id/streams
|
||||
{
|
||||
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
|
||||
'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
|
||||
|
@@ -48,6 +48,16 @@ class TeachableBaseIE(InfoExtractor):
|
||||
'https://%s/sign_in' % site, None,
|
||||
'Downloading %s login page' % site)
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'class=["\']user-signout',
|
||||
r'<a[^>]+\bhref=["\']/sign_out',
|
||||
r'Log\s+[Oo]ut\s*<'))
|
||||
|
||||
if is_logged(login_page):
|
||||
self._logged_in = True
|
||||
return
|
||||
|
||||
login_url = compat_str(urlh.geturl())
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
@@ -78,10 +88,7 @@ class TeachableBaseIE(InfoExtractor):
|
||||
'Go to https://%s/ and accept.' % (site, site), expected=True)
|
||||
|
||||
# Successful login
|
||||
if any(re.search(p, response) for p in (
|
||||
r'class=["\']user-signout',
|
||||
r'<a[^>]+\bhref=["\']/sign_out',
|
||||
r'>\s*Log out\s*<')):
|
||||
if is_logged(response):
|
||||
self._logged_in = True
|
||||
return
|
||||
|
||||
|
@@ -182,20 +182,29 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
title = talk_info['title'].strip()
|
||||
|
||||
native_downloads = try_get(
|
||||
talk_info,
|
||||
(lambda x: x['downloads']['nativeDownloads'],
|
||||
lambda x: x['nativeDownloads']),
|
||||
dict) or {}
|
||||
downloads = talk_info.get('downloads') or {}
|
||||
native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {}
|
||||
|
||||
formats = [{
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'format': format_id,
|
||||
} for (format_id, format_url) in native_downloads.items() if format_url is not None]
|
||||
|
||||
subtitled_downloads = downloads.get('subtitledDownloads') or {}
|
||||
for lang, subtitled_download in subtitled_downloads.items():
|
||||
for q in self._NATIVE_FORMATS:
|
||||
q_url = subtitled_download.get(q)
|
||||
if not q_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': q_url,
|
||||
'format_id': '%s-%s' % (q, lang),
|
||||
'language': lang,
|
||||
})
|
||||
|
||||
if formats:
|
||||
for f in formats:
|
||||
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
||||
finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0])
|
||||
if finfo:
|
||||
f.update(finfo)
|
||||
|
||||
@@ -215,34 +224,7 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
http_url = None
|
||||
for format_id, resources in resources_.items():
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
if not h264_url:
|
||||
continue
|
||||
bitrate = int_or_none(resource.get('bitrate'))
|
||||
formats.append({
|
||||
'url': h264_url,
|
||||
'format_id': '%s-%sk' % (format_id, bitrate),
|
||||
'tbr': bitrate,
|
||||
})
|
||||
if re.search(r'\d+k', h264_url):
|
||||
http_url = h264_url
|
||||
elif format_id == 'rtmp':
|
||||
streamer = talk_info.get('streamer')
|
||||
if not streamer:
|
||||
continue
|
||||
for resource in resources:
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (format_id, resource.get('name')),
|
||||
'url': streamer,
|
||||
'play_path': resource['file'],
|
||||
'ext': 'flv',
|
||||
'width': int_or_none(resource.get('width')),
|
||||
'height': int_or_none(resource.get('height')),
|
||||
'tbr': int_or_none(resource.get('bitrate')),
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
if format_id == 'hls':
|
||||
if not isinstance(resources, dict):
|
||||
continue
|
||||
stream_url = url_or_none(resources.get('stream'))
|
||||
@@ -251,6 +233,36 @@ class TEDIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_name, 'mp4', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
else:
|
||||
if not isinstance(resources, list):
|
||||
continue
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
if not h264_url:
|
||||
continue
|
||||
bitrate = int_or_none(resource.get('bitrate'))
|
||||
formats.append({
|
||||
'url': h264_url,
|
||||
'format_id': '%s-%sk' % (format_id, bitrate),
|
||||
'tbr': bitrate,
|
||||
})
|
||||
if re.search(r'\d+k', h264_url):
|
||||
http_url = h264_url
|
||||
elif format_id == 'rtmp':
|
||||
streamer = talk_info.get('streamer')
|
||||
if not streamer:
|
||||
continue
|
||||
for resource in resources:
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (format_id, resource.get('name')),
|
||||
'url': streamer,
|
||||
'play_path': resource['file'],
|
||||
'ext': 'flv',
|
||||
'width': int_or_none(resource.get('width')),
|
||||
'height': int_or_none(resource.get('height')),
|
||||
'tbr': int_or_none(resource.get('bitrate')),
|
||||
})
|
||||
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
|
||||
|
@@ -7,6 +7,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,7 +23,13 @@ class TeleQuebecBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
zonevideo\.telequebec\.tv/media|
|
||||
coucou\.telequebec\.tv/videos
|
||||
)/(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# available till 01.01.2023
|
||||
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
|
||||
@@ -41,6 +48,9 @@ class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
# no description
|
||||
'url': 'http://zonevideo.telequebec.tv/media/30261',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -61,6 +71,52 @@ class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
return info
|
||||
|
||||
|
||||
class TeleQuebecSquatIE(InfoExtractor):
|
||||
_VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://squat.telequebec.tv/videos/9314',
|
||||
'info_dict': {
|
||||
'id': 'd59ae78112d542e793d83cc9d3a5b530',
|
||||
'ext': 'mp4',
|
||||
'title': 'Poupeflekta',
|
||||
'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75',
|
||||
'duration': 1351,
|
||||
'timestamp': 1569057600,
|
||||
'upload_date': '20190921',
|
||||
'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir',
|
||||
'season': 'Saison 3',
|
||||
'season_number': 3,
|
||||
'episode_number': 57,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://squat.api.telequebec.tv/v1/videos/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
media_id = video['sourceId']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id,
|
||||
'ie_key': TeleQuebecIE.ie_key(),
|
||||
'id': media_id,
|
||||
'title': video.get('titre'),
|
||||
'description': video.get('description'),
|
||||
'timestamp': unified_timestamp(video.get('datePublication')),
|
||||
'series': video.get('container'),
|
||||
'season': video.get('saison'),
|
||||
'season_number': int_or_none(video.get('noSaison')),
|
||||
'episode_number': int_or_none(video.get('episode')),
|
||||
}
|
||||
|
||||
|
||||
class TeleQuebecEmissionIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
|
@@ -72,8 +72,13 @@ class TV4IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.tv4play.se/player/assets/%s.json' % video_id,
|
||||
video_id, 'Downloading video info JSON')
|
||||
'https://playback-api.b17g.net/asset/%s' % video_id,
|
||||
video_id, 'Downloading video info JSON', query={
|
||||
'service': 'tv4',
|
||||
'device': 'browser',
|
||||
'protocol': 'hls,dash',
|
||||
'drm': 'widevine',
|
||||
})['metadata']
|
||||
|
||||
title = info['title']
|
||||
|
||||
@@ -111,5 +116,9 @@ class TV4IE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'thumbnail': info.get('image'),
|
||||
'is_live': info.get('is_live') is True,
|
||||
'is_live': info.get('isLive') is True,
|
||||
'series': info.get('seriesTitle'),
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode': info.get('episodeTitle'),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
}
|
||||
|
@@ -9,6 +9,8 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/video/sokrat/',
|
||||
'md5': '36514aed3657d4f70b4b2cef8eb520cd',
|
||||
'info_dict': {
|
||||
'id': '1848932',
|
||||
'display_id': 'sokrat',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Сократ',
|
||||
'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
|
||||
'duration': 6586,
|
||||
@@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
|
||||
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
|
||||
'info_dict': {
|
||||
'id': '5142516',
|
||||
'ext': 'flv',
|
||||
@@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(
|
||||
(r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
|
||||
r'var\s+cloudId\s*=\s*["\'](\d+)',
|
||||
r'cloudId\s*=\s*["\'](\d+)',
|
||||
r'class="video-preview current_playing" id="(\d+)"'),
|
||||
webpage, 'video id')
|
||||
|
||||
@@ -90,21 +90,40 @@ class TvigleIE(InfoExtractor):
|
||||
age_limit = parse_age_limit(item.get('ageRestrictions'))
|
||||
|
||||
formats = []
|
||||
for vcodec, fmts in item['videos'].items():
|
||||
for vcodec, url_or_fmts in item['videos'].items():
|
||||
if vcodec == 'hls':
|
||||
continue
|
||||
for format_id, video_url in fmts.items():
|
||||
if format_id == 'm3u8':
|
||||
m3u8_url = url_or_none(url_or_fmts)
|
||||
if not m3u8_url:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%s-%s' % (vcodec, format_id),
|
||||
'vcodec': vcodec,
|
||||
'height': int_or_none(height),
|
||||
'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)),
|
||||
})
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif vcodec == 'dash':
|
||||
mpd_url = url_or_none(url_or_fmts)
|
||||
if not mpd_url:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
if not isinstance(url_or_fmts, dict):
|
||||
continue
|
||||
for format_id, video_url in url_or_fmts.items():
|
||||
if format_id == 'm3u8':
|
||||
continue
|
||||
video_url = url_or_none(video_url)
|
||||
if not video_url:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
filesize = int_or_none(try_get(
|
||||
item, lambda x: x['video_files_size'][vcodec][format_id]))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%s-%s' % (vcodec, format_id),
|
||||
'vcodec': vcodec,
|
||||
'height': int_or_none(height),
|
||||
'filesize': filesize,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
@@ -17,9 +18,21 @@ class TVN24IE(InfoExtractor):
|
||||
'id': '1584444',
|
||||
'ext': 'mp4',
|
||||
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
|
||||
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
|
||||
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.',
|
||||
'thumbnail': 're:https?://.*[.]jpeg',
|
||||
}
|
||||
}, {
|
||||
# different layout
|
||||
'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
|
||||
'info_dict': {
|
||||
'id': '1771763',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)',
|
||||
'thumbnail': 're:https?://.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
|
||||
'only_matching': True,
|
||||
@@ -35,18 +48,21 @@ class TVN24IE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h',
|
||||
webpage, 'title')
|
||||
|
||||
def extract_json(attr, name, fatal=True):
|
||||
def extract_json(attr, name, default=NO_DEFAULT, fatal=True):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
|
||||
name, group='json', fatal=fatal) or '{}',
|
||||
video_id, transform_source=unescapeHTML, fatal=fatal)
|
||||
name, group='json', default=default, fatal=fatal) or '{}',
|
||||
display_id, transform_source=unescapeHTML, fatal=fatal)
|
||||
|
||||
quality_data = extract_json('data-quality', 'formats')
|
||||
|
||||
@@ -59,16 +75,24 @@ class TVN24IE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
|
||||
'thumbnail', group='url')
|
||||
|
||||
video_id = None
|
||||
|
||||
share_params = extract_json(
|
||||
'data-share-params', 'share params', fatal=False)
|
||||
'data-share-params', 'share params', default=None)
|
||||
if isinstance(share_params, dict):
|
||||
video_id = share_params.get('id') or video_id
|
||||
video_id = share_params.get('id')
|
||||
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'data-vid-id=["\'](\d+)', webpage, 'video id',
|
||||
default=None) or self._search_regex(
|
||||
r',(\d+)\.html', url, 'video id', default=display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -248,7 +248,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
|
||||
player\.twitch\.tv/\?.*?\bvideo=v
|
||||
player\.twitch\.tv/\?.*?\bvideo=v?
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
@@ -306,6 +306,9 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.twitch.tv/northernlion/video/291940395',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://player.twitch.tv/?video=480452374',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,11 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
NO_DEFAULT,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -31,22 +29,22 @@ class USANetworkIE(AdobePassIE):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_params = extract_attributes(self._search_regex(
|
||||
r'(<div[^>]+data-usa-tve-player-container[^>]*>)', webpage, 'player params'))
|
||||
video_id = player_params['data-mpx-guid']
|
||||
title = player_params['data-episode-title']
|
||||
def _x(name, default=NO_DEFAULT):
|
||||
return self._search_regex(
|
||||
r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||
webpage, name, default=default, group='value')
|
||||
|
||||
account_pid, path = re.search(
|
||||
r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)',
|
||||
webpage).groups()
|
||||
video_id = _x('mpx-guid')
|
||||
title = _x('episode-title')
|
||||
mpx_account_id = _x('mpx-account-id', '2304992029')
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if player_params.get('data-is-full-episode') == '1':
|
||||
if _x('is-full-episode', None) == '1':
|
||||
query['manifest'] = 'm3u'
|
||||
|
||||
if player_params.get('data-entitlement') == 'auth':
|
||||
if _x('is-entitlement', None) == '1':
|
||||
adobe_pass = {}
|
||||
drupal_settings = self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
@@ -57,7 +55,7 @@ class USANetworkIE(AdobePassIE):
|
||||
adobe_pass = drupal_settings.get('adobePass', {})
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'usa'),
|
||||
title, video_id, player_params.get('data-episode-rating', 'TV-14'))
|
||||
title, video_id, _x('episode-rating', 'TV-14'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
|
||||
|
||||
@@ -65,11 +63,11 @@ class USANetworkIE(AdobePassIE):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/%s/%s' % (account_pid, path),
|
||||
'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
|
||||
query), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': player_params.get('data-show-title'),
|
||||
'series': _x('show-title', None),
|
||||
'episode': title,
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
|
@@ -1,157 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class VesselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)'
|
||||
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
|
||||
_LOGIN_URL = 'https://www.vessel.com/api/account/login'
|
||||
_NETRC_MACHINE = 'vessel'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vessel.com/videos/HDN7G5UMs',
|
||||
'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
|
||||
'info_dict': {
|
||||
'id': 'HDN7G5UMs',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150317',
|
||||
'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
|
||||
'timestamp': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/videos/F01_dsLj1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.vessel.com/videos/RRX-sir-J',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
@staticmethod
|
||||
def make_json_request(url, data):
|
||||
payload = json.dumps(data).encode('utf-8')
|
||||
req = sanitized_Request(url, payload)
|
||||
req.add_header('Content-Type', 'application/json; charset=utf-8')
|
||||
return req
|
||||
|
||||
@staticmethod
|
||||
def find_assets(data, asset_type, asset_id=None):
|
||||
for asset in data.get('assets', []):
|
||||
if not asset.get('type') == asset_type:
|
||||
continue
|
||||
elif asset_id is not None and not asset.get('id') == asset_id:
|
||||
continue
|
||||
else:
|
||||
yield asset
|
||||
|
||||
def _check_access_rights(self, data):
|
||||
access_info = data.get('__view', {})
|
||||
if not access_info.get('allow_access', True):
|
||||
err_code = access_info.get('error_code') or ''
|
||||
if err_code == 'ITEM_PAID_ONLY':
|
||||
raise ExtractorError(
|
||||
'This video requires subscription.', expected=True)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
data = {
|
||||
'client_id': 'web',
|
||||
'type': 'password',
|
||||
'user_key': username,
|
||||
'password': password,
|
||||
}
|
||||
login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
|
||||
asset_id = data['model']['data']['id']
|
||||
|
||||
req = VesselIE.make_json_request(
|
||||
self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
|
||||
data = self._download_json(req, video_id)
|
||||
video_asset_id = data.get('main_video_asset')
|
||||
|
||||
self._check_access_rights(data)
|
||||
|
||||
try:
|
||||
video_asset = next(
|
||||
VesselIE.find_assets(data, 'video', asset_id=video_asset_id))
|
||||
except StopIteration:
|
||||
raise ExtractorError('No video assets found')
|
||||
|
||||
formats = []
|
||||
for f in video_asset.get('sources', []):
|
||||
location = f.get('location')
|
||||
if not location:
|
||||
continue
|
||||
name = f.get('name')
|
||||
if name == 'hls-index':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
location, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False))
|
||||
elif name == 'dash-index':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
location, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': name,
|
||||
'tbr': f.get('bitrate'),
|
||||
'height': f.get('height'),
|
||||
'width': f.get('width'),
|
||||
'url': location,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for im_asset in VesselIE.find_assets(data, 'image'):
|
||||
thumbnails.append({
|
||||
'url': im_asset['location'],
|
||||
'width': im_asset.get('width', 0),
|
||||
'height': im_asset.get('height', 0),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': data.get('short_description'),
|
||||
'duration': data.get('duration'),
|
||||
'comment_count': data.get('comment_count'),
|
||||
'like_count': data.get('like_count'),
|
||||
'view_count': data.get('view_count'),
|
||||
'timestamp': parse_iso8601(data.get('released_at')),
|
||||
}
|
@@ -13,11 +13,12 @@ from ..utils import (
|
||||
js_to_json,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ViewLiftBaseIE(InfoExtractor):
|
||||
_DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
|
||||
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
|
||||
|
||||
|
||||
class ViewLiftEmbedIE(ViewLiftBaseIE):
|
||||
@@ -113,7 +114,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
|
||||
|
||||
|
||||
class ViewLiftIE(ViewLiftBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?:/(?:films/title|show|(?:news/)?videos?))?/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
|
||||
'md5': '19844f897b35af219773fd63bdec2942',
|
||||
@@ -128,7 +129,7 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||
'categories': 'mincount:3',
|
||||
'age_limit': 14,
|
||||
'upload_date': '20150421',
|
||||
'timestamp': 1429656819,
|
||||
'timestamp': 1429656820,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
|
||||
@@ -141,10 +142,26 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||
'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 979,
|
||||
'categories': 'mincount:2',
|
||||
'timestamp': 1399478279,
|
||||
'upload_date': '20140507',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
|
||||
'info_dict': {
|
||||
'id': '00000148-7b53-de26-a9fb-fbf306f70020',
|
||||
'display_id': 'augie_alone/s_2_ep_12_love',
|
||||
'ext': 'mp4',
|
||||
'title': 'Augie, Alone:S. 2 Ep. 12 - Love',
|
||||
'description': 'md5:db2a5c72d994f16a780c1eb353a8f403',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 107,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://main.snagfilms.com/films/title/the_freebie',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Film is not playable in your area.
|
||||
'url': 'http://www.snagfilms.com/films/title/inside_mecca',
|
||||
@@ -162,6 +179,10 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
@@ -181,7 +202,21 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||
gist = content_data['gist']
|
||||
film_id = gist['id']
|
||||
title = gist['title']
|
||||
video_assets = content_data['streamingInfo']['videoAssets']
|
||||
video_assets = try_get(
|
||||
content_data, lambda x: x['streamingInfo']['videoAssets'], dict)
|
||||
if not video_assets:
|
||||
token = self._download_json(
|
||||
'https://prod-api.viewlift.com/identity/anonymous-token',
|
||||
film_id, 'Downloading authorization token',
|
||||
query={'site': 'snagfilms'})['authorizationToken']
|
||||
video_assets = self._download_json(
|
||||
'https://prod-api.viewlift.com/entitlement/video/status',
|
||||
film_id, headers={
|
||||
'Authorization': token,
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'id': film_id
|
||||
})['video']['streamingInfo']['videoAssets']
|
||||
|
||||
formats = []
|
||||
mpeg_video_assets = video_assets.get('mpeg') or []
|
||||
@@ -241,8 +276,9 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||
if category.get('title')]
|
||||
break
|
||||
else:
|
||||
title = self._search_regex(
|
||||
r'itemprop="title">([^<]+)<', webpage, 'title')
|
||||
title = self._html_search_regex(
|
||||
(r'itemprop="title">([^<]+)<',
|
||||
r'(?s)itemprop="title">(.+?)<div'), webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
|
||||
webpage, 'description', default=None) or self._og_search_description(webpage)
|
||||
|
@@ -1,217 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
HEADRequest,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class ViewsterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
||||
_TESTS = [{
|
||||
# movie, Type=Movie
|
||||
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
|
||||
'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000',
|
||||
'ext': 'mp4',
|
||||
'title': 'The listening Project',
|
||||
'description': 'md5:bac720244afd1a8ea279864e67baa071',
|
||||
'timestamp': 1214870400,
|
||||
'upload_date': '20080701',
|
||||
'duration': 4680,
|
||||
},
|
||||
}, {
|
||||
# series episode, Type=Episode
|
||||
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
|
||||
'md5': '9243079a8531809efe1b089db102c069',
|
||||
'info_dict': {
|
||||
'id': '1284-19427-001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The World and a Wall',
|
||||
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
|
||||
'timestamp': 1428192000,
|
||||
'upload_date': '20150405',
|
||||
'duration': 1500,
|
||||
},
|
||||
}, {
|
||||
# serie, Type=Serie
|
||||
'url': 'http://www.viewster.com/serie/1303-19426-000/',
|
||||
'info_dict': {
|
||||
'id': '1303-19426-000',
|
||||
'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
|
||||
'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# unfinished serie, no Type
|
||||
'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
|
||||
'info_dict': {
|
||||
'id': '1284-19427-000',
|
||||
'title': 'Baby Steps—Season 2',
|
||||
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
# geo restricted series
|
||||
'url': 'https://www.viewster.com/serie/1280-18794-002/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted video
|
||||
'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
request.add_header('Auth-token', self._AUTH_TOKEN)
|
||||
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# Get 'api_token' cookie
|
||||
self._request_webpage(
|
||||
HEADRequest('http://www.viewster.com/'),
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
cookies = self._get_cookies('http://www.viewster.com/')
|
||||
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
|
||||
|
||||
info = self._download_json(
|
||||
'https://public-api.viewster.com/search/%s' % video_id,
|
||||
video_id, 'Downloading entry JSON')
|
||||
|
||||
entry_id = info.get('Id') or info['id']
|
||||
|
||||
# unfinished serie has no Type
|
||||
if info.get('Type') in ('Serie', None):
|
||||
try:
|
||||
episodes = self._download_json(
|
||||
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
||||
video_id, 'Downloading series JSON')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
self.raise_geo_restricted()
|
||||
else:
|
||||
raise
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
|
||||
for episode in episodes]
|
||||
title = (info.get('Title') or info['Synopsis']['Title']).strip()
|
||||
description = info.get('Synopsis', {}).get('Detailed')
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
formats = []
|
||||
for language_set in info.get('LanguageSets', []):
|
||||
manifest_url = None
|
||||
m3u8_formats = []
|
||||
audio = language_set.get('Audio') or ''
|
||||
subtitle = language_set.get('Subtitle') or ''
|
||||
base_format_id = audio
|
||||
if subtitle:
|
||||
base_format_id += '-%s' % subtitle
|
||||
|
||||
def concat(suffix, sep='-'):
|
||||
return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
|
||||
|
||||
medias = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/videos' % entry_id,
|
||||
video_id, fatal=False, query={
|
||||
'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'],
|
||||
'language': audio,
|
||||
'subtitle': subtitle,
|
||||
})
|
||||
if not medias:
|
||||
continue
|
||||
for media in medias:
|
||||
video_url = media.get('Uri')
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
manifest_url = video_url
|
||||
video_url += '&' if '?' in video_url else '?'
|
||||
video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id=concat('hds')))
|
||||
elif ext == 'm3u8':
|
||||
manifest_url = video_url
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id=concat('hls'),
|
||||
fatal=False) # m3u8 sometimes fail
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
qualities_basename = self._search_regex(
|
||||
r'/([^/]+)\.csmil/',
|
||||
manifest_url, 'qualities basename', default=None)
|
||||
if not qualities_basename:
|
||||
continue
|
||||
QUALITIES_RE = r'((,\d+k)+,?)'
|
||||
qualities = self._search_regex(
|
||||
QUALITIES_RE, qualities_basename,
|
||||
'qualities', default=None)
|
||||
if not qualities:
|
||||
continue
|
||||
qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
|
||||
qualities.sort()
|
||||
http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
|
||||
http_url_basename = url_basename(video_url)
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': video_url.replace(http_url_basename, http_template % q),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
'url': video_url.replace(http_url_basename, http_template % q),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'http-%d' % q,
|
||||
'tbr': q,
|
||||
})
|
||||
|
||||
if not formats and not info.get('VODSettings'):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
synopsis = info.get('Synopsis') or {}
|
||||
# Prefer title outside synopsis since it's less messy
|
||||
title = (info.get('Title') or synopsis['Title']).strip()
|
||||
description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
|
||||
duration = int_or_none(info.get('Duration'))
|
||||
timestamp = parse_iso8601(info.get('ReleaseDate'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -2,12 +2,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
@@ -19,8 +21,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
NO_DEFAULT,
|
||||
OnDemandPagedList,
|
||||
parse_filesize,
|
||||
qualities,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
@@ -98,6 +100,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
webpage, 'vuid', group='vuid')
|
||||
return xsrft, vuid
|
||||
|
||||
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
|
||||
vimeo_config = self._search_regex(
|
||||
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
|
||||
webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
|
||||
if vimeo_config:
|
||||
return self._parse_json(vimeo_config, video_id)
|
||||
|
||||
def _set_vimeo_cookie(self, name, value):
|
||||
self._set_cookie('vimeo.com', name, value)
|
||||
|
||||
@@ -201,6 +210,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
video_uploader_url = owner.get('url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(video_title) if is_live else video_title,
|
||||
'uploader': owner.get('name'),
|
||||
'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
|
||||
@@ -253,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
\.
|
||||
)?
|
||||
vimeo(?P<pro>pro)?\.com/
|
||||
(?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?:.*?/)?
|
||||
(?:
|
||||
(?:
|
||||
@@ -580,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# and latter we extract those that are Vimeo specific.
|
||||
self.report_extraction(video_id)
|
||||
|
||||
vimeo_config = self._search_regex(
|
||||
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
|
||||
'vimeo config', default=None)
|
||||
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
|
||||
if vimeo_config:
|
||||
seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
|
||||
seed_status = vimeo_config.get('seed_status', {})
|
||||
if seed_status.get('state') == 'failed':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, seed_status['title']),
|
||||
@@ -722,7 +730,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'timestamp': unified_timestamp(timestamp),
|
||||
'description': video_description,
|
||||
@@ -905,7 +912,7 @@ class VimeoUserIE(VimeoChannelIE):
|
||||
|
||||
class VimeoAlbumIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:album'
|
||||
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
||||
_VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/album/2632481',
|
||||
@@ -925,21 +932,41 @@ class VimeoAlbumIE(VimeoChannelIE):
|
||||
'params': {
|
||||
'videopassword': 'youtube-dl',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: respect page number
|
||||
'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
return '%s/page:%d/' % (base_url, pagenum)
|
||||
def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
||||
api_page = page + 1
|
||||
query = {
|
||||
'fields': 'link,uri',
|
||||
'page': api_page,
|
||||
'per_page': self._PAGE_SIZE,
|
||||
}
|
||||
if hashed_pass:
|
||||
query['_hashed_pass'] = hashed_pass
|
||||
videos = self._download_json(
|
||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||
'Authorization': 'jwt ' + authorizaion,
|
||||
})['data']
|
||||
for video in videos:
|
||||
link = video.get('link')
|
||||
if not link:
|
||||
continue
|
||||
uri = video.get('uri')
|
||||
video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
|
||||
yield self.url_result(link, VimeoIE.ie_key(), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
webpage = self._login_list_password(url, album_id, webpage)
|
||||
api_config = self._extract_vimeo_config(webpage, album_id)['api']
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, album_id, api_config['jwt'],
|
||||
api_config.get('hashed_pass')), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, album_id, self._html_search_regex(
|
||||
r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))
|
||||
|
||||
|
||||
class VimeoGroupsIE(VimeoAlbumIE):
|
||||
@@ -1035,7 +1062,6 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
if source_format:
|
||||
info_dict['formats'].append(source_format)
|
||||
self._vimeo_sort_formats(info_dict['formats'])
|
||||
info_dict['id'] = video_id
|
||||
return info_dict
|
||||
|
||||
|
||||
@@ -1089,94 +1115,17 @@ class VimeoLikesIE(VimeoChannelIE):
|
||||
return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
|
||||
|
||||
|
||||
class VHXEmbedIE(InfoExtractor):
|
||||
class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vhx:embed'
|
||||
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
|
||||
|
||||
def _call_api(self, video_id, access_token, path='', query=None):
|
||||
return self._download_json(
|
||||
'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
}, query=query)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
credentials = self._parse_json(self._search_regex(
|
||||
r'(?s)credentials\s*:\s*({.+?}),', webpage,
|
||||
'config'), video_id, js_to_json)
|
||||
access_token = credentials['access_token']
|
||||
|
||||
query = {}
|
||||
for k, v in credentials.items():
|
||||
if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined':
|
||||
if k == 'authUserToken':
|
||||
query['auth_user_token'] = v
|
||||
else:
|
||||
query[k] = v
|
||||
files = self._call_api(video_id, access_token, '/files', query)
|
||||
|
||||
formats = []
|
||||
for f in files:
|
||||
href = try_get(f, lambda x: x['_links']['source']['href'])
|
||||
if not href:
|
||||
continue
|
||||
method = f.get('method')
|
||||
if method == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif method == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
fmt = {
|
||||
'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])),
|
||||
'format_id': 'http',
|
||||
'preference': 1,
|
||||
'url': href,
|
||||
'vcodec': f.get('codec'),
|
||||
}
|
||||
quality = f.get('quality')
|
||||
if quality:
|
||||
fmt.update({
|
||||
'format_id': 'http-' + quality,
|
||||
'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)),
|
||||
})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_data = self._call_api(video_id, access_token)
|
||||
title = video_data.get('title') or video_data['name']
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []:
|
||||
lang = subtitle.get('srclang') or subtitle.get('label')
|
||||
for _link in subtitle.get('_links', {}).values():
|
||||
href = _link.get('href')
|
||||
if not href:
|
||||
continue
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': href,
|
||||
})
|
||||
|
||||
q = qualities(['small', 'medium', 'large', 'source'])
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items():
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
'preference': q(thumbnail_id),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': unified_timestamp(video_data.get('created_at')),
|
||||
'view_count': int_or_none(video_data.get('plays_count')),
|
||||
}
|
||||
config_url = self._parse_json(self._search_regex(
|
||||
r'window\.OTTData\s*=\s*({.+})', webpage,
|
||||
'ott data'), video_id, js_to_json)['config_url']
|
||||
config = self._download_json(config_url, video_id)
|
||||
info = self._parse_config(config, video_id)
|
||||
self._vimeo_sort_formats(info['formats'])
|
||||
return info
|
||||
|
@@ -403,8 +403,17 @@ class VKIE(VKBaseIE):
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
|
||||
'player params'),
|
||||
video_id)['params'][0]
|
||||
'player params', default='{}'),
|
||||
video_id)
|
||||
if data:
|
||||
data = data['params'][0]
|
||||
|
||||
# <!--{...}
|
||||
if not data:
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<!--\s*({.+})', info_page, 'payload'),
|
||||
video_id)['payload'][-1][-1]['player']['params'][0]
|
||||
|
||||
title = unescapeHTML(data['md_title'])
|
||||
|
||||
|
@@ -64,7 +64,15 @@ class VRVBaseIE(InfoExtractor):
|
||||
|
||||
def _call_cms(self, path, video_id, note):
|
||||
if not self._CMS_SIGNING:
|
||||
self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
|
||||
index = self._call_api('index', video_id, 'CMS Signing')
|
||||
self._CMS_SIGNING = index.get('cms_signing') or {}
|
||||
if not self._CMS_SIGNING:
|
||||
for signing_policy in index.get('signing_policies', []):
|
||||
signing_path = signing_policy.get('path')
|
||||
if signing_path and signing_path.startswith('/cms/'):
|
||||
name, value = signing_policy.get('name'), signing_policy.get('value')
|
||||
if name and value:
|
||||
self._CMS_SIGNING[name] = value
|
||||
return self._download_json(
|
||||
self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
|
||||
note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
|
||||
|
@@ -1,54 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class WimpIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.wimp.com/maru-is-exhausted/',
|
||||
'md5': 'ee21217ffd66d058e8b16be340b74883',
|
||||
'info_dict': {
|
||||
'id': 'maru-is-exhausted',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maru is exhausted.',
|
||||
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.wimp.com/clowncar/',
|
||||
'md5': '5c31ad862a90dc5b1f023956faec13fe',
|
||||
'info_dict': {
|
||||
'id': 'cG4CEr2aiSg',
|
||||
'ext': 'webm',
|
||||
'title': 'Basset hound clown car...incredible!',
|
||||
'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom',
|
||||
'upload_date': '20140303',
|
||||
'uploader': 'Gretchen Hoey',
|
||||
'uploader_id': 'gretchenandjeff1',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
youtube_id = self._search_regex(
|
||||
(r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
|
||||
r'data-id=["\']([0-9A-Za-z_-]{11})'),
|
||||
webpage, 'video URL', default=None)
|
||||
if youtube_id:
|
||||
return self.url_result(youtube_id, YoutubeIE.ie_key())
|
||||
|
||||
info_dict = self._extract_jwplayer_data(
|
||||
webpage, video_id, require_title=False)
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
})
|
||||
|
||||
return info_dict
|
@@ -4,37 +4,64 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_chr
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
js_to_json,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
|
||||
def aa_decode(aa_code):
|
||||
symbol_table = [
|
||||
('7', '((゚ー゚) + (o^_^o))'),
|
||||
('6', '((o^_^o) +(o^_^o))'),
|
||||
('5', '((゚ー゚) + (゚Θ゚))'),
|
||||
('2', '((o^_^o) - (゚Θ゚))'),
|
||||
('4', '(゚ー゚)'),
|
||||
('3', '(o^_^o)'),
|
||||
('1', '(゚Θ゚)'),
|
||||
('0', '(c^_^o)'),
|
||||
]
|
||||
delim = '(゚Д゚)[゚ε゚]+'
|
||||
ret = ''
|
||||
for aa_char in aa_code.split(delim):
|
||||
for val, pat in symbol_table:
|
||||
aa_char = aa_char.replace(pat, val)
|
||||
aa_char = aa_char.replace('+ ', '')
|
||||
m = re.match(r'^\d+', aa_char)
|
||||
if m:
|
||||
ret += compat_chr(int(m.group(0), 8))
|
||||
else:
|
||||
m = re.match(r'^u([\da-f]+)', aa_char)
|
||||
if m:
|
||||
ret += compat_chr(int(m.group(1), 16))
|
||||
return ret
|
||||
|
||||
|
||||
class XFileShareIE(InfoExtractor):
|
||||
_SITES = (
|
||||
(r'daclips\.(?:in|com)', 'DaClips'),
|
||||
(r'filehoot\.com', 'FileHoot'),
|
||||
(r'gorillavid\.(?:in|com)', 'GorillaVid'),
|
||||
(r'movpod\.in', 'MovPod'),
|
||||
(r'powerwatch\.pw', 'PowerWatch'),
|
||||
(r'rapidvideo\.ws', 'Rapidvideo.ws'),
|
||||
(r'clipwatching\.com', 'ClipWatching'),
|
||||
(r'gounlimited\.to', 'GoUnlimited'),
|
||||
(r'govid\.me', 'GoVid'),
|
||||
(r'holavid\.com', 'HolaVid'),
|
||||
(r'streamty\.com', 'Streamty'),
|
||||
(r'thevideobee\.to', 'TheVideoBee'),
|
||||
(r'vidto\.(?:me|se)', 'Vidto'),
|
||||
(r'streamin\.to', 'Streamin.To'),
|
||||
(r'xvidstage\.com', 'XVIDSTAGE'),
|
||||
(r'vidabc\.com', 'Vid ABC'),
|
||||
(r'uqload\.com', 'Uqload'),
|
||||
(r'vidbom\.com', 'VidBom'),
|
||||
(r'vidlo\.us', 'vidlo'),
|
||||
(r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
|
||||
(r'fastvideo\.me', 'FastVideo.me'),
|
||||
(r'vidlocker\.xyz', 'VidLocker'),
|
||||
(r'vidshare\.tv', 'VidShare'),
|
||||
(r'vup\.to', 'VUp'),
|
||||
(r'xvideosharing\.com', 'XVideoSharing'),
|
||||
)
|
||||
|
||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||
_VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
% '|'.join(site for site in list(zip(*_SITES))[0]))
|
||||
|
||||
_FILE_NOT_FOUND_REGEXES = (
|
||||
@@ -43,82 +70,14 @@ class XFileShareIE(InfoExtractor):
|
||||
)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://gorillavid.in/06y9juieqpmi',
|
||||
'md5': '5ae4a3580620380619678ee4875893ba',
|
||||
'url': 'http://xvideosharing.com/fq65f94nd2ve',
|
||||
'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
|
||||
'info_dict': {
|
||||
'id': '06y9juieqpmi',
|
||||
'id': 'fq65f94nd2ve',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
|
||||
'title': 'sample',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://daclips.in/3rso4kdn6f9m',
|
||||
'md5': '1ad8fd39bb976eeb66004d3a4895f106',
|
||||
'info_dict': {
|
||||
'id': '3rso4kdn6f9m',
|
||||
'ext': 'mp4',
|
||||
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://movpod.in/0wguyyxi1yca',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://filehoot.com/3ivfabn7573c.html',
|
||||
'info_dict': {
|
||||
'id': '3ivfabn7573c',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
},
|
||||
'skip': 'Video removed',
|
||||
}, {
|
||||
'url': 'http://vidto.me/ku5glz52nqe1.html',
|
||||
'info_dict': {
|
||||
'id': 'ku5glz52nqe1',
|
||||
'ext': 'mp4',
|
||||
'title': 'test'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://powerwatch.pw/duecjibvicbu',
|
||||
'info_dict': {
|
||||
'id': 'duecjibvicbu',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny trailer',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://xvidstage.com/e0qcnl03co6z',
|
||||
'info_dict': {
|
||||
'id': 'e0qcnl03co6z',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chucky Prank 2015.mp4',
|
||||
},
|
||||
}, {
|
||||
# removed by administrator
|
||||
'url': 'http://xvidstage.com/amfy7atlkx25',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidabc.com/i8ybqscrphfv',
|
||||
'info_dict': {
|
||||
'id': 'i8ybqscrphfv',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Beauty and the Beast 2017',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rapidvideo.cool/b667kprndr8w',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidto.se/1tx1pf6t12cg.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -131,10 +90,9 @@ class XFileShareIE(InfoExtractor):
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
url = 'http://%s/%s' % (mobj.group('host'), video_id)
|
||||
url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
|
||||
@@ -142,7 +100,7 @@ class XFileShareIE(InfoExtractor):
|
||||
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
if fields.get('op') == 'download1':
|
||||
countdown = int_or_none(self._search_regex(
|
||||
r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
|
||||
webpage, 'countdown', default=None))
|
||||
@@ -160,13 +118,37 @@ class XFileShareIE(InfoExtractor):
|
||||
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||
r'<td nowrap>([^<]+)</td>',
|
||||
r'h4-fine[^>]*>([^<]+)<',
|
||||
r'>Watch (.+) ',
|
||||
r'>Watch (.+)[ <]',
|
||||
r'<h2 class="video-page-head">([^<]+)</h2>',
|
||||
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to
|
||||
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
|
||||
r'title\s*:\s*"([^"]+)"'), # govid.me
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or video_id).strip()
|
||||
|
||||
def extract_formats(default=NO_DEFAULT):
|
||||
for regex, func in (
|
||||
(r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
|
||||
(r'(゚.+)', aa_decode)):
|
||||
obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
|
||||
if obf_code:
|
||||
webpage = webpage.replace(obf_code, func(obf_code))
|
||||
|
||||
formats = []
|
||||
|
||||
jwplayer_data = self._search_regex(
|
||||
[
|
||||
r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
|
||||
r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
|
||||
], webpage,
|
||||
'jwplayer data', default=None)
|
||||
if jwplayer_data:
|
||||
jwplayer_data = self._parse_json(
|
||||
jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
|
||||
if jwplayer_data:
|
||||
formats = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, False,
|
||||
m3u8_id='hls', mpd_id='dash')['formats']
|
||||
|
||||
if not formats:
|
||||
urls = []
|
||||
for regex in (
|
||||
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
||||
@@ -177,6 +159,12 @@ class XFileShareIE(InfoExtractor):
|
||||
video_url = mobj.group('url')
|
||||
if video_url not in urls:
|
||||
urls.append(video_url)
|
||||
|
||||
sources = self._search_regex(
|
||||
r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
|
||||
if sources:
|
||||
urls.extend(self._parse_json(sources, video_id))
|
||||
|
||||
formats = []
|
||||
for video_url in urls:
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
@@ -189,21 +177,13 @@ class XFileShareIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': 'sd',
|
||||
})
|
||||
if not formats and default is not NO_DEFAULT:
|
||||
return default
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
formats = extract_formats(default=None)
|
||||
|
||||
if not formats:
|
||||
webpage = decode_packed_codes(self._search_regex(
|
||||
r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
|
||||
webpage, 'packed code'))
|
||||
formats = extract_formats()
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
|
||||
[
|
||||
r'<video[^>]+poster="([^"]+)"',
|
||||
r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
|
||||
], webpage, 'thumbnail', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,6 +9,7 @@ from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
dict_get,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@@ -18,21 +20,21 @@ from ..utils import (
|
||||
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:.+?\.)?xhamster\.(?:com|one)/
|
||||
(?:.+?\.)?%s/
|
||||
(?:
|
||||
movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
|
||||
videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
|
||||
)
|
||||
'''
|
||||
|
||||
''' % _DOMAINS
|
||||
_TESTS = [{
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
'md5': '8281348b8d3c53d39fffb377d24eac4e',
|
||||
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
|
||||
'md5': '98b4687efb1ffd331c4197854dc09e8f',
|
||||
'info_dict': {
|
||||
'id': '1509445',
|
||||
'display_id': 'femaleagent_shy_beauty_takes_the_bait',
|
||||
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
|
||||
'ext': 'mp4',
|
||||
'title': 'FemaleAgent Shy beauty takes the bait',
|
||||
'timestamp': 1350194821,
|
||||
@@ -40,13 +42,12 @@ class XHamsterIE(InfoExtractor):
|
||||
'uploader': 'Ruseful2011',
|
||||
'duration': 893,
|
||||
'age_limit': 18,
|
||||
'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
|
||||
'info_dict': {
|
||||
'id': '2221348',
|
||||
'display_id': 'britney_spears_sexy_booty',
|
||||
'display_id': 'britney-spears-sexy-booty',
|
||||
'ext': 'mp4',
|
||||
'title': 'Britney Spears Sexy Booty',
|
||||
'timestamp': 1379123460,
|
||||
@@ -54,13 +55,12 @@ class XHamsterIE(InfoExtractor):
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200,
|
||||
'age_limit': 18,
|
||||
'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# empty seo
|
||||
# empty seo, unavailable via new URL schema
|
||||
'url': 'http://xhamster.com/movies/5667973/.html',
|
||||
'info_dict': {
|
||||
'id': '5667973',
|
||||
@@ -71,7 +71,6 @@ class XHamsterIE(InfoExtractor):
|
||||
'uploader': 'parejafree',
|
||||
'duration': 72,
|
||||
'age_limit': 18,
|
||||
'categories': ['Amateur', 'Blowjobs'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -94,6 +93,18 @@ class XHamsterIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -285,7 +296,7 @@ class XHamsterIE(InfoExtractor):
|
||||
|
||||
|
||||
class XHamsterEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
|
||||
_TEST = {
|
||||
'url': 'http://xhamster.com/xembed.php?video=3328539',
|
||||
'info_dict': {
|
||||
@@ -322,3 +333,49 @@ class XHamsterEmbedIE(InfoExtractor):
|
||||
video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
|
||||
|
||||
return self.url_result(video_url, 'XHamster')
|
||||
|
||||
|
||||
class XHamsterUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
|
||||
_TESTS = [{
|
||||
# Paginated user profile
|
||||
'url': 'https://xhamster.com/users/netvideogirls/videos',
|
||||
'info_dict': {
|
||||
'id': 'netvideogirls',
|
||||
},
|
||||
'playlist_mincount': 267,
|
||||
}, {
|
||||
# Non-paginated user profile
|
||||
'url': 'https://xhamster.com/users/firatkaan/videos',
|
||||
'info_dict': {
|
||||
'id': 'firatkaan',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
def _entries(self, user_id):
|
||||
next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
|
||||
for pagenum in itertools.count(1):
|
||||
page = self._download_webpage(
|
||||
next_page_url, user_id, 'Downloading page %s' % pagenum)
|
||||
for video_tag in re.findall(
|
||||
r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
|
||||
page):
|
||||
video = extract_attributes(video_tag)
|
||||
video_url = url_or_none(video.get('href'))
|
||||
if not video_url or not XHamsterIE.suitable(video_url):
|
||||
continue
|
||||
video_id = XHamsterIE._match_id(video_url)
|
||||
yield self.url_result(
|
||||
video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
|
||||
mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
|
||||
if not mobj:
|
||||
break
|
||||
next_page = extract_attributes(mobj.group(0))
|
||||
next_page_url = url_or_none(next_page.get('href'))
|
||||
if not next_page_url:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(user_id), user_id)
|
||||
|
@@ -17,7 +17,8 @@ class XVideosIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?xvideos\.com/video|
|
||||
(?:[^/]+\.)?xvideos2?\.com/video|
|
||||
(?:www\.)?xvideos\.es/video|
|
||||
flashservice\.xvideos\.com/embedframe/|
|
||||
static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
|
||||
)
|
||||
@@ -39,6 +40,42 @@ class XVideosIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://xvideos.es/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://xvideos.es/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,12 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -18,7 +20,9 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
from .brightcove import (
|
||||
@@ -556,3 +560,130 @@ class YahooGyaOIE(InfoExtractor):
|
||||
'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
|
||||
YahooGyaOPlayerIE.ie_key(), video_id))
|
||||
return self.playlist_result(entries, program_id)
|
||||
|
||||
|
||||
class YahooJapanNewsIE(InfoExtractor):
|
||||
IE_NAME = 'yahoo:japannews'
|
||||
IE_DESC = 'Yahoo! Japan News'
|
||||
_VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?'
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_TESTS = [{
|
||||
'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int',
|
||||
'info_dict': {
|
||||
'id': '1736242',
|
||||
'ext': 'mp4',
|
||||
'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース',
|
||||
'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))',
|
||||
'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://headlines.yahoo.co.jp/videonews/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://news.yahoo.co.jp',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://news.yahoo.co.jp/feature/1356',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _extract_formats(self, json_data, content_id):
|
||||
formats = []
|
||||
|
||||
video_data = try_get(
|
||||
json_data,
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
for vid in video_data or []:
|
||||
delivery = vid.get('delivery')
|
||||
url = url_or_none(vid.get('Url'))
|
||||
if not delivery or not url:
|
||||
continue
|
||||
elif delivery == 'hls':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
url, content_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')),
|
||||
'height': int_or_none(vid.get('height')),
|
||||
'width': int_or_none(vid.get('width')),
|
||||
'tbr': int_or_none(vid.get('bitrate')),
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
display_id = mobj.group('id') or host
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title', default=None
|
||||
) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title')
|
||||
|
||||
if display_id == host:
|
||||
# Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)
|
||||
stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage)
|
||||
entries = [
|
||||
self.url_result(
|
||||
smuggle_url(
|
||||
'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id,
|
||||
{'geo_countries': ['JP']}),
|
||||
ie='BrightcoveNew', video_id=plist_id)
|
||||
for plist_id in stream_plists]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
# Article page
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image', webpage, 'thumbnail', default=None)
|
||||
space_id = self._search_regex([
|
||||
r'<script[^>]+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)',
|
||||
r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)',
|
||||
r'<!--\s+SpaceID=(\d+)'
|
||||
], webpage, 'spaceid')
|
||||
|
||||
content_id = self._search_regex(
|
||||
r'<script[^>]+class=["\']yvpub-player["\'][^>]+contentid=(?P<contentid>[^&"\']+)',
|
||||
webpage, 'contentid', group='contentid')
|
||||
|
||||
json_data = self._download_json(
|
||||
'https://feapi-yvpub.yahooapis.jp/v1/content/%s' % content_id,
|
||||
content_id,
|
||||
query={
|
||||
'appid': 'dj0zaiZpPVZMTVFJR0FwZWpiMyZzPWNvbnN1bWVyc2VjcmV0Jng9YjU-',
|
||||
'output': 'json',
|
||||
'space_id': space_id,
|
||||
'domain': host,
|
||||
'ak': hashlib.md5('_'.join((space_id, host)).encode()).hexdigest(),
|
||||
'device_type': '1100',
|
||||
})
|
||||
formats = self._extract_formats(json_data, content_id)
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
@@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor):
|
||||
# episode, sports
|
||||
'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DASH with DRM
|
||||
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor):
|
||||
'disable_trackings': 1,
|
||||
})['content']
|
||||
|
||||
m3u8_url = url_or_none(content.get('content_url')) or url_or_none(
|
||||
content_url = url_or_none(content.get('content_url')) or url_or_none(
|
||||
content['streams'][0]['url'])
|
||||
title = content.get('title') or content.get('computed_title')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
ext = determine_ext(content_url)
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
elif ext == 'mpd':
|
||||
formats = self._extract_mpd_formats(
|
||||
content_url, video_id, mpd_id='dash')
|
||||
else:
|
||||
formats = [{'url': content_url}]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = content.get('description')
|
||||
|
@@ -31,6 +31,7 @@ from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
error_to_compat_str,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_attribute,
|
||||
@@ -40,7 +41,6 @@ from ..utils import (
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
parse_duration,
|
||||
qualities,
|
||||
remove_quotes,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
@@ -324,17 +324,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
for video_id, video_title in self.extract_videos_from_page(content):
|
||||
yield self.url_result(video_id, 'Youtube', video_id, video_title)
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
ids_in_page = []
|
||||
titles_in_page = []
|
||||
for mobj in re.finditer(self._VIDEO_RE, page):
|
||||
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
|
||||
for mobj in re.finditer(video_re, page):
|
||||
# The link with index 0 is not the first video of the playlist (not sure if still actual)
|
||||
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
|
||||
continue
|
||||
video_id = mobj.group('id')
|
||||
video_title = unescapeHTML(mobj.group('title'))
|
||||
video_title = unescapeHTML(
|
||||
mobj.group('title')) if 'title' in mobj.groupdict() else None
|
||||
if video_title:
|
||||
video_title = video_title.strip()
|
||||
if video_title == '► Play all':
|
||||
video_title = None
|
||||
try:
|
||||
idx = ids_in_page.index(video_id)
|
||||
if video_title and not titles_in_page[idx]:
|
||||
@@ -342,6 +343,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
except ValueError:
|
||||
ids_in_page.append(video_id)
|
||||
titles_in_page.append(video_title)
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
ids_in_page = []
|
||||
titles_in_page = []
|
||||
self.extract_videos_from_page_impl(
|
||||
self._VIDEO_RE, page, ids_in_page, titles_in_page)
|
||||
return zip(ids_in_page, titles_in_page)
|
||||
|
||||
|
||||
@@ -376,10 +383,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?:(?:www|no)\.)?invidiou\.sh/|
|
||||
(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
|
||||
(?:www\.)?invidious\.kabi\.tk/|
|
||||
(?:www\.)?invidious\.enkirton\.net/|
|
||||
(?:www\.)?invidious\.13ad\.de/|
|
||||
(?:www\.)?invidious\.mastodon\.host/|
|
||||
(?:www\.)?invidious\.nixnet\.xyz/|
|
||||
(?:www\.)?invidious\.drycat\.fr/|
|
||||
(?:www\.)?tube\.poal\.co/|
|
||||
(?:www\.)?vid\.wxzm\.sx/|
|
||||
(?:www\.)?yt\.elukerio\.org/|
|
||||
(?:www\.)?yt\.lelux\.fi/|
|
||||
(?:www\.)?kgg2m7yk5aybusll\.onion/|
|
||||
(?:www\.)?qklhadlycap4cnod\.onion/|
|
||||
(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
|
||||
(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
|
||||
(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
|
||||
(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
|
||||
(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
(?: # the various things that can precede the ID:
|
||||
@@ -1594,17 +1612,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
def _extract_annotations(self, video_id):
|
||||
return self._download_webpage(
|
||||
'https://www.youtube.com/annotations_invideo', video_id,
|
||||
note='Downloading annotations',
|
||||
errnote='Unable to download video annotations', fatal=False,
|
||||
query={
|
||||
'features': 1,
|
||||
'legacy': 1,
|
||||
'video_id': video_id,
|
||||
})
|
||||
|
||||
@staticmethod
|
||||
def _extract_chapters(description, duration):
|
||||
if not description:
|
||||
@@ -1699,6 +1706,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def extract_token(v_info):
|
||||
return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
|
||||
|
||||
def extract_player_response(player_response, video_id):
|
||||
pl_response = str_or_none(player_response)
|
||||
if not pl_response:
|
||||
return
|
||||
pl_response = self._parse_json(pl_response, video_id, fatal=False)
|
||||
if isinstance(pl_response, dict):
|
||||
add_dash_mpd_pr(pl_response)
|
||||
return pl_response
|
||||
|
||||
player_response = {}
|
||||
|
||||
# Get video info
|
||||
@@ -1721,7 +1737,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
note='Refetching age-gated info webpage',
|
||||
errnote='unable to download video info webpage')
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
pl_response = video_info.get('player_response', [None])[0]
|
||||
player_response = extract_player_response(pl_response, video_id)
|
||||
add_dash_mpd(video_info)
|
||||
view_count = extract_view_count(video_info)
|
||||
else:
|
||||
age_gate = False
|
||||
video_info = None
|
||||
@@ -1744,11 +1763,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
is_live = True
|
||||
sts = ytplayer_config.get('sts')
|
||||
if not player_response:
|
||||
pl_response = str_or_none(args.get('player_response'))
|
||||
if pl_response:
|
||||
pl_response = self._parse_json(pl_response, video_id, fatal=False)
|
||||
if isinstance(pl_response, dict):
|
||||
player_response = pl_response
|
||||
player_response = extract_player_response(args.get('player_response'), video_id)
|
||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
add_dash_mpd_pr(player_response)
|
||||
# We also try looking in get_video_info since it may contain different dashmpd
|
||||
@@ -1780,9 +1795,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
get_video_info = compat_parse_qs(video_info_webpage)
|
||||
if not player_response:
|
||||
pl_response = get_video_info.get('player_response', [None])[0]
|
||||
if isinstance(pl_response, dict):
|
||||
player_response = pl_response
|
||||
add_dash_mpd_pr(player_response)
|
||||
player_response = extract_player_response(pl_response, video_id)
|
||||
add_dash_mpd(get_video_info)
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(get_video_info)
|
||||
@@ -1805,9 +1818,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
break
|
||||
|
||||
def extract_unavailable_message():
|
||||
return self._html_search_regex(
|
||||
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
|
||||
video_webpage, 'unavailable message', default=None)
|
||||
messages = []
|
||||
for tag, kind in (('h1', 'message'), ('div', 'submessage')):
|
||||
msg = self._html_search_regex(
|
||||
r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
|
||||
video_webpage, 'unavailable %s' % kind, default=None)
|
||||
if msg:
|
||||
messages.append(msg)
|
||||
if messages:
|
||||
return '\n'.join(messages)
|
||||
|
||||
if not video_info:
|
||||
unavailable_message = extract_unavailable_message()
|
||||
@@ -1819,16 +1838,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_details = try_get(
|
||||
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||
|
||||
# title
|
||||
if 'title' in video_info:
|
||||
video_title = video_info['title'][0]
|
||||
elif 'title' in player_response:
|
||||
video_title = video_details['title']
|
||||
else:
|
||||
video_title = video_info.get('title', [None])[0] or video_details.get('title')
|
||||
if not video_title:
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
video_title = '_'
|
||||
|
||||
# description
|
||||
description_original = video_description = get_element_by_id("eow-description", video_webpage)
|
||||
if video_description:
|
||||
|
||||
@@ -1853,11 +1867,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
''', replace_url, video_description)
|
||||
video_description = clean_html(video_description)
|
||||
else:
|
||||
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
|
||||
if fd_mobj:
|
||||
video_description = unescapeHTML(fd_mobj.group(1))
|
||||
else:
|
||||
video_description = ''
|
||||
video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
|
||||
|
||||
if not smuggled_data.get('force_singlefeed', False):
|
||||
if not self._downloader.params.get('noplaylist'):
|
||||
@@ -1896,9 +1906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
view_count = int_or_none(video_details.get('viewCount'))
|
||||
|
||||
if is_live is None:
|
||||
is_live = bool_or_none(dict_get(
|
||||
video_details, ('isLive', 'isLiveContent'),
|
||||
skip_false_values=False))
|
||||
is_live = bool_or_none(video_details.get('isLive'))
|
||||
|
||||
# Check for "rental" videos
|
||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||
@@ -1908,6 +1916,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return int_or_none(self._search_regex(
|
||||
r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
|
||||
|
||||
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
|
||||
streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
|
||||
|
||||
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
|
||||
self.report_rtmp_download()
|
||||
formats = [{
|
||||
@@ -1916,10 +1927,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': video_info['conn'][0],
|
||||
'player_url': player_url,
|
||||
}]
|
||||
elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
|
||||
elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
|
||||
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
|
||||
if 'rtmpe%3Dyes' in encoded_url_map:
|
||||
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
|
||||
formats = []
|
||||
formats_spec = {}
|
||||
fmt_list = video_info.get('fmt_list', [''])[0]
|
||||
if fmt_list:
|
||||
@@ -1933,91 +1945,104 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'width': int_or_none(width_height[0]),
|
||||
'height': int_or_none(width_height[1]),
|
||||
}
|
||||
q = qualities(['small', 'medium', 'hd720'])
|
||||
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
|
||||
if streaming_formats:
|
||||
for fmt in streaming_formats:
|
||||
itag = str_or_none(fmt.get('itag'))
|
||||
if not itag:
|
||||
continue
|
||||
quality = fmt.get('quality')
|
||||
quality_label = fmt.get('qualityLabel') or quality
|
||||
formats_spec[itag] = {
|
||||
'asr': int_or_none(fmt.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt.get('contentLength')),
|
||||
'format_note': quality_label,
|
||||
'fps': int_or_none(fmt.get('fps')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'quality': q(quality),
|
||||
# bitrate for itag 43 is always 2147483647
|
||||
'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
}
|
||||
formats = []
|
||||
for url_data_str in encoded_url_map.split(','):
|
||||
url_data = compat_parse_qs(url_data_str)
|
||||
if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
|
||||
for fmt in streaming_formats:
|
||||
itag = str_or_none(fmt.get('itag'))
|
||||
if not itag:
|
||||
continue
|
||||
quality = fmt.get('quality')
|
||||
quality_label = fmt.get('qualityLabel') or quality
|
||||
formats_spec[itag] = {
|
||||
'asr': int_or_none(fmt.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt.get('contentLength')),
|
||||
'format_note': quality_label,
|
||||
'fps': int_or_none(fmt.get('fps')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
# bitrate for itag 43 is always 2147483647
|
||||
'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
}
|
||||
|
||||
for fmt in streaming_formats:
|
||||
if fmt.get('drm_families'):
|
||||
continue
|
||||
url = url_or_none(fmt.get('url'))
|
||||
|
||||
if not url:
|
||||
cipher = fmt.get('cipher')
|
||||
if not cipher:
|
||||
continue
|
||||
url_data = compat_parse_qs(cipher)
|
||||
url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
|
||||
if not url:
|
||||
continue
|
||||
else:
|
||||
cipher = None
|
||||
url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
|
||||
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
|
||||
# Unsupported FORMAT_STREAM_TYPE_OTF
|
||||
if stream_type == 3:
|
||||
continue
|
||||
format_id = url_data['itag'][0]
|
||||
url = url_data['url'][0]
|
||||
|
||||
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
||||
jsplayer_url_json = self._search_regex(
|
||||
ASSETS_RE,
|
||||
embed_webpage if age_gate else video_webpage,
|
||||
'JS player URL (1)', default=None)
|
||||
if not jsplayer_url_json and not age_gate:
|
||||
# We need the embed website after all
|
||||
if embed_webpage is None:
|
||||
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||
embed_webpage = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed webpage')
|
||||
format_id = fmt.get('itag') or url_data['itag'][0]
|
||||
if not format_id:
|
||||
continue
|
||||
format_id = compat_str(format_id)
|
||||
|
||||
if cipher:
|
||||
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
||||
jsplayer_url_json = self._search_regex(
|
||||
ASSETS_RE, embed_webpage, 'JS player URL')
|
||||
ASSETS_RE,
|
||||
embed_webpage if age_gate else video_webpage,
|
||||
'JS player URL (1)', default=None)
|
||||
if not jsplayer_url_json and not age_gate:
|
||||
# We need the embed website after all
|
||||
if embed_webpage is None:
|
||||
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||
embed_webpage = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed webpage')
|
||||
jsplayer_url_json = self._search_regex(
|
||||
ASSETS_RE, embed_webpage, 'JS player URL')
|
||||
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
video_webpage, 'age gate player URL')
|
||||
player_url = json.loads(player_url_json)
|
||||
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if player_url is None:
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
else:
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
player_url_json = self._search_regex(
|
||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
video_webpage, 'age gate player URL')
|
||||
player_url = json.loads(player_url_json)
|
||||
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
if player_url is None:
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen('{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen('{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
|
||||
url += '&%s=%s' % (sp, signature)
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
|
||||
url += '&%s=%s' % (sp, signature)
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
|
||||
@@ -2037,24 +2062,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
|
||||
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
|
||||
|
||||
if width is None:
|
||||
width = int_or_none(fmt.get('width'))
|
||||
if height is None:
|
||||
height = int_or_none(fmt.get('height'))
|
||||
|
||||
filesize = int_or_none(url_data.get(
|
||||
'clen', [None])[0]) or _extract_filesize(url)
|
||||
|
||||
quality = url_data.get('quality', [None])[0]
|
||||
quality = url_data.get('quality', [None])[0] or fmt.get('quality')
|
||||
quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
|
||||
|
||||
tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
|
||||
or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
|
||||
fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
|
||||
|
||||
more_fields = {
|
||||
'filesize': filesize,
|
||||
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'fps': int_or_none(url_data.get('fps', [None])[0]),
|
||||
'format_note': url_data.get('quality_label', [None])[0] or quality,
|
||||
'quality': q(quality),
|
||||
'fps': fps,
|
||||
'format_note': quality_label or quality,
|
||||
}
|
||||
for key, value in more_fields.items():
|
||||
if value:
|
||||
dct[key] = value
|
||||
type_ = url_data.get('type', [None])[0]
|
||||
type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
|
||||
if type_:
|
||||
type_split = type_.split(';')
|
||||
kind_ext = type_split[0].split('/')
|
||||
@@ -2102,9 +2136,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
|
||||
formats.append(a_format)
|
||||
else:
|
||||
error_message = clean_html(video_info.get('reason', [None])[0])
|
||||
error_message = extract_unavailable_message()
|
||||
if not error_message:
|
||||
error_message = extract_unavailable_message()
|
||||
error_message = clean_html(try_get(
|
||||
player_response, lambda x: x['playabilityStatus']['reason'],
|
||||
compat_str))
|
||||
if not error_message:
|
||||
error_message = clean_html(
|
||||
try_get(video_info, lambda x: x['reason'][0], compat_str))
|
||||
if error_message:
|
||||
raise ExtractorError(error_message, expected=True)
|
||||
raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
|
||||
@@ -2275,7 +2314,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# annotations
|
||||
video_annotations = None
|
||||
if self._downloader.params.get('writeannotations', False):
|
||||
video_annotations = self._extract_annotations(video_id)
|
||||
xsrf_token = self._search_regex(
|
||||
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
|
||||
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
||||
invideo_url = try_get(
|
||||
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
|
||||
if xsrf_token and invideo_url:
|
||||
xsrf_field_name = self._search_regex(
|
||||
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
||||
video_webpage, 'xsrf field name',
|
||||
group='xsrf_field_name', default='session_token')
|
||||
video_annotations = self._download_webpage(
|
||||
self._proto_relative_url(invideo_url),
|
||||
video_id, note='Downloading annotations',
|
||||
errnote='Unable to download video annotations', fatal=False,
|
||||
data=urlencode_postdata({xsrf_field_name: xsrf_token}))
|
||||
|
||||
chapters = self._extract_chapters(description_original, video_duration)
|
||||
|
||||
@@ -2433,7 +2486,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
(%(playlist_id)s)
|
||||
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
||||
_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
|
||||
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
|
||||
IE_NAME = 'youtube:playlist'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||
@@ -2456,6 +2510,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'title': '29C3: Not my department',
|
||||
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||
'uploader': 'Christiaan008',
|
||||
'uploader_id': 'ChRiStIaAn008',
|
||||
},
|
||||
'playlist_count': 95,
|
||||
}, {
|
||||
@@ -2464,6 +2520,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'title': '[OLD]Team Fortress 2 (Class-based LP)',
|
||||
'id': 'PLBB231211A4F62143',
|
||||
'uploader': 'Wickydoo',
|
||||
'uploader_id': 'Wickydoo',
|
||||
},
|
||||
'playlist_mincount': 26,
|
||||
}, {
|
||||
@@ -2472,6 +2530,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'title': 'Uploads from Cauchemar',
|
||||
'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
|
||||
'uploader': 'Cauchemar',
|
||||
'uploader_id': 'Cauchemar89',
|
||||
},
|
||||
'playlist_mincount': 799,
|
||||
}, {
|
||||
@@ -2489,13 +2549,17 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'title': 'JODA15',
|
||||
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
|
||||
'uploader': 'milan',
|
||||
'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
|
||||
'playlist_mincount': 485,
|
||||
'info_dict': {
|
||||
'title': '2017 華語最新單曲 (2/24更新)',
|
||||
'title': '2018 Chinese New Singles (11/6 updated)',
|
||||
'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
|
||||
'uploader': 'LBK',
|
||||
'uploader_id': 'sdragonfang',
|
||||
}
|
||||
}, {
|
||||
'note': 'Embedded SWF player',
|
||||
@@ -2504,13 +2568,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'title': 'JODA7',
|
||||
'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
|
||||
}
|
||||
},
|
||||
'skip': 'This playlist does not exist',
|
||||
}, {
|
||||
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
|
||||
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
|
||||
'info_dict': {
|
||||
'title': 'Uploads from Interstellar Movie',
|
||||
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
|
||||
'uploader': 'Interstellar Movie',
|
||||
'uploader_id': 'InterstellarMovie1',
|
||||
},
|
||||
'playlist_mincount': 21,
|
||||
}, {
|
||||
@@ -2535,6 +2602,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is not available.',
|
||||
'add_ie': [YoutubeIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
|
||||
@@ -2546,7 +2614,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'uploader_id': 'backuspagemuseum',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
|
||||
'upload_date': '20161008',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
|
||||
'categories': ['Nonprofits & Activism'],
|
||||
'tags': list,
|
||||
@@ -2557,6 +2624,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/21844
|
||||
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
|
||||
'info_dict': {
|
||||
'title': 'Data Analysis with Dr Mike Pound',
|
||||
'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
|
||||
'uploader_id': 'Computerphile',
|
||||
'uploader': 'Computerphile',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
|
||||
'only_matching': True,
|
||||
@@ -2575,6 +2652,34 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
ids_in_page = []
|
||||
titles_in_page = []
|
||||
|
||||
for item in re.findall(
|
||||
r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
|
||||
attrs = extract_attributes(item)
|
||||
video_id = attrs['data-video-id']
|
||||
video_title = unescapeHTML(attrs.get('data-title'))
|
||||
if video_title:
|
||||
video_title = video_title.strip()
|
||||
ids_in_page.append(video_id)
|
||||
titles_in_page.append(video_title)
|
||||
|
||||
# Fallback with old _VIDEO_RE
|
||||
self.extract_videos_from_page_impl(
|
||||
self._VIDEO_RE, page, ids_in_page, titles_in_page)
|
||||
|
||||
# Relaxed fallbacks
|
||||
self.extract_videos_from_page_impl(
|
||||
r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
|
||||
ids_in_page, titles_in_page)
|
||||
self.extract_videos_from_page_impl(
|
||||
r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
|
||||
ids_in_page, titles_in_page)
|
||||
|
||||
return zip(ids_in_page, titles_in_page)
|
||||
|
||||
def _extract_mix(self, playlist_id):
|
||||
# The mixes are generated from a single video
|
||||
# the id of the playlist is just 'RD' + video_id
|
||||
@@ -2637,7 +2742,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
page, 'title', default=None)
|
||||
|
||||
_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
|
||||
uploader = self._search_regex(
|
||||
uploader = self._html_search_regex(
|
||||
r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
|
||||
page, 'uploader', default=None)
|
||||
mobj = re.search(
|
||||
@@ -2723,6 +2828,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'title': 'Uploads from lex will',
|
||||
'uploader': 'lex will',
|
||||
'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
}
|
||||
}, {
|
||||
'note': 'Age restricted channel',
|
||||
@@ -2732,6 +2839,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'UUs0ifCMCm1icqRbqhUINa0w',
|
||||
'title': 'Uploads from Deus Ex',
|
||||
'uploader': 'Deus Ex',
|
||||
'uploader_id': 'DeusExOfficial',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
|
||||
@@ -2816,6 +2925,8 @@ class YoutubeUserIE(YoutubeChannelIE):
|
||||
'info_dict': {
|
||||
'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
|
||||
'title': 'Uploads from The Linux Foundation',
|
||||
'uploader': 'The Linux Foundation',
|
||||
'uploader_id': 'TheLinuxFoundation',
|
||||
}
|
||||
}, {
|
||||
# Only available via https://www.youtube.com/c/12minuteathlete/videos
|
||||
@@ -2825,6 +2936,8 @@ class YoutubeUserIE(YoutubeChannelIE):
|
||||
'info_dict': {
|
||||
'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
|
||||
'title': 'Uploads from 12 Minute Athlete',
|
||||
'uploader': '12 Minute Athlete',
|
||||
'uploader_id': 'the12minuteathlete',
|
||||
}
|
||||
}, {
|
||||
'url': 'ytuser:phihag',
|
||||
@@ -2918,7 +3031,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
||||
'playlist_mincount': 4,
|
||||
'info_dict': {
|
||||
'id': 'ThirstForScience',
|
||||
'title': 'Thirst for Science',
|
||||
'title': 'ThirstForScience',
|
||||
},
|
||||
}, {
|
||||
# with "Load more" button
|
||||
@@ -2935,6 +3048,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
||||
'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
|
||||
'title': 'Chem Player',
|
||||
},
|
||||
'skip': 'Blocked',
|
||||
}]
|
||||
|
||||
|
||||
|
@@ -41,6 +41,7 @@ class ZDFBaseIE(InfoExtractor):
|
||||
class ZDFIE(ZDFBaseIE):
|
||||
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
|
||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
|
||||
|
@@ -393,7 +393,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
sub_ext = sub_info['ext']
|
||||
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
|
||||
sub_langs.append(lang)
|
||||
sub_filenames.append(subtitles_filename(filename, lang, sub_ext))
|
||||
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
|
||||
else:
|
||||
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
|
||||
webm_vtt_warn = True
|
||||
@@ -606,9 +606,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
self._downloader.to_screen(
|
||||
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
|
||||
continue
|
||||
old_file = subtitles_filename(filename, lang, ext)
|
||||
old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
|
||||
sub_filenames.append(old_file)
|
||||
new_file = subtitles_filename(filename, lang, new_ext)
|
||||
new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
|
||||
|
||||
if ext in ('dfxp', 'ttml', 'tt'):
|
||||
self._downloader.report_warning(
|
||||
@@ -616,7 +616,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
'which results in style information loss')
|
||||
|
||||
dfxp_file = old_file
|
||||
srt_file = subtitles_filename(filename, lang, 'srt')
|
||||
srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
|
||||
|
||||
with open(dfxp_file, 'rb') as f:
|
||||
srt_data = dfxp2srt(f.read())
|
||||
|
@@ -2906,8 +2906,8 @@ def determine_ext(url, default_ext='unknown_video'):
|
||||
return default_ext
|
||||
|
||||
|
||||
def subtitles_filename(filename, sub_lang, sub_format):
|
||||
return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
|
||||
def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
|
||||
return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
|
||||
|
||||
|
||||
def date_from_str(date_str):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2019.07.14'
|
||||
__version__ = '2019.10.22'
|
||||
|
Reference in New Issue
Block a user