Compare commits

..

79 Commits

Author SHA1 Message Date
Sergey M․
f4b865c613 release 2019.09.28 2019-09-28 00:30:30 +07:00
Sergey M․
412f44f4b3 [ChangeLog] Actualize
[ci skip]
2019-09-28 00:23:25 +07:00
Sergey M․
6483fbd336 [vk] Fix extraction (closes #22522) 2019-09-28 00:04:52 +07:00
Sergey M․
8130ac42e5 [openload] PEP 8 2019-09-26 23:15:06 +07:00
Sergey M․
cb3e4a2947 [heise] Fix kaltura embeds extraction (closes #22514) 2019-09-26 23:11:02 +07:00
Remita Amine
2a88a0c44d [ted] check for resources validity and extract subtitled downloads(closes #22513) 2019-09-26 11:44:57 +01:00
sofutru
33c1c7d80f [youtube] Add support for owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292) 2019-09-25 02:43:34 +07:00
Sergey M․
21d3c21e62 [nhk] Add support for clips 2019-09-25 02:39:25 +07:00
Remita Amine
a373befa25 [nhk] fix video extraction(closes #22249)(closes #22353) 2019-09-24 20:24:17 +01:00
Sergey M․
df63cafe49 [byutv] Fix extraction (refs #22070)
Downloading of new videos does not work due to DRM
2019-09-25 02:16:25 +07:00
Sergey M․
d06daf23da [YoutubeDL] Honour all --get-* options with --flat-playlist (closes #22493) 2019-09-25 02:10:37 +07:00
smed79
8e9fdcbe27 [openload] Add support for oload.online (#22304) 2019-09-24 23:56:12 +07:00
sofutru
666d808e70 [youtube] Add support for invidious.drycat.fr (#22451) 2019-09-24 23:16:46 +07:00
ipaha
7d327fea5b [jwplatfom] do not match video URLs(#20596) (#22148) 2019-09-23 19:44:00 +00:00
Sergey M․
4e3f1f0469 [youtube:playlist] Unescape playlist uploader (closes #22483) 2019-09-23 00:20:52 +07:00
Remita Amine
4bc15a68d1 [bilibili] add support audio albums and songs(closes #21094) 2019-09-22 17:14:18 +01:00
Remita Amine
edb2820ca5 [instagram] add support for tv URLs 2019-09-21 21:57:45 +01:00
Remita Amine
6cf6b357f5 [mixcloud] allow uppercase letters in format urls(closes #19280) 2019-09-20 11:14:24 +01:00
Remita Amine
f455a934e9 [brightcove] delegate all supported BrightcoveLegacyIE URLs to BrightcoveNewIE
closes #11523
closes #12842
closes #13912
closes #15669
closes #16303
2019-09-19 18:02:26 +01:00
Sergey M․
d9d3098675 [hotstar] Use native HLS downloader by default 2019-09-19 03:03:07 +07:00
Sergey M․
1cb812d3c2 [hotstar] Extract more formats (closes #22323) 2019-09-19 03:00:19 +07:00
Sergey M․
6fd26a7d4a [9now] Fix extraction (closes #22361) 2019-09-19 02:31:39 +07:00
Sergey M․
9cf26b6e1d [zdf] Bypass geo restriction 2019-09-19 01:11:52 +07:00
Sergey M․
20e11b70ac [tv4] Fix extraction and extract series metadata (closes #22443) 2019-09-18 23:45:26 +07:00
Sergey M․
e1f692f0b3 release 2019.09.12.1 2019-09-12 02:53:52 +07:00
Sergey M․
2f851a7d7d [ChangeLog] Actualize
[ci skip]
2019-09-12 02:48:07 +07:00
Sergey M․
4878759f3b [youtube] Remove quality and tbr for itag 43 (closes #22372) 2019-09-12 02:46:12 +07:00
Sergey M․
303d3e142c [ChangeLog] Actualize
[ci skip]
2019-09-12 02:05:54 +07:00
Sergey M․
bd10b229c0 release 2019.09.12 2019-09-12 01:21:21 +07:00
Sergey M․
035c7a59e8 [ChangeLog] Actualize
[ci skip]
2019-09-12 01:18:25 +07:00
Sergey M․
bf1317d257 [youtube] Quick extraction tempfix (closes #22367, closes #22163) 2019-09-11 22:44:47 +07:00
sofutru
bff90fc518 [youtube] Add support for invidious tor instances (#22268) 2019-09-03 01:35:32 +07:00
Sergey M․
31dbd054c8 [platzi] Improve client data extraction (closes #22290) 2019-09-03 01:24:20 +07:00
Sergey M․
66d04c74e0 [platzi:course] Add support for authentication 2019-09-03 01:23:22 +07:00
Patrick Dessalle
d7da1e37c7 [nickjr] Add support for nickelodeonjunior.fr (#22246) 2019-09-02 00:59:57 +07:00
Sergey M․
f620d0d860 release 2019.09.01 2019-09-01 03:33:02 +07:00
Sergey M․
79dd8884bb [ChangeLog] Actualize
[ci skip]
2019-09-01 03:18:35 +07:00
Sergey M․
df228355fd [xhamster:user] Add extractor (closes #16330, closes #18454) 2019-09-01 03:12:56 +07:00
Sergey M․
8945b10f6e [xhamster] Add support for more domains 2019-09-01 03:09:04 +07:00
Sergey M․
7cb51b5daf [extractor/generic] Improve squarespace detection and fix test (closes #21859, refs #21294, refs #21802) 2019-09-01 01:25:48 +07:00
Barbara Miller
d78657fd18 [extractor/generic] Add support for squarespace embeds (closes #21294) 2019-09-01 01:25:48 +07:00
Sergey M․
cc73d5ad15 [openload] Fix domains regex 2019-09-01 01:25:48 +07:00
telephono
71f47617c8 [downloader/external] Respect mtime option for aria2c (#22242) 2019-09-01 00:24:43 +07:00
Remita Amine
3f46a25a97 [verystream] add support for woof.tube (closes #22217) 2019-08-31 10:02:09 +01:00
Sergey M․
9d058b3206 [dailymotion] Add support for lequipe.fr (closes #21328, closes #22152) 2019-08-29 23:08:19 +07:00
Sergey M․
b500955a58 [openload] Add support for oload.vip (closes #22205) 2019-08-28 01:58:07 +07:00
Jay
acc86c9a97 [bbc] Fix some tests 2019-08-28 01:53:40 +07:00
Jay
b72305f078 [bbccouk] Extend _VALID_URL (closes #19200) 2019-08-28 01:53:40 +07:00
sofutru
494d664e67 [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223) 2019-08-28 01:39:59 +07:00
phan-ctrl
d1fcf255c5 [safari] Fix authentication (closes #22161) (#22184) 2019-08-27 10:16:04 +07:00
Sergey M․
183a18c4e7 [usanetwork] Fix extraction (closes #22105) 2019-08-26 03:38:54 +07:00
supritkumar
393cc31d5e [einthusan] Add support for einthusan.ca (#22171) 2019-08-21 09:52:59 +07:00
Sergey M․
0add33abcb [youtube] Improve unavailable message extraction (refs #22117) 2019-08-16 23:44:11 +07:00
Chuck Cho
0326bcb6c1 [piksel] add subtitle capability (#20506) 2019-08-15 22:14:47 +00:00
Sergey M․
def849e0e6 release 2019.08.13 2019-08-13 23:18:38 +07:00
Sergey M․
69611a1616 [ChangeLog] Actualize
[ci skip]
2019-08-13 23:10:05 +07:00
Sergey M․
351f37c022 [youtube:playlist] Improve flat extraction (closes #21927) 2019-08-13 05:02:52 +07:00
lightmare
3bce4ff7d9 [downloader/fragment] Fix ETA calculation of resumed download (#21992) 2019-08-11 06:57:43 +07:00
Remita Amine
ffddb11264 [YoutubeDL] check annotations availabilty(closes #18582) 2019-08-09 08:19:41 +01:00
Remita Amine
64b6a4e91e [youtube] fix annotations extraction(closes #22045) 2019-08-09 08:16:53 +01:00
Remita Amine
b3d39be239 [discovery] extract series meta field(#21808) 2019-08-08 23:23:58 +01:00
Sergey M․
1357734978 [youtube] Improve error detection (#16445) 2019-08-06 02:32:44 +07:00
Remita Amine
eb9c9c74a6 [vimeo] fix album extraction
closes #1933
closes #15704
closes #15855
closes #18967
closes #21986
2019-08-03 10:29:20 +01:00
Remita Amine
5efbc1366f [roosterteeth] add support for watch URLs 2019-08-02 19:38:35 +01:00
Remita Amine
995f319b06 [discovery] limit video data by show slug(closes #21980) 2019-08-02 18:08:26 +01:00
Sergey M
d9d3a5a816 [README.md] Move code from #21939 to the right place 2019-08-02 05:54:56 +07:00
Sergey M․
4f2d735803 release 2019.08.02 2019-08-02 05:37:54 +07:00
Sergey M․
2e9522b061 [ChangeLog] Actualize
[ci skip]
2019-08-02 05:36:32 +07:00
Sergey M․
be306d6a31 [tvigle] Fix extraction and add support for HLS and DASH formats (closes #21967) 2019-08-02 05:25:01 +07:00
Sergey M․
33b529fabd [yandexvideo] Add support for DASH formats (#21971) 2019-08-02 05:03:25 +07:00
Kyle
07f3a05c87 [CONTRIBUTING.md] Add some more coding conventions (#21939) 2019-08-02 04:49:01 +07:00
Remita Amine
535111657b [discovery] use API call for video data extraction(#21808) 2019-08-01 22:45:10 +01:00
cantandwont
826dcff99c Output batch filename when it could not be read (#21915) 2019-08-01 03:54:39 +07:00
Sen Jiang
9a37ff82f1 [mgtv] Extract format_note (#21881)
format_note should now show 标清, 高清, 超清, 蓝光, etc.
2019-08-01 03:45:02 +07:00
Sergey M․
766c4f6090 [tvn24] Fix test 2019-07-31 02:32:45 +07:00
Sergey M․
7279163412 [tvn24] Fix metadata extraction (closes #21833, closes #21834) 2019-07-31 02:32:45 +07:00
CeruleanSky
07ab44c420 [dlive] Relax _VALID_URL (#21909) 2019-07-31 01:43:49 +07:00
smed79
2c8b1a21e8 [openload] Add support for oload.best (#21913) 2019-07-31 01:40:50 +07:00
Sergey M․
c2d125d99f [youtube] Improve metadata extraction for age gate content (closes #21943) 2019-07-31 00:14:33 +07:00
49 changed files with 1198 additions and 563 deletions

View File

@@ -18,7 +18,7 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2019.07.30**
- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.07.30
[debug] youtube-dl version 2019.09.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -19,7 +19,7 @@ labels: 'site-support-request'
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2019.07.30**
- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@@ -18,13 +18,13 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->
- [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2019.07.30**
- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@@ -18,7 +18,7 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2019.07.30**
- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2019.07.30
[debug] youtube-dl version 2019.09.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -19,13 +19,13 @@ labels: 'request'
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.07.30. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->
- [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2019.07.30**
- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
- [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@@ -339,6 +339,72 @@ Incorrect:
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
```
### Inline values
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
#### Example
Correct:
```python
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
```
Incorrect:
```python
TITLE_RE = r'<title>([^<]+)</title>'
# ...some lines of code...
title = self._html_search_regex(TITLE_RE, webpage, 'title')
```
### Collapse fallbacks
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
#### Example
Good:
```python
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, 'description', default=None)
```
Unwieldy:
```python
description = (
self._og_search_description(webpage, default=None)
or self._html_search_meta('description', webpage, default=None)
or self._html_search_meta('twitter:description', webpage, default=None))
```
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
### Trailing parentheses
Always move trailing parentheses after the last argument.
#### Example
Correct:
```python
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
list)
```
Incorrect:
```python
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
list,
)
```
### Use convenience conversion and parsing functions
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.

View File

@@ -1,3 +1,98 @@
version 2019.09.28
Core
* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
Extractors
* [vk] Fix extraction (#22522)
* [heise] Fix kaltura embeds extraction (#22514)
* [ted] Check for resources validity and extract subtitled downloads (#22513)
+ [youtube] Add support for
owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
+ [nhk] Add support for clips
* [nhk] Fix video extraction (#22249, #22353)
* [byutv] Fix extraction (#22070)
+ [openload] Add support for oload.online (#22304)
+ [youtube] Add support for invidious.drycat.fr (#22451)
* [jwplatfom] Do not match video URLs (#20596, #22148)
* [youtube:playlist] Unescape playlist uploader (#22483)
+ [bilibili] Add support audio albums and songs (#21094)
+ [instagram] Add support for tv URLs
+ [mixcloud] Allow uppercase letters in format URLs (#19280)
* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
#12842, #13912, #15669, #16303)
* [hotstar] Use native HLS downloader by default
+ [hotstar] Extract more formats (#22323)
* [9now] Fix extraction (#22361)
* [zdf] Bypass geo restriction
+ [tv4] Extract series metadata
* [tv4] Fix extraction (#22443)
version 2019.09.12.1
Extractors
* [youtube] Remove quality and tbr for itag 43 (#22372)
version 2019.09.12
Extractors
* [youtube] Quick extraction tempfix (#22367, #22163)
version 2019.09.01
Core
+ [extractor/generic] Add support for squarespace embeds (#21294, #21802,
#21859)
+ [downloader/external] Respect mtime option for aria2c (#22242)
Extractors
+ [xhamster:user] Add support for user pages (#16330, #18454)
+ [xhamster] Add support for more domains
+ [verystream] Add support for woof.tube (#22217)
+ [dailymotion] Add support for lequipe.fr (#21328, #22152)
+ [openload] Add support for oload.vip (#22205)
+ [bbccouk] Extend URL regular expression (#19200)
+ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223)
* [safari] Fix authentication (#22161, #22184)
* [usanetwork] Fix extraction (#22105)
+ [einthusan] Add support for einthusan.ca (#22171)
* [youtube] Improve unavailable message extraction (#22117)
+ [piksel] Extract subtitles (#20506)
version 2019.08.13
Core
* [downloader/fragment] Fix ETA calculation of resumed download (#21992)
* [YoutubeDL] Check annotations availability (#18582)
Extractors
* [youtube:playlist] Improve flat extraction (#21927)
* [youtube] Fix annotations extraction (#22045)
+ [discovery] Extract series meta field (#21808)
* [youtube] Improve error detection (#16445)
* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986)
+ [roosterteeth] Add support for watch URLs
* [discovery] Limit video data by show slug (#21980)
version 2019.08.02
Extractors
+ [tvigle] Add support for HLS and DASH formats (#21967)
* [tvigle] Fix extraction (#21967)
+ [yandexvideo] Add support for DASH formats (#21971)
* [discovery] Use API call for video data extraction (#21808)
+ [mgtv] Extract format_note (#21881)
* [tvn24] Fix metadata extraction (#21833, #21834)
* [dlive] Relax URL regular expression (#21909)
+ [openload] Add support for oload.best (#21913)
* [youtube] Improve metadata extraction for age gate content (#21943)
version 2019.07.30
Extractors

View File

@@ -1216,6 +1216,72 @@ Incorrect:
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
```
### Inline values
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
#### Example
Correct:
```python
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
```
Incorrect:
```python
TITLE_RE = r'<title>([^<]+)</title>'
# ...some lines of code...
title = self._html_search_regex(TITLE_RE, webpage, 'title')
```
### Collapse fallbacks
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
#### Example
Good:
```python
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, 'description', default=None)
```
Unwieldy:
```python
description = (
self._og_search_description(webpage, default=None)
or self._html_search_meta('description', webpage, default=None)
or self._html_search_meta('twitter:description', webpage, default=None))
```
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
### Trailing parentheses
Always move trailing parentheses after the last argument.
#### Example
Correct:
```python
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
list)
```
Incorrect:
```python
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
list,
)
```
### Use convenience conversion and parsing functions
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.

View File

@@ -98,6 +98,8 @@
- **Bigflix**
- **Bild**: Bild.de
- **BiliBili**
- **BilibiliAudio**
- **BilibiliAudioAlbum**
- **BioBioChileTV**
- **BIQLE**
- **BitChute**
@@ -1100,6 +1102,7 @@
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
- **XHamster**
- **XHamsterEmbed**
- **XHamsterUser**
- **xiami:album**: 虾米音乐 - 专辑
- **xiami:artist**: 虾米音乐 - 歌手
- **xiami:collection**: 虾米音乐 - 精选集

View File

@@ -852,8 +852,9 @@ class YoutubeDL(object):
extract_flat = self.params.get('extract_flat', False)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
or extract_flat is True):
if self.params.get('forcejson', False):
self.to_stdout(json.dumps(ie_result))
self.__forced_printings(
ie_result, self.prepare_filename(ie_result),
incomplete=True)
return ie_result
if result_type == 'video':
@@ -1693,6 +1694,36 @@ class YoutubeDL(object):
subs[lang] = f
return subs
def __forced_printings(self, info_dict, filename, incomplete):
def print_mandatory(field):
if (self.params.get('force%s' % field, False)
and (not incomplete or info_dict.get(field) is not None)):
self.to_stdout(info_dict[field])
def print_optional(field):
if (self.params.get('force%s' % field, False)
and info_dict.get(field) is not None):
self.to_stdout(info_dict[field])
print_mandatory('title')
print_mandatory('id')
if self.params.get('forceurl', False) and not incomplete:
if info_dict.get('requested_formats') is not None:
for f in info_dict['requested_formats']:
self.to_stdout(f['url'] + f.get('play_path', ''))
else:
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
print_optional('thumbnail')
print_optional('description')
if self.params.get('forcefilename', False) and filename is not None:
self.to_stdout(filename)
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration']))
print_mandatory('format')
if self.params.get('forcejson', False):
self.to_stdout(json.dumps(info_dict))
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@@ -1703,9 +1734,8 @@ class YoutubeDL(object):
if self._num_downloads >= int(max_downloads):
raise MaxDownloadsReached()
# TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title']
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
@@ -1720,29 +1750,7 @@ class YoutubeDL(object):
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
# Forced printings
if self.params.get('forcetitle', False):
self.to_stdout(info_dict['fulltitle'])
if self.params.get('forceid', False):
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
if info_dict.get('requested_formats') is not None:
for f in info_dict['requested_formats']:
self.to_stdout(f['url'] + f.get('play_path', ''))
else:
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
self.to_stdout(info_dict['description'])
if self.params.get('forcefilename', False) and filename is not None:
self.to_stdout(filename)
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration']))
if self.params.get('forceformat', False):
self.to_stdout(info_dict['format'])
if self.params.get('forcejson', False):
self.to_stdout(json.dumps(info_dict))
self.__forced_printings(info_dict, filename, incomplete=False)
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -1783,6 +1791,8 @@ class YoutubeDL(object):
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
elif not info_dict.get('annotations'):
self.report_warning('There are no annotations to write.')
else:
try:
self.to_screen('[info] Writing video annotations to: ' + annofn)

View File

@@ -94,7 +94,7 @@ def _real_main(argv=None):
if opts.verbose:
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
except IOError:
sys.exit('ERROR: batch file could not be read')
sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]

View File

@@ -194,6 +194,7 @@ class Aria2cFD(ExternalFD):
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
cmd += ['--', info_dict['url']]
return cmd

View File

@@ -190,12 +190,13 @@ class FragmentFD(FileDownloader):
})
def _start_frag_download(self, ctx):
resume_len = ctx['complete_frags_downloaded_bytes']
total_frags = ctx['total_frags']
# This dict stores the download progress, it's updated by the progress
# hook
state = {
'status': 'downloading',
'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
'downloaded_bytes': resume_len,
'fragment_index': ctx['fragment_index'],
'fragment_count': total_frags,
'filename': ctx['filename'],
@@ -234,8 +235,8 @@ class FragmentFD(FileDownloader):
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
if not ctx['live']:
state['eta'] = self.calc_eta(
start, time_now, estimated_size,
state['downloaded_bytes'])
start, time_now, estimated_size - resume_len,
state['downloaded_bytes'] - resume_len)
state['speed'] = s.get('speed') or ctx.get('speed')
ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes

View File

@@ -40,6 +40,7 @@ class BBCCoUkIE(InfoExtractor):
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/(?:clips|audiovideo/popular)[/#]|
radio/player/|
sounds/play/|
events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@@ -70,7 +71,7 @@ class BBCCoUkIE(InfoExtractor):
'info_dict': {
'id': 'b039d07m',
'ext': 'flv',
'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
'title': 'Kaleidoscope, Leonard Cohen',
'description': 'The Canadian poet and songwriter reflects on his musical career.',
},
'params': {
@@ -220,6 +221,20 @@ class BBCCoUkIE(InfoExtractor):
# rtmp download
'skip_download': True,
},
}, {
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
'note': 'Audio',
'info_dict': {
'id': 'm0007jz9',
'ext': 'mp4',
'title': 'BBC Proms, 2019, Prom 34: WestEastern Divan Orchestra',
'description': "Live BBC Proms. WestEastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
'duration': 9840,
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
@@ -609,7 +624,7 @@ class BBCIE(BBCCoUkIE):
'url': 'http://www.bbc.com/news/world-europe-32668511',
'info_dict': {
'id': 'world-europe-32668511',
'title': 'Russia stages massive WW2 parade despite Western boycott',
'title': 'Russia stages massive WW2 parade',
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
},
'playlist_count': 2,

View File

@@ -15,6 +15,7 @@ from ..utils import (
float_or_none,
parse_iso8601,
smuggle_url,
str_or_none,
strip_jsonp,
unified_timestamp,
unsmuggle_url,
@@ -306,3 +307,115 @@ class BiliBiliBangumiIE(InfoExtractor):
return self.playlist_result(
entries, bangumi_id,
season_info.get('bangumi_title'), season_info.get('evaluate'))
class BilibiliAudioBaseIE(InfoExtractor):
def _call_api(self, path, sid, query=None):
if not query:
query = {'sid': sid}
return self._download_json(
'https://www.bilibili.com/audio/music-service-c/web/' + path,
sid, query=query)['data']
class BilibiliAudioIE(BilibiliAudioBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
_TEST = {
'url': 'https://www.bilibili.com/audio/au1003142',
'md5': 'fec4987014ec94ef9e666d4d158ad03b',
'info_dict': {
'id': '1003142',
'ext': 'm4a',
'title': '【tsukimi】YELLOW / 神山羊',
'artist': 'tsukimi',
'comment_count': int,
'description': 'YELLOW的mp3版',
'duration': 183,
'subtitles': {
'origin': [{
'ext': 'lrc',
}],
},
'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1564836614,
'upload_date': '20190803',
'uploader': 'tsukimi-つきみぐー',
'view_count': int,
},
}
def _real_extract(self, url):
au_id = self._match_id(url)
play_data = self._call_api('url', au_id)
formats = [{
'url': play_data['cdns'][0],
'filesize': int_or_none(play_data.get('size')),
}]
song = self._call_api('song/info', au_id)
title = song['title']
statistic = song.get('statistic') or {}
subtitles = None
lyric = song.get('lyric')
if lyric:
subtitles = {
'origin': [{
'url': lyric,
}]
}
return {
'id': au_id,
'title': title,
'formats': formats,
'artist': song.get('author'),
'comment_count': int_or_none(statistic.get('comment')),
'description': song.get('intro'),
'duration': int_or_none(song.get('duration')),
'subtitles': subtitles,
'thumbnail': song.get('cover'),
'timestamp': int_or_none(song.get('passtime')),
'uploader': song.get('uname'),
'view_count': int_or_none(statistic.get('play')),
}
class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
_TEST = {
'url': 'https://www.bilibili.com/audio/am10624',
'info_dict': {
'id': '10624',
'title': '每日新曲推荐每日11:00更新',
'description': '每天11:00更新为你推送最新音乐',
},
'playlist_count': 19,
}
def _real_extract(self, url):
am_id = self._match_id(url)
songs = self._call_api(
'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
entries = []
for song in songs:
sid = str_or_none(song.get('id'))
if not sid:
continue
entries.append(self.url_result(
'https://www.bilibili.com/audio/au' + sid,
BilibiliAudioIE.ie_key(), sid))
if entries:
album_data = self._call_api('menu/info', am_id) or {}
album_title = album_data.get('title')
if album_title:
for entry in entries:
entry['album'] = album_title
return self.playlist_result(
entries, am_id, album_title, album_data.get('intro'))
return self.playlist_result(entries, am_id)

View File

@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import base64
import json
import re
import struct
@@ -11,14 +10,12 @@ from .adobepass import AdobePassIE
from ..compat import (
compat_etree_fromstring,
compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
compat_urlparse,
compat_xml_parse_error,
compat_HTTPError,
)
from ..utils import (
determine_ext,
ExtractorError,
extract_attributes,
find_xpath_attr,
@@ -27,18 +24,19 @@ from ..utils import (
js_to_json,
int_or_none,
parse_iso8601,
smuggle_url,
unescapeHTML,
unsmuggle_url,
update_url_query,
clean_html,
mimetype2ext,
UnsupportedError,
)
class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
_TESTS = [
{
@@ -55,7 +53,8 @@ class BrightcoveLegacyIE(InfoExtractor):
'timestamp': 1368213670,
'upload_date': '20130510',
'uploader_id': '1589608506001',
}
},
'skip': 'The player has been deactivated by the content owner',
},
{
# From http://medianetwork.oracle.com/video/player/1785452137001
@@ -70,6 +69,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'upload_date': '20120814',
'uploader_id': '1460825906',
},
'skip': 'video not playable',
},
{
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
@@ -79,7 +79,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'ext': 'mp4',
'title': 'This Bracelet Acts as a Personal Thermostat',
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
'uploader': 'Mashable',
# 'uploader': 'Mashable',
'timestamp': 1382041798,
'upload_date': '20131017',
'uploader_id': '1130468786001',
@@ -124,6 +124,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'id': '3550319591001',
},
'playlist_mincount': 7,
'skip': 'Unsupported URL',
},
{
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
@@ -133,6 +134,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'title': 'Lesson 08',
},
'playlist_mincount': 10,
'skip': 'Unsupported URL',
},
{
# playerID inferred from bcpid
@@ -141,12 +143,6 @@ class BrightcoveLegacyIE(InfoExtractor):
'only_matching': True, # Tested in GenericIE
}
]
FLV_VCODECS = {
1: 'SORENSON',
2: 'ON2',
3: 'H264',
4: 'VP8',
}
@classmethod
def _build_brighcove_url(cls, object_str):
@@ -238,7 +234,8 @@ class BrightcoveLegacyIE(InfoExtractor):
@classmethod
def _make_brightcove_url(cls, params):
return update_url_query(cls._FEDERATED_URL, params)
return update_url_query(
'http://c.brightcove.com/services/viewer/htmlFederated', params)
@classmethod
def _extract_brightcove_url(cls, webpage):
@@ -297,38 +294,12 @@ class BrightcoveLegacyIE(InfoExtractor):
videoPlayer = query.get('@videoPlayer')
if videoPlayer:
# We set the original url as the default 'Referer' header
referer = smuggled_data.get('Referer', url)
referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
video_id = videoPlayer[0]
if 'playerID' not in query:
mobj = re.search(r'/bcpid(\d+)', url)
if mobj is not None:
query['playerID'] = [mobj.group(1)]
return self._get_video_info(
videoPlayer[0], query, referer=referer)
elif 'playerKey' in query:
player_key = query['playerKey']
return self._get_playlist_info(player_key[0])
else:
raise ExtractorError(
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
expected=True)
def _brightcove_new_url_result(self, publisher_id, video_id):
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
def _get_video_info(self, video_id, query, referer=None):
headers = {}
linkBase = query.get('linkBaseURL')
if linkBase is not None:
referer = linkBase[0]
if referer is not None:
headers['Referer'] = referer
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
error_msg = self._html_search_regex(
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
'error message', default=None)
if error_msg is not None:
publisher_id = query.get('publisherId')
if publisher_id and publisher_id[0].isdigit():
publisher_id = publisher_id[0]
@@ -339,6 +310,9 @@ class BrightcoveLegacyIE(InfoExtractor):
else:
player_id = query.get('playerID')
if player_id and player_id[0].isdigit():
headers = {}
if referer:
headers['Referer'] = referer
player_page = self._download_webpage(
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
video_id, headers=headers, fatal=False)
@@ -349,136 +323,16 @@ class BrightcoveLegacyIE(InfoExtractor):
if player_key:
enc_pub_id = player_key.split(',')[1].replace('~', '=')
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
if publisher_id:
return self._brightcove_new_url_result(publisher_id, video_id)
raise ExtractorError(
'brightcove said: %s' % error_msg, expected=True)
self.report_extraction(video_id)
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
info = json.loads(info)['data']
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
return self._extract_video_info(video_info)
def _get_playlist_info(self, player_key):
info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
playlist_info = self._download_webpage(
info_url, player_key, 'Downloading playlist information')
json_data = json.loads(playlist_info)
if 'videoList' in json_data:
playlist_info = json_data['videoList']
playlist_dto = playlist_info['mediaCollectionDTO']
elif 'playlistTabs' in json_data:
playlist_info = json_data['playlistTabs']
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
else:
raise ExtractorError('Empty playlist')
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
playlist_title=playlist_dto['displayName'])
def _extract_video_info(self, video_info):
video_id = compat_str(video_info['id'])
publisher_id = video_info.get('publisherId')
info = {
'id': video_id,
'title': video_info['displayName'].strip(),
'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
'uploader': video_info.get('publisherName'),
'uploader_id': compat_str(publisher_id) if publisher_id else None,
'duration': float_or_none(video_info.get('length'), 1000),
'timestamp': int_or_none(video_info.get('creationDate'), 1000),
}
renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
if renditions:
formats = []
for rend in renditions:
url = rend['defaultURL']
if not url:
continue
ext = None
if rend['remote']:
url_comp = compat_urllib_parse_urlparse(url)
if url_comp.path.endswith('.m3u8'):
formats.extend(
self._extract_m3u8_formats(
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
continue
elif 'akamaihd.net' in url_comp.netloc:
# This type of renditions are served through
# akamaihd.net, but they don't use f4m manifests
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
ext = 'flv'
if ext is None:
ext = determine_ext(url)
tbr = int_or_none(rend.get('encodingRate'), 1000)
a_format = {
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
'url': url,
'ext': ext,
'filesize': int_or_none(rend.get('size')) or None,
'tbr': tbr,
}
if rend.get('audioOnly'):
a_format.update({
'vcodec': 'none',
})
else:
a_format.update({
'height': int_or_none(rend.get('frameHeight')),
'width': int_or_none(rend.get('frameWidth')),
'vcodec': rend.get('videoCodec'),
})
# m3u8 manifests with remote == false are media playlists
# Not calling _extract_m3u8_formats here to save network traffic
if ext == 'm3u8':
a_format.update({
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
'ext': 'mp4',
'protocol': 'm3u8_native',
})
formats.append(a_format)
self._sort_formats(formats)
info['formats'] = formats
elif video_info.get('FLVFullLengthURL') is not None:
info.update({
'url': video_info['FLVFullLengthURL'],
'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
'filesize': int_or_none(video_info.get('FLVFullSize')),
})
if self._downloader.params.get('include_ads', False):
adServerURL = video_info.get('_youtubedl_adServerURL')
if adServerURL:
ad_info = {
'_type': 'url',
'url': adServerURL,
}
if 'url' in info:
return {
'_type': 'playlist',
'title': info['title'],
'entries': [ad_info, info],
}
else:
return ad_info
if not info.get('url') and not info.get('formats'):
uploader_id = info.get('uploader_id')
if uploader_id:
info.update(self._brightcove_new_url_result(uploader_id, video_id))
else:
raise ExtractorError('Unable to extract video url for %s' % video_id)
return info
if publisher_id:
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
if referer:
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
# TODO: figure out if it's possible to extract playlistId from playerKey
# elif 'playerKey' in query:
# player_key = query['playerKey']
# return self._get_playlist_info(player_key[0])
raise UnsupportedError(url)
class BrightcoveNewIE(AdobePassIE):

View File

@@ -3,7 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import parse_duration
from ..utils import (
determine_ext,
merge_dicts,
parse_duration,
url_or_none,
)
class BYUtvIE(InfoExtractor):
@@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor):
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
info = self._download_json(
video = self._download_json(
'https://api.byutv.org/api3/catalog/getvideosforcontent',
display_id, query={
'contentid': video_id,
@@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor):
'x-byutv-platformkey': 'xsaaw9c7y5',
})
ep = info.get('ooyalaVOD')
ep = video.get('ooyalaVOD')
if ep:
return {
'_type': 'url_transparent',
@@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor):
'thumbnail': ep.get('imageThumbnail'),
}
ep = info['dvr']
title = ep['title']
formats = self._extract_m3u8_formats(
ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
info = {}
formats = []
for format_id, ep in video.items():
if not isinstance(ep, dict):
continue
video_url = url_or_none(ep.get('videoUrl'))
if not video_url:
continue
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': video_url,
'format_id': format_id,
})
merge_dicts(info, {
'title': ep.get('title'),
'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'),
'duration': parse_duration(ep.get('length')),
})
self._sort_formats(formats)
return {
return merge_dicts(info, {
'id': video_id,
'display_id': display_id,
'title': title,
'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'),
'duration': parse_duration(ep.get('length')),
'title': display_id,
'formats': formats,
}
})

View File

@@ -48,7 +48,14 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
_VALID_URL = r'''(?ix)
https?://
(?:
(?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
(?:www\.)?lequipe\.fr/video
)
/(?P<id>[^/?_]+)
'''
IE_NAME = 'dailymotion'
_FORMATS = [
@@ -133,6 +140,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, {
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
'only_matching': True,
}, {
'url': 'https://www.lequipe.fr/video/x791mem',
'only_matching': True,
}, {
'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
'only_matching': True,
}]
@staticmethod

View File

@@ -5,14 +5,8 @@ import re
import string
from .discoverygo import DiscoveryGoBaseIE
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
ExtractorError,
try_get,
)
from ..compat import compat_urllib_parse_unquote
from ..utils import ExtractorError
from ..compat import compat_HTTPError
@@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
cookingchanneltv|
motortrend
)
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
_TESTS = [{
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
'info_dict': {
'id': '5a2d9b4d6b66d17a5026e1fd',
'id': '5a2f35ce6b66d17a5026e29e',
'ext': 'mp4',
'title': 'Dave Foley',
'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
'duration': 608,
'title': 'Riding with Matthew Perry',
'description': 'md5:a34333153e79bc4526019a5129e7f878',
'duration': 84,
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -59,20 +53,17 @@ class DiscoveryIE(DiscoveryGoBaseIE):
}, {
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
'only_matching': True,
}, {
# using `show_slug` is important to get the correct video data
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
'only_matching': True,
}]
_GEO_COUNTRIES = ['US']
_GEO_BYPASS = False
_API_BASE_URL = 'https://api.discovery.com/v1/'
def _real_extract(self, url):
site, path, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
react_data = self._parse_json(self._search_regex(
r'window\.__reactTransmitPacket\s*=\s*({.+?});',
webpage, 'react data'), display_id)
content_blocks = react_data['layout'][path]['contentBlocks']
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
video_id = video['id']
site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
access_token = None
cookies = self._get_cookies(url)
@@ -82,27 +73,36 @@ class DiscoveryIE(DiscoveryGoBaseIE):
if auth_storage_cookie and auth_storage_cookie.value:
auth_storage = self._parse_json(compat_urllib_parse_unquote(
compat_urllib_parse_unquote(auth_storage_cookie.value)),
video_id, fatal=False) or {}
display_id, fatal=False) or {}
access_token = auth_storage.get('a') or auth_storage.get('access_token')
if not access_token:
access_token = self._download_json(
'https://%s.com/anonymous' % site, display_id, query={
'https://%s.com/anonymous' % site, display_id,
'Downloading token JSON metadata', query={
'authRel': 'authorization',
'client_id': try_get(
react_data, lambda x: x['application']['apiClientId'],
compat_str) or '3020a40c2356a645b4b4',
'client_id': '3020a40c2356a645b4b4',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
})['access_token']
try:
headers = self.geo_verification_headers()
headers['Authorization'] = 'Bearer ' + access_token
headers = self.geo_verification_headers()
headers['Authorization'] = 'Bearer ' + access_token
try:
video = self._download_json(
self._API_BASE_URL + 'content/videos',
display_id, 'Downloading content JSON metadata',
headers=headers, query={
'embed': 'show.name',
'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
'slug': display_id,
'show_slug': show_slug,
})[0]
video_id = video['id']
stream = self._download_json(
'https://api.discovery.com/v1/streaming/video/' + video_id,
display_id, headers=headers)
self._API_BASE_URL + 'streaming/video/' + video_id,
display_id, 'Downloading streaming JSON metadata', headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
e_description = self._parse_json(

View File

@@ -9,8 +9,8 @@ from ..utils import int_or_none
class DLiveVODIE(InfoExtractor):
IE_NAME = 'dlive:vod'
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[a-zA-Z0-9]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
'info_dict': {
'id': '3mTzOl4WR',
@@ -20,7 +20,10 @@ class DLiveVODIE(InfoExtractor):
'timestamp': 1562011015,
'uploader_id': 'pdp',
}
}
}, {
'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg',
'only_matching': True,
}]
def _real_extract(self, url):
uploader_id, vod_id = re.match(self._VALID_URL, url).groups()

View File

@@ -19,7 +19,7 @@ from ..utils import (
class EinthusanIE(InfoExtractor):
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com))/movie/watch/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://einthusan.tv/movie/watch/9097/',
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
@@ -36,6 +36,9 @@ class EinthusanIE(InfoExtractor):
}, {
'url': 'https://einthusan.com/movie/watch/9097/',
'only_matching': True,
}, {
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
'only_matching': True,
}]
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js

View File

@@ -104,6 +104,8 @@ from .bild import BildIE
from .bilibili import (
BiliBiliIE,
BiliBiliBangumiIE,
BilibiliAudioIE,
BilibiliAudioAlbumIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
@@ -1425,6 +1427,7 @@ from .xfileshare import XFileShareIE
from .xhamster import (
XHamsterIE,
XHamsterEmbedIE,
XHamsterUserIE,
)
from .xiami import (
XiamiSongIE,

View File

@@ -2075,6 +2075,22 @@ class GenericIE(InfoExtractor):
},
'playlist_count': 6,
},
{
# Squarespace video embed, 2019-08-28
'url': 'http://ootboxford.com',
'info_dict': {
'id': 'Tc7b_JGdZfw',
'title': 'Out of the Blue, at Childish Things 10',
'ext': 'mp4',
'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
'uploader_id': 'helendouglashouse',
'uploader': 'Helen & Douglas House',
'upload_date': '20140328',
},
'params': {
'skip_download': True,
},
},
{
# Zype embed
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
@@ -2395,6 +2411,12 @@ class GenericIE(InfoExtractor):
# Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse_unquote(webpage)
# Unescape squarespace embeds to be detected by generic extractor,
# see https://github.com/ytdl-org/youtube-dl/issues/21294
webpage = re.sub(
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
lambda x: unescapeHTML(x.group(0)), webpage)
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name

View File

@@ -105,8 +105,7 @@ class HeiseIE(InfoExtractor):
webpage, default=None) or self._html_search_meta(
'description', webpage)
kaltura_url = KalturaIE._extract_url(webpage)
if kaltura_url:
def _make_kaltura_result(kaltura_url):
return {
'_type': 'url_transparent',
'url': smuggle_url(kaltura_url, {'source_url': url}),
@@ -115,6 +114,16 @@ class HeiseIE(InfoExtractor):
'description': description,
}
kaltura_url = KalturaIE._extract_url(webpage)
if kaltura_url:
return _make_kaltura_result(kaltura_url)
kaltura_id = self._search_regex(
r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
default=None, group='id')
if kaltura_id:
return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
yt_urls = YoutubeIE._extract_urls(webpage)
if yt_urls:
return self.playlist_from_matches(

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import hashlib
import hmac
import re
import time
import uuid
@@ -126,6 +127,8 @@ class HotStarIE(HotStarBaseIE):
format_url = url_or_none(playback_set.get('playbackUrl'))
if not format_url:
continue
format_url = re.sub(
r'(?<=//staragvod)(\d)', r'web\1', format_url)
tags = str_or_none(playback_set.get('tagsCombination')) or ''
if tags and 'encryption:plain' not in tags:
continue
@@ -133,7 +136,8 @@ class HotStarIE(HotStarBaseIE):
try:
if 'package:hls' in tags or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id='hls'))
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls'))
elif 'package:dash' in tags or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash'))

View File

@@ -22,7 +22,7 @@ from ..utils import (
class InstagramIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
@@ -92,6 +92,9 @@ class InstagramIE(InfoExtractor):
}, {
'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
'only_matching': True,
}, {
'url': 'https://www.instagram.com/tv/aye83DjauH/',
'only_matching': True,
}]
@staticmethod

View File

@@ -7,7 +7,7 @@ from .common import InfoExtractor
class JWPlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',

View File

@@ -82,6 +82,7 @@ class MGTVIE(InfoExtractor):
'http_headers': {
'Referer': url,
},
'format_note': stream.get('name'),
})
self._sort_formats(formats)

View File

@@ -164,7 +164,7 @@ class MixcloudIE(InfoExtractor):
def decrypt_url(f_url):
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
decrypted_url = self._decrypt_xor_cipher(k, f_url)
if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
return decrypted_url
for url_key in ('url', 'hlsUrl', 'dashUrl'):

View File

@@ -10,6 +10,18 @@ class NhkVodIE(InfoExtractor):
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
# clip
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
'md5': '256a1be14f48d960a7e61e2532d95ec3',
'info_dict': {
'id': 'a95j5iza',
'ext': 'mp4',
'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
'timestamp': 1565965194,
'upload_date': '20190816',
},
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
'only_matching': True,
}, {
@@ -19,7 +31,7 @@ class NhkVodIE(InfoExtractor):
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
'only_matching': True,
}]
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json'
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
def _real_extract(self, url):
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
@@ -28,7 +40,10 @@ class NhkVodIE(InfoExtractor):
is_video = m_type == 'video'
episode = self._download_json(
self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''),
self._API_URL_TEMPLATE % (
'v' if is_video else 'r',
'clip' if episode_id[:4] == '9999' else 'esd',
episode_id, lang, '/all' if is_video else ''),
episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
title = episode.get('sub_title_clean') or episode['sub_title']
@@ -60,8 +75,8 @@ class NhkVodIE(InfoExtractor):
if is_video:
info.update({
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:' + episode['vod_id'],
'ie_key': 'Piksel',
'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
})
else:
audio = episode['audio']

View File

@@ -85,7 +85,8 @@ class NickBrIE(MTVServicesInfoExtractor):
https?://
(?:
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
(?:www\.)?nickjr\.[a-z]{2}
(?:www\.)?nickjr\.[a-z]{2}|
(?:www\.)?nickelodeonjunior\.fr
)
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
'''
@@ -101,6 +102,9 @@ class NickBrIE(MTVServicesInfoExtractor):
}, {
'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
'only_matching': True,
}, {
'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -45,7 +45,11 @@ class NineNowIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
page_data = self._parse_json(self._search_regex(
r'window\.__data\s*=\s*({.*?});', webpage,
'page data'), display_id)
'page data', default='{}'), display_id, fatal=False)
if not page_data:
page_data = self._parse_json(self._parse_json(self._search_regex(
r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
webpage, 'page data'), display_id), display_id)
for kind in ('episode', 'clip'):
current_key = page_data.get(kind, {}).get(

View File

@@ -243,7 +243,13 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor):
_DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|life|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)'
_DOMAINS = r'''
(?:
openload\.(?:co|io|link|pw)|
oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|press|pw|life|live|space|services|website|vip)|
oladblock\.(?:services|xyz|me)|openloed\.co
)
'''
_VALID_URL = r'''(?x)
https?://
(?P<host>
@@ -356,6 +362,9 @@ class OpenloadIE(InfoExtractor):
}, {
'url': 'https://oload.services/embed/bs1NWj1dCag/',
'only_matching': True,
}, {
'url': 'https://oload.online/f/W8o2UfN1vNY/',
'only_matching': True,
}, {
'url': 'https://oload.press/embed/drTBl1aOTvk/',
'only_matching': True,
@@ -368,6 +377,9 @@ class OpenloadIE(InfoExtractor):
}, {
'url': 'https://oload.biz/f/bEk3Gp8ARr4/',
'only_matching': True,
}, {
'url': 'https://oload.best/embed/kkz9JgVZeWc/',
'only_matching': True,
}, {
'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
'only_matching': True,
@@ -380,12 +392,15 @@ class OpenloadIE(InfoExtractor):
}, {
'url': 'https://openloed.co/f/b8NWEgkqNLI/',
'only_matching': True,
}, {
'url': 'https://oload.vip/f/kUEfGclsU9o',
'only_matching': True,
}]
@classmethod
def _extract_urls(cls, webpage):
return re.findall(
r'<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
r'(?x)<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
% (cls._DOMAINS, cls._EMBED_WORD), webpage)
def _extract_decrypted_page(self, page_url, webpage, video_id):
@@ -451,7 +466,7 @@ class OpenloadIE(InfoExtractor):
class VerystreamIE(OpenloadIE):
IE_NAME = 'verystream'
_DOMAINS = r'(?:verystream\.com)'
_DOMAINS = r'(?:verystream\.com|woof\.tube)'
_VALID_URL = r'''(?x)
https?://
(?P<host>

View File

@@ -15,18 +15,17 @@ from ..utils import (
class PikselIE(InfoExtractor):
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
_VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
_TESTS = [
{
'url': 'http://player.piksel.com/v/nv60p12f',
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
'url': 'http://player.piksel.com/v/ums2867l',
'md5': '34e34c8d89dc2559976a6079db531e85',
'info_dict': {
'id': 'nv60p12f',
'id': 'ums2867l',
'ext': 'mp4',
'title': 'فن الحياة - الحلقة 1',
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
'timestamp': 1465231790,
'upload_date': '20160606',
'title': 'GX-005 with Caption',
'timestamp': 1481335659,
'upload_date': '20161210'
}
},
{
@@ -39,8 +38,13 @@ class PikselIE(InfoExtractor):
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
'timestamp': 1486171129,
'upload_date': '20170204',
'upload_date': '20170204'
}
},
{
# https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
'only_matching': True,
}
]
@@ -53,8 +57,11 @@ class PikselIE(InfoExtractor):
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-de-program-uuid=[\'"]([a-z0-9]+)',
webpage, 'program uuid', default=display_id)
app_token = self._search_regex([
r'clientAPI\s*:\s*"([^"]+)"',
r'data-de-api-key\s*=\s*"([^"]+)"'
@@ -113,6 +120,13 @@ class PikselIE(InfoExtractor):
})
self._sort_formats(formats)
subtitles = {}
for caption in video_data.get('captions', []):
caption_url = caption.get('url')
if caption_url:
subtitles.setdefault(caption.get('locale', 'en'), []).append({
'url': caption_url})
return {
'id': video_id,
'title': title,
@@ -120,4 +134,5 @@ class PikselIE(InfoExtractor):
'thumbnail': video_data.get('thumbnailUrl'),
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
'subtitles': subtitles,
}

View File

@@ -18,43 +18,10 @@ from ..utils import (
)
class PlatziIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
platzi\.com/clases| # es version
courses\.platzi\.com/classes # en version
)/[^/]+/(?P<id>\d+)-[^/?\#&]+
'''
class PlatziBaseIE(InfoExtractor):
_LOGIN_URL = 'https://platzi.com/login/'
_NETRC_MACHINE = 'platzi'
_TESTS = [{
'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
'md5': '8f56448241005b561c10f11a595b37e3',
'info_dict': {
'id': '12074',
'ext': 'mp4',
'title': 'Creando nuestra primera página',
'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
'duration': 420,
},
'skip': 'Requires platzi account credentials',
}, {
'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
'info_dict': {
'id': '13430',
'ext': 'mp4',
'title': 'Background',
'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
'duration': 360,
},
'skip': 'Requires platzi account credentials',
'params': {
'skip_download': True,
},
}]
def _real_initialize(self):
self._login()
@@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor):
'Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
class PlatziIE(PlatziBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
platzi\.com/clases| # es version
courses\.platzi\.com/classes # en version
)/[^/]+/(?P<id>\d+)-[^/?\#&]+
'''
_TESTS = [{
'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
'md5': '8f56448241005b561c10f11a595b37e3',
'info_dict': {
'id': '12074',
'ext': 'mp4',
'title': 'Creando nuestra primera página',
'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
'duration': 420,
},
'skip': 'Requires platzi account credentials',
}, {
'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
'info_dict': {
'id': '13430',
'ext': 'mp4',
'title': 'Background',
'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
'duration': 360,
},
'skip': 'Requires platzi account credentials',
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
lecture_id = self._match_id(url)
@@ -104,7 +107,11 @@ class PlatziIE(InfoExtractor):
data = self._parse_json(
self._search_regex(
r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
# client_data may contain "};" so that we have to try more
# strict regex first
(r'client_data\s*=\s*({.+?})\s*;\s*\n',
r'client_data\s*=\s*({.+?})\s*;'),
webpage, 'client data'),
lecture_id)
material = data['initialState']['material']
@@ -146,7 +153,7 @@ class PlatziIE(InfoExtractor):
}
class PlatziCourseIE(InfoExtractor):
class PlatziCourseIE(PlatziBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:

View File

@@ -17,7 +17,7 @@ from ..utils import (
class RoosterTeethIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
_LOGIN_URL = 'https://roosterteeth.com/login'
_NETRC_MACHINE = 'roosterteeth'
_TESTS = [{
@@ -49,6 +49,9 @@ class RoosterTeethIE(InfoExtractor):
# only available for FIRST members
'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
'only_matching': True,
}, {
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'only_matching': True,
}]
def _login(self):

View File

@@ -68,9 +68,10 @@ class SafariBaseIE(InfoExtractor):
raise ExtractorError(
'Unable to login: %s' % credentials, expected=True)
# oreilly serves two same groot_sessionid cookies in Set-Cookie header
# and expects first one to be actually set
self._apply_first_set_cookie_header(urlh, 'groot_sessionid')
# oreilly serves two same instances of the following cookies
# in Set-Cookie header and expects first one to be actually set
for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
self._apply_first_set_cookie_header(urlh, cookie)
_, urlh = self._download_webpage_handle(
auth.get('redirect_uri') or next_uri, None, 'Completing login',)

View File

@@ -182,20 +182,29 @@ class TEDIE(InfoExtractor):
title = talk_info['title'].strip()
native_downloads = try_get(
talk_info,
(lambda x: x['downloads']['nativeDownloads'],
lambda x: x['nativeDownloads']),
dict) or {}
downloads = talk_info.get('downloads') or {}
native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {}
formats = [{
'url': format_url,
'format_id': format_id,
'format': format_id,
} for (format_id, format_url) in native_downloads.items() if format_url is not None]
subtitled_downloads = downloads.get('subtitledDownloads') or {}
for lang, subtitled_download in subtitled_downloads.items():
for q in self._NATIVE_FORMATS:
q_url = subtitled_download.get(q)
if not q_url:
continue
formats.append({
'url': q_url,
'format_id': '%s-%s' % (q, lang),
'language': lang,
})
if formats:
for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id'])
finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0])
if finfo:
f.update(finfo)
@@ -215,34 +224,7 @@ class TEDIE(InfoExtractor):
http_url = None
for format_id, resources in resources_.items():
if format_id == 'h264':
for resource in resources:
h264_url = resource.get('file')
if not h264_url:
continue
bitrate = int_or_none(resource.get('bitrate'))
formats.append({
'url': h264_url,
'format_id': '%s-%sk' % (format_id, bitrate),
'tbr': bitrate,
})
if re.search(r'\d+k', h264_url):
http_url = h264_url
elif format_id == 'rtmp':
streamer = talk_info.get('streamer')
if not streamer:
continue
for resource in resources:
formats.append({
'format_id': '%s-%s' % (format_id, resource.get('name')),
'url': streamer,
'play_path': resource['file'],
'ext': 'flv',
'width': int_or_none(resource.get('width')),
'height': int_or_none(resource.get('height')),
'tbr': int_or_none(resource.get('bitrate')),
})
elif format_id == 'hls':
if format_id == 'hls':
if not isinstance(resources, dict):
continue
stream_url = url_or_none(resources.get('stream'))
@@ -251,6 +233,36 @@ class TEDIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
stream_url, video_name, 'mp4', m3u8_id=format_id,
fatal=False))
else:
if not isinstance(resources, list):
continue
if format_id == 'h264':
for resource in resources:
h264_url = resource.get('file')
if not h264_url:
continue
bitrate = int_or_none(resource.get('bitrate'))
formats.append({
'url': h264_url,
'format_id': '%s-%sk' % (format_id, bitrate),
'tbr': bitrate,
})
if re.search(r'\d+k', h264_url):
http_url = h264_url
elif format_id == 'rtmp':
streamer = talk_info.get('streamer')
if not streamer:
continue
for resource in resources:
formats.append({
'format_id': '%s-%s' % (format_id, resource.get('name')),
'url': streamer,
'play_path': resource['file'],
'ext': 'flv',
'width': int_or_none(resource.get('width')),
'height': int_or_none(resource.get('height')),
'tbr': int_or_none(resource.get('bitrate')),
})
m3u8_formats = list(filter(
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',

View File

@@ -72,8 +72,13 @@ class TV4IE(InfoExtractor):
video_id = self._match_id(url)
info = self._download_json(
'http://www.tv4play.se/player/assets/%s.json' % video_id,
video_id, 'Downloading video info JSON')
'https://playback-api.b17g.net/asset/%s' % video_id,
video_id, 'Downloading video info JSON', query={
'service': 'tv4',
'device': 'browser',
'protocol': 'hls,dash',
'drm': 'widevine',
})['metadata']
title = info['title']
@@ -111,5 +116,9 @@ class TV4IE(InfoExtractor):
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('image'),
'is_live': info.get('is_live') is True,
'is_live': info.get('isLive') is True,
'series': info.get('seriesTitle'),
'season_number': int_or_none(info.get('seasonNumber')),
'episode': info.get('episodeTitle'),
'episode_number': int_or_none(info.get('episodeNumber')),
}

View File

@@ -9,6 +9,8 @@ from ..utils import (
float_or_none,
int_or_none,
parse_age_limit,
try_get,
url_or_none,
)
@@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.tvigle.ru/video/sokrat/',
'md5': '36514aed3657d4f70b4b2cef8eb520cd',
'info_dict': {
'id': '1848932',
'display_id': 'sokrat',
'ext': 'flv',
'ext': 'mp4',
'title': 'Сократ',
'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
'duration': 6586,
@@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor):
},
{
'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
'info_dict': {
'id': '5142516',
'ext': 'flv',
@@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
(r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
r'var\s+cloudId\s*=\s*["\'](\d+)',
r'cloudId\s*=\s*["\'](\d+)',
r'class="video-preview current_playing" id="(\d+)"'),
webpage, 'video id')
@@ -90,21 +90,40 @@ class TvigleIE(InfoExtractor):
age_limit = parse_age_limit(item.get('ageRestrictions'))
formats = []
for vcodec, fmts in item['videos'].items():
for vcodec, url_or_fmts in item['videos'].items():
if vcodec == 'hls':
continue
for format_id, video_url in fmts.items():
if format_id == 'm3u8':
m3u8_url = url_or_none(url_or_fmts)
if not m3u8_url:
continue
height = self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)
formats.append({
'url': video_url,
'format_id': '%s-%s' % (vcodec, format_id),
'vcodec': vcodec,
'height': int_or_none(height),
'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)),
})
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif vcodec == 'dash':
mpd_url = url_or_none(url_or_fmts)
if not mpd_url:
continue
formats.extend(self._extract_mpd_formats(
mpd_url, video_id, mpd_id='dash', fatal=False))
else:
if not isinstance(url_or_fmts, dict):
continue
for format_id, video_url in url_or_fmts.items():
if format_id == 'm3u8':
continue
video_url = url_or_none(video_url)
if not video_url:
continue
height = self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)
filesize = int_or_none(try_get(
item, lambda x: x['video_files_size'][vcodec][format_id]))
formats.append({
'url': video_url,
'format_id': '%s-%s' % (vcodec, format_id),
'vcodec': vcodec,
'height': int_or_none(height),
'filesize': filesize,
})
self._sort_formats(formats)
return {

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
NO_DEFAULT,
unescapeHTML,
)
@@ -17,9 +18,21 @@ class TVN24IE(InfoExtractor):
'id': '1584444',
'ext': 'mp4',
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.',
'thumbnail': 're:https?://.*[.]jpeg',
}
}, {
# different layout
'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
'info_dict': {
'id': '1771763',
'ext': 'mp4',
'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)',
'thumbnail': 're:https?://.*',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
'only_matching': True,
@@ -35,18 +48,21 @@ class TVN24IE(InfoExtractor):
}]
def _real_extract(self, url):
video_id = self._match_id(url)
display_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage)
title = self._og_search_title(
webpage, default=None) or self._search_regex(
r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h',
webpage, 'title')
def extract_json(attr, name, fatal=True):
def extract_json(attr, name, default=NO_DEFAULT, fatal=True):
return self._parse_json(
self._search_regex(
r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
name, group='json', fatal=fatal) or '{}',
video_id, transform_source=unescapeHTML, fatal=fatal)
name, group='json', default=default, fatal=fatal) or '{}',
display_id, transform_source=unescapeHTML, fatal=fatal)
quality_data = extract_json('data-quality', 'formats')
@@ -59,16 +75,24 @@ class TVN24IE(InfoExtractor):
})
self._sort_formats(formats)
description = self._og_search_description(webpage)
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_regex(
r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
'thumbnail', group='url')
video_id = None
share_params = extract_json(
'data-share-params', 'share params', fatal=False)
'data-share-params', 'share params', default=None)
if isinstance(share_params, dict):
video_id = share_params.get('id') or video_id
video_id = share_params.get('id')
if not video_id:
video_id = self._search_regex(
r'data-vid-id=["\'](\d+)', webpage, 'video id',
default=None) or self._search_regex(
r',(\d+)\.html', url, 'video id', default=display_id)
return {
'id': video_id,

View File

@@ -1,11 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .adobepass import AdobePassIE
from ..utils import (
extract_attributes,
NO_DEFAULT,
smuggle_url,
update_url_query,
)
@@ -31,22 +29,22 @@ class USANetworkIE(AdobePassIE):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_params = extract_attributes(self._search_regex(
r'(<div[^>]+data-usa-tve-player-container[^>]*>)', webpage, 'player params'))
video_id = player_params['data-mpx-guid']
title = player_params['data-episode-title']
def _x(name, default=NO_DEFAULT):
return self._search_regex(
r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
webpage, name, default=default, group='value')
account_pid, path = re.search(
r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)',
webpage).groups()
video_id = _x('mpx-guid')
title = _x('episode-title')
mpx_account_id = _x('mpx-account-id', '2304992029')
query = {
'mbr': 'true',
}
if player_params.get('data-is-full-episode') == '1':
if _x('is-full-episode', None) == '1':
query['manifest'] = 'm3u'
if player_params.get('data-entitlement') == 'auth':
if _x('is-entitlement', None) == '1':
adobe_pass = {}
drupal_settings = self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
@@ -57,7 +55,7 @@ class USANetworkIE(AdobePassIE):
adobe_pass = drupal_settings.get('adobePass', {})
resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'usa'),
title, video_id, player_params.get('data-episode-rating', 'TV-14'))
title, video_id, _x('episode-rating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
@@ -65,11 +63,11 @@ class USANetworkIE(AdobePassIE):
info.update({
'_type': 'url_transparent',
'url': smuggle_url(update_url_query(
'http://link.theplatform.com/s/%s/%s' % (account_pid, path),
'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
query), {'force_smil_url': True}),
'id': video_id,
'title': title,
'series': player_params.get('data-show-title'),
'series': _x('show-title', None),
'episode': title,
'ie_key': 'ThePlatform',
})

View File

@@ -2,12 +2,14 @@
from __future__ import unicode_literals
import base64
import functools
import json
import re
import itertools
from .common import InfoExtractor
from ..compat import (
compat_kwargs,
compat_HTTPError,
compat_str,
compat_urlparse,
@@ -19,6 +21,7 @@ from ..utils import (
int_or_none,
merge_dicts,
NO_DEFAULT,
OnDemandPagedList,
parse_filesize,
qualities,
RegexNotFoundError,
@@ -98,6 +101,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
webpage, 'vuid', group='vuid')
return xsrft, vuid
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
vimeo_config = self._search_regex(
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
if vimeo_config:
return self._parse_json(vimeo_config, video_id)
def _set_vimeo_cookie(self, name, value):
self._set_cookie('vimeo.com', name, value)
@@ -253,7 +263,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
\.
)?
vimeo(?P<pro>pro)?\.com/
(?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?:.*?/)?
(?:
(?:
@@ -580,11 +590,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)
vimeo_config = self._search_regex(
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
'vimeo config', default=None)
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config:
seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
seed_status = vimeo_config.get('seed_status', {})
if seed_status.get('state') == 'failed':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, seed_status['title']),
@@ -905,7 +913,7 @@ class VimeoUserIE(VimeoChannelIE):
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
_VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
'url': 'https://vimeo.com/album/2632481',
@@ -925,21 +933,39 @@ class VimeoAlbumIE(VimeoChannelIE):
'params': {
'videopassword': 'youtube-dl',
}
}, {
'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
'only_matching': True,
}, {
# TODO: respect page number
'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
'only_matching': True,
}]
_PAGE_SIZE = 100
def _page_url(self, base_url, pagenum):
return '%s/page:%d/' % (base_url, pagenum)
def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
api_page = page + 1
query = {
'fields': 'link',
'page': api_page,
'per_page': self._PAGE_SIZE,
}
if hashed_pass:
query['_hashed_pass'] = hashed_pass
videos = self._download_json(
'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorizaion,
})['data']
for video in videos:
link = video.get('link')
if not link:
continue
yield self.url_result(link, VimeoIE.ie_key(), VimeoIE._match_id(link))
def _real_extract(self, url):
album_id = self._match_id(url)
return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
webpage = self._download_webpage(url, album_id)
webpage = self._login_list_password(url, album_id, webpage)
api_config = self._extract_vimeo_config(webpage, album_id)['api']
entries = OnDemandPagedList(functools.partial(
self._fetch_page, album_id, api_config['jwt'],
api_config.get('hashed_pass')), self._PAGE_SIZE)
return self.playlist_result(entries, album_id, self._html_search_regex(
r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))
class VimeoGroupsIE(VimeoAlbumIE):

View File

@@ -403,8 +403,17 @@ class VKIE(VKBaseIE):
data = self._parse_json(
self._search_regex(
r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
'player params'),
video_id)['params'][0]
'player params', default='{}'),
video_id)
if data:
data = data['params'][0]
# <!--{...}
if not data:
data = self._parse_json(
self._search_regex(
r'<!--\s*({.+})', info_page, 'payload'),
video_id)['payload'][-1][-1]['player']['params'][0]
title = unescapeHTML(data['md_title'])

View File

@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
@@ -8,6 +9,7 @@ from ..utils import (
clean_html,
determine_ext,
dict_get,
extract_attributes,
ExtractorError,
int_or_none,
parse_duration,
@@ -18,21 +20,21 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)'
_VALID_URL = r'''(?x)
https?://
(?:.+?\.)?xhamster\.(?:com|one)/
(?:.+?\.)?%s/
(?:
movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
)
'''
''' % _DOMAINS
_TESTS = [{
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'md5': '8281348b8d3c53d39fffb377d24eac4e',
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'md5': '98b4687efb1ffd331c4197854dc09e8f',
'info_dict': {
'id': '1509445',
'display_id': 'femaleagent_shy_beauty_takes_the_bait',
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait',
'timestamp': 1350194821,
@@ -40,13 +42,12 @@ class XHamsterIE(InfoExtractor):
'uploader': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'],
},
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
'info_dict': {
'id': '2221348',
'display_id': 'britney_spears_sexy_booty',
'display_id': 'britney-spears-sexy-booty',
'ext': 'mp4',
'title': 'Britney Spears Sexy Booty',
'timestamp': 1379123460,
@@ -54,13 +55,12 @@ class XHamsterIE(InfoExtractor):
'uploader': 'jojo747400',
'duration': 200,
'age_limit': 18,
'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
},
'params': {
'skip_download': True,
},
}, {
# empty seo
# empty seo, unavailable via new URL schema
'url': 'http://xhamster.com/movies/5667973/.html',
'info_dict': {
'id': '5667973',
@@ -71,7 +71,6 @@ class XHamsterIE(InfoExtractor):
'uploader': 'parejafree',
'duration': 72,
'age_limit': 18,
'categories': ['Amateur', 'Blowjobs'],
},
'params': {
'skip_download': True,
@@ -94,6 +93,18 @@ class XHamsterIE(InfoExtractor):
}, {
'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'only_matching': True,
}, {
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'only_matching': True,
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -285,7 +296,7 @@ class XHamsterIE(InfoExtractor):
class XHamsterEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
_VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
_TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539',
'info_dict': {
@@ -322,3 +333,49 @@ class XHamsterEmbedIE(InfoExtractor):
video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
return self.url_result(video_url, 'XHamster')
class XHamsterUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
_TESTS = [{
# Paginated user profile
'url': 'https://xhamster.com/users/netvideogirls/videos',
'info_dict': {
'id': 'netvideogirls',
},
'playlist_mincount': 267,
}, {
# Non-paginated user profile
'url': 'https://xhamster.com/users/firatkaan/videos',
'info_dict': {
'id': 'firatkaan',
},
'playlist_mincount': 1,
}]
def _entries(self, user_id):
next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
for pagenum in itertools.count(1):
page = self._download_webpage(
next_page_url, user_id, 'Downloading page %s' % pagenum)
for video_tag in re.findall(
r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
page):
video = extract_attributes(video_tag)
video_url = url_or_none(video.get('href'))
if not video_url or not XHamsterIE.suitable(video_url):
continue
video_id = XHamsterIE._match_id(video_url)
yield self.url_result(
video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
if not mobj:
break
next_page = extract_attributes(mobj.group(0))
next_page_url = url_or_none(next_page.get('href'))
if not next_page_url:
break
def _real_extract(self, url):
user_id = self._match_id(url)
return self.playlist_result(self._entries(user_id), user_id)

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
url_or_none,
)
@@ -47,6 +48,10 @@ class YandexVideoIE(InfoExtractor):
# episode, sports
'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
'only_matching': True,
}, {
# DASH with DRM
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -59,13 +64,22 @@ class YandexVideoIE(InfoExtractor):
'disable_trackings': 1,
})['content']
m3u8_url = url_or_none(content.get('content_url')) or url_or_none(
content_url = url_or_none(content.get('content_url')) or url_or_none(
content['streams'][0]['url'])
title = content.get('title') or content.get('computed_title')
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
ext = determine_ext(content_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(
content_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
elif ext == 'mpd':
formats = self._extract_mpd_formats(
content_url, video_id, mpd_id='dash')
else:
formats = [{'url': content_url}]
self._sort_formats(formats)
description = content.get('description')

View File

@@ -31,6 +31,7 @@ from ..utils import (
clean_html,
dict_get,
error_to_compat_str,
extract_attributes,
ExtractorError,
float_or_none,
get_element_by_attribute,
@@ -40,7 +41,6 @@ from ..utils import (
orderedSet,
parse_codecs,
parse_duration,
qualities,
remove_quotes,
remove_start,
smuggle_url,
@@ -324,17 +324,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
for video_id, video_title in self.extract_videos_from_page(content):
yield self.url_result(video_id, 'Youtube', video_id, video_title)
def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
for mobj in re.finditer(self._VIDEO_RE, page):
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
for mobj in re.finditer(video_re, page):
# The link with index 0 is not the first video of the playlist (not sure if still actual)
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
continue
video_id = mobj.group('id')
video_title = unescapeHTML(mobj.group('title'))
video_title = unescapeHTML(
mobj.group('title')) if 'title' in mobj.groupdict() else None
if video_title:
video_title = video_title.strip()
if video_title == '► Play all':
video_title = None
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
@@ -342,6 +343,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
self.extract_videos_from_page_impl(
self._VIDEO_RE, page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
@@ -379,8 +386,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:www\.)?invidious\.enkirton\.net/|
(?:www\.)?invidious\.13ad\.de/|
(?:www\.)?invidious\.mastodon\.host/|
(?:www\.)?invidious\.nixnet\.xyz/|
(?:www\.)?invidious\.drycat\.fr/|
(?:www\.)?tube\.poal\.co/|
(?:www\.)?vid\.wxzm\.sx/|
(?:www\.)?yt\.elukerio\.org/|
(?:www\.)?kgg2m7yk5aybusll\.onion/|
(?:www\.)?qklhadlycap4cnod\.onion/|
(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
@@ -1595,17 +1612,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id = mobj.group(2)
return video_id
def _extract_annotations(self, video_id):
return self._download_webpage(
'https://www.youtube.com/annotations_invideo', video_id,
note='Downloading annotations',
errnote='Unable to download video annotations', fatal=False,
query={
'features': 1,
'legacy': 1,
'video_id': video_id,
})
@staticmethod
def _extract_chapters(description, duration):
if not description:
@@ -1700,6 +1706,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def extract_token(v_info):
return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
def extract_player_response(player_response, video_id):
pl_response = str_or_none(player_response)
if not pl_response:
return
pl_response = self._parse_json(pl_response, video_id, fatal=False)
if isinstance(pl_response, dict):
add_dash_mpd_pr(pl_response)
return pl_response
player_response = {}
# Get video info
@@ -1722,7 +1737,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
note='Refetching age-gated info webpage',
errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
pl_response = video_info.get('player_response', [None])[0]
player_response = extract_player_response(pl_response, video_id)
add_dash_mpd(video_info)
view_count = extract_view_count(video_info)
else:
age_gate = False
video_info = None
@@ -1745,11 +1763,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
is_live = True
sts = ytplayer_config.get('sts')
if not player_response:
pl_response = str_or_none(args.get('player_response'))
if pl_response:
pl_response = self._parse_json(pl_response, video_id, fatal=False)
if isinstance(pl_response, dict):
player_response = pl_response
player_response = extract_player_response(args.get('player_response'), video_id)
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response)
# We also try looking in get_video_info since it may contain different dashmpd
@@ -1781,9 +1795,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
get_video_info = compat_parse_qs(video_info_webpage)
if not player_response:
pl_response = get_video_info.get('player_response', [None])[0]
if isinstance(pl_response, dict):
player_response = pl_response
add_dash_mpd_pr(player_response)
player_response = extract_player_response(pl_response, video_id)
add_dash_mpd(get_video_info)
if view_count is None:
view_count = extract_view_count(get_video_info)
@@ -1806,9 +1818,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
def extract_unavailable_message():
return self._html_search_regex(
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
video_webpage, 'unavailable message', default=None)
messages = []
for tag, kind in (('h1', 'message'), ('div', 'submessage')):
msg = self._html_search_regex(
r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
video_webpage, 'unavailable %s' % kind, default=None)
if msg:
messages.append(msg)
if messages:
return '\n'.join(messages)
if not video_info:
unavailable_message = extract_unavailable_message()
@@ -1898,6 +1916,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return int_or_none(self._search_regex(
r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
@@ -1906,10 +1927,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': video_info['conn'][0],
'player_url': player_url,
}]
elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
formats = []
formats_spec = {}
fmt_list = video_info.get('fmt_list', [''])[0]
if fmt_list:
@@ -1923,91 +1945,104 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
}
q = qualities(['small', 'medium', 'hd720'])
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
if streaming_formats:
for fmt in streaming_formats:
itag = str_or_none(fmt.get('itag'))
if not itag:
continue
quality = fmt.get('quality')
quality_label = fmt.get('qualityLabel') or quality
formats_spec[itag] = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_note': quality_label,
'fps': int_or_none(fmt.get('fps')),
'height': int_or_none(fmt.get('height')),
'quality': q(quality),
# bitrate for itag 43 is always 2147483647
'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
'width': int_or_none(fmt.get('width')),
}
formats = []
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
for fmt in streaming_formats:
itag = str_or_none(fmt.get('itag'))
if not itag:
continue
quality = fmt.get('quality')
quality_label = fmt.get('qualityLabel') or quality
formats_spec[itag] = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_note': quality_label,
'fps': int_or_none(fmt.get('fps')),
'height': int_or_none(fmt.get('height')),
# bitrate for itag 43 is always 2147483647
'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
'width': int_or_none(fmt.get('width')),
}
for fmt in streaming_formats:
if fmt.get('drm_families'):
continue
url = url_or_none(fmt.get('url'))
if not url:
cipher = fmt.get('cipher')
if not cipher:
continue
url_data = compat_parse_qs(cipher)
url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
if not url:
continue
else:
cipher = None
url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
# Unsupported FORMAT_STREAM_TYPE_OTF
if stream_type == 3:
continue
format_id = url_data['itag'][0]
url = url_data['url'][0]
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
jsplayer_url_json = self._search_regex(
ASSETS_RE,
embed_webpage if age_gate else video_webpage,
'JS player URL (1)', default=None)
if not jsplayer_url_json and not age_gate:
# We need the embed website after all
if embed_webpage is None:
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
embed_webpage = self._download_webpage(
embed_url, video_id, 'Downloading embed webpage')
format_id = fmt.get('itag') or url_data['itag'][0]
if not format_id:
continue
format_id = compat_str(format_id)
if cipher:
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
jsplayer_url_json = self._search_regex(
ASSETS_RE, embed_webpage, 'JS player URL')
ASSETS_RE,
embed_webpage if age_gate else video_webpage,
'JS player URL (1)', default=None)
if not jsplayer_url_json and not age_gate:
# We need the embed website after all
if embed_webpage is None:
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
embed_webpage = self._download_webpage(
embed_url, video_id, 'Downloading embed webpage')
jsplayer_url_json = self._search_regex(
ASSETS_RE, embed_webpage, 'JS player URL')
player_url = json.loads(jsplayer_url_json)
if player_url is None:
player_url_json = self._search_regex(
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
video_webpage, 'age gate player URL')
player_url = json.loads(player_url_json)
if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
encrypted_sig = url_data['s'][0]
if self._downloader.params.get('verbose'):
player_url = json.loads(jsplayer_url_json)
if player_url is None:
player_version = 'unknown'
player_desc = 'unknown'
else:
if player_url.endswith('swf'):
player_version = self._search_regex(
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
player_url_json = self._search_regex(
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
video_webpage, 'age gate player URL')
player_url = json.loads(player_url_json)
if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
encrypted_sig = url_data['s'][0]
if self._downloader.params.get('verbose'):
if player_url is None:
player_version = 'unknown'
player_desc = 'unknown'
else:
player_version = self._search_regex(
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
if player_url.endswith('swf'):
player_version = self._search_regex(
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
parts_sizes = self._signature_cache_id(encrypted_sig)
self.to_screen('{%s} signature length %s, %s' %
(format_id, parts_sizes, player_desc))
parts_sizes = self._signature_cache_id(encrypted_sig)
self.to_screen('{%s} signature length %s, %s' %
(format_id, parts_sizes, player_desc))
signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate)
sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
url += '&%s=%s' % (sp, signature)
signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate)
sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
url += '&%s=%s' % (sp, signature)
if 'ratebypass' not in url:
url += '&ratebypass=yes'
@@ -2027,24 +2062,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
if width is None:
width = int_or_none(fmt.get('width'))
if height is None:
height = int_or_none(fmt.get('height'))
filesize = int_or_none(url_data.get(
'clen', [None])[0]) or _extract_filesize(url)
quality = url_data.get('quality', [None])[0]
quality = url_data.get('quality', [None])[0] or fmt.get('quality')
quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
more_fields = {
'filesize': filesize,
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
'tbr': tbr,
'width': width,
'height': height,
'fps': int_or_none(url_data.get('fps', [None])[0]),
'format_note': url_data.get('quality_label', [None])[0] or quality,
'quality': q(quality),
'fps': fps,
'format_note': quality_label or quality,
}
for key, value in more_fields.items():
if value:
dct[key] = value
type_ = url_data.get('type', [None])[0]
type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
if type_:
type_split = type_.split(';')
kind_ext = type_split[0].split('/')
@@ -2092,9 +2136,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
formats.append(a_format)
else:
error_message = clean_html(video_info.get('reason', [None])[0])
error_message = extract_unavailable_message()
if not error_message:
error_message = extract_unavailable_message()
error_message = clean_html(try_get(
player_response, lambda x: x['playabilityStatus']['reason'],
compat_str))
if not error_message:
error_message = clean_html(
try_get(video_info, lambda x: x['reason'][0], compat_str))
if error_message:
raise ExtractorError(error_message, expected=True)
raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
@@ -2265,7 +2314,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# annotations
video_annotations = None
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
xsrf_token = self._search_regex(
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
invideo_url = try_get(
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
if xsrf_token and invideo_url:
xsrf_field_name = self._search_regex(
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
video_webpage, 'xsrf field name',
group='xsrf_field_name', default='session_token')
video_annotations = self._download_webpage(
self._proto_relative_url(invideo_url),
video_id, note='Downloading annotations',
errnote='Unable to download video annotations', fatal=False,
data=urlencode_postdata({xsrf_field_name: xsrf_token}))
chapters = self._extract_chapters(description_original, video_duration)
@@ -2423,7 +2486,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
(%(playlist_id)s)
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist'
_TESTS = [{
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
@@ -2588,6 +2652,34 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
def _real_initialize(self):
self._login()
def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
for item in re.findall(
r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
attrs = extract_attributes(item)
video_id = attrs['data-video-id']
video_title = unescapeHTML(attrs.get('data-title'))
if video_title:
video_title = video_title.strip()
ids_in_page.append(video_id)
titles_in_page.append(video_title)
# Fallback with old _VIDEO_RE
self.extract_videos_from_page_impl(
self._VIDEO_RE, page, ids_in_page, titles_in_page)
# Relaxed fallbacks
self.extract_videos_from_page_impl(
r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
ids_in_page, titles_in_page)
self.extract_videos_from_page_impl(
r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
def _extract_mix(self, playlist_id):
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
@@ -2650,7 +2742,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
page, 'title', default=None)
_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
uploader = self._search_regex(
uploader = self._html_search_regex(
r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
page, 'uploader', default=None)
mobj = re.search(

View File

@@ -41,6 +41,7 @@ class ZDFBaseIE(InfoExtractor):
class ZDFIE(ZDFBaseIE):
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2019.07.30'
__version__ = '2019.09.28'