Merge remote-tracking branch 'upstream/master'

This commit is contained in:
vallovic 2021-08-28 17:33:40 +01:00
commit 89a75524e6
40 changed files with 463 additions and 455 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.04.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2021.04.26** - [ ] I've verified that I'm running youtube-dl version **2021.06.06**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2021.04.26 [debug] youtube-dl version 2021.06.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.04.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2021.04.26** - [ ] I've verified that I'm running youtube-dl version **2021.06.06**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.04.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2021.04.26** - [ ] I've verified that I'm running youtube-dl version **2021.06.06**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.04.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2021.04.26** - [ ] I've verified that I'm running youtube-dl version **2021.06.06**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2021.04.26 [debug] youtube-dl version 2021.06.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.04.26. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2021.04.26** - [ ] I've verified that I'm running youtube-dl version **2021.06.06**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -1,3 +1,52 @@
version 2021.06.06
Extractors
* [facebook] Improve login required detection
* [youporn] Fix formats and view count extraction (#29216)
* [orf:tvthek] Fix thumbnails extraction (#29217)
* [formula1] Fix extraction (#29206)
* [ard] Relax URL regular expression and fix video ids (#22724, #29091)
+ [ustream] Detect https embeds (#29133)
* [ted] Prefer own formats over external sources (#29142)
* [twitch:clips] Improve extraction (#29149)
+ [twitch:clips] Add access token query to download URLs (#29136)
* [youtube] Fix get_video_info request (#29086, #29165)
* [vimeo] Fix vimeo pro embed extraction (#29126)
* [redbulltv] Fix embed data extraction (#28770)
* [shahid] Relax URL regular expression (#28772, #28930)
version 2021.05.16
Core
* [options] Fix thumbnail option group name (#29042)
* [YoutubeDL] Improve extract_info doc (#28946)
Extractors
+ [playstuff] Add support for play.stuff.co.nz (#28901, #28931)
* [eroprofile] Fix extraction (#23200, #23626, #29008)
+ [vivo] Add support for vivo.st (#29009)
+ [generic] Add support for og:audio (#28311, #29015)
* [phoenix] Fix extraction (#29057)
+ [generic] Add support for sibnet embeds
+ [vk] Add support for sibnet embeds (#9500)
+ [generic] Add Referer header for direct videojs download URLs (#2879,
#20217, #29053)
* [orf:radio] Switch download URLs to HTTPS (#29012, #29046)
- [blinkx] Remove extractor (#28941)
* [medaltv] Relax URL regular expression (#28884)
+ [funimation] Add support for optional lang code in URLs (#28950)
+ [gdcvault] Add support for HTML5 videos
* [dispeak] Improve FLV extraction (#13513, #28970)
* [kaltura] Improve iframe extraction (#28969)
* [kaltura] Make embed code alternatives actually work
* [cda] Improve extraction (#28709, #28937)
* [twitter] Improve formats extraction from vmap URL (#28909)
* [xtube] Fix formats extraction (#28870)
* [svtplay] Improve extraction (#28507, #28876)
* [tv2dk] Fix extraction (#28888)
version 2021.04.26 version 2021.04.26
Extractors Extractors

View File

@ -287,7 +287,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--no-cache-dir Disable filesystem caching --no-cache-dir Disable filesystem caching
--rm-cache-dir Delete all filesystem cache files --rm-cache-dir Delete all filesystem cache files
## Thumbnail images: ## Thumbnail Options:
--write-thumbnail Write thumbnail image to disk --write-thumbnail Write thumbnail image to disk
--write-all-thumbnails Write all thumbnail image formats to --write-all-thumbnails Write all thumbnail image formats to
disk disk
@ -893,7 +893,7 @@ Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) yout
### The exe throws an error due to missing `MSVCR100.dll` ### The exe throws an error due to missing `MSVCR100.dll`
To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555). To run the exe you need to install first the [Microsoft Visual C++ 2010 Service Pack 1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe).
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files? ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?

View File

@ -119,7 +119,6 @@
- **BitChuteChannel** - **BitChuteChannel**
- **BleacherReport** - **BleacherReport**
- **BleacherReportCMS** - **BleacherReportCMS**
- **blinkx**
- **Bloomberg** - **Bloomberg**
- **BokeCC** - **BokeCC**
- **BongaCams** - **BongaCams**
@ -713,6 +712,7 @@
- **play.fm** - **play.fm**
- **player.sky.it** - **player.sky.it**
- **PlayPlusTV** - **PlayPlusTV**
- **PlayStuff**
- **PlaysTV** - **PlaysTV**
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid** - **Playvid**

View File

@ -9,10 +9,10 @@ from ..utils import (
class AppleConnectIE(InfoExtractor): class AppleConnectIE(InfoExtractor):
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)' _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
_TEST = { _TESTS = [{
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'md5': 'e7c38568a01ea45402570e6029206723', 'md5': 'c1d41f72c8bcaf222e089434619316e4',
'info_dict': { 'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'ext': 'm4v', 'ext': 'm4v',
@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
'upload_date': '20150710', 'upload_date': '20150710',
'timestamp': 1436545535, 'timestamp': 1436545535,
}, },
} }, {
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
video_data = self._parse_json(video_json, video_id) video_data = self._parse_json(video_json, video_id)
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count')) like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
return { return {
'id': video_id, 'id': video_id,

View File

@ -249,14 +249,14 @@ class ARDMediathekIE(ARDMediathekBaseIE):
class ARDIE(InfoExtractor): class ARDIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html' _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
_TESTS = [{ _TESTS = [{
# available till 7.01.2022 # available till 7.01.2022
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html', 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1', 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
'info_dict': { 'info_dict': {
'display_id': 'maischberger-die-woche', 'id': 'maischberger-die-woche-video100',
'id': '100', 'display_id': 'maischberger-die-woche-video100',
'ext': 'mp4', 'ext': 'mp4',
'duration': 3687.0, 'duration': 3687.0,
'title': 'maischberger. die woche vom 7. Januar 2021', 'title': 'maischberger. die woche vom 7. Januar 2021',
@ -264,16 +264,25 @@ class ARDIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
}, },
}, { }, {
'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html', 'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
'only_matching': True,
}, {
'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
'only_matching': True,
}, {
'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id') display_id = mobj.group('id')
player_url = mobj.group('mainurl') + '~playerXml.xml' player_url = mobj.group('mainurl') + '~playerXml.xml'
doc = self._download_xml(player_url, display_id) doc = self._download_xml(player_url, display_id)
@ -324,7 +333,7 @@ class ARDIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': mobj.group('id'), 'id': xpath_text(video_node, './videoId', default=display_id),
'formats': formats, 'formats': formats,
'display_id': display_id, 'display_id': display_id,
'title': video_node.find('./title').text, 'title': video_node.find('./title').text,

View File

@ -233,7 +233,7 @@ class BiliBiliIE(InfoExtractor):
webpage) webpage)
if uploader_mobj: if uploader_mobj:
info.update({ info.update({
'uploader': uploader_mobj.group('name'), 'uploader': uploader_mobj.group('name').strip(),
'uploader_id': uploader_mobj.group('id'), 'uploader_id': uploader_mobj.group('id'),
}) })
if not info.get('uploader'): if not info.get('uploader'):

View File

@ -145,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream:collection' IE_NAME = 'curiositystream:collection'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)' _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://app.curiositystream.com/collection/2', 'url': 'https://app.curiositystream.com/collection/2',
'info_dict': { 'info_dict': {
@ -157,6 +157,9 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
}, { }, {
'url': 'https://curiositystream.com/series/2', 'url': 'https://curiositystream.com/series/2',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://curiositystream.com/collections/36',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
class EggheadCourseIE(EggheadBaseIE): class EggheadCourseIE(EggheadBaseIE):
IE_DESC = 'egghead.io course' IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course' IE_NAME = 'egghead:course'
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)' _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29, 'playlist_count': 29,
'info_dict': { 'info_dict': {
'id': '72', 'id': '432655',
'title': 'Professor Frisby Introduces Composable Functional JavaScript', 'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$', 'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
}, },
} }, {
'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
class EggheadLessonIE(EggheadBaseIE): class EggheadLessonIE(EggheadBaseIE):
IE_DESC = 'egghead.io lesson' IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson' IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)' _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': { 'info_dict': {
@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
}, { }, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application', 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode from ..compat import compat_urllib_parse_urlencode
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unescapeHTML merge_dicts,
) )
@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor):
'title': 'sexy babe softcore', 'title': 'sexy babe softcore',
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18, 'age_limit': 18,
} },
'skip': 'Video not found',
}, { }, {
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
'md5': '1baa9602ede46ce904c431f5418d8916', 'md5': '1baa9602ede46ce904c431f5418d8916',
@ -77,19 +78,15 @@ class EroProfileIE(InfoExtractor):
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None) webpage, 'video id', default=None)
video_url = unescapeHTML(self._search_regex(
r'<source src="([^"]+)', webpage, 'video url'))
title = self._html_search_regex( title = self._html_search_regex(
r'Title:</th><td>([^<]+)</td>', webpage, 'title') (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
thumbnail = self._search_regex( webpage, 'title')
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
webpage, 'thumbnail', fatal=False)
return { info = self._parse_html5_media_entries(url, webpage, video_id)[0]
return merge_dicts(info, {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url,
'title': title, 'title': title,
'thumbnail': thumbnail,
'age_limit': 18, 'age_limit': 18,
} })

View File

@ -610,10 +610,6 @@ from .linkedin import (
from .linuxacademy import LinuxAcademyIE from .linuxacademy import LinuxAcademyIE
from .litv import LiTVIE from .litv import LiTVIE
from .livejournal import LiveJournalIE from .livejournal import LiveJournalIE
from .liveleak import (
LiveLeakIE,
LiveLeakEmbedIE,
)
from .livestream import ( from .livestream import (
LivestreamIE, LivestreamIE,
LivestreamOriginalIE, LivestreamOriginalIE,
@ -925,6 +921,7 @@ from .platzi import (
from .playfm import PlayFMIE from .playfm import PlayFMIE
from .playplustv import PlayPlusTVIE from .playplustv import PlayPlusTVIE
from .plays import PlaysTVIE from .plays import PlaysTVIE
from .playstuff import PlayStuffIE
from .playtvak import PlaytvakIE from .playtvak import PlaytvakIE
from .playvid import PlayvidIE from .playvid import PlayvidIE
from .playwire import PlaywireIE from .playwire import PlaywireIE

View File

@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor):
raise ExtractorError( raise ExtractorError(
'The video is not available, Facebook said: "%s"' % m_msg.group(1), 'The video is not available, Facebook said: "%s"' % m_msg.group(1),
expected=True) expected=True)
elif '>You must log in to continue' in webpage: elif any(p in webpage for p in (
'>You must log in to continue',
'id="login_form"',
'id="loginbutton"')):
self.raise_login_required() self.raise_login_required()
if not video_data and '/watchparty/' in url: if not video_data and '/watchparty/' in url:

View File

@ -5,29 +5,23 @@ from .common import InfoExtractor
class Formula1IE(InfoExtractor): class Formula1IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html' _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
_TESTS = [{ _TEST = {
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html', 'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
'md5': '8c79e54be72078b26b89e0e111c0502b', 'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
'info_dict': { 'info_dict': {
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', 'id': '6060988138001',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Race highlights - Spain 2016', 'title': 'Race highlights - Spain 2016',
'timestamp': 1463332814,
'upload_date': '20160515',
'uploader_id': '6057949432001',
}, },
'params': { 'add_ie': ['BrightcoveNew'],
# m3u8 download }
'skip_download': True, BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) bc_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ooyala_embed_code = self._search_regex(
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
return self.url_result( return self.url_result(
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code) self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)

View File

@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .arkena import ArkenaIE from .arkena import ArkenaIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
@ -126,6 +125,7 @@ from .viqeo import ViqeoIE
from .expressen import ExpressenIE from .expressen import ExpressenIE
from .zype import ZypeIE from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .vk import VKIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .arcpublishing import ArcPublishingIE from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
@ -1628,31 +1628,6 @@ class GenericIE(InfoExtractor):
'upload_date': '20160409', 'upload_date': '20160409',
}, },
}, },
# LiveLeak embed
{
'url': 'http://www.wykop.pl/link/3088787/',
'md5': '7619da8c820e835bef21a1efa2a0fc71',
'info_dict': {
'id': '874_1459135191',
'ext': 'mp4',
'title': 'Man shows poor quality of new apartment building',
'description': 'The wall is like a sand pile.',
'uploader': 'Lake8737',
},
'add_ie': [LiveLeakIE.ie_key()],
},
# Another LiveLeak embed pattern (#13336)
{
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
'info_dict': {
'id': '2eb_1496309988',
'ext': 'mp4',
'title': 'Thief robs place where everyone was armed',
'description': 'md5:694d73ee79e535953cf2488562288eee',
'uploader': 'brazilwtf',
},
'add_ie': [LiveLeakIE.ie_key()],
},
# Duplicated embedded video URLs # Duplicated embedded video URLs
{ {
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443', 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
@ -2248,6 +2223,11 @@ class GenericIE(InfoExtractor):
}, },
'playlist_mincount': 52, 'playlist_mincount': 52,
}, },
{
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
'only_matching': True,
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -2777,6 +2757,11 @@ class GenericIE(InfoExtractor):
if odnoklassniki_url: if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
# Look for sibnet embedded player
sibnet_urls = VKIE._extract_sibnet_urls(webpage)
if sibnet_urls:
return self.playlist_from_matches(sibnet_urls, video_id, video_title)
# Look for embedded ivi player # Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
if mobj is not None: if mobj is not None:
@ -3168,11 +3153,6 @@ class GenericIE(InfoExtractor):
return self.url_result( return self.url_result(
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()) self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
# Look for LiveLeak embeds
liveleak_urls = LiveLeakIE._extract_urls(webpage)
if liveleak_urls:
return self.playlist_from_matches(liveleak_urls, video_id, video_title)
# Look for 3Q SDN embeds # Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage) threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
if threeqsdn_url: if threeqsdn_url:
@ -3400,6 +3380,9 @@ class GenericIE(InfoExtractor):
'url': src, 'url': src,
'ext': (mimetype2ext(src_type) 'ext': (mimetype2ext(src_type)
or ext if ext in KNOWN_EXTENSIONS else 'mp4'), or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
'http_headers': {
'Referer': full_response.geturl(),
},
}) })
if formats: if formats:
self._sort_formats(formats) self._sort_formats(formats)
@ -3468,7 +3451,7 @@ class GenericIE(InfoExtractor):
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player: # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None: if m_video_type is not None:
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)) found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
if not found: if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search( found = re.search(

View File

@ -1,191 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class LiveLeakIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
_TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680',
'md5': '0813c2430bea7a46bf13acf3406992f4',
'info_dict': {
'id': '757_1364311680',
'ext': 'mp4',
'description': 'extremely bad day for this guy..!',
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
'id': 'f93_1390833151',
'ext': 'mp4',
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
# Prochan embed
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
'md5': '42c6d97d54f1db107958760788c5f48f',
'info_dict': {
'id': '4f7_1392687779',
'ext': 'mp4',
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
'uploader': 'CapObveus',
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
'age_limit': 18,
},
'skip': 'Video is dead',
}, {
# Covers https://github.com/ytdl-org/youtube-dl/pull/5983
# Multiple resolutions
'url': 'http://www.liveleak.com/view?i=801_1409392012',
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
'info_dict': {
'id': '801_1409392012',
'ext': 'mp4',
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
'uploader': 'bony333',
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
# Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
'url': 'http://m.liveleak.com/view?i=763_1473349649',
'add_ie': ['Youtube'],
'info_dict': {
'id': '763_1473349649',
'ext': 'mp4',
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
'uploader': 'Ziz',
'upload_date': '20160908',
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.liveleak.com/view?i=677_1439397581',
'info_dict': {
'id': '677_1439397581',
'title': 'Fuel Depot in China Explosion caught on video',
},
'playlist_count': 3,
}, {
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
'only_matching': True,
}, {
# No original video
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
age_limit = int_or_none(self._search_regex(
r'you confirm that you are ([0-9]+) years and over.',
webpage, 'age limit', default=None))
video_thumbnail = self._og_search_thumbnail(webpage)
entries = self._parse_html5_media_entries(url, webpage, video_id)
if not entries:
# Maybe an embed?
embed_url = self._search_regex(
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
webpage, 'embed URL')
return {
'_type': 'url_transparent',
'url': embed_url,
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
}
for idx, info_dict in enumerate(entries):
formats = []
for a_format in info_dict['formats']:
if not a_format.get('height'):
a_format['height'] = int_or_none(self._search_regex(
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
default=None))
formats.append(a_format)
# Removing '.*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
# https://github.com/ytdl-org/youtube-dl/pull/4768)
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
if a_format['url'] != orig_url:
format_id = a_format.get('format_id')
format_id = 'original' + ('-' + format_id if format_id else '')
if self._is_valid_url(orig_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': orig_url,
'preference': 1,
})
self._sort_formats(formats)
info_dict['formats'] = formats
# Don't append entry ID for one-video pages to keep backward compatibility
if len(entries) > 1:
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
else:
info_dict['id'] = video_id
info_dict.update({
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
'thumbnail': video_thumbnail,
})
return self.playlist_result(entries, video_id, video_title)
class LiveLeakEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
# See generic.py for actual test cases
_TESTS = [{
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
'only_matching': True,
}, {
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
'only_matching': True,
}]
def _real_extract(self, url):
kind, video_id = re.match(self._VALID_URL, url).groups()
if kind == 'f':
webpage = self._download_webpage(url, video_id)
liveleak_url = self._search_regex(
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
webpage, 'LiveLeak URL', group='url')
else:
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())

View File

@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None): def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
return self._download_json( return self._download_json(
urljoin('http://psapi.nrk.no/', path), urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item, video_id, note or 'Downloading %s JSON' % item,
fatal=fatal, query=query, fatal=fatal, query=query,
headers={'Accept-Encoding': 'gzip, deflate, br'}) headers={'Accept-Encoding': 'gzip, deflate, br'})

View File

@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
elif ext == 'f4m': elif ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id=format_id, fatal=False)) src, video_id, f4m_id=format_id, fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src, video_id, mpd_id=format_id, fatal=False))
else: else:
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
@ -140,6 +143,25 @@ class ORFTVthekIE(InfoExtractor):
}) })
upload_date = unified_strdate(sd.get('created_date')) upload_date = unified_strdate(sd.get('created_date'))
thumbnails = []
preview = sd.get('preview_image_url')
if preview:
thumbnails.append({
'id': 'preview',
'url': preview,
'preference': 0,
})
image = sd.get('image_full_url')
if not image and len(data_jsb) == 1:
image = self._og_search_thumbnail(webpage)
if image:
thumbnails.append({
'id': 'full',
'url': image,
'preference': 1,
})
entries.append({ entries.append({
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
@ -149,7 +171,7 @@ class ORFTVthekIE(InfoExtractor):
'description': sd.get('description'), 'description': sd.get('description'),
'duration': int_or_none(sd.get('duration_in_seconds')), 'duration': int_or_none(sd.get('duration_in_seconds')),
'upload_date': upload_date, 'upload_date': upload_date,
'thumbnail': sd.get('image_full_url'), 'thumbnails': thumbnails,
}) })
return { return {
@ -182,7 +204,7 @@ class ORFRadioIE(InfoExtractor):
duration = end - start if end and start else None duration = end - start if end and start else None
entries.append({ entries.append({
'id': loop_stream_id.replace('.mp3', ''), 'id': loop_stream_id.replace('.mp3', ''),
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id), 'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
'title': title, 'title': title,
'description': clean_html(data.get('subtitle')), 'description': clean_html(data.get('subtitle')),
'duration': duration, 'duration': duration,

View File

@ -569,15 +569,15 @@ class PeerTubeIE(InfoExtractor):
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
full_description = self._call_api( description = video.get('description')
host, video_id, 'description', note='Downloading description JSON', if len(description) >= 250:
fatal=False) # description is shortened
full_description = self._call_api(
host, video_id, 'description', note='Downloading description JSON',
fatal=False)
description = None if isinstance(full_description, dict):
if isinstance(full_description, dict): description = str_or_none(full_description.get('description')) or description
description = str_or_none(full_description.get('description'))
if not description:
description = video.get('description')
subtitles = self.extract_subtitles(host, video_id) subtitles = self.extract_subtitles(host, video_id)

View File

@ -12,6 +12,10 @@ from ..utils import (
class PeriscopeBaseIE(InfoExtractor): class PeriscopeBaseIE(InfoExtractor):
_M3U8_HEADERS = {
'Referer': 'https://www.periscope.tv/'
}
def _call_api(self, method, query, item_id): def _call_api(self, method, query, item_id):
return self._download_json( return self._download_json(
'https://api.periscope.tv/api/v2/%s' % method, 'https://api.periscope.tv/api/v2/%s' % method,
@ -54,9 +58,11 @@ class PeriscopeBaseIE(InfoExtractor):
m3u8_url, video_id, 'mp4', m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native' entry_protocol='m3u8_native'
if state in ('ended', 'timed_out') else 'm3u8', if state in ('ended', 'timed_out') else 'm3u8',
m3u8_id=format_id, fatal=fatal) m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS)
if len(m3u8_formats) == 1: if len(m3u8_formats) == 1:
self._add_width_and_height(m3u8_formats[0], width, height) self._add_width_and_height(m3u8_formats[0], width, height)
for f in m3u8_formats:
f.setdefault('http_headers', {}).update(self._M3U8_HEADERS)
return m3u8_formats return m3u8_formats

View File

@ -9,8 +9,9 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
merge_dicts, merge_dicts,
try_get,
unified_timestamp, unified_timestamp,
xpath_text, urljoin,
) )
@ -27,10 +28,11 @@ class PhoenixIE(ZDFBaseIE):
'title': 'Wohin führt der Protest in der Pandemie?', 'title': 'Wohin führt der Protest in der Pandemie?',
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
'duration': 1691, 'duration': 1691,
'timestamp': 1613906100, 'timestamp': 1613902500,
'upload_date': '20210221', 'upload_date': '20210221',
'uploader': 'Phoenix', 'uploader': 'Phoenix',
'channel': 'corona nachgehakt', 'series': 'corona nachgehakt',
'episode': 'Wohin führt der Protest in der Pandemie?',
}, },
}, { }, {
# Youtube embed # Youtube embed
@ -79,50 +81,53 @@ class PhoenixIE(ZDFBaseIE):
video_id = compat_str(video.get('basename') or video.get('content')) video_id = compat_str(video.get('basename') or video.get('content'))
details = self._download_xml( details = self._download_json(
'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
video_id, 'Downloading details XML', query={ video_id, 'Downloading details JSON', query={
'ak': 'web', 'ak': 'web',
'ptmd': 'true', 'ptmd': 'true',
'id': video_id, 'id': video_id,
'profile': 'player2', 'profile': 'player2',
}) })
title = title or xpath_text( title = title or details['title']
details, './/information/title', 'title', fatal=True) content_id = details['tracking']['nielsen']['content']['assetid']
content_id = xpath_text(
details, './/video/details/basename', 'content id', fatal=True)
info = self._extract_ptmd( info = self._extract_ptmd(
'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id, 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
content_id, None, url) content_id, None, url)
timestamp = unified_timestamp(xpath_text(details, './/details/airtime')) duration = int_or_none(try_get(
details, lambda x: x['tracking']['nielsen']['content']['length']))
timestamp = unified_timestamp(details.get('editorialDate'))
series = try_get(
details, lambda x: x['tracking']['nielsen']['content']['program'],
compat_str)
episode = title if details.get('contentType') == 'episode' else None
thumbnails = [] thumbnails = []
for node in details.findall('.//teaserimages/teaserimage'): teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
thumbnail_url = node.text for thumbnail_key, thumbnail_url in teaser_images.items():
thumbnail_url = urljoin(url, thumbnail_url)
if not thumbnail_url: if not thumbnail_url:
continue continue
thumbnail = { thumbnail = {
'url': thumbnail_url, 'url': thumbnail_url,
} }
thumbnail_key = node.get('key') m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
if thumbnail_key: if m:
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) thumbnail['width'] = int(m.group(1))
if m: thumbnail['height'] = int(m.group(2))
thumbnail['width'] = int(m.group(1))
thumbnail['height'] = int(m.group(2))
thumbnails.append(thumbnail) thumbnails.append(thumbnail)
return merge_dicts(info, { return merge_dicts(info, {
'id': content_id, 'id': content_id,
'title': title, 'title': title,
'description': xpath_text(details, './/information/detail'), 'description': details.get('leadParagraph'),
'duration': int_or_none(xpath_text(details, './/details/lengthSec')), 'duration': duration,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'timestamp': timestamp, 'timestamp': timestamp,
'uploader': xpath_text(details, './/details/channel'), 'uploader': details.get('tvService'),
'uploader_id': xpath_text(details, './/details/originChannelId'), 'series': series,
'channel': xpath_text(details, './/details/originChannelTitle'), 'episode': episode,
}) })

View File

@ -0,0 +1,65 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
smuggle_url,
try_get,
)
class PlayStuffIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a',
'md5': 'c82d3669e5247c64bc382577843e5bd0',
'info_dict': {
'id': '6250584958001',
'ext': 'mp4',
'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga',
'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913',
'uploader_id': '6005208634001',
'timestamp': 1619491027,
'upload_date': '20210427',
},
'add_ie': ['BrightcoveNew'],
}, {
# geo restricted, bypassable
'url': 'https://play.stuff.co.nz/details/_6155660351001',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
state = self._parse_json(
self._search_regex(
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'),
video_id)
account_id = try_get(
state, lambda x: x['configurations']['accountId'],
compat_str) or '6005208634001'
player_id = try_get(
state, lambda x: x['configurations']['playerId'],
compat_str) or 'default'
entries = []
for item_id, video in state['items'].items():
if not isinstance(video, dict):
continue
asset_id = try_get(
video, lambda x: x['content']['attributes']['assetId'],
compat_str)
if not asset_id:
continue
entries.append(self.url_result(
smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id),
{'geo_countries': ['NZ']}),
'BrightcoveNew', video_id))
return self.playlist_result(entries, video_id)

View File

@ -30,6 +30,7 @@ from ..utils import (
class PornHubBaseIE(InfoExtractor): class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub' _NETRC_MACHINE = 'pornhub'
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs): def dl(*args, **kwargs):
@ -122,11 +123,13 @@ class PornHubIE(PornHubBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:[^/]+\.)?
%s
/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/ (?:www\.)?thumbzilla\.com/video/
) )
(?P<id>[\da-z]+) (?P<id>[\da-z]+)
''' ''' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
@ -236,6 +239,13 @@ class PornHubIE(PornHubBaseIE):
}, { }, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3', 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True, 'only_matching': True,
}, {
# geo restricted
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}, {
'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -275,6 +285,11 @@ class PornHubIE(PornHubBaseIE):
'PornHub said: %s' % error_msg, 'PornHub said: %s' % error_msg,
expected=True, video_id=video_id) expected=True, video_id=video_id)
if any(re.search(p, webpage) for p in (
r'class=["\']geoBlocked["\']',
r'>\s*This content is unavailable in your country')):
self.raise_geo_restricted()
# video_title from flashvars contains whitespace instead of non-ASCII (see # video_title from flashvars contains whitespace instead of non-ASCII (see
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore. # on that anymore.
@ -408,17 +423,14 @@ class PornHubIE(PornHubBaseIE):
format_url, video_id, 'mp4', entry_protocol='m3u8_native', format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
return return
tbr = None if not height:
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url) height = int_or_none(self._search_regex(
if mobj: r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
if not height: default=None))
height = int(mobj.group('height'))
tbr = int(mobj.group('tbr'))
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format_id': '%dp' % height if height else None, 'format_id': '%dp' % height if height else None,
'height': height, 'height': height,
'tbr': tbr,
}) })
for video_url, height in video_urls: for video_url, height in video_urls:
@ -440,7 +452,8 @@ class PornHubIE(PornHubBaseIE):
add_format(video_url, height) add_format(video_url, height)
continue continue
add_format(video_url) add_format(video_url)
self._sort_formats(formats) self._sort_formats(
formats, field_preference=('height', 'width', 'fps', 'format_id'))
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
@ -513,7 +526,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
class PornHubUserIE(PornHubPlaylistBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph', 'url': 'https://www.pornhub.com/model/zoe_ph',
'playlist_mincount': 118, 'playlist_mincount': 118,
@ -542,6 +555,9 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
# Same as before, multi page # Same as before, multi page
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau', 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -617,7 +633,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)' _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
'only_matching': True, 'only_matching': True,
@ -722,6 +738,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
}, { }, {
'url': 'https://de.pornhub.com/playlist/4667351', 'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
'only_matching': True,
}] }]
@classmethod @classmethod
@ -732,7 +751,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'info_dict': { 'info_dict': {
@ -742,4 +761,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
}, { }, {
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
'only_matching': True,
}] }]

View File

@ -133,8 +133,10 @@ class RedBullEmbedIE(RedBullTVIE):
rrn_id = self._match_id(url) rrn_id = self._match_id(url)
asset_id = self._download_json( asset_id = self._download_json(
'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'}, rrn_id, headers={
query={ 'Accept': 'application/json',
'API-KEY': 'e90a1ff11335423998b100c929ecc866',
}, query={
'query': '''{ 'query': '''{
resource(id: "%s", enforceGeoBlocking: false) { resource(id: "%s", enforceGeoBlocking: false) {
%s %s

View File

@ -21,6 +21,7 @@ from ..utils import (
class ShahidBaseIE(AWSIE): class ShahidBaseIE(AWSIE):
_AWS_PROXY_HOST = 'api2.shahid.net' _AWS_PROXY_HOST = 'api2.shahid.net'
_AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
_VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/'
def _handle_error(self, e): def _handle_error(self, e):
fail_data = self._parse_json( fail_data = self._parse_json(
@ -49,7 +50,7 @@ class ShahidBaseIE(AWSIE):
class ShahidIE(ShahidBaseIE): class ShahidIE(ShahidBaseIE):
_NETRC_MACHINE = 'shahid' _NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)' _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924', 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
'info_dict': { 'info_dict': {
@ -73,6 +74,9 @@ class ShahidIE(ShahidBaseIE):
# shahid plus subscriber only # shahid plus subscriber only
'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511', 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
'only_matching': True 'only_matching': True
}, {
'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319',
'only_matching': True
}] }]
def _real_initialize(self): def _real_initialize(self):
@ -168,7 +172,7 @@ class ShahidIE(ShahidBaseIE):
class ShahidShowIE(ShahidBaseIE): class ShahidShowIE(ShahidBaseIE):
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)' _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
'info_dict': { 'info_dict': {

View File

@ -86,10 +86,10 @@ class SharedIE(SharedBaseIE):
class VivoIE(SharedBaseIE): class VivoIE(SharedBaseIE):
IE_DESC = 'vivo.sx' IE_DESC = 'vivo.sx'
_VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})' _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})'
_FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
_TEST = { _TESTS = [{
'url': 'http://vivo.sx/d7ddda0e78', 'url': 'http://vivo.sx/d7ddda0e78',
'md5': '15b3af41be0b4fe01f4df075c2678b2c', 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
'info_dict': { 'info_dict': {
@ -98,7 +98,10 @@ class VivoIE(SharedBaseIE):
'title': 'Chicken', 'title': 'Chicken',
'filesize': 515659, 'filesize': 515659,
}, },
} }, {
'url': 'http://vivo.st/d7ddda0e78',
'only_matching': True,
}]
def _extract_title(self, webpage): def _extract_title(self, webpage):
title = self._html_search_regex( title = self._html_search_regex(

View File

@ -123,6 +123,10 @@ class TEDIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# with own formats and private Youtube external
'url': 'https://www.ted.com/talks/spencer_wells_a_family_tree_for_humanity',
'only_matching': True,
}] }]
_NATIVE_FORMATS = { _NATIVE_FORMATS = {
@ -210,16 +214,6 @@ class TEDIE(InfoExtractor):
player_talk = talk_info['player_talks'][0] player_talk = talk_info['player_talks'][0]
external = player_talk.get('external')
if isinstance(external, dict):
service = external.get('service')
if isinstance(service, compat_str):
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
return self.url_result(ext_url or external['uri'])
resources_ = player_talk.get('resources') or talk_info.get('resources') resources_ = player_talk.get('resources') or talk_info.get('resources')
http_url = None http_url = None
@ -294,6 +288,16 @@ class TEDIE(InfoExtractor):
'vcodec': 'none', 'vcodec': 'none',
}) })
if not formats:
external = player_talk.get('external')
if isinstance(external, dict):
service = external.get('service')
if isinstance(service, compat_str):
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
return self.url_result(ext_url or external['uri'])
self._sort_formats(formats) self._sort_formats(formats)
video_id = compat_str(talk_info['id']) video_id = compat_str(talk_info['id'])

View File

@ -49,6 +49,7 @@ class TwitchBaseIE(InfoExtractor):
'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84', 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e', 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
} }
@ -893,7 +894,25 @@ class TwitchClipsIE(TwitchBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
clip = self._download_base_gql( clip = self._download_gql(
video_id, [{
'operationName': 'VideoAccessToken_Clip',
'variables': {
'slug': video_id,
},
}],
'Downloading clip access token GraphQL')[0]['data']['clip']
if not clip:
raise ExtractorError(
'This clip is no longer available', expected=True)
access_query = {
'sig': clip['playbackAccessToken']['signature'],
'token': clip['playbackAccessToken']['value'],
}
data = self._download_base_gql(
video_id, { video_id, {
'query': '''{ 'query': '''{
clip(slug: "%s") { clip(slug: "%s") {
@ -918,11 +937,10 @@ class TwitchClipsIE(TwitchBaseIE):
} }
viewCount viewCount
} }
}''' % video_id}, 'Downloading clip GraphQL')['data']['clip'] }''' % video_id}, 'Downloading clip GraphQL', fatal=False)
if not clip: if data:
raise ExtractorError( clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
'This clip is no longer available', expected=True)
formats = [] formats = []
for option in clip.get('videoQualities', []): for option in clip.get('videoQualities', []):
@ -932,7 +950,7 @@ class TwitchClipsIE(TwitchBaseIE):
if not source: if not source:
continue continue
formats.append({ formats.append({
'url': source, 'url': update_url_query(source, access_query),
'format_id': option.get('quality'), 'format_id': option.get('quality'),
'height': int_or_none(option.get('quality')), 'height': int_or_none(option.get('quality')),
'fps': int_or_none(option.get('frameRate')), 'fps': int_or_none(option.get('frameRate')),

View File

@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
'https://api.universal-music.de/graphql', 'https://graphql.universal-music.de/',
video_id, query={ video_id, query={
'query': '''{ 'query': '''{
universalMusic(channel:16) { universalMusic(channel:16) {
@ -56,11 +56,9 @@ class UMGDeIE(InfoExtractor):
formats = [] formats = []
def add_m3u8_format(format_id): def add_m3u8_format(format_id):
m3u8_formats = self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
hls_url_template % format_id, video_id, 'mp4', hls_url_template % format_id, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal='False') 'm3u8_native', m3u8_id='hls', fatal=False))
if m3u8_formats and m3u8_formats[0].get('height'):
formats.extend(m3u8_formats)
for f in video_data.get('formats', []): for f in video_data.get('formats', []):
f_url = f.get('url') f_url = f.get('url')

View File

@ -75,7 +75,7 @@ class UstreamIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
if mobj is not None: if mobj is not None:
return mobj.group('url') return mobj.group('url')

View File

@ -647,7 +647,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
expected=True) expected=True)
raise raise
if '://player.vimeo.com/video/' in url: if '//player.vimeo.com/video/' in url:
config = self._parse_json(self._search_regex( config = self._parse_json(self._search_regex(
r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
if config.get('view') == 4: if config.get('view') == 4:

View File

@ -300,6 +300,13 @@ class VKIE(VKBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_sibnet_urls(webpage):
# https://help.sibnet.ru/?sibnet_video_embed
return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
@ -408,6 +415,10 @@ class VKIE(VKBaseIE):
if odnoklassniki_url: if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
sibnet_urls = self._extract_sibnet_urls(info_page)
if sibnet_urls:
return self.url_result(sibnet_urls[0])
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
if m_opts: if m_opts:
m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))

View File

@ -4,13 +4,12 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
extract_attributes,
int_or_none, int_or_none,
str_to_int, str_to_int,
unescapeHTML,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
) )
from ..aes import aes_decrypt_text
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
@ -34,6 +33,7 @@ class YouPornIE(InfoExtractor):
'tags': list, 'tags': list,
'age_limit': 18, 'age_limit': 18,
}, },
'skip': 'This video has been disabled',
}, { }, {
# Unknown uploader # Unknown uploader
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@ -78,6 +78,40 @@ class YouPornIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id
definitions = self._download_json(
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
display_id)
formats = []
for definition in definitions:
if not isinstance(definition, dict):
continue
video_url = url_or_none(definition.get('videoUrl'))
if not video_url:
continue
f = {
'url': video_url,
'filesize': int_or_none(definition.get('videoSize')),
}
height = int_or_none(definition.get('quality'))
# Video URL's path looks like this:
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
# We will benefit from it by extracting some metadata
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
if mobj:
if not height:
height = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate'))
f.update({
'format_id': '%dp-%dk' % (height, bitrate),
'tbr': bitrate,
})
f['height'] = height
formats.append(f)
self._sort_formats(formats)
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id, 'http://www.youporn.com/watch/%s' % video_id, display_id,
headers={'Cookie': 'age_verified=1'}) headers={'Cookie': 'age_verified=1'})
@ -88,65 +122,6 @@ class YouPornIE(InfoExtractor):
webpage, default=None) or self._html_search_meta( webpage, default=None) or self._html_search_meta(
'title', webpage, fatal=True) 'title', webpage, fatal=True)
links = []
# Main source
definitions = self._parse_json(
self._search_regex(
r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
'media definitions', default='[]'),
video_id, fatal=False)
if definitions:
for definition in definitions:
if not isinstance(definition, dict):
continue
video_url = url_or_none(definition.get('videoUrl'))
if video_url:
links.append(video_url)
# Fallback #1, this also contains extra low quality 180p format
for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
links.append(link)
# Fallback #2 (unavailable as at 22.06.2017)
sources = self._search_regex(
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
if sources:
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
links.append(link)
# Fallback #3 (unavailable as at 22.06.2017)
for _, link in re.findall(
r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
links.append(link)
# Fallback #4, encrypted links (unavailable as at 22.06.2017)
for _, encrypted_link in re.findall(
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
formats = []
for video_url in set(unescapeHTML(link) for link in links):
f = {
'url': video_url,
}
# Video URL's path looks like this:
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
# We will benefit from it by extracting some metadata
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
if mobj:
height = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate'))
f.update({
'format_id': '%dp-%dk' % (height, bitrate),
'height': height,
'tbr': bitrate,
})
formats.append(f)
self._sort_formats(formats)
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>', r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
webpage, 'description', webpage, 'description',
@ -169,13 +144,12 @@ class YouPornIE(InfoExtractor):
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
average_rating = int_or_none(self._search_regex( view_count = None
r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>', views = self._search_regex(
webpage, 'average rating', fatal=False)) r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
'views', default=None)
view_count = str_to_int(self._search_regex( if views:
r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<', view_count = str_to_int(extract_attributes(views).get('data-value'))
webpage, 'view count', fatal=False, group='count'))
comment_count = str_to_int(self._search_regex( comment_count = str_to_int(self._search_regex(
r'>All [Cc]omments? \(([\d,.]+)\)', r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', default=None)) webpage, 'comment count', default=None))
@ -201,7 +175,6 @@ class YouPornIE(InfoExtractor):
'duration': duration, 'duration': duration,
'uploader': uploader, 'uploader': uploader,
'upload_date': upload_date, 'upload_date': upload_date,
'average_rating': average_rating,
'view_count': view_count, 'view_count': view_count,
'comment_count': comment_count, 'comment_count': comment_count,
'categories': categories, 'categories': categories,

View File

@ -353,7 +353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.13ad\.de', r'(?:www\.)?invidious\.13ad\.de',
r'(?:www\.)?invidious\.mastodon\.host', r'(?:www\.)?invidious\.mastodon\.host',
r'(?:www\.)?invidious\.zapashcanon\.fr', r'(?:www\.)?invidious\.zapashcanon\.fr',
r'(?:www\.)?invidious\.kavin\.rocks', r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
r'(?:www\.)?invidious\.tinfoil-hat\.net', r'(?:www\.)?invidious\.tinfoil-hat\.net',
r'(?:www\.)?invidious\.himiko\.cloud', r'(?:www\.)?invidious\.himiko\.cloud',
r'(?:www\.)?invidious\.reallyancient\.tech', r'(?:www\.)?invidious\.reallyancient\.tech',
@ -380,6 +380,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.toot\.koeln', r'(?:www\.)?invidious\.toot\.koeln',
r'(?:www\.)?invidious\.fdn\.fr', r'(?:www\.)?invidious\.fdn\.fr',
r'(?:www\.)?watch\.nettohikari\.com', r'(?:www\.)?watch\.nettohikari\.com',
r'(?:www\.)?invidious\.namazso\.eu',
r'(?:www\.)?invidious\.silkky\.cloud',
r'(?:www\.)?invidious\.exonip\.de',
r'(?:www\.)?invidious\.riverside\.rocks',
r'(?:www\.)?invidious\.blamefran\.net',
r'(?:www\.)?invidious\.moomoo\.de',
r'(?:www\.)?ytb\.trom\.tf',
r'(?:www\.)?yt\.cyberhost\.uk',
r'(?:www\.)?kgg2m7yk5aybusll\.onion', r'(?:www\.)?kgg2m7yk5aybusll\.onion',
r'(?:www\.)?qklhadlycap4cnod\.onion', r'(?:www\.)?qklhadlycap4cnod\.onion',
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion', r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
@ -388,6 +396,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion', r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p', r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion', r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
) )
_VALID_URL = r"""(?x)^ _VALID_URL = r"""(?x)^
( (
@ -1492,18 +1504,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
playability_status = player_response.get('playabilityStatus') or {} playability_status = player_response.get('playabilityStatus') or {}
if playability_status.get('reason') == 'Sign in to confirm your age': if playability_status.get('reason') == 'Sign in to confirm your age':
pr = self._parse_json(try_get(compat_parse_qs( video_info = self._download_webpage(
self._download_webpage( base_url + 'get_video_info', video_id,
base_url + 'get_video_info', video_id, 'Refetching age-gated info webpage',
'Refetching age-gated info webpage', 'unable to download video info webpage', query={
'unable to download video info webpage', query={ 'video_id': video_id,
'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'html5': 1,
}, fatal=False)), # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
lambda x: x['player_response'][0], 'c': 'TVHTML5',
compat_str) or '{}', video_id) 'cver': '6.20180913',
if pr: }, fatal=False)
player_response = pr if video_info:
pr = self._parse_json(
try_get(
compat_parse_qs(video_info),
lambda x: x['player_response'][0], compat_str) or '{}',
video_id, fatal=False)
if pr and isinstance(pr, dict):
player_response = pr
trailer_video_id = try_get( trailer_video_id = try_get(
playability_status, playability_status,

View File

@ -768,7 +768,7 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='rm_cachedir', action='store_true', dest='rm_cachedir',
help='Delete all filesystem cache files') help='Delete all filesystem cache files')
thumbnail = optparse.OptionGroup(parser, 'Thumbnail images') thumbnail = optparse.OptionGroup(parser, 'Thumbnail Options')
thumbnail.add_option( thumbnail.add_option(
'--write-thumbnail', '--write-thumbnail',
action='store_true', dest='writethumbnail', default=False, action='store_true', dest='writethumbnail', default=False,

View File

@ -231,7 +231,10 @@ class FFmpegPostProcessor(PostProcessor):
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace') stderr = stderr.decode('utf-8', 'replace')
msg = stderr.strip().split('\n')[-1] msgs = stderr.strip().split('\n')
msg = msgs[-1]
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ' + '\n'.join(msgs[:-1]))
raise FFmpegPostProcessorError(msg) raise FFmpegPostProcessorError(msg)
self.try_utime(out_path, oldest_mtime, oldest_mtime) self.try_utime(out_path, oldest_mtime, oldest_mtime)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2021.04.26' __version__ = '2021.06.06'