Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into mkvthumbnail

This commit is contained in:
MrDoritos 2020-05-15 10:30:26 -06:00
commit 332d74ae9c
51 changed files with 1302 additions and 668 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've verified that I'm running youtube-dl version **2020.05.08**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.03.08 [debug] youtube-dl version 2020.05.08
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've verified that I'm running youtube-dl version **2020.05.08**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've verified that I'm running youtube-dl version **2020.05.08**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've verified that I'm running youtube-dl version **2020.05.08**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.03.08 [debug] youtube-dl version 2020.05.08
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.05.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2020.03.08** - [ ] I've verified that I'm running youtube-dl version **2020.05.08**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -1,7 +1,87 @@
version 2020.05.08
Core
* [downloader/http] Request last data block of exact remaining size
* [downloader/http] Finish downloading once received data length matches
expected
* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always
ensure cookie name and value are bytestrings on python 2 (#23256, #24776)
+ [compat] Introduce compat_cookiejar_Cookie
* [utils] Improve cookie files support
+ Add support for UTF-8 in cookie files
* Skip malformed cookie file entries instead of crashing (invalid entry
length, invalid expires at)
Extractors
* [youtube] Improve signature cipher extraction (#25187, #25188)
* [iprima] Improve extraction (#25138)
* [uol] Fix extraction (#22007)
+ [orf] Add support for more radio stations (#24938, #24968)
* [dailymotion] Fix typo
- [puhutv] Remove no longer available HTTP formats (#25124)
version 2020.05.03
Core
+ [extractor/common] Extract multiple JSON-LD entries
* [options] Clarify doc on --exec command (#19087, #24883)
* [extractor/common] Skip malformed ISM manifest XMLs while extracting
ISM formats (#24667)
Extractors
* [crunchyroll] Fix and improve extraction (#25096, #25060)
* [youtube] Improve player id extraction
* [youtube] Use redirected video id if any (#25063)
* [yahoo] Fix GYAO Player extraction and relax URL regular expression
(#24178, #24778)
* [tvplay] Fix Viafree extraction (#15189, #24473, #24789)
* [tenplay] Relax URL regular expression (#25001)
+ [prosiebensat1] Extract series metadata
* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
- [prosiebensat1] Remove 7tv.de support (#24948)
* [youtube] Fix DRM videos detection (#24736)
* [thisoldhouse] Fix video id extraction (#24548, #24549)
+ [soundcloud] Extract AAC format (#19173, #24708)
* [youtube] Skip broken multifeed videos (#24711)
* [nova:embed] Fix extraction (#24700)
* [motherless] Fix extraction (#24699)
* [twitch:clips] Extend URL regular expression (#24290, #24642)
* [tv4] Fix ISM formats extraction (#24667)
* [tele5] Fix extraction (#24553)
+ [mofosex] Add support for generic embeds (#24633)
+ [youporn] Add support for generic embeds
+ [spankwire] Add support for generic embeds (#24633)
* [spankwire] Fix extraction (#18924, #20648)
version 2020.03.24
Core
- [utils] Revert support for cookie files with spaces used instead of tabs
Extractors
* [teachable] Update upskillcourses and gns3 domains
* [generic] Look for teachable embeds before wistia
+ [teachable] Extract chapter metadata (#24421)
+ [bilibili] Add support for player.bilibili.com (#24402)
+ [bilibili] Add support for new URL schema with BV ids (#24439, #24442)
* [limelight] Remove disabled API requests (#24255)
* [soundcloud] Fix download URL extraction (#24394)
+ [cbc:watch] Add support for authentication (#19160)
* [hellporno] Fix extraction (#24399)
* [xtube] Fix formats extraction (#24348)
* [ndr] Fix extraction (#24326)
* [nhk] Update m3u8 URL and use native HLS downloader (#24329)
- [nhk] Remove obsolete rtmp formats (#24329)
* [nhk] Relax URL regular expression (#24329)
- [vimeo] Revert fix showcase password protected video extraction (#24224)
version 2020.03.08 version 2020.03.08
Core Core
+ [utils] Add support for cookie files with spaces + [utils] Add support for cookie files with spaces used instead of tabs
Extractors Extractors
+ [pornhub] Add support for pornhubpremium.com (#24288) + [pornhub] Add support for pornhubpremium.com (#24288)

View File

@ -434,9 +434,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
either the path to the binary or its either the path to the binary or its
containing directory. containing directory.
--exec CMD Execute a command on the file after --exec CMD Execute a command on the file after
downloading, similar to find's -exec downloading and post-processing, similar to
syntax. Example: --exec 'adb push {} find's -exec syntax. Example: --exec 'adb
/sdcard/Music/ && rm {}' push {} /sdcard/Music/ && rm {}'
--convert-subs FORMAT Convert the subtitles to other format --convert-subs FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt|lrc) (currently supported: srt|ass|vtt|lrc)
@ -1032,7 +1032,7 @@ After you have ensured this site is distributing its content legally, you can fo
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py $ flake8 youtube_dl/extractor/yourextractor.py

View File

@ -98,6 +98,7 @@
- **BiliBili** - **BiliBili**
- **BilibiliAudio** - **BilibiliAudio**
- **BilibiliAudioAlbum** - **BilibiliAudioAlbum**
- **BiliBiliPlayer**
- **BioBioChileTV** - **BioBioChileTV**
- **BIQLE** - **BIQLE**
- **BitChute** - **BitChute**
@ -496,6 +497,7 @@
- **MNetTV** - **MNetTV**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **Mofosex** - **Mofosex**
- **MofosexEmbed**
- **Mojvideo** - **Mojvideo**
- **Morningstar**: morningstar.com - **Morningstar**: morningstar.com
- **Motherless** - **Motherless**
@ -618,11 +620,21 @@
- **Ooyala** - **Ooyala**
- **OoyalaExternal** - **OoyalaExternal**
- **OraTV** - **OraTV**
- **orf:burgenland**: Radio Burgenland
- **orf:fm4**: radio FM4 - **orf:fm4**: radio FM4
- **orf:fm4:story**: fm4.orf.at stories - **orf:fm4:story**: fm4.orf.at stories
- **orf:iptv**: iptv.ORF.at - **orf:iptv**: iptv.ORF.at
- **orf:kaernten**: Radio Kärnten
- **orf:noe**: Radio Niederösterreich
- **orf:oberoesterreich**: Radio Oberösterreich
- **orf:oe1**: Radio Österreich 1 - **orf:oe1**: Radio Österreich 1
- **orf:oe3**: Radio Österreich 3
- **orf:salzburg**: Radio Salzburg
- **orf:steiermark**: Radio Steiermark
- **orf:tirol**: Radio Tirol
- **orf:tvthek**: ORF TVthek - **orf:tvthek**: ORF TVthek
- **orf:vorarlberg**: Radio Vorarlberg
- **orf:wien**: Radio Wien
- **OsnatelTV** - **OsnatelTV**
- **OutsideTV** - **OutsideTV**
- **PacktPub** - **PacktPub**

View File

@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
assert_cookie_has_value('HTTPONLY_COOKIE') assert_cookie_has_value('HTTPONLY_COOKIE')
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
def test_malformed_cookies(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True)
# Cookies should be empty since all malformed cookie file entries
# will be ignored
self.assertFalse(cookiejar._cookies)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -74,6 +74,28 @@ _TESTS = [
] ]
class TestPlayerInfo(unittest.TestCase):
def test_youtube_extract_player_info(self):
PLAYER_URLS = (
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
# obsolete
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
)
for player_url, expected_player_id in PLAYER_URLS:
expected_player_type = player_url.split('.')[-1]
player_type, player_id = YoutubeIE._extract_player_info(player_url)
self.assertEqual(player_type, expected_player_type)
self.assertEqual(player_id, expected_player_id)
class TestSignature(unittest.TestCase): class TestSignature(unittest.TestCase):
def setUp(self): def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))

View File

@ -0,0 +1,9 @@
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
# Cookie file entry with invalid number of fields - 6 instead of 7
www.foobar.foobar FALSE / FALSE 0 COOKIE
# Cookie file entry with invalid expires at
www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE

View File

@ -57,6 +57,17 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
import cookielib as compat_cookiejar import cookielib as compat_cookiejar
if sys.version_info[0] == 2:
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
def __init__(self, version, name, value, *args, **kwargs):
if isinstance(name, compat_str):
name = name.encode()
if isinstance(value, compat_str):
value = value.encode()
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
else:
compat_cookiejar_Cookie = compat_cookiejar.Cookie
try: try:
import http.cookies as compat_cookies import http.cookies as compat_cookies
except ImportError: # Python 2 except ImportError: # Python 2
@ -2987,6 +2998,7 @@ __all__ = [
'compat_basestring', 'compat_basestring',
'compat_chr', 'compat_chr',
'compat_cookiejar', 'compat_cookiejar',
'compat_cookiejar_Cookie',
'compat_cookies', 'compat_cookies',
'compat_ctypes_WINFUNCTYPE', 'compat_ctypes_WINFUNCTYPE',
'compat_etree_Element', 'compat_etree_Element',

View File

@ -227,7 +227,7 @@ class HttpFD(FileDownloader):
while True: while True:
try: try:
# Download and write # Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have # socket.timeout is a subclass of socket.error but may not have
# errno set # errno set
except socket.timeout as e: except socket.timeout as e:
@ -299,7 +299,7 @@ class HttpFD(FileDownloader):
'elapsed': now - ctx.start_time, 'elapsed': now - ctx.start_time,
}) })
if is_test and byte_counter == data_len: if data_len is not None and byte_counter == data_len:
break break
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:

View File

@ -528,7 +528,7 @@ class BBCCoUkIE(InfoExtractor):
def get_programme_id(item): def get_programme_id(item):
def get_from_attributes(item): def get_from_attributes(item):
for p in('identifier', 'group'): for p in ('identifier', 'group'):
value = item.get(p) value = item.get(p)
if value and re.match(r'^[pb][\da-z]{7}$', value): if value and re.match(r'^[pb][\da-z]{7}$', value):
return value return value

View File

@ -24,7 +24,18 @@ from ..utils import (
class BiliBiliIE(InfoExtractor): class BiliBiliIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?:(?:www|bangumi)\.)?
bilibili\.(?:tv|com)/
(?:
(?:
video/[aA][vV]|
anime/(?P<anime_id>\d+)/play\#
)(?P<id_bv>\d+)|
video/[bB][vV](?P<id>[^/?#&]+)
)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/', 'url': 'http://www.bilibili.tv/video/av1074402/',
@ -92,6 +103,10 @@ class BiliBiliIE(InfoExtractor):
'skip_download': True, # Test metadata only 'skip_download': True, # Test metadata only
}, },
}] }]
}, {
# new BV video id format
'url': 'https://www.bilibili.com/video/BV1JE411F741',
'only_matching': True,
}] }]
_APP_KEY = 'iVGUTjsxvpLeuDCf' _APP_KEY = 'iVGUTjsxvpLeuDCf'
@ -109,7 +124,7 @@ class BiliBiliIE(InfoExtractor):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id') or mobj.group('id_bv')
anime_id = mobj.group('anime_id') anime_id = mobj.group('anime_id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
@ -419,3 +434,17 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
entries, am_id, album_title, album_data.get('intro')) entries, am_id, album_title, album_data.get('intro'))
return self.playlist_result(entries, am_id) return self.playlist_result(entries, am_id)
class BiliBiliPlayerIE(InfoExtractor):
_VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
_TEST = {
'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
'only_matching': True,
}
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'http://www.bilibili.tv/video/av%s/' % video_id,
ie=BiliBiliIE.ie_key(), video_id=video_id)

View File

@ -1,8 +1,10 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import hashlib
import json import json
import re import re
from xml.sax.saxutils import escape
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -216,6 +218,29 @@ class CBCWatchBaseIE(InfoExtractor):
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
} }
_GEO_COUNTRIES = ['CA'] _GEO_COUNTRIES = ['CA']
_LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login'
_TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token'
_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
_NETRC_MACHINE = 'cbcwatch'
def _signature(self, email, password):
data = json.dumps({
'email': email,
'password': password,
}).encode()
headers = {'content-type': 'application/json'}
query = {'apikey': self._API_KEY}
resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
access_token = resp['access_token']
# token
query = {
'access_token': access_token,
'apikey': self._API_KEY,
'jwtapp': 'jwt',
}
resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
return resp['signature']
def _call_api(self, path, video_id): def _call_api(self, path, video_id):
url = path if path.startswith('http') else self._API_BASE_URL + path url = path if path.startswith('http') else self._API_BASE_URL + path
@ -239,7 +264,8 @@ class CBCWatchBaseIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
if self._valid_device_token(): if self._valid_device_token():
return return
device = self._downloader.cache.load('cbcwatch', 'device') or {} device = self._downloader.cache.load(
'cbcwatch', self._cache_device_key()) or {}
self._device_id, self._device_token = device.get('id'), device.get('token') self._device_id, self._device_token = device.get('id'), device.get('token')
if self._valid_device_token(): if self._valid_device_token():
return return
@ -248,16 +274,30 @@ class CBCWatchBaseIE(InfoExtractor):
def _valid_device_token(self): def _valid_device_token(self):
return self._device_id and self._device_token return self._device_id and self._device_token
def _cache_device_key(self):
email, _ = self._get_login_info()
return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device'
def _register_device(self): def _register_device(self):
self._device_id = self._device_token = None
result = self._download_xml( result = self._download_xml(
self._API_BASE_URL + 'device/register', self._API_BASE_URL + 'device/register',
None, 'Acquiring device token', None, 'Acquiring device token',
data=b'<device><type>web</type></device>') data=b'<device><type>web</type></device>')
self._device_id = xpath_text(result, 'deviceId', fatal=True) self._device_id = xpath_text(result, 'deviceId', fatal=True)
self._device_token = xpath_text(result, 'deviceToken', fatal=True) email, password = self._get_login_info()
if email and password:
signature = self._signature(email, password)
data = '<login><token>{0}</token><device><deviceId>{1}</deviceId><type>web</type></device></login>'.format(
escape(signature), escape(self._device_id)).encode()
url = self._API_BASE_URL + 'device/login'
result = self._download_xml(
url, None, data=data,
headers={'content-type': 'application/xml'})
self._device_token = xpath_text(result, 'token', fatal=True)
else:
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
self._downloader.cache.store( self._downloader.cache.store(
'cbcwatch', 'device', { 'cbcwatch', self._cache_device_key(), {
'id': self._device_id, 'id': self._device_id,
'token': self._device_token, 'token': self._device_token,
}) })

View File

@ -15,7 +15,7 @@ import time
import math import math
from ..compat import ( from ..compat import (
compat_cookiejar, compat_cookiejar_Cookie,
compat_cookies, compat_cookies,
compat_etree_Element, compat_etree_Element,
compat_etree_fromstring, compat_etree_fromstring,
@ -1182,16 +1182,33 @@ class InfoExtractor(object):
'twitter card player') 'twitter card player')
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs): def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
json_ld = self._search_regex( json_ld_list = list(re.finditer(JSON_LD_RE, html))
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
default = kwargs.get('default', NO_DEFAULT) default = kwargs.get('default', NO_DEFAULT)
if not json_ld:
return default if default is not NO_DEFAULT else {}
# JSON-LD may be malformed and thus `fatal` should be respected. # JSON-LD may be malformed and thus `fatal` should be respected.
# At the same time `default` may be passed that assumes `fatal=False` # At the same time `default` may be passed that assumes `fatal=False`
# for _search_regex. Let's simulate the same behavior here as well. # for _search_regex. Let's simulate the same behavior here as well.
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type) json_ld = []
for mobj in json_ld_list:
json_ld_item = self._parse_json(
mobj.group('json_ld'), video_id, fatal=fatal)
if not json_ld_item:
continue
if isinstance(json_ld_item, dict):
json_ld.append(json_ld_item)
elif isinstance(json_ld_item, (list, tuple)):
json_ld.extend(json_ld_item)
if json_ld:
json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
if json_ld:
return json_ld
if default is not NO_DEFAULT:
return default
elif fatal:
raise RegexNotFoundError('Unable to extract JSON-LD')
else:
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None): def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
if isinstance(json_ld, compat_str): if isinstance(json_ld, compat_str):
@ -1256,10 +1273,10 @@ class InfoExtractor(object):
extract_interaction_statistic(e) extract_interaction_statistic(e)
for e in json_ld: for e in json_ld:
if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')): if '@context' in e:
item_type = e.get('@type') item_type = e.get('@type')
if expected_type is not None and expected_type != item_type: if expected_type is not None and expected_type != item_type:
return info continue
if item_type in ('TVEpisode', 'Episode'): if item_type in ('TVEpisode', 'Episode'):
episode_name = unescapeHTML(e.get('name')) episode_name = unescapeHTML(e.get('name'))
info.update({ info.update({
@ -1293,11 +1310,17 @@ class InfoExtractor(object):
}) })
elif item_type == 'VideoObject': elif item_type == 'VideoObject':
extract_video_object(e) extract_video_object(e)
continue if expected_type is None:
continue
else:
break
video = e.get('video') video = e.get('video')
if isinstance(video, dict) and video.get('@type') == 'VideoObject': if isinstance(video, dict) and video.get('@type') == 'VideoObject':
extract_video_object(video) extract_video_object(video)
break if expected_type is None:
continue
else:
break
return dict((k, v) for k, v in info.items() if v is not None) return dict((k, v) for k, v in info.items() if v is not None)
@staticmethod @staticmethod
@ -2340,6 +2363,8 @@ class InfoExtractor(object):
if res is False: if res is False:
return [] return []
ism_doc, urlh = res ism_doc, urlh = res
if ism_doc is None:
return []
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
@ -2818,7 +2843,7 @@ class InfoExtractor(object):
def _set_cookie(self, domain, name, value, expire_time=None, port=None, def _set_cookie(self, domain, name, value, expire_time=None, port=None,
path='/', secure=False, discard=False, rest={}, **kwargs): path='/', secure=False, discard=False, rest={}, **kwargs):
cookie = compat_cookiejar.Cookie( cookie = compat_cookiejar_Cookie(
0, name, value, port, port is not None, domain, True, 0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time, domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest) discard, None, None, rest)

View File

@ -13,6 +13,7 @@ from ..compat import (
compat_b64decode, compat_b64decode,
compat_etree_Element, compat_etree_Element,
compat_etree_fromstring, compat_etree_fromstring,
compat_str,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urllib_request, compat_urllib_request,
compat_urlparse, compat_urlparse,
@ -25,9 +26,9 @@ from ..utils import (
intlist_to_bytes, intlist_to_bytes,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
merge_dicts,
remove_end, remove_end,
sanitized_Request, sanitized_Request,
unified_strdate,
urlencode_postdata, urlencode_postdata,
xpath_text, xpath_text,
) )
@ -136,6 +137,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
# rtmp # rtmp
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Video gone',
}, { }, {
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1', 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
'info_dict': { 'info_dict': {
@ -157,11 +159,12 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
'info_dict': { 'info_dict': {
'id': '702409', 'id': '702409',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 The Morning of Our Promise Is Still Distant', 'title': compat_str,
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9', 'description': compat_str,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'TV TOKYO', 'uploader': 'Re:Zero Partners',
'upload_date': '20160508', 'timestamp': 1462098900,
'upload_date': '20160501',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -172,12 +175,13 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
'info_dict': { 'info_dict': {
'id': '727589', 'id': '727589',
'ext': 'mp4', 'ext': 'mp4',
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 Give Me Deliverance From This Judicial Injustice!", 'title': compat_str,
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d', 'description': compat_str,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Kadokawa Pictures Inc.', 'uploader': 'Kadokawa Pictures Inc.',
'upload_date': '20170118', 'timestamp': 1484130900,
'series': "KONOSUBA -God's blessing on this wonderful world!", 'upload_date': '20170111',
'series': compat_str,
'season': "KONOSUBA -God's blessing on this wonderful world! 2", 'season': "KONOSUBA -God's blessing on this wonderful world! 2",
'season_number': 2, 'season_number': 2,
'episode': 'Give Me Deliverance From This Judicial Injustice!', 'episode': 'Give Me Deliverance From This Judicial Injustice!',
@ -200,10 +204,11 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
'info_dict': { 'info_dict': {
'id': '535080', 'id': '535080',
'ext': 'mp4', 'ext': 'mp4',
'title': '11eyes Episode 1 Red Night ~ Piros éjszaka', 'title': compat_str,
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".', 'description': compat_str,
'uploader': 'Marvelous AQL Inc.', 'uploader': 'Marvelous AQL Inc.',
'upload_date': '20091021', 'timestamp': 1255512600,
'upload_date': '20091014',
}, },
'params': { 'params': {
# Just test metadata extraction # Just test metadata extraction
@ -224,15 +229,17 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
# just test metadata extraction # just test metadata extraction
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Video gone',
}, { }, {
# A video with a vastly different season name compared to the series name # A video with a vastly different season name compared to the series name
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532', 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
'info_dict': { 'info_dict': {
'id': '590532', 'id': '590532',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Haiyoru! Nyaruani (ONA) Episode 1 Test', 'title': compat_str,
'description': 'Mahiro and Nyaruko talk about official certification.', 'description': compat_str,
'uploader': 'TV TOKYO', 'uploader': 'TV TOKYO',
'timestamp': 1330956000,
'upload_date': '20120305', 'upload_date': '20120305',
'series': 'Nyarko-san: Another Crawling Chaos', 'series': 'Nyarko-san: Another Crawling Chaos',
'season': 'Haiyoru! Nyaruani (ONA)', 'season': 'Haiyoru! Nyaruani (ONA)',
@ -442,23 +449,21 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
webpage, 'language', default=None, group='lang') webpage, 'language', default=None, group='lang')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>', (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
webpage, 'video_title') r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
webpage, 'video_title', default=None)
if not video_title:
video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
video_title = re.sub(r' {2,}', ' ', video_title) video_title = re.sub(r' {2,}', ' ', video_title)
video_description = (self._parse_json(self._html_search_regex( video_description = (self._parse_json(self._html_search_regex(
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
webpage, 'description', default='{}'), video_id) or media_metadata).get('description') webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
if video_description: if video_description:
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n')) video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
video_upload_date = self._html_search_regex(
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
# try looking for both an uploader that's a link and one that's not # try looking for both an uploader that's a link and one that's not
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
webpage, 'video_uploader', fatal=False) webpage, 'video_uploader', default=False)
formats = [] formats = []
for stream in media.get('streams', []): for stream in media.get('streams', []):
@ -611,14 +616,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
webpage, 'season number', default=None)) webpage, 'season number', default=None))
return { info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts({
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'description': video_description, 'description': video_description,
'duration': duration, 'duration': duration,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': video_upload_date,
'series': series, 'series': series,
'season': season, 'season': season,
'season_number': season_number, 'season_number': season_number,
@ -626,7 +632,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'episode_number': episode_number, 'episode_number': episode_number,
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }, info)
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):

View File

@ -32,7 +32,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod @staticmethod
def _get_cookie_value(cookies, name): def _get_cookie_value(cookies, name):
cookie = cookies.get('name') cookie = cookies.get(name)
if cookie: if cookie:
return cookie.value return cookie.value

View File

@ -105,6 +105,7 @@ from .bilibili import (
BiliBiliBangumiIE, BiliBiliBangumiIE,
BilibiliAudioIE, BilibiliAudioIE,
BilibiliAudioAlbumIE, BilibiliAudioAlbumIE,
BiliBiliPlayerIE,
) )
from .biobiochiletv import BioBioChileTVIE from .biobiochiletv import BioBioChileTVIE
from .bitchute import ( from .bitchute import (
@ -635,7 +636,10 @@ from .mixcloud import (
from .mlb import MLBIE from .mlb import MLBIE
from .mnet import MnetIE from .mnet import MnetIE
from .moevideo import MoeVideoIE from .moevideo import MoeVideoIE
from .mofosex import MofosexIE from .mofosex import (
MofosexIE,
MofosexEmbedIE,
)
from .mojvideo import MojvideoIE from .mojvideo import MojvideoIE
from .morningstar import MorningstarIE from .morningstar import MorningstarIE
from .motherless import ( from .motherless import (
@ -800,6 +804,16 @@ from .orf import (
ORFFM4IE, ORFFM4IE,
ORFFM4StoryIE, ORFFM4StoryIE,
ORFOE1IE, ORFOE1IE,
ORFOE3IE,
ORFNOEIE,
ORFWIEIE,
ORFBGLIE,
ORFOOEIE,
ORFSTMIE,
ORFKTNIE,
ORFSBGIE,
ORFTIRIE,
ORFVBGIE,
ORFIPTVIE, ORFIPTVIE,
) )
from .outsidetv import OutsideTVIE from .outsidetv import OutsideTVIE

View File

@ -60,6 +60,9 @@ from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE from .drtuber import DrTuberIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .mofosex import MofosexEmbedIE
from .spankwire import SpankwireIE
from .youporn import YouPornIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .dailymail import DailyMailIE from .dailymail import DailyMailIE
@ -2536,6 +2539,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
# Look for Teachable embeds, must be before Wistia
teachable_url = TeachableIE._extract_url(webpage, url)
if teachable_url:
return self.url_result(teachable_url)
# Look for embedded Wistia player # Look for embedded Wistia player
wistia_urls = WistiaIE._extract_urls(webpage) wistia_urls = WistiaIE._extract_urls(webpage)
if wistia_urls: if wistia_urls:
@ -2710,6 +2718,21 @@ class GenericIE(InfoExtractor):
if tube8_urls: if tube8_urls:
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key()) return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
# Look for embedded Mofosex player
mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
if mofosex_urls:
return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
# Look for embedded Spankwire player
spankwire_urls = SpankwireIE._extract_urls(webpage)
if spankwire_urls:
return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
# Look for embedded YouPorn player
youporn_urls = YouPornIE._extract_urls(webpage)
if youporn_urls:
return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
# Look for embedded Tvigle player # Look for embedded Tvigle player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@ -3141,10 +3164,6 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key()) peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
teachable_url = TeachableIE._extract_url(webpage, url)
if teachable_url:
return self.url_result(teachable_url)
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage) indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
if indavideo_urls: if indavideo_urls:
return self.playlist_from_matches( return self.playlist_from_matches(

View File

@ -16,12 +16,22 @@ class IPrimaIE(InfoExtractor):
_GEO_BYPASS = False _GEO_BYPASS = False
_TESTS = [{ _TESTS = [{
'url': 'http://play.iprima.cz/gondici-s-r-o-33', 'url': 'https://prima.iprima.cz/particka/92-epizoda',
'info_dict': { 'info_dict': {
'id': 'p136534', 'id': 'p51388',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Gondíci s. r. o. (34)', 'title': 'Partička (92)',
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd', 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
},
'params': {
'skip_download': True, # m3u8 download
},
}, {
'url': 'https://cnn.iprima.cz/videa/70-epizoda',
'info_dict': {
'id': 'p681554',
'ext': 'mp4',
'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
}, },
'params': { 'params': {
'skip_download': True, # m3u8 download 'skip_download': True, # m3u8 download
@ -68,9 +78,15 @@ class IPrimaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._og_search_title(
webpage, default=None) or self._search_regex(
r'<h1>([^<]+)', webpage, 'title')
video_id = self._search_regex( video_id = self._search_regex(
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
r'data-product="([^"]+)">'), r'data-product="([^"]+)">',
r'id=["\']player-(p\d+)"',
r'playerId\s*:\s*["\']player-(p\d+)'),
webpage, 'real id') webpage, 'real id')
playerpage = self._download_webpage( playerpage = self._download_webpage(
@ -125,8 +141,8 @@ class IPrimaIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': title,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage, default=None),
'formats': formats, 'formats': formats,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage, default=None),
} }

View File

@ -18,7 +18,6 @@ from ..utils import (
class LimelightBaseIE(InfoExtractor): class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
@classmethod @classmethod
def _extract_urls(cls, webpage, source_url): def _extract_urls(cls, webpage, source_url):
@ -70,7 +69,8 @@ class LimelightBaseIE(InfoExtractor):
try: try:
return self._download_json( return self._download_json(
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) item_id, 'Downloading PlaylistService %s JSON' % method,
fatal=fatal, headers=headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission'] error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
@ -79,22 +79,22 @@ class LimelightBaseIE(InfoExtractor):
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise raise
def _call_api(self, organization_id, item_id, method): def _extract(self, item_id, pc_method, mobile_method, referer=None):
return self._download_json(
self._API_URL % (organization_id, self._API_PATH, item_id, method),
item_id, 'Downloading API %s JSON' % method)
def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
pc = self._call_playlist_service(item_id, pc_method, referer=referer) pc = self._call_playlist_service(item_id, pc_method, referer=referer)
metadata = self._call_api(pc['orgId'], item_id, meta_method) mobile = self._call_playlist_service(
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer) item_id, mobile_method, fatal=False, referer=referer)
return pc, mobile, metadata return pc, mobile
def _extract_info(self, pc, mobile, i, referer):
get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
pc_item = get_item(pc, 'playlistItems')
mobile_item = get_item(mobile, 'mediaList')
video_id = pc_item.get('mediaId') or mobile_item['mediaId']
title = pc_item.get('title') or mobile_item['title']
def _extract_info(self, streams, mobile_urls, properties):
video_id = properties['media_id']
formats = [] formats = []
urls = [] urls = []
for stream in streams: for stream in pc_item.get('streams', []):
stream_url = stream.get('url') stream_url = stream.get('url')
if not stream_url or stream.get('drmProtected') or stream_url in urls: if not stream_url or stream.get('drmProtected') or stream_url in urls:
continue continue
@ -155,7 +155,7 @@ class LimelightBaseIE(InfoExtractor):
}) })
formats.append(fmt) formats.append(fmt)
for mobile_url in mobile_urls: for mobile_url in mobile_item.get('mobileUrls', []):
media_url = mobile_url.get('mobileUrl') media_url = mobile_url.get('mobileUrl')
format_id = mobile_url.get('targetMediaPlatform') format_id = mobile_url.get('targetMediaPlatform')
if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls: if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
@ -179,54 +179,34 @@ class LimelightBaseIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
title = properties['title']
description = properties.get('description')
timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
filesize = int_or_none(properties.get('total_storage_in_bytes'))
categories = [properties.get('category')]
tags = properties.get('tags', [])
thumbnails = [{
'url': thumbnail['url'],
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
} for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
subtitles = {} subtitles = {}
for caption in properties.get('captions', []): for flag in mobile_item.get('flags'):
lang = caption.get('language_code') if flag == 'ClosedCaptions':
subtitles_url = caption.get('url') closed_captions = self._call_playlist_service(
if lang and subtitles_url: video_id, 'getClosedCaptionsDetailsByMediaId',
subtitles.setdefault(lang, []).append({ False, referer) or []
'url': subtitles_url, for cc in closed_captions:
}) cc_url = cc.get('webvttFileUrl')
closed_captions_url = properties.get('closed_captions_url') if not cc_url:
if closed_captions_url: continue
subtitles.setdefault('en', []).append({ lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en')
'url': closed_captions_url, subtitles.setdefault(lang, []).append({
'ext': 'ttml', 'url': cc_url,
}) })
break
get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': get_meta('description'),
'formats': formats, 'formats': formats,
'timestamp': timestamp, 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
'duration': duration, 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
'filesize': filesize,
'categories': categories,
'tags': tags,
'thumbnails': thumbnails,
'subtitles': subtitles, 'subtitles': subtitles,
} }
def _extract_info_helper(self, pc, mobile, i, metadata):
return self._extract_info(
try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [],
try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [],
metadata)
class LimelightMediaIE(LimelightBaseIE): class LimelightMediaIE(LimelightBaseIE):
IE_NAME = 'limelight' IE_NAME = 'limelight'
@ -251,8 +231,6 @@ class LimelightMediaIE(LimelightBaseIE):
'description': 'md5:8005b944181778e313d95c1237ddb640', 'description': 'md5:8005b944181778e313d95c1237ddb640',
'thumbnail': r're:^https?://.*\.jpeg$', 'thumbnail': r're:^https?://.*\.jpeg$',
'duration': 144.23, 'duration': 144.23,
'timestamp': 1244136834,
'upload_date': '20090604',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -268,30 +246,29 @@ class LimelightMediaIE(LimelightBaseIE):
'title': '3Play Media Overview Video', 'title': '3Play Media Overview Video',
'thumbnail': r're:^https?://.*\.jpeg$', 'thumbnail': r're:^https?://.*\.jpeg$',
'duration': 78.101, 'duration': 78.101,
'timestamp': 1338929955, # TODO: extract all languages that were accessible via API
'upload_date': '20120605', # 'subtitles': 'mincount:9',
'subtitles': 'mincount:9', 'subtitles': 'mincount:1',
}, },
}, { }, {
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452', 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
'only_matching': True, 'only_matching': True,
}] }]
_PLAYLIST_SERVICE_PATH = 'media' _PLAYLIST_SERVICE_PATH = 'media'
_API_PATH = 'media'
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url) video_id = self._match_id(url)
source_url = smuggled_data.get('source_url')
self._initialize_geo_bypass({ self._initialize_geo_bypass({
'countries': smuggled_data.get('geo_countries'), 'countries': smuggled_data.get('geo_countries'),
}) })
pc, mobile, metadata = self._extract( pc, mobile = self._extract(
video_id, 'getPlaylistByMediaId', video_id, 'getPlaylistByMediaId',
'getMobilePlaylistByMediaId', 'properties', 'getMobilePlaylistByMediaId', source_url)
smuggled_data.get('source_url'))
return self._extract_info_helper(pc, mobile, 0, metadata) return self._extract_info(pc, mobile, 0, source_url)
class LimelightChannelIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE):
@ -313,6 +290,7 @@ class LimelightChannelIE(LimelightBaseIE):
'info_dict': { 'info_dict': {
'id': 'ab6a524c379342f9b23642917020c082', 'id': 'ab6a524c379342f9b23642917020c082',
'title': 'Javascript Sample Code', 'title': 'Javascript Sample Code',
'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
}, { }, {
@ -320,22 +298,23 @@ class LimelightChannelIE(LimelightBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
_PLAYLIST_SERVICE_PATH = 'channel' _PLAYLIST_SERVICE_PATH = 'channel'
_API_PATH = 'channels'
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
channel_id = self._match_id(url) channel_id = self._match_id(url)
source_url = smuggled_data.get('source_url')
pc, mobile, medias = self._extract( pc, mobile = self._extract(
channel_id, 'getPlaylistByChannelId', channel_id, 'getPlaylistByChannelId',
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
'media', smuggled_data.get('source_url')) source_url)
entries = [ entries = [
self._extract_info_helper(pc, mobile, i, medias['media_list'][i]) self._extract_info(pc, mobile, i, source_url)
for i in range(len(medias['media_list']))] for i in range(len(pc['playlistItems']))]
return self.playlist_result(entries, channel_id, pc['title']) return self.playlist_result(
entries, channel_id, pc.get('title'), mobile.get('description'))
class LimelightChannelListIE(LimelightBaseIE): class LimelightChannelListIE(LimelightBaseIE):
@ -368,10 +347,12 @@ class LimelightChannelListIE(LimelightBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
channel_list_id = self._match_id(url) channel_list_id = self._match_id(url)
channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById') channel_list = self._call_playlist_service(
channel_list_id, 'getMobileChannelListById')
entries = [ entries = [
self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel') self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
for channel in channel_list['channelList']] for channel in channel_list['channelList']]
return self.playlist_result(entries, channel_list_id, channel_list['title']) return self.playlist_result(
entries, channel_list_id, channel_list['title'])

View File

@ -128,6 +128,12 @@ class MailRuIE(InfoExtractor):
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
video_id, 'Downloading video JSON') video_id, 'Downloading video JSON')
headers = {}
video_key = self._get_cookies('https://my.mail.ru').get('video_key')
if video_key:
headers['Cookie'] = 'video_key=%s' % video_key.value
formats = [] formats = []
for f in video_data['videos']: for f in video_data['videos']:
video_url = f.get('url') video_url = f.get('url')
@ -140,6 +146,7 @@ class MailRuIE(InfoExtractor):
'url': video_url, 'url': video_url,
'format_id': format_id, 'format_id': format_id,
'height': height, 'height': height,
'http_headers': headers,
}) })
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -1,5 +1,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
str_to_int, str_to_int,
@ -54,3 +57,23 @@ class MofosexIE(KeezMoviesIE):
}) })
return info return info
class MofosexEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
ie=MofosexIE.ie_key(), video_id=video_id)

View File

@ -26,7 +26,7 @@ class MotherlessIE(InfoExtractor):
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913', 'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup', 'uploader_id': 'famouslyfuckedup',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18, 'age_limit': 18,
} }
}, { }, {
@ -40,7 +40,7 @@ class MotherlessIE(InfoExtractor):
'game', 'hairy'], 'game', 'hairy'],
'upload_date': '20140622', 'upload_date': '20140622',
'uploader_id': 'Sulivana7x', 'uploader_id': 'Sulivana7x',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18, 'age_limit': 18,
}, },
'skip': '404', 'skip': '404',
@ -54,7 +54,7 @@ class MotherlessIE(InfoExtractor):
'categories': ['superheroine heroine superher'], 'categories': ['superheroine heroine superher'],
'upload_date': '20140827', 'upload_date': '20140827',
'uploader_id': 'shade0230', 'uploader_id': 'shade0230',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18, 'age_limit': 18,
} }
}, { }, {
@ -76,7 +76,8 @@ class MotherlessIE(InfoExtractor):
raise ExtractorError('Video %s is for friends only' % video_id, expected=True) raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
title = self._html_search_regex( title = self._html_search_regex(
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
video_url = (self._html_search_regex( video_url = (self._html_search_regex(
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'), r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
@ -84,14 +85,15 @@ class MotherlessIE(InfoExtractor):
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id) or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
view_count = str_to_int(self._html_search_regex( view_count = str_to_int(self._html_search_regex(
r'<strong>Views</strong>\s+([^<]+)<', (r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex( like_count = str_to_int(self._html_search_regex(
r'<strong>Favorited</strong>\s+([^<]+)<', (r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
webpage, 'like count', fatal=False)) webpage, 'like count', fatal=False))
upload_date = self._html_search_regex( upload_date = self._html_search_regex(
r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date') (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
if 'Ago' in upload_date: if 'Ago' in upload_date:
days = int(re.search(r'([0-9]+)', upload_date).group(1)) days = int(re.search(r'([0-9]+)', upload_date).group(1))
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')

View File

@ -6,6 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext,
int_or_none, int_or_none,
js_to_json, js_to_json,
qualities, qualities,
@ -33,42 +34,76 @@ class NovaEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
bitrates = self._parse_json( duration = None
self._search_regex(
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
formats = [] formats = []
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list): player = self._parse_json(
format_list = [format_list] self._search_regex(
for format_url in format_list: r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
format_url = url_or_none(format_url) webpage, 'player', default='{}'), video_id, fatal=False)
if not format_url: if player:
continue for format_id, format_list in player['tracks'].items():
if format_id == 'hls': if not isinstance(format_list, list):
formats.extend(self._extract_m3u8_formats( format_list = [format_list]
format_url, video_id, ext='mp4', for format_dict in format_list:
entry_protocol='m3u8_native', m3u8_id='hls', if not isinstance(format_dict, dict):
fatal=False)) continue
continue format_url = url_or_none(format_dict.get('src'))
f = { format_type = format_dict.get('type')
'url': format_url, ext = determine_ext(format_url)
} if (format_type == 'application/x-mpegURL'
f_id = format_id or format_id == 'HLS' or ext == 'm3u8'):
for quality in QUALITIES: formats.extend(self._extract_m3u8_formats(
if '%s.mp4' % quality in format_url: format_url, video_id, 'mp4',
f_id += '-%s' % quality entry_protocol='m3u8_native', m3u8_id='hls',
f.update({ fatal=False))
'quality': quality_key(quality), elif (format_type == 'application/dash+xml'
'format_note': quality.upper(), or format_id == 'DASH' or ext == 'mpd'):
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
}) })
break duration = int_or_none(player.get('duration'))
f['format_id'] = f_id else:
formats.append(f) # Old path, not actual as of 08.04.2020
bitrates = self._parse_json(
self._search_regex(
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list):
format_list = [format_list]
for format_url in format_list:
format_url = url_or_none(format_url)
if not format_url:
continue
if format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
continue
f = {
'url': format_url,
}
f_id = format_id
for quality in QUALITIES:
if '%s.mp4' % quality in format_url:
f_id += '-%s' % quality
f.update({
'quality': quality_key(quality),
'format_note': quality.upper(),
})
break
f['format_id'] = f_id
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
title = self._og_search_title( title = self._og_search_title(
@ -81,7 +116,8 @@ class NovaEmbedIE(InfoExtractor):
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='value') 'thumbnail', fatal=False, group='value')
duration = int_or_none(self._search_regex( duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
default=duration))
return { return {
'id': video_id, 'id': video_id,

View File

@ -162,13 +162,12 @@ class ORFTVthekIE(InfoExtractor):
class ORFRadioIE(InfoExtractor): class ORFRadioIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
station = mobj.group('station')
show_date = mobj.group('date') show_date = mobj.group('date')
show_id = mobj.group('show') show_id = mobj.group('show')
data = self._download_json( data = self._download_json(
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
% (station, show_id, show_date), show_id) % (self._API_STATION, show_id, show_date), show_id)
entries = [] entries = []
for info in data['streams']: for info in data['streams']:
@ -183,7 +182,7 @@ class ORFRadioIE(InfoExtractor):
duration = end - start if end and start else None duration = end - start if end and start else None
entries.append({ entries.append({
'id': loop_stream_id.replace('.mp3', ''), 'id': loop_stream_id.replace('.mp3', ''),
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, loop_stream_id), 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
'title': title, 'title': title,
'description': clean_html(data.get('subtitle')), 'description': clean_html(data.get('subtitle')),
'duration': duration, 'duration': duration,
@ -205,6 +204,8 @@ class ORFFM4IE(ORFRadioIE):
IE_NAME = 'orf:fm4' IE_NAME = 'orf:fm4'
IE_DESC = 'radio FM4' IE_DESC = 'radio FM4'
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)' _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
_API_STATION = 'fm4'
_LOOP_STATION = 'fm4'
_TEST = { _TEST = {
'url': 'http://fm4.orf.at/player/20170107/4CC', 'url': 'http://fm4.orf.at/player/20170107/4CC',
@ -223,10 +224,142 @@ class ORFFM4IE(ORFRadioIE):
} }
class ORFNOEIE(ORFRadioIE):
IE_NAME = 'orf:noe'
IE_DESC = 'Radio Niederösterreich'
_VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'noe'
_LOOP_STATION = 'oe2n'
_TEST = {
'url': 'https://noe.orf.at/player/20200423/NGM',
'only_matching': True,
}
class ORFWIEIE(ORFRadioIE):
IE_NAME = 'orf:wien'
IE_DESC = 'Radio Wien'
_VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'wie'
_LOOP_STATION = 'oe2w'
_TEST = {
'url': 'https://wien.orf.at/player/20200423/WGUM',
'only_matching': True,
}
class ORFBGLIE(ORFRadioIE):
IE_NAME = 'orf:burgenland'
IE_DESC = 'Radio Burgenland'
_VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'bgl'
_LOOP_STATION = 'oe2b'
_TEST = {
'url': 'https://burgenland.orf.at/player/20200423/BGM',
'only_matching': True,
}
class ORFOOEIE(ORFRadioIE):
IE_NAME = 'orf:oberoesterreich'
IE_DESC = 'Radio Oberösterreich'
_VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'ooe'
_LOOP_STATION = 'oe2o'
_TEST = {
'url': 'https://ooe.orf.at/player/20200423/OGMO',
'only_matching': True,
}
class ORFSTMIE(ORFRadioIE):
IE_NAME = 'orf:steiermark'
IE_DESC = 'Radio Steiermark'
_VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'stm'
_LOOP_STATION = 'oe2st'
_TEST = {
'url': 'https://steiermark.orf.at/player/20200423/STGMS',
'only_matching': True,
}
class ORFKTNIE(ORFRadioIE):
IE_NAME = 'orf:kaernten'
IE_DESC = 'Radio Kärnten'
_VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'ktn'
_LOOP_STATION = 'oe2k'
_TEST = {
'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
'only_matching': True,
}
class ORFSBGIE(ORFRadioIE):
IE_NAME = 'orf:salzburg'
IE_DESC = 'Radio Salzburg'
_VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'sbg'
_LOOP_STATION = 'oe2s'
_TEST = {
'url': 'https://salzburg.orf.at/player/20200423/SGUM',
'only_matching': True,
}
class ORFTIRIE(ORFRadioIE):
IE_NAME = 'orf:tirol'
IE_DESC = 'Radio Tirol'
_VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'tir'
_LOOP_STATION = 'oe2t'
_TEST = {
'url': 'https://tirol.orf.at/player/20200423/TGUMO',
'only_matching': True,
}
class ORFVBGIE(ORFRadioIE):
IE_NAME = 'orf:vorarlberg'
IE_DESC = 'Radio Vorarlberg'
_VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'vbg'
_LOOP_STATION = 'oe2v'
_TEST = {
'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
'only_matching': True,
}
class ORFOE3IE(ORFRadioIE):
IE_NAME = 'orf:oe3'
IE_DESC = 'Radio Österreich 3'
_VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'oe3'
_LOOP_STATION = 'oe3'
_TEST = {
'url': 'https://oe3.orf.at/player/20200424/3WEK',
'only_matching': True,
}
class ORFOE1IE(ORFRadioIE): class ORFOE1IE(ORFRadioIE):
IE_NAME = 'orf:oe1' IE_NAME = 'orf:oe1'
IE_DESC = 'Radio Österreich 1' IE_DESC = 'Radio Österreich 1'
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'oe1'
_LOOP_STATION = 'oe1'
_TEST = { _TEST = {
'url': 'http://oe1.orf.at/player/20170108/456544', 'url': 'http://oe1.orf.at/player/20170108/456544',

View File

@ -20,20 +20,16 @@ class PokemonIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Ol Raise and Switch!', 'title': 'The Ol Raise and Switch!',
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
'timestamp': 1511824728,
'upload_date': '20171127',
}, },
'add_id': ['LimelightMedia'], 'add_id': ['LimelightMedia'],
}, { }, {
# no data-video-title # no data-video-title
'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008', 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
'info_dict': { 'info_dict': {
'id': '99f3bae270bf4e5097274817239ce9c8', 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Pokémon: The Rise of Darkrai', 'title': "Pokémon : L'ascension de Darkrai",
'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d', 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
'timestamp': 1417778347,
'upload_date': '20141205',
}, },
'add_id': ['LimelightMedia'], 'add_id': ['LimelightMedia'],
'params': { 'params': {

View File

@ -11,6 +11,7 @@ from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts,
unified_strdate, unified_strdate,
) )
@ -175,7 +176,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
(?: (?:
(?:beta\.)? (?:beta\.)?
(?: (?:
prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
)\.(?:de|at|ch)| )\.(?:de|at|ch)|
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
) )
@ -193,10 +194,14 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'info_dict': { 'info_dict': {
'id': '2104602', 'id': '2104602',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Episode 18 - Staffel 2', 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231', 'upload_date': '20131231',
'duration': 5845.04, 'duration': 5845.04,
'series': 'CIRCUS HALLIGALLI',
'season_number': 2,
'episode': 'Episode 18 - Staffel 2',
'episode_number': 18,
}, },
}, },
{ {
@ -300,8 +305,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'info_dict': { 'info_dict': {
'id': '2572814', 'id': '2572814',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Andreas Kümmert: Rocket Man', 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
'description': 'md5:6ddb02b0781c6adf778afea606652e38', 'description': 'md5:6ddb02b0781c6adf778afea606652e38',
'timestamp': 1382041620,
'upload_date': '20131017', 'upload_date': '20131017',
'duration': 469.88, 'duration': 469.88,
}, },
@ -310,7 +316,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
}, },
}, },
{ {
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
'info_dict': { 'info_dict': {
'id': '2156342', 'id': '2156342',
'ext': 'mp4', 'ext': 'mp4',
@ -332,19 +338,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'playlist_count': 2, 'playlist_count': 2,
'skip': 'This video is unavailable', 'skip': 'This video is unavailable',
}, },
{
'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
'info_dict': {
'id': '4187506',
'ext': 'mp4',
'title': 'Best of Circus HalliGalli',
'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
'upload_date': '20151229',
},
'params': {
'skip_download': True,
},
},
{ {
# title in <h2 class="subtitle"> # title in <h2 class="subtitle">
'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
@ -421,7 +414,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
] ]
_UPLOAD_DATE_REGEXES = [ _UPLOAD_DATE_REGEXES = [
r'<meta property="og:published_time" content="(.+?)">',
r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
@ -451,17 +443,21 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
if description is None: if description is None:
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex( upload_date = unified_strdate(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) self._html_search_meta('og:published_time', webpage,
'upload date', default=None)
or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
webpage, 'upload date', default=None))
info.update({ json_ld = self._search_json_ld(webpage, clip_id, default={})
return merge_dicts(info, {
'id': clip_id, 'id': clip_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'upload_date': upload_date, 'upload_date': upload_date,
}) }, json_ld)
return info
def _extract_playlist(self, url, webpage): def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex( playlist_id = self._html_search_regex(

View File

@ -82,17 +82,6 @@ class PuhuTVIE(InfoExtractor):
urls = [] urls = []
formats = [] formats = []
def add_http_from_hls(m3u8_f):
http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4')
if http_url != m3u8_f['url']:
f = m3u8_f.copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
'url': http_url,
})
formats.append(f)
for video in videos['data']['videos']: for video in videos['data']['videos']:
media_url = url_or_none(video.get('url')) media_url = url_or_none(video.get('url'))
if not media_url or media_url in urls: if not media_url or media_url in urls:
@ -101,12 +90,9 @@ class PuhuTVIE(InfoExtractor):
playlist = video.get('is_playlist') playlist = video.get('is_playlist')
if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
m3u8_formats = self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native', media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False))
for m3u8_f in m3u8_formats:
formats.append(m3u8_f)
add_http_from_hls(m3u8_f)
continue continue
quality = int_or_none(video.get('quality')) quality = int_or_none(video.get('quality'))
@ -128,8 +114,6 @@ class PuhuTVIE(InfoExtractor):
format_id += '-%sp' % quality format_id += '-%sp' % quality
f['format_id'] = format_id f['format_id'] = format_id
formats.append(f) formats.append(f)
if is_hls:
add_http_from_hls(f)
self._sort_formats(formats) self._sort_formats(formats)
creator = try_get( creator = try_get(

View File

@ -27,6 +27,7 @@ from ..utils import (
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
url_or_none, url_or_none,
urlhandle_detect_ext,
) )
@ -96,7 +97,7 @@ class SoundcloudIE(InfoExtractor):
'repost_count': int, 'repost_count': int,
} }
}, },
# not streamable song, preview # geo-restricted
{ {
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': { 'info_dict': {
@ -108,17 +109,13 @@ class SoundcloudIE(InfoExtractor):
'uploader_id': '9615865', 'uploader_id': '9615865',
'timestamp': 1337635207, 'timestamp': 1337635207,
'upload_date': '20120521', 'upload_date': '20120521',
'duration': 30, 'duration': 227.155,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
}, },
'params': {
# rtmp
'skip_download': True,
},
}, },
# private link # private link
{ {
@ -229,7 +226,6 @@ class SoundcloudIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
# not available via api.soundcloud.com/i1/tracks/id/streams
{ {
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer', 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7', 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
@ -250,11 +246,14 @@ class SoundcloudIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
}, },
'expected_warnings': ['Unable to download JSON metadata'], },
} {
# with AAC HQ format available via OAuth token
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
'only_matching': True,
},
] ]
_API_BASE = 'https://api.soundcloud.com/'
_API_V2_BASE = 'https://api-v2.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/' _BASE_URL = 'https://soundcloud.com/'
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
@ -316,10 +315,9 @@ class SoundcloudIE(InfoExtractor):
def _resolv_url(cls, url): def _resolv_url(cls, url):
return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): def _extract_info_dict(self, info, full_title=None, secret_token=None):
track_id = compat_str(info['id']) track_id = compat_str(info['id'])
title = info['title'] title = info['title']
track_base_url = self._API_BASE + 'tracks/%s' % track_id
format_urls = set() format_urls = set()
formats = [] formats = []
@ -328,21 +326,22 @@ class SoundcloudIE(InfoExtractor):
query['secret_token'] = secret_token query['secret_token'] = secret_token
if info.get('downloadable') and info.get('has_downloads_left'): if info.get('downloadable') and info.get('has_downloads_left'):
format_url = update_url_query( download_url = update_url_query(
info.get('download_url') or track_base_url + '/download', query) self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
format_urls.add(format_url) redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
if version == 2: if redirect_url:
v1_info = self._download_json( urlh = self._request_webpage(
track_base_url, track_id, query=query, fatal=False) or {} HEADRequest(redirect_url), track_id, fatal=False)
else: if urlh:
v1_info = info format_url = urlh.geturl()
formats.append({ format_urls.add(format_url)
'format_id': 'download', formats.append({
'ext': v1_info.get('original_format') or 'mp3', 'format_id': 'download',
'filesize': int_or_none(v1_info.get('original_content_size')), 'ext': urlhandle_detect_ext(urlh) or 'mp3',
'url': format_url, 'filesize': int_or_none(urlh.headers.get('Content-Length')),
'preference': 10, 'url': format_url,
}) 'preference': 10,
})
def invalid_url(url): def invalid_url(url):
return not url or url in format_urls return not url or url in format_urls
@ -356,6 +355,9 @@ class SoundcloudIE(InfoExtractor):
format_id_list = [] format_id_list = []
if protocol: if protocol:
format_id_list.append(protocol) format_id_list.append(protocol)
ext = f.get('ext')
if ext == 'aac':
f['abr'] = '256'
for k in ('ext', 'abr'): for k in ('ext', 'abr'):
v = f.get(k) v = f.get(k)
if v: if v:
@ -366,9 +368,13 @@ class SoundcloudIE(InfoExtractor):
abr = f.get('abr') abr = f.get('abr')
if abr: if abr:
f['abr'] = int(abr) f['abr'] = int(abr)
if protocol == 'hls':
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
else:
protocol = 'http'
f.update({ f.update({
'format_id': '_'.join(format_id_list), 'format_id': '_'.join(format_id_list),
'protocol': 'm3u8_native' if protocol == 'hls' else 'http', 'protocol': protocol,
'preference': -10 if preview else None, 'preference': -10 if preview else None,
}) })
formats.append(f) formats.append(f)
@ -406,42 +412,11 @@ class SoundcloudIE(InfoExtractor):
}, 'http' if protocol == 'progressive' else protocol, }, 'http' if protocol == 'progressive' else protocol,
t.get('snipped') or '/preview/' in format_url) t.get('snipped') or '/preview/' in format_url)
if not formats:
# Old API, does not work for some tracks (e.g.
# https://soundcloud.com/giovannisarani/mezzo-valzer)
# and might serve preview URLs (e.g.
# http://www.soundcloud.com/snbrn/ele)
format_dict = self._download_json(
track_base_url + '/streams', track_id,
'Downloading track url', query=query, fatal=False) or {}
for key, stream_url in format_dict.items():
if invalid_url(stream_url):
continue
format_urls.add(stream_url)
mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key)
if mobj:
protocol, ext, abr = mobj.groups()
add_format({
'abr': abr,
'ext': ext,
'url': stream_url,
}, protocol)
if not formats:
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
urlh = self._request_webpage(
HEADRequest(info.get('stream_url') or track_base_url + '/stream'),
track_id, query=query, fatal=False)
if urlh:
stream_url = urlh.geturl()
if not invalid_url(stream_url):
add_format({'url': stream_url}, 'http')
for f in formats: for f in formats:
f['vcodec'] = 'none' f['vcodec'] = 'none'
if not formats and info.get('policy') == 'BLOCK':
self.raise_geo_restricted()
self._sort_formats(formats) self._sort_formats(formats)
user = info.get('user') or {} user = info.get('user') or {}
@ -511,16 +486,10 @@ class SoundcloudIE(InfoExtractor):
resolve_title += '/%s' % token resolve_title += '/%s' % token
info_json_url = self._resolv_url(self._BASE_URL + resolve_title) info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
version = 2
info = self._download_json( info = self._download_json(
info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False) info_json_url, full_title, 'Downloading info JSON', query=query)
if not info:
info = self._download_json(
info_json_url.replace(self._API_V2_BASE, self._API_BASE),
full_title, 'Downloading info JSON', query=query)
version = 1
return self._extract_info_dict(info, full_title, token, version) return self._extract_info_dict(info, full_title, token)
class SoundcloudPlaylistBaseIE(SoundcloudIE): class SoundcloudPlaylistBaseIE(SoundcloudIE):
@ -590,7 +559,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
COMMON_QUERY = { COMMON_QUERY = {
'limit': 2000000000, 'limit': 80000,
'linked_partitioning': '1', 'linked_partitioning': '1',
} }

View File

@ -3,34 +3,47 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
sanitized_Request, float_or_none,
int_or_none,
merge_dicts,
str_or_none,
str_to_int, str_to_int,
unified_strdate, url_or_none,
) )
from ..aes import aes_decrypt_text
class SpankwireIE(InfoExtractor): class SpankwireIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)' _VALID_URL = r'''(?x)
https?://
(?:www\.)?spankwire\.com/
(?:
[^/]+/video|
EmbedPlayer\.aspx/?\?.*?\bArticleId=
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4 # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
'md5': '8bbfde12b101204b39e4b9fe7eb67095', 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd',
'info_dict': { 'info_dict': {
'id': '103545', 'id': '103545',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
'description': 'Crazy Bitch X rated music video.', 'description': 'Crazy Bitch X rated music video.',
'duration': 222,
'uploader': 'oreusz', 'uploader': 'oreusz',
'uploader_id': '124697', 'uploader_id': '124697',
'upload_date': '20070507', 'timestamp': 1178587885,
'upload_date': '20070508',
'average_rating': float,
'view_count': int,
'comment_count': int,
'age_limit': 18, 'age_limit': 18,
} 'categories': list,
'tags': list,
},
}, { }, {
# download URL pattern: */mp4_<format_id>_<video_id>.mp4 # download URL pattern: */mp4_<format_id>_<video_id>.mp4
'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
@ -45,83 +58,125 @@ class SpankwireIE(InfoExtractor):
'upload_date': '20150822', 'upload_date': '20150822',
'age_limit': 18, 'age_limit': 18,
}, },
'params': {
'proxy': '127.0.0.1:8118'
},
'skip': 'removed',
}, {
'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true',
'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
req = sanitized_Request('http://www.' + mobj.group('url')) video = self._download_json(
req.add_header('Cookie', 'age_verified=1') 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id)
webpage = self._download_webpage(req, video_id)
title = self._html_search_regex( title = video['title']
r'<h1>([^<]+)', webpage, 'title')
description = self._html_search_regex(
r'(?s)<div\s+id="descriptionContent">(.+?)</div>',
webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'by:\s*<a [^>]*>(.+?)</a>',
webpage, 'uploader', fatal=False)
uploader_id = self._html_search_regex(
r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
webpage, 'uploader id', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
r'</a> on (.+?) at \d+:\d+',
webpage, 'upload date', fatal=False))
view_count = str_to_int(self._html_search_regex(
r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
webpage, 'comment count', fatal=False))
videos = re.findall(
r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
heights = [int(video[0]) for video in videos]
video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
if webpage.find(r'flashvars\.encrypted = "true"') != -1:
password = self._search_regex(
r'flashvars\.video_title = "([^"]+)',
webpage, 'password').replace('+', ' ')
video_urls = list(map(
lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'),
video_urls))
formats = [] formats = []
for height, video_url in zip(heights, video_urls): videos = video.get('videos')
path = compat_urllib_parse_urlparse(video_url).path if isinstance(videos, dict):
m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path) for format_id, format_url in videos.items():
if m: video_url = url_or_none(format_url)
tbr = int(m.group('tbr')) if not format_url:
height = int(m.group('height')) continue
else: height = int_or_none(self._search_regex(
tbr = None r'(\d+)[pP]', format_id, 'height', default=None))
formats.append({ m = re.search(
'url': video_url, r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url)
'format_id': '%dp' % height, if m:
'height': height, tbr = int(m.group('tbr'))
'tbr': tbr, height = height or int(m.group('height'))
else:
tbr = None
formats.append({
'url': video_url,
'format_id': '%dp' % height if height else format_id,
'height': height,
'tbr': tbr,
})
m3u8_url = url_or_none(video.get('HLS'))
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id'))
view_count = str_to_int(video.get('viewed'))
thumbnails = []
for preference, t in enumerate(('', '2x'), start=0):
thumbnail_url = url_or_none(video.get('poster%s' % t))
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'preference': preference,
}) })
self._sort_formats(formats)
age_limit = self._rta_search(webpage) def extract_names(key):
entries_list = video.get(key)
if not isinstance(entries_list, list):
return
entries = []
for entry in entries_list:
name = str_or_none(entry.get('name'))
if name:
entries.append(name)
return entries
return { categories = extract_names('categories')
tags = extract_names('tags')
uploader = None
info = {}
webpage = self._download_webpage(
'https://www.spankwire.com/_/video%s/' % video_id, video_id,
fatal=False)
if webpage:
info = self._search_json_ld(webpage, video_id, default={})
thumbnail_url = None
if 'thumbnail' in info:
thumbnail_url = url_or_none(info['thumbnail'])
del info['thumbnail']
if not thumbnail_url:
thumbnail_url = self._og_search_thumbnail(webpage)
if thumbnail_url:
thumbnails.append({
'url': thumbnail_url,
'preference': 10,
})
uploader = self._html_search_regex(
r'(?s)by\s*<a[^>]+\bclass=["\']uploaded__by[^>]*>(.+?)</a>',
webpage, 'uploader', fatal=False)
if not view_count:
view_count = str_to_int(self._search_regex(
r'data-views=["\']([\d,.]+)', webpage, 'view count',
fatal=False))
return merge_dicts({
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': video.get('description'),
'thumbnail': thumbnail, 'duration': int_or_none(video.get('duration')),
'thumbnails': thumbnails,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': str_or_none(video.get('userId')),
'upload_date': upload_date, 'timestamp': int_or_none(video.get('time_approved_on')),
'average_rating': float_or_none(video.get('rating')),
'view_count': view_count, 'view_count': view_count,
'comment_count': comment_count, 'comment_count': int_or_none(video.get('comments')),
'age_limit': 18,
'categories': categories,
'tags': tags,
'formats': formats, 'formats': formats,
'age_limit': age_limit, }, info)
}

View File

@ -8,15 +8,10 @@ class BellatorIE(MTVServicesInfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg', 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
'info_dict': { 'info_dict': {
'id': 'b55e434e-fde1-4a98-b7cc-92003a034de4', 'title': 'Michael Page vs. Evangelista Cyborg',
'ext': 'mp4', 'description': 'md5:0d917fc00ffd72dd92814963fc6cbb05',
'title': 'Douglas Lima vs. Paul Daley - Round 1',
'description': 'md5:805a8dd29310fd611d32baba2f767885',
},
'params': {
# m3u8 download
'skip_download': True,
}, },
'playlist_count': 3,
}, { }, {
'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page', 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
'only_matching': True, 'only_matching': True,
@ -25,6 +20,9 @@ class BellatorIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.bellator.com/feeds/mrss/' _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US'] _GEO_COUNTRIES = ['US']
def _extract_mgid(self, webpage):
return self._extract_triforce_mgid(webpage)
class ParamountNetworkIE(MTVServicesInfoExtractor): class ParamountNetworkIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'

View File

@ -7,7 +7,9 @@ from .wistia import WistiaIE
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
int_or_none,
get_element_by_class, get_element_by_class,
strip_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
@ -19,8 +21,8 @@ class TeachableBaseIE(InfoExtractor):
_SITES = { _SITES = {
# Only notable ones here # Only notable ones here
'upskillcourses.com': 'upskill', 'v1.upskillcourses.com': 'upskill',
'academy.gns3.com': 'gns3', 'gns3.teachable.com': 'gns3',
'academyhacker.com': 'academyhacker', 'academyhacker.com': 'academyhacker',
'stackskills.com': 'stackskills', 'stackskills.com': 'stackskills',
'market.saleshacker.com': 'saleshacker', 'market.saleshacker.com': 'saleshacker',
@ -109,27 +111,29 @@ class TeachableIE(TeachableBaseIE):
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
_TESTS = [{ _TESTS = [{
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364',
'info_dict': { 'info_dict': {
'id': 'uzw6zw58or', 'id': 'untlgzk1v7',
'ext': 'mp4', 'ext': 'bin',
'title': 'Welcome to the Course!', 'title': 'Overview',
'description': 'md5:65edb0affa582974de4625b9cdea1107', 'description': 'md5:071463ff08b86c208811130ea1c2464c',
'duration': 138.763, 'duration': 736.4,
'timestamp': 1479846621, 'timestamp': 1542315762,
'upload_date': '20161122', 'upload_date': '20181115',
'chapter': 'Welcome',
'chapter_number': 1,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'http://upskillcourses.com/courses/119763/lectures/1747100', 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://academy.gns3.com/courses/423415/lectures/6885939', 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
'only_matching': True, 'only_matching': True,
}] }]
@ -173,11 +177,34 @@ class TeachableIE(TeachableBaseIE):
title = self._og_search_title(webpage, default=None) title = self._og_search_title(webpage, default=None)
chapter = None
chapter_number = None
section_item = self._search_regex(
r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id,
webpage, 'section item', default=None, group='li')
if section_item:
chapter_number = int_or_none(self._search_regex(
r'data-ss-position=["\'](\d+)', section_item, 'section id',
default=None))
if chapter_number is not None:
sections = []
for s in re.findall(
r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage):
section = strip_or_none(clean_html(s))
if not section:
sections = []
break
sections.append(section)
if chapter_number <= len(sections):
chapter = sections[chapter_number - 1]
entries = [{ entries = [{
'_type': 'url_transparent', '_type': 'url_transparent',
'url': wistia_url, 'url': wistia_url,
'ie_key': WistiaIE.ie_key(), 'ie_key': WistiaIE.ie_key(),
'title': title, 'title': title,
'chapter': chapter,
'chapter_number': chapter_number,
} for wistia_url in wistia_urls] } for wistia_url in wistia_urls]
return self.playlist_result(entries, video_id, title) return self.playlist_result(entries, video_id, title)
@ -192,20 +219,20 @@ class TeachableCourseIE(TeachableBaseIE):
/(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+) /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
_TESTS = [{ _TESTS = [{
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/', 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/',
'info_dict': { 'info_dict': {
'id': 'essential-web-developer-course', 'id': 'essential-web-developer-course',
'title': 'The Essential Web Developer Course (Free)', 'title': 'The Essential Web Developer Course (Free)',
}, },
'playlist_count': 192, 'playlist_count': 192,
}, { }, {
'url': 'http://upskillcourses.com/courses/119763/', 'url': 'http://v1.upskillcourses.com/courses/119763/',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://upskillcourses.com/courses/enrolled/119763', 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://academy.gns3.com/courses/enrolled/423415', 'url': 'https://gns3.teachable.com/courses/enrolled/423415',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',

View File

@ -1,9 +1,19 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from .nexx import NexxIE from .nexx import NexxIE
from ..compat import compat_urlparse from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
NO_DEFAULT,
try_get,
)
class Tele5IE(InfoExtractor): class Tele5IE(InfoExtractor):
@ -44,14 +54,49 @@ class Tele5IE(InfoExtractor):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0] video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
if not video_id: NEXX_ID_RE = r'\d{6,}'
JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
def nexx_result(nexx_id):
return self.url_result(
'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
ie=NexxIE.ie_key(), video_id=nexx_id)
nexx_id = jwplatform_id = None
if video_id:
if re.match(NEXX_ID_RE, video_id):
return nexx_result(video_id)
elif re.match(JWPLATFORM_ID_RE, video_id):
jwplatform_id = video_id
if not nexx_id:
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
(r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)', def extract_id(pattern, name, default=NO_DEFAULT):
r'\s+id\s*=\s*["\']player_(\d{6,})', return self._html_search_regex(
r'\bdata-id\s*=\s*["\'](\d{6,})'), webpage, 'video id') (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
r'\s+id\s*=\s*["\']player_(%s)' % pattern,
r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
default=default)
nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
if nexx_id:
return nexx_result(nexx_id)
if not jwplatform_id:
jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
media = self._download_json(
'https://cdn.jwplayer.com/v2/media/' + jwplatform_id,
display_id)
nexx_id = try_get(
media, lambda x: x['playlist'][0]['nexx_id'], compat_str)
if nexx_id:
return nexx_result(nexx_id)
return self.url_result( return self.url_result(
'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id, 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
ie=NexxIE.ie_key(), video_id=video_id) video_id=jwplatform_id)

View File

@ -38,8 +38,6 @@ class TeleQuebecIE(TeleQuebecBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Un petit choc et puis repart!', 'title': 'Un petit choc et puis repart!',
'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374', 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
'upload_date': '20180222',
'timestamp': 1519326631,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View File

@ -10,8 +10,8 @@ from ..utils import (
class TenPlayIE(InfoExtractor): class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/[^/]+/episodes/[^/]+/[^/]+/(?P<id>tpv\d{6}[a-z]{5})' _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
_TEST = { _TESTS = [{
'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga', 'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga',
'info_dict': { 'info_dict': {
'id': '6060533435001', 'id': '6060533435001',
@ -27,7 +27,10 @@ class TenPlayIE(InfoExtractor):
'format': 'bestvideo', 'format': 'bestvideo',
'skip_download': True, 'skip_download': True,
} }
} }, {
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -17,14 +17,12 @@ class TFOIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon', 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
'md5': '47c987d0515561114cf03d1226a9d4c7', 'md5': 'cafbe4f47a8dae0ca0159937878100d6',
'info_dict': { 'info_dict': {
'id': '100463871', 'id': '7da3d50e495c406b8fc0b997659cc075',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Video Game Hackathon', 'title': 'Video Game Hackathon',
'description': 'md5:558afeba217c6c8d96c60e5421795c07', 'description': 'md5:558afeba217c6c8d96c60e5421795c07',
'upload_date': '20160212',
'timestamp': 1455310233,
} }
} }

View File

@ -31,6 +31,10 @@ class ThisOldHouseIE(InfoExtractor):
}, { }, {
'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost', 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
'only_matching': True, 'only_matching': True,
}, {
# iframe www.thisoldhouse.com
'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
'only_matching': True,
}] }]
_ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe' _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
@ -38,6 +42,6 @@ class ThisOldHouseIE(InfoExtractor):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._search_regex( video_id = self._search_regex(
r'<iframe[^>]+src=[\'"](?:https?:)?//thisoldhouse\.chorus\.build/videos/zype/([0-9a-f]{24})', r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})',
webpage, 'video id') webpage, 'video id')
return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id) return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)

View File

@ -99,7 +99,7 @@ class TV4IE(InfoExtractor):
manifest_url.replace('.m3u8', '.f4m'), manifest_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False)) video_id, f4m_id='hds', fatal=False))
formats.extend(self._extract_ism_formats( formats.extend(self._extract_ism_formats(
re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url), re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
video_id, ism_id='mss', fatal=False)) video_id, ism_id='mss', fatal=False))
if not formats and info.get('is_geo_restricted'): if not formats and info.get('is_geo_restricted'):

View File

@ -6,7 +6,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_str,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
@ -15,9 +14,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
qualities, qualities,
smuggle_url,
try_get, try_get,
unsmuggle_url,
update_url_query, update_url_query,
url_or_none, url_or_none,
) )
@ -235,11 +232,6 @@ class TVPlayIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass({
'countries': smuggled_data.get('geo_countries'),
})
video_id = self._match_id(url) video_id = self._match_id(url)
geo_country = self._search_regex( geo_country = self._search_regex(
r'https?://[^/]+\.([a-z]{2})', url, r'https?://[^/]+\.([a-z]{2})', url,
@ -285,8 +277,6 @@ class TVPlayIE(InfoExtractor):
'ext': ext, 'ext': ext,
} }
if video_url.startswith('rtmp'): if video_url.startswith('rtmp'):
if smuggled_data.get('skip_rtmp'):
continue
m = re.search( m = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url) r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
if not m: if not m:
@ -347,115 +337,80 @@ class ViafreeIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.)? (?:www\.)?
viafree\. viafree\.(?P<country>dk|no|se)
(?: /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+)
(?:dk|no)/programmer|
se/program
)
/(?:[^/]+/)+(?P<id>[^/?#&]+)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2', 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
'info_dict': { 'info_dict': {
'id': '395375', 'id': '757786',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Husräddarna S02E02', 'title': 'Det beste vorspielet - Sesong 2 - Episode 1',
'description': 'md5:4db5c933e37db629b5a2f75dfb34829e', 'description': 'md5:b632cb848331404ccacd8cd03e83b4c3',
'series': 'Husräddarna', 'series': 'Det beste vorspielet',
'season': 'Säsong 2',
'season_number': 2, 'season_number': 2,
'duration': 2576, 'duration': 1116,
'timestamp': 1400596321, 'timestamp': 1471200600,
'upload_date': '20140520', 'upload_date': '20160814',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [TVPlayIE.ie_key()],
}, { }, {
# with relatedClips # with relatedClips
'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1', 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
'info_dict': { 'only_matching': True,
'id': '758770',
'ext': 'mp4',
'title': 'Sommaren med YouTube-stjärnorna S01E01',
'description': 'md5:2bc69dce2c4bb48391e858539bbb0e3f',
'series': 'Sommaren med YouTube-stjärnorna',
'season': 'Säsong 1',
'season_number': 1,
'duration': 1326,
'timestamp': 1470905572,
'upload_date': '20160811',
},
'params': {
'skip_download': True,
},
'add_ie': [TVPlayIE.ie_key()],
}, { }, {
# Different og:image URL schema # Different og:image URL schema
'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5', 'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
'only_matching': True, 'only_matching': True,
}] }]
_GEO_BYPASS = False
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url) return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) country, path = re.match(self._VALID_URL, url).groups()
content = self._download_json(
'https://viafree-content.mtg-api.com/viafree-content/v1/%s/path/%s' % (country, path), path)
program = content['_embedded']['viafreeBlocks'][0]['_embedded']['program']
guid = program['guid']
meta = content['meta']
title = meta['title']
webpage = self._download_webpage(url, video_id) try:
stream_href = self._download_json(
program['_links']['streamLink']['href'], guid,
headers=self.geo_verification_headers())['embedded']['prioritizedStreams'][0]['links']['stream']['href']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_geo_restricted(countries=[country])
raise
data = self._parse_json( formats = self._extract_m3u8_formats(stream_href, guid, 'mp4')
self._search_regex( self._sort_formats(formats)
r'(?s)window\.App\s*=\s*({.+?})\s*;\s*</script', episode = program.get('episode') or {}
webpage, 'data', default='{}'),
video_id, transform_source=lambda x: re.sub(
r'(?s)function\s+[a-zA-Z_][\da-zA-Z_]*\s*\([^)]*\)\s*{[^}]*}\s*',
'null', x), fatal=False)
video_id = None return {
'id': guid,
if data: 'title': title,
video_id = try_get( 'thumbnail': meta.get('image'),
data, lambda x: x['context']['dispatcher']['stores'][ 'description': meta.get('description'),
'ContentPageProgramStore']['currentVideo']['id'], 'series': episode.get('seriesTitle'),
compat_str) 'episode_number': int_or_none(episode.get('episodeNumber')),
'season_number': int_or_none(episode.get('seasonNumber')),
# Fallback #1 (extract from og:image URL schema) 'duration': int_or_none(try_get(program, lambda x: x['video']['duration']['milliseconds']), 1000),
if not video_id: 'timestamp': parse_iso8601(try_get(program, lambda x: x['availability']['start'])),
thumbnail = self._og_search_thumbnail(webpage, default=None) 'formats': formats,
if thumbnail: }
video_id = self._search_regex(
# Patterns seen:
# http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/inbox/765166/a2e95e5f1d735bab9f309fa345cc3f25.jpg
# http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/seasons/15204/758770/4a5ba509ca8bc043e1ebd1a76131cdf2.jpg
r'https?://[^/]+/imagecache/(?:[^/]+/)+(\d{6,})/',
thumbnail, 'video id', default=None)
# Fallback #2. Extract from raw JSON string.
# May extract wrong video id if relatedClips is present.
if not video_id:
video_id = self._search_regex(
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
webpage, 'video id')
return self.url_result(
smuggle_url(
'mtg:%s' % video_id,
{
'geo_countries': [
compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]],
# rtmp host mtgfs.fplive.net for viafree is unresolvable
'skip_rtmp': True,
}),
ie=TVPlayIE.ie_key(), video_id=video_id)
class TVPlayHomeIE(InfoExtractor): class TVPlayHomeIE(InfoExtractor):

View File

@ -643,7 +643,14 @@ class TwitchStreamIE(TwitchBaseIE):
class TwitchClipsIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE):
IE_NAME = 'twitch:clips' IE_NAME = 'twitch:clips'
_VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)' _VALID_URL = r'''(?x)
https?://
(?:
clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
(?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
)
(?P<id>[^/?#&]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
@ -669,6 +676,12 @@ class TwitchClipsIE(TwitchBaseIE):
}, { }, {
'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited', 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
'only_matching': True,
}, {
'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -2,12 +2,17 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601,
qualities,
update_url_query, update_url_query,
str_or_none,
) )
@ -16,21 +21,25 @@ class UOLIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)' _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931', 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
'md5': '25291da27dc45e0afb5718a8603d3816', 'md5': '4f1e26683979715ff64e4e29099cf020',
'info_dict': { 'info_dict': {
'id': '15951931', 'id': '15951931',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Miss simpatia é encontrada morta', 'title': 'Miss simpatia é encontrada morta',
'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2', 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
'timestamp': 1470421860,
'upload_date': '20160805',
} }
}, { }, {
'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326', 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9', 'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2',
'info_dict': { 'info_dict': {
'id': '15954259', 'id': '15954259',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Incêndio destrói uma das maiores casas noturnas de Londres', 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.', 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
'timestamp': 1470674520,
'upload_date': '20160808',
} }
}, { }, {
'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931', 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
@ -55,91 +64,55 @@ class UOLIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_FORMATS = {
'2': {
'width': 640,
'height': 360,
},
'5': {
'width': 1280,
'height': 720,
},
'6': {
'width': 426,
'height': 240,
},
'7': {
'width': 1920,
'height': 1080,
},
'8': {
'width': 192,
'height': 144,
},
'9': {
'width': 568,
'height': 320,
},
'11': {
'width': 640,
'height': 360,
}
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
media_id = None
if video_id.isdigit():
media_id = video_id
if not media_id:
embed_page = self._download_webpage(
'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
video_id, 'Downloading embed page', fatal=False)
if embed_page:
media_id = self._search_regex(
(r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
embed_page, 'media id', default=None)
if not media_id:
webpage = self._download_webpage(url, video_id)
media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')
video_data = self._download_json( video_data = self._download_json(
'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id, # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID]
media_id)['item'] 'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id,
video_id)['item']
media_id = compat_str(video_data['mediaId'])
title = video_data['title'] title = video_data['title']
ver = video_data.get('revision', 2)
query = { uol_formats = self._download_json(
'ver': video_data.get('numRevision', 2), 'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id,
'r': 'http://mais.uol.com.br', media_id)
} quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p'])
for k in ('token', 'sign'):
v = video_data.get(k)
if v:
query[k] = v
formats = [] formats = []
for f in video_data.get('formats', []): for format_id, f in uol_formats.items():
if not isinstance(f, dict):
continue
f_url = f.get('url') or f.get('secureUrl') f_url = f.get('url') or f.get('secureUrl')
if not f_url: if not f_url:
continue continue
query = {
'ver': ver,
'r': 'http://mais.uol.com.br',
}
for k in ('token', 'sign'):
v = f.get(k)
if v:
query[k] = v
f_url = update_url_query(f_url, query) f_url = update_url_query(f_url, query)
format_id = str_or_none(f.get('id')) format_id = format_id
if format_id == '10': if format_id == 'HLS':
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
f_url, video_id, 'mp4', 'm3u8_native', f_url, media_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False)
encoded_query = compat_urllib_parse_urlencode(query)
for m3u8_f in m3u8_formats:
m3u8_f['extra_param_to_segment_url'] = encoded_query
m3u8_f['url'] = update_url_query(m3u8_f['url'], query)
formats.extend(m3u8_formats)
continue continue
fmt = { formats.append({
'format_id': format_id, 'format_id': format_id,
'url': f_url, 'url': f_url,
'source_preference': 1, 'quality': quality(format_id),
} 'preference': -1,
fmt.update(self._FORMATS.get(format_id, {})) })
formats.append(fmt) self._sort_formats(formats)
self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext'))
tags = [] tags = []
for tag in video_data.get('tags', []): for tag in video_data.get('tags', []):
@ -148,12 +121,24 @@ class UOLIE(InfoExtractor):
continue continue
tags.append(tag_description) tags.append(tag_description)
thumbnails = []
for q in ('Small', 'Medium', 'Wmedium', 'Large', 'Wlarge', 'Xlarge'):
q_url = video_data.get('thumb' + q)
if not q_url:
continue
thumbnails.append({
'id': q,
'url': q_url,
})
return { return {
'id': media_id, 'id': media_id,
'title': title, 'title': title,
'description': clean_html(video_data.get('desMedia')), 'description': clean_html(video_data.get('description')),
'thumbnail': video_data.get('thumbnail'), 'thumbnails': thumbnails,
'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')), 'duration': parse_duration(video_data.get('duration')),
'tags': tags, 'tags': tags,
'formats': formats, 'formats': formats,
'timestamp': parse_iso8601(video_data.get('publishDate'), ' '),
'view_count': int_or_none(video_data.get('viewsQtty')),
} }

View File

@ -140,28 +140,28 @@ class VimeoBaseInfoExtractor(InfoExtractor):
}) })
# TODO: fix handling of 308 status code returned for live archive manifest requests # TODO: fix handling of 308 status code returned for live archive manifest requests
sep_pattern = r'/sep/video/'
for files_type in ('hls', 'dash'): for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items(): for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
manifest_url = cdn_data.get('url') manifest_url = cdn_data.get('url')
if not manifest_url: if not manifest_url:
continue continue
format_id = '%s-%s' % (files_type, cdn_name) format_id = '%s-%s' % (files_type, cdn_name)
if files_type == 'hls': sep_manifest_urls = []
formats.extend(self._extract_m3u8_formats( if re.search(sep_pattern, manifest_url):
manifest_url, video_id, 'mp4', for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
'm3u8' if is_live else 'm3u8_native', m3u8_id=format_id, sep_manifest_urls.append((format_id + suffix, re.sub(
note='Downloading %s m3u8 information' % cdn_name, sep_pattern, '/%s/' % repl, manifest_url)))
fatal=False)) else:
elif files_type == 'dash': sep_manifest_urls = [(format_id, manifest_url)]
mpd_pattern = r'/%s/(?:sep/)?video/' % video_id for f_id, m_url in sep_manifest_urls:
mpd_manifest_urls = [] if files_type == 'hls':
if re.search(mpd_pattern, manifest_url): formats.extend(self._extract_m3u8_formats(
for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): m_url, video_id, 'mp4',
mpd_manifest_urls.append((format_id + suffix, re.sub( 'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id,
mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url))) note='Downloading %s m3u8 information' % cdn_name,
else: fatal=False))
mpd_manifest_urls = [(format_id, manifest_url)] elif files_type == 'dash':
for f_id, m_url in mpd_manifest_urls:
if 'json=1' in m_url: if 'json=1' in m_url:
real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url') real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
if real_m_url: if real_m_url:
@ -170,11 +170,6 @@ class VimeoBaseInfoExtractor(InfoExtractor):
m_url.replace('/master.json', '/master.mpd'), video_id, f_id, m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
'Downloading %s MPD information' % cdn_name, 'Downloading %s MPD information' % cdn_name,
fatal=False) fatal=False)
for f in mpd_formats:
if f.get('vcodec') == 'none':
f['preference'] = -50
elif f.get('acodec') == 'none':
f['preference'] = -40
formats.extend(mpd_formats) formats.extend(mpd_formats)
live_archive = live_event.get('archive') or {} live_archive = live_event.get('archive') or {}
@ -186,6 +181,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'preference': 1, 'preference': 1,
}) })
for f in formats:
if f.get('vcodec') == 'none':
f['preference'] = -50
elif f.get('acodec') == 'none':
f['preference'] = -40
subtitles = {} subtitles = {}
text_tracks = config['request'].get('text_tracks') text_tracks = config['request'].get('text_tracks')
if text_tracks: if text_tracks:

View File

@ -12,6 +12,7 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError,
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
@ -368,31 +369,47 @@ class YahooGyaOPlayerIE(InfoExtractor):
'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682', 'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
'only_matching': True, 'only_matching': True,
}] }]
_GEO_BYPASS = False
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).replace('/', ':') video_id = self._match_id(url).replace('/', ':')
video = self._download_json( headers = self.geo_verification_headers()
'https://gyao.yahoo.co.jp/dam/v1/videos/' + video_id, headers['Accept'] = 'application/json'
video_id, query={ resp = self._download_json(
'fields': 'longDescription,title,videoId', 'https://gyao.yahoo.co.jp/apis/playback/graphql', video_id, query={
}, headers={ 'appId': 'dj00aiZpPUNJeDh2cU1RazU3UCZzPWNvbnN1bWVyc2VjcmV0Jng9NTk-',
'X-User-Agent': 'Unknown Pc GYAO!/2.0.0 Web', 'query': '''{
}) content(parameter: {contentId: "%s", logicaAgent: PC_WEB}) {
video {
delivery {
id
}
title
}
}
}''' % video_id,
}, headers=headers)
content = resp['data']['content']
if not content:
msg = resp['errors'][0]['message']
if msg == 'not in japan':
self.raise_geo_restricted(countries=['JP'])
raise ExtractorError(msg)
video = content['video']
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,
'title': video['title'], 'title': video['title'],
'url': smuggle_url( 'url': smuggle_url(
'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['videoId'], 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['delivery']['id'],
{'geo_countries': ['JP']}), {'geo_countries': ['JP']}),
'description': video.get('longDescription'),
'ie_key': BrightcoveNewIE.ie_key(), 'ie_key': BrightcoveNewIE.ie_key(),
} }
class YahooGyaOIE(InfoExtractor): class YahooGyaOIE(InfoExtractor):
IE_NAME = 'yahoo:gyao' IE_NAME = 'yahoo:gyao'
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title/[^/]+)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{ _TESTS = [{
'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/', 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
'info_dict': { 'info_dict': {
@ -405,6 +422,9 @@ class YahooGyaOIE(InfoExtractor):
}, { }, {
'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf', 'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://gyao.yahoo.co.jp/title/5b025a49-b2e5-4dc7-945c-09c6634afacf',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -5,7 +5,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
sanitized_Request,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@ -15,7 +14,7 @@ from ..aes import aes_decrypt_text
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '3744d24c50438cf5b6f6d59feb5055c2', 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@ -57,16 +56,28 @@ class YouPornIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'only_matching': True,
}, {
'url': 'http://www.youporn.com/watch/505835',
'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id') or video_id
request = sanitized_Request(url) webpage = self._download_webpage(
request.add_header('Cookie', 'age_verified=1') 'http://www.youporn.com/watch/%s' % video_id, display_id,
webpage = self._download_webpage(request, display_id) headers={'Cookie': 'age_verified=1'})
title = self._html_search_regex( title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',

View File

@ -388,6 +388,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:www\.)?invidious\.drycat\.fr/| (?:www\.)?invidious\.drycat\.fr/|
(?:www\.)?tube\.poal\.co/| (?:www\.)?tube\.poal\.co/|
(?:www\.)?vid\.wxzm\.sx/| (?:www\.)?vid\.wxzm\.sx/|
(?:www\.)?yewtu\.be/|
(?:www\.)?yt\.elukerio\.org/| (?:www\.)?yt\.elukerio\.org/|
(?:www\.)?yt\.lelux\.fi/| (?:www\.)?yt\.lelux\.fi/|
(?:www\.)?kgg2m7yk5aybusll\.onion/| (?:www\.)?kgg2m7yk5aybusll\.onion/|
@ -426,6 +427,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?(1).+)? # if we found the ID, everything can follow (?(1).+)? # if we found the ID, everything can follow
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_PLAYER_INFO_RE = (
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
)
_formats = { _formats = {
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
@ -1227,6 +1232,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q', 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
'only_matching': True, 'only_matching': True,
}, },
{
# invalid -> valid video id redirection
'url': 'DJztXj2GPfl',
'info_dict': {
'id': 'DJztXj2GPfk',
'ext': 'mp4',
'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
'description': 'md5:bf577a41da97918e94fa9798d9228825',
'upload_date': '20090125',
'uploader': 'Prochorowka',
'uploader_id': 'Prochorowka',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
'artist': 'Panjabi MC',
'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
'album': 'Beware of the Boys (Mundian To Bach Ke)',
},
'params': {
'skip_download': True,
},
}
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -1253,14 +1278,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
""" Return a string representation of a signature """ """ Return a string representation of a signature """
return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
def _extract_signature_function(self, video_id, player_url, example_sig): @classmethod
id_m = re.match( def _extract_player_info(cls, player_url):
r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', for player_re in cls._PLAYER_INFO_RE:
player_url) id_m = re.search(player_re, player_url)
if not id_m: if id_m:
break
else:
raise ExtractorError('Cannot identify player %r' % player_url) raise ExtractorError('Cannot identify player %r' % player_url)
player_type = id_m.group('ext') return id_m.group('ext'), id_m.group('id')
player_id = id_m.group('id')
def _extract_signature_function(self, video_id, player_url, example_sig):
player_type, player_id = self._extract_player_info(player_url)
# Read from filesystem cache # Read from filesystem cache
func_id = '%s_%s_%s' % ( func_id = '%s_%s_%s' % (
@ -1678,7 +1707,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get video webpage # Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
video_webpage = self._download_webpage(url, video_id) video_webpage, urlh = self._download_webpage_handle(url, video_id)
qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
video_id = qs.get('v', [None])[0] or video_id
# Attempt to extract SWF player URL # Attempt to extract SWF player URL
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
@ -1840,15 +1872,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# fields may contain comma as well (see # fields may contain comma as well (see
# https://github.com/ytdl-org/youtube-dl/issues/8536) # https://github.com/ytdl-org/youtube-dl/issues/8536)
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed)) feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
def feed_entry(name):
return try_get(feed_data, lambda x: x[name][0], compat_str)
feed_id = feed_entry('id')
if not feed_id:
continue
feed_title = feed_entry('title')
title = video_title
if feed_title:
title += ' (%s)' % feed_title
entries.append({ entries.append({
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'Youtube', 'ie_key': 'Youtube',
'url': smuggle_url( 'url': smuggle_url(
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]), '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
{'force_singlefeed': True}), {'force_singlefeed': True}),
'title': '%s (%s)' % (video_title, feed_data['title'][0]), 'title': title,
}) })
feed_ids.append(feed_data['id'][0]) feed_ids.append(feed_id)
self.to_screen( self.to_screen(
'Downloading multifeed video (%s) - add --no-playlist to just download video %s' 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
% (', '.join(feed_ids), video_id)) % (', '.join(feed_ids), video_id))
@ -1919,12 +1962,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} }
for fmt in streaming_formats: for fmt in streaming_formats:
if fmt.get('drm_families'): if fmt.get('drmFamilies') or fmt.get('drm_families'):
continue continue
url = url_or_none(fmt.get('url')) url = url_or_none(fmt.get('url'))
if not url: if not url:
cipher = fmt.get('cipher') cipher = fmt.get('cipher') or fmt.get('signatureCipher')
if not cipher: if not cipher:
continue continue
url_data = compat_parse_qs(cipher) url_data = compat_parse_qs(cipher)
@ -1975,22 +2018,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if self._downloader.params.get('verbose'): if self._downloader.params.get('verbose'):
if player_url is None: if player_url is None:
player_version = 'unknown'
player_desc = 'unknown' player_desc = 'unknown'
else: else:
if player_url.endswith('swf'): player_type, player_version = self._extract_player_info(player_url)
player_version = self._search_regex( player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
parts_sizes = self._signature_cache_id(encrypted_sig) parts_sizes = self._signature_cache_id(encrypted_sig)
self.to_screen('{%s} signature length %s, %s' % self.to_screen('{%s} signature length %s, %s' %
(format_id, parts_sizes, player_desc)) (format_id, parts_sizes, player_desc))

View File

@ -853,7 +853,7 @@ def parseOpts(overrideArguments=None):
postproc.add_option( postproc.add_option(
'--exec', '--exec',
metavar='CMD', dest='exec_cmd', metavar='CMD', dest='exec_cmd',
help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'') help='Execute a command on the file after downloading and post-processing, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
postproc.add_option( postproc.add_option(
'--convert-subs', '--convert-subtitles', '--convert-subs', '--convert-subtitles',
metavar='FORMAT', dest='convertsubtitles', default=None, metavar='FORMAT', dest='convertsubtitles', default=None,

View File

@ -7,6 +7,7 @@ import base64
import binascii import binascii
import calendar import calendar
import codecs import codecs
import collections
import contextlib import contextlib
import ctypes import ctypes
import datetime import datetime
@ -30,6 +31,7 @@ import ssl
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
import time
import traceback import traceback
import xml.etree.ElementTree import xml.etree.ElementTree
import zlib import zlib
@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1. https://curl.haxx.se/docs/http-cookies.html 1. https://curl.haxx.se/docs/http-cookies.html
""" """
_HTTPONLY_PREFIX = '#HttpOnly_' _HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by youtube-dl. Do not edit.
'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
def save(self, filename=None, ignore_discard=False, ignore_expires=False): def save(self, filename=None, ignore_discard=False, ignore_expires=False):
"""
Save cookies to a file.
Most of the code is taken from CPython 3.8 and slightly adapted
to support cookie files with UTF-8 in both python 2 and 3.
"""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
# Store session cookies with `expires` set to 0 instead of an empty # Store session cookies with `expires` set to 0 instead of an empty
# string # string
for cookie in self: for cookie in self:
if cookie.expires is None: if cookie.expires is None:
cookie.expires = 0 cookie.expires = 0
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
with io.open(filename, 'w', encoding='utf-8') as f:
f.write(self._HEADER)
now = time.time()
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
if cookie.secure:
secure = 'TRUE'
else:
secure = 'FALSE'
if cookie.domain.startswith('.'):
initial_dot = 'TRUE'
else:
initial_dot = 'FALSE'
if cookie.expires is not None:
expires = compat_str(cookie.expires)
else:
expires = ''
if cookie.value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name = ''
value = cookie.name
else:
name = cookie.name
value = cookie.value
f.write(
'\t'.join([cookie.domain, initial_dot, cookie.path,
secure, expires, name, value]) + '\n')
def load(self, filename=None, ignore_discard=False, ignore_expires=False): def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file.""" """Load cookies from a file."""
@ -2752,12 +2806,30 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
else: else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line
cf = io.StringIO() cf = io.StringIO()
with open(filename) as f: with io.open(filename, encoding='utf-8') as f:
for line in f: for line in f:
if line.startswith(self._HTTPONLY_PREFIX): try:
line = line[len(self._HTTPONLY_PREFIX):] cf.write(prepare_line(line))
cf.write(compat_str(line)) except compat_cookiejar.LoadError as e:
write_string(
'WARNING: skipping cookie file entry due to %s: %r\n'
% (e, line), sys.stderr)
continue
cf.seek(0) cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires) self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to # Session cookies are denoted by either `expires` field set to

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2020.03.08' __version__ = '2020.05.08'