mirror of
https://github.com/ytdl-org/youtube-dl
synced 2024-12-31 16:40:09 +09:00
Merge branch 'ytdl-org:master' into master
This commit is contained in:
commit
c8168d9914
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support
|
- [ ] I'm reporting a broken site support
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
|
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||||
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2021.06.06
|
[debug] youtube-dl version 2021.12.17
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -19,7 +19,7 @@ labels: 'site-support-request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a new site support request
|
- [ ] I'm reporting a new site support request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
|
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||||
|
@ -18,13 +18,13 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a site feature request
|
- [ ] I'm reporting a site feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
|
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support issue
|
- [ ] I'm reporting a broken site support issue
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
|
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||||
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2021.06.06
|
[debug] youtube-dl version 2021.12.17
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@ -19,13 +19,13 @@ labels: 'request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.06.06. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a feature request
|
- [ ] I'm reporting a feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2021.06.06**
|
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
blank_issues_enabled: false
|
25
ChangeLog
25
ChangeLog
@ -1,3 +1,28 @@
|
|||||||
|
version 2021.12.17
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [postprocessor/ffmpeg] Show ffmpeg output on error (#22680, #29336)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Update signature function patterns (#30363, #30366)
|
||||||
|
* [peertube] Only call description endpoint if necessary (#29383)
|
||||||
|
* [periscope] Pass referer to HLS requests (#29419)
|
||||||
|
- [liveleak] Remove extractor (#17625, #24222, #29331)
|
||||||
|
+ [pornhub] Add support for pornhubthbh7ap3u.onion
|
||||||
|
* [pornhub] Detect geo restriction
|
||||||
|
* [pornhub] Dismiss tbr extracted from download URLs (#28927)
|
||||||
|
* [curiositystream:collection] Extend _VALID_URL (#26326, #29117)
|
||||||
|
* [youtube] Make get_video_info processing more robust (#29333)
|
||||||
|
* [youtube] Workaround for get_video_info request (#29333)
|
||||||
|
* [bilibili] Strip uploader name (#29202)
|
||||||
|
* [youtube] Update invidious instance list (#29281)
|
||||||
|
* [umg:de] Update GraphQL API URL (#29304)
|
||||||
|
* [nrk] Switch psapi URL to https (#29344)
|
||||||
|
+ [egghead] Add support for app.egghead.io (#28404, #29303)
|
||||||
|
* [appleconnect] Fix extraction (#29208)
|
||||||
|
+ [orf:tvthek] Add support for MPD formats (#28672, #29236)
|
||||||
|
|
||||||
|
|
||||||
version 2021.06.06
|
version 2021.06.06
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
@ -1072,9 +1072,11 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
* the test names use the extractor class name **without the trailing `IE`**
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
* tests with `only_matching` key in test's dict are not counted.
|
||||||
|
8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
|
9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
|
|
||||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
|
@ -472,8 +472,6 @@
|
|||||||
- **LinuxAcademy**
|
- **LinuxAcademy**
|
||||||
- **LiTV**
|
- **LiTV**
|
||||||
- **LiveJournal**
|
- **LiveJournal**
|
||||||
- **LiveLeak**
|
|
||||||
- **LiveLeakEmbed**
|
|
||||||
- **livestream**
|
- **livestream**
|
||||||
- **livestream:original**
|
- **livestream:original**
|
||||||
- **LnkGo**
|
- **LnkGo**
|
||||||
|
@ -18,7 +18,6 @@
|
|||||||
"noprogress": false,
|
"noprogress": false,
|
||||||
"outtmpl": "%(id)s.%(ext)s",
|
"outtmpl": "%(id)s.%(ext)s",
|
||||||
"password": null,
|
"password": null,
|
||||||
"playlistend": -1,
|
|
||||||
"playliststart": 1,
|
"playliststart": 1,
|
||||||
"prefer_free_formats": false,
|
"prefer_free_formats": false,
|
||||||
"quiet": false,
|
"quiet": false,
|
||||||
|
@ -997,6 +997,25 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
self.assertEqual(downloaded['extractor'], 'Video')
|
self.assertEqual(downloaded['extractor'], 'Video')
|
||||||
self.assertEqual(downloaded['extractor_key'], 'Video')
|
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||||
|
|
||||||
|
def test_default_times(self):
|
||||||
|
"""Test addition of missing upload/release/_date from /release_/timestamp"""
|
||||||
|
info = {
|
||||||
|
'id': '1234',
|
||||||
|
'url': TEST_URL,
|
||||||
|
'title': 'Title',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1631352900,
|
||||||
|
'release_timestamp': 1632995931,
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {'simulate': True, }
|
||||||
|
ydl = FakeYDL(params)
|
||||||
|
out_info = ydl.process_ie_result(info)
|
||||||
|
self.assertTrue(isinstance(out_info['upload_date'], compat_str))
|
||||||
|
self.assertEqual(out_info['upload_date'], '20210911')
|
||||||
|
self.assertTrue(isinstance(out_info['release_date'], compat_str))
|
||||||
|
self.assertEqual(out_info['release_date'], '20210930')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -66,9 +66,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
|
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
|
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
|
||||||
|
|
||||||
# def test_youtube_search_matching(self):
|
def test_youtube_search_matching(self):
|
||||||
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_facebook_matching(self):
|
def test_facebook_matching(self):
|
||||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
|
@ -121,6 +121,7 @@ def generator(test_case, tname):
|
|||||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||||
if is_playlist and 'playlist' not in test_case:
|
if is_playlist and 'playlist' not in test_case:
|
||||||
params.setdefault('extract_flat', 'in_playlist')
|
params.setdefault('extract_flat', 'in_playlist')
|
||||||
|
params.setdefault('playlistend', test_case.get('playlist_mincount'))
|
||||||
params.setdefault('skip_download', True)
|
params.setdefault('skip_download', True)
|
||||||
|
|
||||||
ydl = YoutubeDL(params, auto_init=False)
|
ydl = YoutubeDL(params, auto_init=False)
|
||||||
|
@ -112,6 +112,72 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('z'), 5)
|
self.assertEqual(jsi.call_function('z'), 5)
|
||||||
|
|
||||||
|
def test_for_loop(self):
|
||||||
|
# function x() { a=0; for (i=0; i-10; i++) {a++} a }
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() { a=0; for (i=0; i-10; i = i + 1) {a++} a }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
|
||||||
|
def test_switch(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x(f) { switch(f){
|
||||||
|
case 1:f+=1;
|
||||||
|
case 2:f+=2;
|
||||||
|
case 3:f+=3;break;
|
||||||
|
case 4:f+=4;
|
||||||
|
default:f=0;
|
||||||
|
} return f }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x', 1), 7)
|
||||||
|
self.assertEqual(jsi.call_function('x', 3), 6)
|
||||||
|
self.assertEqual(jsi.call_function('x', 5), 0)
|
||||||
|
|
||||||
|
def test_switch_default(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x(f) { switch(f){
|
||||||
|
case 2: f+=2;
|
||||||
|
default: f-=1;
|
||||||
|
case 5:
|
||||||
|
case 6: f+=6;
|
||||||
|
case 0: break;
|
||||||
|
case 1: f+=1;
|
||||||
|
} return f }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x', 1), 2)
|
||||||
|
self.assertEqual(jsi.call_function('x', 5), 11)
|
||||||
|
self.assertEqual(jsi.call_function('x', 9), 14)
|
||||||
|
|
||||||
|
def test_try(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() { try{return 10} catch(e){return 5} }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
|
||||||
|
def test_for_loop_continue(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 0)
|
||||||
|
|
||||||
|
def test_for_loop_break(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() { a=0; for (i=0; i-10; i++) { break; a++ } a }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 0)
|
||||||
|
|
||||||
|
def test_literal_list(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() { [1, 2, "asdf", [5, 6, 7]][3] }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
|
||||||
|
|
||||||
|
def test_comma(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() { a=5; a -= 1, a+=3; return a }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 7)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
@ -9,11 +10,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
|
|
||||||
from test.helper import FakeYDL
|
from test.helper import FakeYDL
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
|
YoutubeIE,
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
YoutubeTabIE,
|
YoutubeTabIE,
|
||||||
YoutubeIE,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -25,38 +25,23 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
def test_youtube_playlist_noplaylist(self):
|
def test_youtube_playlist_noplaylist(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
dl.params['noplaylist'] = True
|
dl.params['noplaylist'] = True
|
||||||
|
dl.params['format'] = 'best'
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertEqual(result['_type'], 'url')
|
self.assertEqual(result['_type'], 'url')
|
||||||
|
result = dl.extract_info(result['url'], download=False, ie_key=result.get('ie_key'), process=False)
|
||||||
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||||
|
|
||||||
def test_youtube_course(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubePlaylistIE(dl)
|
|
||||||
# TODO find a > 100 (paginating?) videos course
|
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
|
||||||
entries = list(result['entries'])
|
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
|
||||||
self.assertEqual(len(entries), 25)
|
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
|
||||||
|
|
||||||
def test_youtube_mix(self):
|
def test_youtube_mix(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
dl.params['format'] = 'best'
|
||||||
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
|
ie = YoutubeTabIE(dl)
|
||||||
entries = result['entries']
|
result = dl.extract_info('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8',
|
||||||
self.assertTrue(len(entries) >= 50)
|
download=False, ie_key=ie.ie_key(), process=True)
|
||||||
|
entries = (result or {}).get('entries', [{'id': 'not_found', }])
|
||||||
|
self.assertTrue(len(entries) >= 25)
|
||||||
original_video = entries[0]
|
original_video = entries[0]
|
||||||
self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
|
self.assertEqual(original_video['id'], 'tyITL_exICo')
|
||||||
|
|
||||||
def test_youtube_toptracks(self):
|
|
||||||
print('Skipping: The playlist page gives error 500')
|
|
||||||
return
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubePlaylistIE(dl)
|
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
|
||||||
entries = result['entries']
|
|
||||||
self.assertEqual(len(entries), 100)
|
|
||||||
|
|
||||||
def test_youtube_flat_playlist_extraction(self):
|
def test_youtube_flat_playlist_extraction(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@ -67,7 +52,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
entries = list(result['entries'])
|
entries = list(result['entries'])
|
||||||
self.assertTrue(len(entries) == 1)
|
self.assertTrue(len(entries) == 1)
|
||||||
video = entries[0]
|
video = entries[0]
|
||||||
self.assertEqual(video['_type'], 'url_transparent')
|
self.assertEqual(video['_type'], 'url')
|
||||||
self.assertEqual(video['ie_key'], 'Youtube')
|
self.assertEqual(video['ie_key'], 'Youtube')
|
||||||
self.assertEqual(video['id'], 'BaW_jenozKc')
|
self.assertEqual(video['id'], 'BaW_jenozKc')
|
||||||
self.assertEqual(video['url'], 'BaW_jenozKc')
|
self.assertEqual(video['url'], 'BaW_jenozKc')
|
||||||
|
@ -14,9 +14,10 @@ import string
|
|||||||
|
|
||||||
from test.helper import FakeYDL
|
from test.helper import FakeYDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
|
from youtube_dl.jsinterp import JSInterpreter
|
||||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||||
|
|
||||||
_TESTS = [
|
_SIG_TESTS = [
|
||||||
(
|
(
|
||||||
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||||
86,
|
86,
|
||||||
@ -64,6 +65,33 @@ _TESTS = [
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_NSIG_TESTS = [
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
|
||||||
|
'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
|
||||||
|
'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
|
||||||
|
'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
|
||||||
|
'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
|
||||||
|
'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
|
||||||
|
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class TestPlayerInfo(unittest.TestCase):
|
class TestPlayerInfo(unittest.TestCase):
|
||||||
def test_youtube_extract_player_info(self):
|
def test_youtube_extract_player_info(self):
|
||||||
@ -90,40 +118,61 @@ class TestPlayerInfo(unittest.TestCase):
|
|||||||
class TestSignature(unittest.TestCase):
|
class TestSignature(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
|
||||||
if not os.path.exists(self.TESTDATA_DIR):
|
if not os.path.exists(self.TESTDATA_DIR):
|
||||||
os.mkdir(self.TESTDATA_DIR)
|
os.mkdir(self.TESTDATA_DIR)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
def make_tfunc(url, sig_input, expected_sig):
|
try:
|
||||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
|
for f in os.listdir(self.TESTDATA_DIR):
|
||||||
assert m, '%r should follow URL format' % url
|
os.remove(f)
|
||||||
test_id = m.group(1)
|
except OSError:
|
||||||
|
pass
|
||||||
def test_func(self):
|
|
||||||
basename = 'player-%s.js' % test_id
|
|
||||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
|
||||||
|
|
||||||
if not os.path.exists(fn):
|
|
||||||
compat_urlretrieve(url, fn)
|
|
||||||
|
|
||||||
ydl = FakeYDL()
|
|
||||||
ie = YoutubeIE(ydl)
|
|
||||||
with io.open(fn, encoding='utf-8') as testf:
|
|
||||||
jscode = testf.read()
|
|
||||||
func = ie._parse_sig_js(jscode)
|
|
||||||
src_sig = (
|
|
||||||
compat_str(string.printable[:sig_input])
|
|
||||||
if isinstance(sig_input, int) else sig_input)
|
|
||||||
got_sig = func(src_sig)
|
|
||||||
self.assertEqual(got_sig, expected_sig)
|
|
||||||
|
|
||||||
test_func.__name__ = str('test_signature_js_' + test_id)
|
|
||||||
setattr(TestSignature, test_func.__name__, test_func)
|
|
||||||
|
|
||||||
|
|
||||||
for test_spec in _TESTS:
|
def t_factory(name, sig_func, url_pattern):
|
||||||
make_tfunc(*test_spec)
|
def make_tfunc(url, sig_input, expected_sig):
|
||||||
|
m = url_pattern.match(url)
|
||||||
|
assert m, '%r should follow URL format' % url
|
||||||
|
test_id = m.group('id')
|
||||||
|
|
||||||
|
def test_func(self):
|
||||||
|
basename = 'player-{0}-{1}.js'.format(name, test_id)
|
||||||
|
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||||
|
|
||||||
|
if not os.path.exists(fn):
|
||||||
|
compat_urlretrieve(url, fn)
|
||||||
|
with io.open(fn, encoding='utf-8') as testf:
|
||||||
|
jscode = testf.read()
|
||||||
|
self.assertEqual(sig_func(jscode, sig_input), expected_sig)
|
||||||
|
|
||||||
|
test_func.__name__ = str('test_{0}_js_{1}'.format(name, test_id))
|
||||||
|
setattr(TestSignature, test_func.__name__, test_func)
|
||||||
|
return make_tfunc
|
||||||
|
|
||||||
|
|
||||||
|
def signature(jscode, sig_input):
|
||||||
|
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
|
||||||
|
src_sig = (
|
||||||
|
compat_str(string.printable[:sig_input])
|
||||||
|
if isinstance(sig_input, int) else sig_input)
|
||||||
|
return func(src_sig)
|
||||||
|
|
||||||
|
|
||||||
|
def n_sig(jscode, sig_input):
|
||||||
|
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
|
||||||
|
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
||||||
|
|
||||||
|
|
||||||
|
make_sig_test = t_factory(
|
||||||
|
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
|
||||||
|
for test_spec in _SIG_TESTS:
|
||||||
|
make_sig_test(*test_spec)
|
||||||
|
|
||||||
|
make_nsig_test = t_factory(
|
||||||
|
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
|
||||||
|
for test_spec in _NSIG_TESTS:
|
||||||
|
make_nsig_test(*test_spec)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1529,7 +1529,7 @@ class YoutubeDL(object):
|
|||||||
# see http://bugs.python.org/issue1646728)
|
# see http://bugs.python.org/issue1646728)
|
||||||
try:
|
try:
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
|
||||||
except (ValueError, OverflowError, OSError):
|
except (ValueError, OverflowError, OSError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -1906,8 +1906,17 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if not self.params.get('skip_download', False):
|
if not self.params.get('skip_download', False):
|
||||||
try:
|
try:
|
||||||
|
def checked_get_suitable_downloader(info_dict, params):
|
||||||
|
ed_args = params.get('external_downloader_args')
|
||||||
|
dler = get_suitable_downloader(info_dict, params)
|
||||||
|
if ed_args and not params.get('external_downloader_args'):
|
||||||
|
# external_downloader_args was cleared because external_downloader was rejected
|
||||||
|
self.report_warning('Requested external downloader cannot be used: '
|
||||||
|
'ignoring --external-downloader-args.')
|
||||||
|
return dler
|
||||||
|
|
||||||
def dl(name, info):
|
def dl(name, info):
|
||||||
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
|
||||||
for ph in self._progress_hooks:
|
for ph in self._progress_hooks:
|
||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
|
@ -21,6 +21,10 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
try:
|
||||||
|
import collections.abc as compat_collections_abc
|
||||||
|
except ImportError:
|
||||||
|
import collections as compat_collections_abc
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import urllib.request as compat_urllib_request
|
import urllib.request as compat_urllib_request
|
||||||
@ -2962,6 +2966,25 @@ else:
|
|||||||
compat_Struct = struct.Struct
|
compat_Struct = struct.Struct
|
||||||
|
|
||||||
|
|
||||||
|
# compat_map/filter() returning an iterator, supposedly the
|
||||||
|
# same versioning as for zip below
|
||||||
|
try:
|
||||||
|
from future_builtins import map as compat_map
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from itertools import imap as compat_map
|
||||||
|
except ImportError:
|
||||||
|
compat_map = map
|
||||||
|
|
||||||
|
try:
|
||||||
|
from future_builtins import filter as compat_filter
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from itertools import ifilter as compat_filter
|
||||||
|
except ImportError:
|
||||||
|
compat_filter = filter
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from future_builtins import zip as compat_zip
|
from future_builtins import zip as compat_zip
|
||||||
except ImportError: # not 2.6+ or is 3.x
|
except ImportError: # not 2.6+ or is 3.x
|
||||||
@ -3006,6 +3029,7 @@ __all__ = [
|
|||||||
'compat_b64decode',
|
'compat_b64decode',
|
||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
'compat_chr',
|
'compat_chr',
|
||||||
|
'compat_collections_abc',
|
||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
'compat_cookiejar_Cookie',
|
'compat_cookiejar_Cookie',
|
||||||
'compat_cookies',
|
'compat_cookies',
|
||||||
@ -3015,6 +3039,7 @@ __all__ = [
|
|||||||
'compat_etree_fromstring',
|
'compat_etree_fromstring',
|
||||||
'compat_etree_register_namespace',
|
'compat_etree_register_namespace',
|
||||||
'compat_expanduser',
|
'compat_expanduser',
|
||||||
|
'compat_filter',
|
||||||
'compat_get_terminal_size',
|
'compat_get_terminal_size',
|
||||||
'compat_getenv',
|
'compat_getenv',
|
||||||
'compat_getpass',
|
'compat_getpass',
|
||||||
@ -3026,6 +3051,7 @@ __all__ = [
|
|||||||
'compat_integer_types',
|
'compat_integer_types',
|
||||||
'compat_itertools_count',
|
'compat_itertools_count',
|
||||||
'compat_kwargs',
|
'compat_kwargs',
|
||||||
|
'compat_map',
|
||||||
'compat_numeric_types',
|
'compat_numeric_types',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
|
@ -1,22 +1,31 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
determine_protocol,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_suitable_downloader(info_dict, params={}):
|
||||||
|
info_dict['protocol'] = determine_protocol(info_dict)
|
||||||
|
info_copy = info_dict.copy()
|
||||||
|
return _get_suitable_downloader(info_copy, params)
|
||||||
|
|
||||||
|
|
||||||
|
# Some of these require get_suitable_downloader
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
|
from .dash import DashSegmentsFD
|
||||||
from .f4m import F4mFD
|
from .f4m import F4mFD
|
||||||
from .hls import HlsFD
|
from .hls import HlsFD
|
||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from .rtmp import RtmpFD
|
from .rtmp import RtmpFD
|
||||||
from .dash import DashSegmentsFD
|
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .ism import IsmFD
|
from .ism import IsmFD
|
||||||
|
from .niconico import NiconicoDmcFD
|
||||||
from .external import (
|
from .external import (
|
||||||
get_external_downloader,
|
get_external_downloader,
|
||||||
FFmpegFD,
|
FFmpegFD,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ..utils import (
|
|
||||||
determine_protocol,
|
|
||||||
)
|
|
||||||
|
|
||||||
PROTOCOL_MAP = {
|
PROTOCOL_MAP = {
|
||||||
'rtmp': RtmpFD,
|
'rtmp': RtmpFD,
|
||||||
'm3u8_native': HlsFD,
|
'm3u8_native': HlsFD,
|
||||||
@ -26,13 +35,12 @@ PROTOCOL_MAP = {
|
|||||||
'f4m': F4mFD,
|
'f4m': F4mFD,
|
||||||
'http_dash_segments': DashSegmentsFD,
|
'http_dash_segments': DashSegmentsFD,
|
||||||
'ism': IsmFD,
|
'ism': IsmFD,
|
||||||
|
'niconico_dmc': NiconicoDmcFD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_suitable_downloader(info_dict, params={}):
|
def _get_suitable_downloader(info_dict, params={}):
|
||||||
"""Get the downloader class that can handle the info dict."""
|
"""Get the downloader class that can handle the info dict."""
|
||||||
protocol = determine_protocol(info_dict)
|
|
||||||
info_dict['protocol'] = protocol
|
|
||||||
|
|
||||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||||
# return FFmpegFD
|
# return FFmpegFD
|
||||||
@ -42,7 +50,11 @@ def get_suitable_downloader(info_dict, params={}):
|
|||||||
ed = get_external_downloader(external_downloader)
|
ed = get_external_downloader(external_downloader)
|
||||||
if ed.can_download(info_dict):
|
if ed.can_download(info_dict):
|
||||||
return ed
|
return ed
|
||||||
|
# Avoid using unwanted args since external_downloader was rejected
|
||||||
|
if params.get('external_downloader_args'):
|
||||||
|
params['external_downloader_args'] = None
|
||||||
|
|
||||||
|
protocol = info_dict['protocol']
|
||||||
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||||
return FFmpegFD
|
return FFmpegFD
|
||||||
|
|
||||||
|
66
youtube_dl/downloader/niconico.py
Normal file
66
youtube_dl/downloader/niconico.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
try:
|
||||||
|
import threading
|
||||||
|
except ImportError:
|
||||||
|
threading = None
|
||||||
|
|
||||||
|
from .common import FileDownloader
|
||||||
|
from ..downloader import get_suitable_downloader
|
||||||
|
from ..extractor.niconico import NiconicoIE
|
||||||
|
from ..utils import sanitized_Request
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoDmcFD(FileDownloader):
|
||||||
|
""" Downloading niconico douga from DMC with heartbeat """
|
||||||
|
|
||||||
|
FD_NAME = 'niconico_dmc'
|
||||||
|
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
||||||
|
|
||||||
|
ie = NiconicoIE(self.ydl)
|
||||||
|
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||||
|
|
||||||
|
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||||
|
for ph in self._progress_hooks:
|
||||||
|
fd.add_progress_hook(ph)
|
||||||
|
|
||||||
|
if not threading:
|
||||||
|
self.to_screen('[%s] Threading for Heartbeat not available' % self.FD_NAME)
|
||||||
|
return fd.real_download(filename, info_dict)
|
||||||
|
|
||||||
|
success = download_complete = False
|
||||||
|
timer = [None]
|
||||||
|
heartbeat_lock = threading.Lock()
|
||||||
|
heartbeat_url = heartbeat_info_dict['url']
|
||||||
|
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||||
|
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||||
|
|
||||||
|
request = sanitized_Request(heartbeat_url, heartbeat_data)
|
||||||
|
|
||||||
|
def heartbeat():
|
||||||
|
try:
|
||||||
|
self.ydl.urlopen(request).read()
|
||||||
|
except Exception:
|
||||||
|
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
||||||
|
|
||||||
|
with heartbeat_lock:
|
||||||
|
if not download_complete:
|
||||||
|
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||||
|
timer[0].start()
|
||||||
|
|
||||||
|
heartbeat_info_dict['ping']()
|
||||||
|
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||||
|
try:
|
||||||
|
heartbeat()
|
||||||
|
if type(fd).__name__ == 'HlsFD':
|
||||||
|
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||||
|
success = fd.real_download(filename, info_dict)
|
||||||
|
finally:
|
||||||
|
if heartbeat_lock:
|
||||||
|
with heartbeat_lock:
|
||||||
|
timer[0].cancel()
|
||||||
|
download_complete = True
|
||||||
|
return success
|
@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor):
|
|||||||
'id': '2800002704436634',
|
'id': '2800002704436634',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'CASIMA7.22',
|
'title': 'CASIMA7.22',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'uploader': 'CASIMA Official Store',
|
'uploader': 'CASIMA Official Store',
|
||||||
'timestamp': 1500717600,
|
'timestamp': 1500717600,
|
||||||
'upload_date': '20170722',
|
'upload_date': '20170722',
|
||||||
|
89
youtube_dl/extractor/alsace20tv.py
Normal file
89
youtube_dl/extractor/alsace20tv.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
dict_get,
|
||||||
|
get_element_by_class,
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Alsace20TVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
|
||||||
|
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lyNHCXpYJh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
|
||||||
|
'title': 'Votre JT du jeudi 3 février',
|
||||||
|
'upload_date': '20220203',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'duration': 1073,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_video(self, video_id, url=None):
|
||||||
|
info = self._download_json(
|
||||||
|
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||||
|
video_id) or {}
|
||||||
|
title = info['titre']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for res, fmt_url in (info.get('files') or {}).items():
|
||||||
|
formats.extend(
|
||||||
|
self._extract_smil_formats(fmt_url, video_id, fatal=False)
|
||||||
|
if '/smil:_' in fmt_url
|
||||||
|
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||||
|
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||||
|
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||||
|
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
|
||||||
|
'view_count': int_or_none(info.get('nb_vues')),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_video(video_id, url)
|
||||||
|
|
||||||
|
|
||||||
|
class Alsace20TVEmbedIE(Alsace20TVIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
|
||||||
|
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lyNHCXpYJh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Votre JT du jeudi 3 février',
|
||||||
|
'upload_date': '20220203',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_video(video_id)
|
@ -3,8 +3,11 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
clean_podcast_url,
|
clean_podcast_url,
|
||||||
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_codecs,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
@ -14,16 +17,17 @@ class ApplePodcastsIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||||
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1000482637777',
|
'id': '1000482637777',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '207 - Whitney Webb Returns',
|
'title': '207 - Whitney Webb Returns',
|
||||||
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||||
'upload_date': '20200705',
|
'upload_date': '20200705',
|
||||||
'timestamp': 1593921600,
|
'timestamp': 1593932400,
|
||||||
'duration': 6425,
|
'duration': 6454,
|
||||||
'series': 'The Tim Dillon Show',
|
'series': 'The Tim Dillon Show',
|
||||||
|
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||||
@ -39,19 +43,40 @@ class ApplePodcastsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
episode_id = self._match_id(url)
|
episode_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, episode_id)
|
webpage = self._download_webpage(url, episode_id)
|
||||||
ember_data = self._parse_json(self._search_regex(
|
episode_data = {}
|
||||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
ember_data = {}
|
||||||
webpage, 'ember data'), episode_id)
|
# new page type 2021-11
|
||||||
ember_data = ember_data.get(episode_id) or ember_data
|
amp_data = self._parse_json(self._search_regex(
|
||||||
episode = ember_data['data']['attributes']
|
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||||
|
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||||
|
amp_data = try_get(amp_data,
|
||||||
|
lambda a: self._parse_json(
|
||||||
|
next(a[x] for x in iter(a) if episode_id in x),
|
||||||
|
episode_id),
|
||||||
|
dict) or {}
|
||||||
|
amp_data = amp_data.get('d') or []
|
||||||
|
episode_data = try_get(
|
||||||
|
amp_data,
|
||||||
|
lambda a: next(x for x in a
|
||||||
|
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||||
|
dict)
|
||||||
|
if not episode_data:
|
||||||
|
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||||
|
ember_data = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||||
|
webpage, 'ember data'), episode_id) or {}
|
||||||
|
ember_data = ember_data.get(episode_id) or ember_data
|
||||||
|
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||||
|
episode = episode_data['attributes']
|
||||||
description = episode.get('description') or {}
|
description = episode.get('description') or {}
|
||||||
|
|
||||||
series = None
|
series = None
|
||||||
for inc in (ember_data.get('included') or []):
|
for inc in (amp_data or ember_data.get('included') or []):
|
||||||
if inc.get('type') == 'media/podcast':
|
if inc.get('type') == 'media/podcast':
|
||||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||||
|
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||||
|
|
||||||
return {
|
info = [{
|
||||||
'id': episode_id,
|
'id': episode_id,
|
||||||
'title': episode['name'],
|
'title': episode['name'],
|
||||||
'url': clean_podcast_url(episode['assetUrl']),
|
'url': clean_podcast_url(episode['assetUrl']),
|
||||||
@ -59,4 +84,10 @@ class ApplePodcastsIE(InfoExtractor):
|
|||||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||||
'series': series,
|
'series': series,
|
||||||
}
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
}]
|
||||||
|
self._sort_formats(info)
|
||||||
|
info = info[0]
|
||||||
|
codecs = parse_codecs(info.get('ext', 'mp3'))
|
||||||
|
info.update(codecs)
|
||||||
|
return info
|
||||||
|
@ -332,9 +332,24 @@ class ARDIE(InfoExtractor):
|
|||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
_SUB_FORMATS = (
|
||||||
|
('./dataTimedText', 'ttml'),
|
||||||
|
('./dataTimedTextNoOffset', 'ttml'),
|
||||||
|
('./dataTimedTextVtt', 'vtt'),
|
||||||
|
)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subsel, subext in _SUB_FORMATS:
|
||||||
|
for node in video_node.findall(subsel):
|
||||||
|
subtitles.setdefault('de', []).append({
|
||||||
|
'url': node.attrib['url'],
|
||||||
|
'ext': subext,
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': xpath_text(video_node, './videoId', default=display_id),
|
'id': xpath_text(video_node, './videoId', default=display_id),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video_node.find('./title').text,
|
'title': video_node.find('./title').text,
|
||||||
'duration': parse_duration(video_node.find('./duration').text),
|
'duration': parse_duration(video_node.find('./duration').text),
|
||||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -252,3 +253,49 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
|||||||
title = collection.get('title')
|
title = collection.get('title')
|
||||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVCategoryIE(ArteTVBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'politics-and-society',
|
||||||
|
'title': 'Politics and society',
|
||||||
|
'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 13,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (
|
||||||
|
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
||||||
|
and super(ArteTVCategoryIE, cls).suitable(url))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for video in re.finditer(
|
||||||
|
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
||||||
|
webpage):
|
||||||
|
video = video.group('url')
|
||||||
|
if video == url:
|
||||||
|
continue
|
||||||
|
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||||
|
items.append(video)
|
||||||
|
|
||||||
|
if items:
|
||||||
|
title = (self._og_search_title(webpage, default=None)
|
||||||
|
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
|
||||||
|
title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
|
||||||
|
|
||||||
|
result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title)
|
||||||
|
if result:
|
||||||
|
description = self._og_search_description(webpage, default=None)
|
||||||
|
if description:
|
||||||
|
result['description'] = description
|
||||||
|
return result
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AudiomackIE(InfoExtractor):
|
class AudiomackIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
|
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:song/|(?=.+/song/))(?P<id>[\w/-]+)'
|
||||||
IE_NAME = 'audiomack'
|
IE_NAME = 'audiomack'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# hosted on audiomack
|
# hosted on audiomack
|
||||||
@ -29,25 +29,27 @@ class AudiomackIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
# audiomack wrapper around soundcloud song
|
# audiomack wrapper around soundcloud song
|
||||||
|
# Needs new test URL.
|
||||||
{
|
{
|
||||||
'add_ie': ['Soundcloud'],
|
'add_ie': ['Soundcloud'],
|
||||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '258901379',
|
# 'info_dict': {
|
||||||
'ext': 'mp3',
|
# 'id': '258901379',
|
||||||
'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
# 'ext': 'mp3',
|
||||||
'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
# 'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||||
'uploader': 'ILOVEMAKONNEN',
|
# 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||||
'upload_date': '20160414',
|
# 'uploader': 'ILOVEMAKONNEN',
|
||||||
}
|
# 'upload_date': '20160414',
|
||||||
|
# }
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# URLs end with [uploader name]/[uploader title]
|
# URLs end with [uploader name]/song/[uploader title]
|
||||||
# this title is whatever the user types in, and is rarely
|
# this title is whatever the user types in, and is rarely
|
||||||
# the proper song title. Real metadata is in the api response
|
# the proper song title. Real metadata is in the api response
|
||||||
album_url_tag = self._match_id(url)
|
album_url_tag = self._match_id(url).replace('/song/', '/')
|
||||||
|
|
||||||
# Request the extended version of the api for extra fields like artist and title
|
# Request the extended version of the api for extra fields like artist and title
|
||||||
api_response = self._download_json(
|
api_response = self._download_json(
|
||||||
@ -73,13 +75,13 @@ class AudiomackIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class AudiomackAlbumIE(InfoExtractor):
|
class AudiomackAlbumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
|
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:album/|(?=.+/album/))(?P<id>[\w/-]+)'
|
||||||
IE_NAME = 'audiomack:album'
|
IE_NAME = 'audiomack:album'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Standard album playlist
|
# Standard album playlist
|
||||||
{
|
{
|
||||||
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
||||||
'playlist_count': 15,
|
'playlist_count': 11,
|
||||||
'info_dict':
|
'info_dict':
|
||||||
{
|
{
|
||||||
'id': '812251',
|
'id': '812251',
|
||||||
@ -95,24 +97,24 @@ class AudiomackAlbumIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
|
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
|
||||||
'id': '837577',
|
'id': '837580',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
'params': {
|
'params': {
|
||||||
'playliststart': 9,
|
'playliststart': 2,
|
||||||
'playlistend': 9,
|
'playlistend': 2,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# URLs end with [uploader name]/[uploader title]
|
# URLs end with [uploader name]/album/[uploader title]
|
||||||
# this title is whatever the user types in, and is rarely
|
# this title is whatever the user types in, and is rarely
|
||||||
# the proper song title. Real metadata is in the api response
|
# the proper song title. Real metadata is in the api response
|
||||||
album_url_tag = self._match_id(url)
|
album_url_tag = self._match_id(url).replace('/album/', '/')
|
||||||
result = {'_type': 'playlist', 'entries': []}
|
result = {'_type': 'playlist', 'entries': []}
|
||||||
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
||||||
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
||||||
@ -134,7 +136,7 @@ class AudiomackAlbumIE(InfoExtractor):
|
|||||||
# Pull out the album metadata and add to result (if it exists)
|
# Pull out the album metadata and add to result (if it exists)
|
||||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||||
if apikey in api_response and resultkey not in result:
|
if apikey in api_response and resultkey not in result:
|
||||||
result[resultkey] = api_response[apikey]
|
result[resultkey] = compat_str(api_response[apikey])
|
||||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||||
result['entries'].append({
|
result['entries'].append({
|
||||||
'id': compat_str(api_response.get('id', song_id)),
|
'id': compat_str(api_response.get('id', song_id)),
|
||||||
|
@ -12,6 +12,7 @@ from ..compat import (
|
|||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_error,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@ -39,7 +40,7 @@ from ..utils import (
|
|||||||
class BBCCoUkIE(InfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?bbc\.co\.uk/
|
(?:www\.)?bbc\.co\.uk/
|
||||||
@ -395,9 +396,17 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
href, programme_id, mpd_id=format_id, fatal=False))
|
href, programme_id, mpd_id=format_id, fatal=False))
|
||||||
elif transfer_format == 'hls':
|
elif transfer_format == 'hls':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
# TODO: let expected_status be passed into _extract_xxx_formats() instead
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
try:
|
||||||
m3u8_id=format_id, fatal=False))
|
fmts = self._extract_m3u8_formats(
|
||||||
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
|
||||||
|
and e.exc_info[1].code in (403, 404)):
|
||||||
|
raise
|
||||||
|
fmts = []
|
||||||
|
formats.extend(fmts)
|
||||||
elif transfer_format == 'hds':
|
elif transfer_format == 'hds':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
href, programme_id, f4m_id=format_id, fatal=False))
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
@ -775,21 +784,33 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'timestamp': 1437785037,
|
'timestamp': 1437785037,
|
||||||
'upload_date': '20150725',
|
'upload_date': '20150725',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||||
|
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p0b71qth',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Why France is making this woman a national hero',
|
||||||
|
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
|
'timestamp': 1638230731,
|
||||||
|
'upload_date': '20211130',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# bbcthreeConfig
|
||||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p06556y7',
|
'id': 'p06556y7',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
'title': 'Things Not To Say to people that live on council estates',
|
||||||
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
|
||||||
|
'duration': 360,
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# window.__PRELOADED_STATE__
|
# window.__PRELOADED_STATE__
|
||||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||||
@ -1162,9 +1183,16 @@ class BBCIE(BBCCoUkIE):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
initial_data = self._parse_json(self._search_regex(
|
initial_data = self._search_regex(
|
||||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||||
'preload state', default='{}'), playlist_id, fatal=False)
|
'quoted preload state', default=None)
|
||||||
|
if initial_data is None:
|
||||||
|
initial_data = self._search_regex(
|
||||||
|
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
||||||
|
'preload state', default={})
|
||||||
|
else:
|
||||||
|
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||||
|
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||||
if initial_data:
|
if initial_data:
|
||||||
def parse_media(media):
|
def parse_media(media):
|
||||||
if not media:
|
if not media:
|
||||||
@ -1205,7 +1233,10 @@ class BBCIE(BBCCoUkIE):
|
|||||||
if name == 'media-experience':
|
if name == 'media-experience':
|
||||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||||
elif name == 'article':
|
elif name == 'article':
|
||||||
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
for block in (try_get(resp,
|
||||||
|
(lambda x: x['data']['blocks'],
|
||||||
|
lambda x: x['data']['content']['model']['blocks'],),
|
||||||
|
list) or []):
|
||||||
if block.get('type') != 'media':
|
if block.get('type') != 'media':
|
||||||
continue
|
continue
|
||||||
parse_media(block.get('model'))
|
parse_media(block.get('model'))
|
||||||
|
59
youtube_dl/extractor/bigo.py
Normal file
59
youtube_dl/extractor/bigo.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError, urlencode_postdata
|
||||||
|
|
||||||
|
|
||||||
|
class BigoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bigo\.tv/(?:[a-z]{2,}/)?(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bigo.tv/ja/221338632',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6576287577575737440',
|
||||||
|
'title': '土よ〜💁♂️ 休憩室/REST room',
|
||||||
|
'thumbnail': r're:https?://.+',
|
||||||
|
'uploader': '✨Shin💫',
|
||||||
|
'uploader_id': '221338632',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'skip': 'livestream',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bigo.tv/th/Tarlerm1304',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://bigo.tv/115976881',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id = self._match_id(url)
|
||||||
|
|
||||||
|
info_raw = self._download_json(
|
||||||
|
'https://bigo.tv/studio/getInternalStudioInfo',
|
||||||
|
user_id, data=urlencode_postdata({'siteId': user_id}))
|
||||||
|
|
||||||
|
if not isinstance(info_raw, dict):
|
||||||
|
raise ExtractorError('Received invalid JSON data')
|
||||||
|
if info_raw.get('code'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||||
|
info = info_raw.get('data') or {}
|
||||||
|
|
||||||
|
if not info.get('alive'):
|
||||||
|
raise ExtractorError('This user is offline.', expected=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': info.get('roomId') or user_id,
|
||||||
|
'title': info.get('roomTopic') or info.get('nick_name') or user_id,
|
||||||
|
'formats': [{
|
||||||
|
'url': info.get('hls_src'),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': 'm3u8',
|
||||||
|
}],
|
||||||
|
'thumbnail': info.get('snapshot'),
|
||||||
|
'uploader': info.get('nick_name'),
|
||||||
|
'uploader_id': user_id,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
@ -369,6 +369,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
|||||||
'filesize': int_or_none(play_data.get('size')),
|
'filesize': int_or_none(play_data.get('size')),
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
for a_format in formats:
|
||||||
|
a_format.setdefault('http_headers', {}).update({
|
||||||
|
'Referer': url,
|
||||||
|
})
|
||||||
|
|
||||||
song = self._call_api('song/info', au_id)
|
song = self._call_api('song/info', au_id)
|
||||||
title = song['title']
|
title = song['title']
|
||||||
statistic = song.get('statistic') or {}
|
statistic = song.get('statistic') or {}
|
||||||
|
148
youtube_dl/extractor/cpac.py
Normal file
148
youtube_dl/extractor/cpac.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
# compat_range
|
||||||
|
try:
|
||||||
|
if callable(xrange):
|
||||||
|
range = xrange
|
||||||
|
except (NameError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CPACIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cpac'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
|
||||||
|
_TEST = {
|
||||||
|
# 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
|
||||||
|
'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||||
|
'md5': 'e46ad699caafd7aa6024279f2614e8fa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20220215',
|
||||||
|
'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
|
||||||
|
'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
|
||||||
|
'timestamp': 1644901200,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'hls_prefer_native': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
url_lang = 'fr' if '/l-episode?' in url else 'en'
|
||||||
|
|
||||||
|
content = self._download_json(
|
||||||
|
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
|
||||||
|
video_id)
|
||||||
|
video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str)
|
||||||
|
formats = []
|
||||||
|
if video_url:
|
||||||
|
content = content['page']
|
||||||
|
title = str_or_none(content['details']['title_%s_t' % (url_lang, )])
|
||||||
|
formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
|
||||||
|
for fmt in formats:
|
||||||
|
# prefer language to match URL
|
||||||
|
fmt_lang = fmt.get('language')
|
||||||
|
if fmt_lang == url_lang:
|
||||||
|
fmt['language_preference'] = 10
|
||||||
|
elif not fmt_lang:
|
||||||
|
fmt['language_preference'] = -1
|
||||||
|
else:
|
||||||
|
fmt['language_preference'] = -10
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
|
||||||
|
|
||||||
|
def is_live(v_type):
|
||||||
|
return (v_type == 'live') if v_type is not None else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
|
||||||
|
'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
|
||||||
|
'category': [category] if category else None,
|
||||||
|
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
|
||||||
|
'is_live': is_live(content['details'].get('type')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CPACPlaylistIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cpac:playlist'
|
||||||
|
_VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cpac.ca/program?id=6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'id=6',
|
||||||
|
'title': 'Headline Politics',
|
||||||
|
'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'key=hudson',
|
||||||
|
'title': 'hudson',
|
||||||
|
},
|
||||||
|
'playlist_count': 22,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/search?programId=50',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'programId=50',
|
||||||
|
'title': '50',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/emission?id=6',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
|
||||||
|
pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
|
||||||
|
api_url = (
|
||||||
|
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s'
|
||||||
|
% (pl_type, video_id, ))
|
||||||
|
content = self._download_json(api_url, video_id)
|
||||||
|
entries = []
|
||||||
|
total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
|
||||||
|
for page in range(1, total_pages + 1):
|
||||||
|
if page > 1:
|
||||||
|
api_url = update_url_query(api_url, {'page': '%d' % (page, ), })
|
||||||
|
content = self._download_json(
|
||||||
|
api_url, video_id,
|
||||||
|
note='Downloading continuation - %d' % (page, ),
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
|
||||||
|
episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )]))
|
||||||
|
if episode_url:
|
||||||
|
entries.append(episode_url)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
(self.url_result(entry) for entry in entries),
|
||||||
|
playlist_id=video_id,
|
||||||
|
playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1],
|
||||||
|
playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]),
|
||||||
|
)
|
@ -51,6 +51,10 @@ from .anvato import AnvatoIE
|
|||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
from .allocine import AllocineIE
|
from .allocine import AllocineIE
|
||||||
from .aliexpress import AliExpressLiveIE
|
from .aliexpress import AliExpressLiveIE
|
||||||
|
from .alsace20tv import (
|
||||||
|
Alsace20TVIE,
|
||||||
|
Alsace20TVEmbedIE,
|
||||||
|
)
|
||||||
from .apa import APAIE
|
from .apa import APAIE
|
||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
from .appleconnect import AppleConnectIE
|
from .appleconnect import AppleConnectIE
|
||||||
@ -71,6 +75,7 @@ from .arte import (
|
|||||||
ArteTVIE,
|
ArteTVIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
|
ArteTVCategoryIE,
|
||||||
)
|
)
|
||||||
from .arnes import ArnesIE
|
from .arnes import ArnesIE
|
||||||
from .asiancrush import (
|
from .asiancrush import (
|
||||||
@ -114,6 +119,7 @@ from .bfmtv import (
|
|||||||
)
|
)
|
||||||
from .bibeltv import BibelTVIE
|
from .bibeltv import BibelTVIE
|
||||||
from .bigflix import BigflixIE
|
from .bigflix import BigflixIE
|
||||||
|
from .bigo import BigoIE
|
||||||
from .bild import BildIE
|
from .bild import BildIE
|
||||||
from .bilibili import (
|
from .bilibili import (
|
||||||
BiliBiliIE,
|
BiliBiliIE,
|
||||||
@ -253,6 +259,10 @@ from .commonprotocols import (
|
|||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .contv import CONtvIE
|
from .contv import CONtvIE
|
||||||
from .corus import CorusIE
|
from .corus import CorusIE
|
||||||
|
from .cpac import (
|
||||||
|
CPACIE,
|
||||||
|
CPACPlaylistIE,
|
||||||
|
)
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .crackle import CrackleIE
|
from .crackle import CrackleIE
|
||||||
from .crooksandliars import CrooksAndLiarsIE
|
from .crooksandliars import CrooksAndLiarsIE
|
||||||
@ -547,6 +557,7 @@ from .kinja import KinjaEmbedIE
|
|||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
from .konserthusetplay import KonserthusetPlayIE
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
|
from .kth import KTHIE
|
||||||
from .ku6 import Ku6IE
|
from .ku6 import Ku6IE
|
||||||
from .kusi import KUSIIE
|
from .kusi import KUSIIE
|
||||||
from .kuwo import (
|
from .kuwo import (
|
||||||
@ -789,7 +800,14 @@ from .nick import (
|
|||||||
NickNightIE,
|
NickNightIE,
|
||||||
NickRuIE,
|
NickRuIE,
|
||||||
)
|
)
|
||||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
from .niconico import (
|
||||||
|
NiconicoIE,
|
||||||
|
NiconicoPlaylistIE,
|
||||||
|
NiconicoUserIE,
|
||||||
|
NicovideoSearchIE,
|
||||||
|
NicovideoSearchDateIE,
|
||||||
|
NicovideoSearchURLIE,
|
||||||
|
)
|
||||||
from .ninecninemedia import NineCNineMediaIE
|
from .ninecninemedia import NineCNineMediaIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .ninenow import NineNowIE
|
from .ninenow import NineNowIE
|
||||||
@ -1606,7 +1624,7 @@ from .youtube import (
|
|||||||
YoutubeRecommendedIE,
|
YoutubeRecommendedIE,
|
||||||
YoutubeSearchDateIE,
|
YoutubeSearchDateIE,
|
||||||
YoutubeSearchIE,
|
YoutubeSearchIE,
|
||||||
#YoutubeSearchURLIE,
|
YoutubeSearchURLIE,
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTruncatedIDIE,
|
YoutubeTruncatedIDIE,
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
|
@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
|
|||||||
'duration': info.get('duration'),
|
'duration': info.get('duration'),
|
||||||
'timestamp': info.get('createdAt'),
|
'timestamp': info.get('createdAt'),
|
||||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||||
'view_count': info.get('plays'),
|
'view_count': int_or_none(info.get('plays')),
|
||||||
}
|
}
|
||||||
|
31
youtube_dl/extractor/kth.py
Normal file
31
youtube_dl/extractor/kth.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
|
class KTHIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
|
||||||
|
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_uoop6oz9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
|
||||||
|
'thumbnail': 're:https?://.+/thumbnail/.+',
|
||||||
|
'duration': 3516,
|
||||||
|
'timestamp': 1647345358,
|
||||||
|
'upload_date': '20220315',
|
||||||
|
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
result = self.url_result(
|
||||||
|
smuggle_url('kaltura:308:%s' % video_id, {
|
||||||
|
'service_url': 'https://api.kaltura.nordu.net'}),
|
||||||
|
'Kaltura')
|
||||||
|
return result
|
@ -35,7 +35,9 @@ class MySpassIE(InfoExtractor):
|
|||||||
title = xpath_text(metadata, 'title', fatal=True)
|
title = xpath_text(metadata, 'title', fatal=True)
|
||||||
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
||||||
video_id_int = int(video_id)
|
video_id_int = int(video_id)
|
||||||
for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
|
|
||||||
|
grps = re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url)
|
||||||
|
for group in grps.groups() if grps else []:
|
||||||
group_int = int(group)
|
group_int = int(group)
|
||||||
if group_int > video_id_int:
|
if group_int > video_id_int:
|
||||||
video_url = video_url.replace(
|
video_url = video_url.replace(
|
||||||
|
@ -4,8 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@ -20,13 +22,13 @@ class NDRBaseIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = next(group for group in mobj.groups() if group)
|
display_id = next(group for group in mobj.groups() if group)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
return self._extract_embed(webpage, display_id)
|
return self._extract_embed(webpage, display_id, url)
|
||||||
|
|
||||||
|
|
||||||
class NDRIE(NDRBaseIE):
|
class NDRIE(NDRBaseIE):
|
||||||
IE_NAME = 'ndr'
|
IE_NAME = 'ndr'
|
||||||
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
||||||
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
_VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# httpVideo, same content id
|
# httpVideo, same content id
|
||||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||||
@ -38,13 +40,14 @@ class NDRIE(NDRBaseIE):
|
|||||||
'title': 'Party, Pötte und Parade',
|
'title': 'Party, Pötte und Parade',
|
||||||
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
|
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
|
||||||
'uploader': 'ndrtv',
|
'uploader': 'ndrtv',
|
||||||
'timestamp': 1431108900,
|
'timestamp': 1431255671,
|
||||||
'upload_date': '20150510',
|
'upload_date': '20150510',
|
||||||
'duration': 3498,
|
'duration': 3498,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
# httpVideo, different content id
|
# httpVideo, different content id
|
||||||
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
|
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
|
||||||
@ -63,6 +66,7 @@ class NDRIE(NDRBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available',
|
||||||
}, {
|
}, {
|
||||||
# httpAudio, same content id
|
# httpAudio, same content id
|
||||||
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
|
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
|
||||||
@ -74,8 +78,8 @@ class NDRIE(NDRBaseIE):
|
|||||||
'title': 'La Valette entgeht der Hinrichtung',
|
'title': 'La Valette entgeht der Hinrichtung',
|
||||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||||
'uploader': 'ndrinfo',
|
'uploader': 'ndrinfo',
|
||||||
'timestamp': 1290626100,
|
'timestamp': 1631711863,
|
||||||
'upload_date': '20140729',
|
'upload_date': '20210915',
|
||||||
'duration': 884,
|
'duration': 884,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -89,9 +93,10 @@ class NDRIE(NDRBaseIE):
|
|||||||
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||||
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
'description': 'md5:700f6de264010585012a72f97b0ac0c9',
|
||||||
'uploader': 'ndrtv',
|
'uploader': 'ndrtv',
|
||||||
'upload_date': '20201113',
|
'upload_date': '20201207',
|
||||||
|
'timestamp': 1614349457,
|
||||||
'duration': 1749,
|
'duration': 1749,
|
||||||
'subtitles': {
|
'subtitles': {
|
||||||
'de': [{
|
'de': [{
|
||||||
@ -109,19 +114,38 @@ class NDRIE(NDRBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_embed(self, webpage, display_id):
|
def _extract_embed(self, webpage, display_id, url):
|
||||||
embed_url = self._html_search_meta(
|
embed_url = (
|
||||||
'embedURL', webpage, 'embed URL',
|
self._html_search_meta(
|
||||||
default=None) or self._search_regex(
|
'embedURL', webpage, 'embed URL',
|
||||||
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
default=None)
|
||||||
'embed URL', group='url')
|
or self._search_regex(
|
||||||
|
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'embed URL', group='url', default=None)
|
||||||
|
or self._search_regex(
|
||||||
|
r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'embed URL', group='url', default=''))
|
||||||
|
# some more work needed if we only found sophoraID
|
||||||
|
if re.match(r'^[a-z]+\d+$', embed_url):
|
||||||
|
# get the initial part of the url path,. eg /panorama/archiv/2022/
|
||||||
|
parsed_url = compat_urllib_parse_urlparse(url)
|
||||||
|
path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
|
||||||
|
# find tell-tale image with the actual ID
|
||||||
|
ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
|
||||||
|
# or try to use special knowledge!
|
||||||
|
NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
|
||||||
|
embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
|
||||||
|
if not embed_url:
|
||||||
|
raise ExtractorError('Unable to extract embedUrl')
|
||||||
|
|
||||||
description = self._search_regex(
|
description = self._search_regex(
|
||||||
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
||||||
webpage, 'description', default=None) or self._og_search_description(webpage)
|
webpage, 'description', default=None) or self._og_search_description(webpage)
|
||||||
timestamp = parse_iso8601(
|
timestamp = parse_iso8601(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
|
(r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
|
||||||
webpage, 'upload date', default=None))
|
r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
|
||||||
|
webpage, 'upload date', group='cont', default=None))
|
||||||
info = self._search_json_ld(webpage, display_id, default={})
|
info = self._search_json_ld(webpage, display_id, default={})
|
||||||
return merge_dicts({
|
return merge_dicts({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
@ -153,19 +177,19 @@ class NJoyIE(NDRBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available',
|
||||||
}, {
|
}, {
|
||||||
# httpVideo, different content id
|
# httpVideo, different content id
|
||||||
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
|
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
|
||||||
'md5': '417660fffa90e6df2fda19f1b40a64d8',
|
'md5': '417660fffa90e6df2fda19f1b40a64d8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dockville882',
|
'id': 'livestream283',
|
||||||
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
|
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
|
||||||
'ext': 'mp4',
|
'ext': 'mp3',
|
||||||
'title': '"Ich hab noch nie" mit Felix Jaehn',
|
'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
|
||||||
'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
|
'description': 'md5:681698f527b8601e511e7b79edde7d2c',
|
||||||
'uploader': 'njoy',
|
'uploader': 'njoy',
|
||||||
'upload_date': '20150822',
|
'upload_date': '20210830',
|
||||||
'duration': 211,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -175,18 +199,25 @@ class NJoyIE(NDRBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_embed(self, webpage, display_id):
|
def _extract_embed(self, webpage, display_id, url=None):
|
||||||
|
# find tell-tale URL with the actual ID, or ...
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
|
(r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
|
||||||
description = self._search_regex(
|
r'<iframe[^>]+id="pp_([\da-z]+)"', ),
|
||||||
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
webpage, 'NDR id', default=None)
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
|
description = (
|
||||||
|
self._html_search_meta('description', webpage)
|
||||||
|
or self._search_regex(
|
||||||
|
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
||||||
|
webpage, 'description', fatal=False))
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'NDREmbedBase',
|
'ie_key': 'NDREmbedBase',
|
||||||
'url': 'ndr:%s' % video_id,
|
'url': 'ndr:%s' % video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'title': display_id.replace('-', ' ').strip(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -291,7 +322,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class NDREmbedIE(NDREmbedBaseIE):
|
class NDREmbedIE(NDREmbedBaseIE):
|
||||||
IE_NAME = 'ndr:embed'
|
IE_NAME = 'ndr:embed'
|
||||||
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
_VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
||||||
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
||||||
@ -304,6 +335,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
|||||||
'upload_date': '20150907',
|
'upload_date': '20150907',
|
||||||
'duration': 132,
|
'duration': 132,
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
|
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
|
||||||
'md5': '002085c44bae38802d94ae5802a36e78',
|
'md5': '002085c44bae38802d94ae5802a36e78',
|
||||||
@ -319,6 +351,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ndr.de/info/audio51535-player.html',
|
'url': 'http://www.ndr.de/info/audio51535-player.html',
|
||||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||||
@ -328,7 +361,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
|||||||
'title': 'La Valette entgeht der Hinrichtung',
|
'title': 'La Valette entgeht der Hinrichtung',
|
||||||
'is_live': False,
|
'is_live': False,
|
||||||
'uploader': 'ndrinfo',
|
'uploader': 'ndrinfo',
|
||||||
'upload_date': '20140729',
|
'upload_date': '20210915',
|
||||||
'duration': 884,
|
'duration': 884,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -349,15 +382,17 @@ class NDREmbedIE(NDREmbedBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available',
|
||||||
}, {
|
}, {
|
||||||
# httpVideoLive
|
# httpVideoLive
|
||||||
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
|
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'livestream217',
|
'id': 'livestream217',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'upload_date': '20150910',
|
'upload_date': '20210409',
|
||||||
|
'uploader': 'ndrtv',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -395,9 +430,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
|
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
|
||||||
'is_live': False,
|
'is_live': False,
|
||||||
'upload_date': '20150807',
|
'upload_date': '20200826',
|
||||||
'duration': 1011,
|
'duration': 1011,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
# httpAudio
|
# httpAudio
|
||||||
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
|
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
|
||||||
@ -414,6 +450,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available',
|
||||||
}, {
|
}, {
|
||||||
# httpAudioLive, no explicit ext
|
# httpAudioLive, no explicit ext
|
||||||
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
|
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
|
||||||
@ -423,7 +460,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
|||||||
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'uploader': 'njoy',
|
'uploader': 'njoy',
|
||||||
'upload_date': '20150810',
|
'upload_date': '20210830',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor):
|
|||||||
def _extract_episode_info(self, url, episode=None):
|
def _extract_episode_info(self, url, episode=None):
|
||||||
fetch_episode = episode is None
|
fetch_episode = episode is None
|
||||||
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
||||||
if episode_id.isdigit():
|
if len(episode_id) == 7:
|
||||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||||
|
|
||||||
is_video = m_type == 'video'
|
is_video = m_type == 'video'
|
||||||
@ -84,7 +85,8 @@ class NhkBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NhkVodIE(NhkBaseIE):
|
class NhkVodIE(NhkBaseIE):
|
||||||
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
|
||||||
|
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||||
# Content available only for a limited period of time. Visit
|
# Content available only for a limited period of time. Visit
|
||||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -124,6 +126,19 @@ class NhkVodIE(NhkBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# video, alphabetic character in ID #29670
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
|
||||||
|
'only_matching': True,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'qfjay6cg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'DESIGN TALKS plus - Fishermen’s Finery',
|
||||||
|
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
|
||||||
|
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
|
||||||
|
'upload_date': '20210615',
|
||||||
|
'timestamp': 1623722008,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -2,25 +2,28 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import functools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import math
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
|
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
dict_get,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
dict_get,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
InAdvancePagedList,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
OnDemandPagedList,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
PostProcessingError,
|
||||||
remove_start,
|
remove_start,
|
||||||
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
'md5': 'a5bad06f1347452102953f323c69da34s',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sm22312215',
|
'id': 'sm22312215',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -157,11 +160,34 @@ class NiconicoIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# DMC video with heartbeat
|
||||||
|
'url': 'https://www.nicovideo.jp/watch/sm34815188',
|
||||||
|
'md5': '9360c6e1f1519d7759e2fe8e1326ae83',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sm34815188',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:aee93e9f3366db72f902f6cd5d389cb7',
|
||||||
|
'description': 'md5:7b9149fc7a00ab053cafaf5c19662704',
|
||||||
|
'thumbnail': r're:https?://.*',
|
||||||
|
'uploader': 'md5:2762e18fa74dbb40aa1ad27c6291ee32',
|
||||||
|
'uploader_id': '67449889',
|
||||||
|
'upload_date': '20190322',
|
||||||
|
'timestamp': int, # timestamp is unstable
|
||||||
|
'duration': 1082.0,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
|
||||||
|
_API_HEADERS = {
|
||||||
|
'X-Frontend-ID': '6',
|
||||||
|
'X-Frontend-Version': '0'
|
||||||
|
}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -191,37 +217,89 @@ class NiconicoIE(InfoExtractor):
|
|||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return login_ok
|
return login_ok
|
||||||
|
|
||||||
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
def _get_heartbeat_info(self, info_dict):
|
||||||
def yesno(boolean):
|
|
||||||
return 'yes' if boolean else 'no'
|
|
||||||
|
|
||||||
session_api_data = api_data['video']['dmcInfo']['session_api']
|
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
||||||
session_api_endpoint = session_api_data['urls'][0]
|
|
||||||
|
|
||||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
api_data = (
|
||||||
|
info_dict.get('_api_data')
|
||||||
|
or self._parse_json(
|
||||||
|
self._html_search_regex(
|
||||||
|
'data-api-data="([^"]+)"',
|
||||||
|
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||||
|
'API data', default='{}'),
|
||||||
|
video_id))
|
||||||
|
|
||||||
|
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||||
|
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||||
|
|
||||||
|
def ping():
|
||||||
|
status = try_get(
|
||||||
|
self._download_json(
|
||||||
|
'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
|
||||||
|
query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
|
||||||
|
note='Acquiring permission for downloading video',
|
||||||
|
headers=self._API_HEADERS),
|
||||||
|
lambda x: x['meta']['status'])
|
||||||
|
if status != 200:
|
||||||
|
self.report_warning('Failed to acquire permission for playing video. The video may not download.')
|
||||||
|
|
||||||
|
yesno = lambda x: 'yes' if x else 'no'
|
||||||
|
|
||||||
|
# m3u8 (encryption)
|
||||||
|
if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
|
||||||
|
protocol = 'm3u8'
|
||||||
|
encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
|
||||||
|
session_api_http_parameters = {
|
||||||
|
'parameters': {
|
||||||
|
'hls_parameters': {
|
||||||
|
'encryption': {
|
||||||
|
encryption: {
|
||||||
|
'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
|
||||||
|
'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'transfer_preset': '',
|
||||||
|
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||||
|
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||||
|
'segment_duration': 6000,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# http
|
||||||
|
else:
|
||||||
|
protocol = 'http'
|
||||||
|
session_api_http_parameters = {
|
||||||
|
'parameters': {
|
||||||
|
'http_output_download_parameters': {
|
||||||
|
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||||
|
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
session_response = self._download_json(
|
session_response = self._download_json(
|
||||||
session_api_endpoint['url'], video_id,
|
session_api_endpoint['url'], video_id,
|
||||||
query={'_format': 'json'},
|
query={'_format': 'json'},
|
||||||
headers={'Content-Type': 'application/json'},
|
headers={'Content-Type': 'application/json'},
|
||||||
note='Downloading JSON metadata for %s' % format_id,
|
note='Downloading JSON metadata for %s' % info_dict['format_id'],
|
||||||
data=json.dumps({
|
data=json.dumps({
|
||||||
'session': {
|
'session': {
|
||||||
'client_info': {
|
'client_info': {
|
||||||
'player_id': session_api_data['player_id'],
|
'player_id': session_api_data.get('playerId'),
|
||||||
},
|
},
|
||||||
'content_auth': {
|
'content_auth': {
|
||||||
'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
|
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||||
'content_key_timeout': session_api_data['content_key_timeout'],
|
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||||
'service_id': 'nicovideo',
|
'service_id': 'nicovideo',
|
||||||
'service_user_id': session_api_data['service_user_id']
|
'service_user_id': session_api_data.get('serviceUserId')
|
||||||
},
|
},
|
||||||
'content_id': session_api_data['content_id'],
|
'content_id': session_api_data.get('contentId'),
|
||||||
'content_src_id_sets': [{
|
'content_src_id_sets': [{
|
||||||
'content_src_ids': [{
|
'content_src_ids': [{
|
||||||
'src_id_to_mux': {
|
'src_id_to_mux': {
|
||||||
'audio_src_ids': [audio_quality['id']],
|
'audio_src_ids': [audio_src_id],
|
||||||
'video_src_ids': [video_quality['id']],
|
'video_src_ids': [video_src_id],
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
}],
|
}],
|
||||||
@ -229,52 +307,81 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'content_uri': '',
|
'content_uri': '',
|
||||||
'keep_method': {
|
'keep_method': {
|
||||||
'heartbeat': {
|
'heartbeat': {
|
||||||
'lifetime': session_api_data['heartbeat_lifetime']
|
'lifetime': session_api_data.get('heartbeatLifetime')
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'priority': session_api_data['priority'],
|
'priority': session_api_data.get('priority'),
|
||||||
'protocol': {
|
'protocol': {
|
||||||
'name': 'http',
|
'name': 'http',
|
||||||
'parameters': {
|
'parameters': {
|
||||||
'http_parameters': {
|
'http_parameters': session_api_http_parameters
|
||||||
'parameters': {
|
|
||||||
'http_output_download_parameters': {
|
|
||||||
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
|
||||||
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'recipe_id': session_api_data['recipe_id'],
|
'recipe_id': session_api_data.get('recipeId'),
|
||||||
'session_operation_auth': {
|
'session_operation_auth': {
|
||||||
'session_operation_auth_by_signature': {
|
'session_operation_auth_by_signature': {
|
||||||
'signature': session_api_data['signature'],
|
'signature': session_api_data.get('signature'),
|
||||||
'token': session_api_data['token'],
|
'token': session_api_data.get('token'),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'timing_constraint': 'unlimited'
|
'timing_constraint': 'unlimited'
|
||||||
}
|
}
|
||||||
}).encode())
|
}).encode())
|
||||||
|
|
||||||
resolution = video_quality.get('resolution', {})
|
info_dict['url'] = session_response['data']['session']['content_uri']
|
||||||
|
info_dict['protocol'] = protocol
|
||||||
|
|
||||||
|
# get heartbeat info
|
||||||
|
heartbeat_info_dict = {
|
||||||
|
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||||
|
'data': json.dumps(session_response['data']),
|
||||||
|
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||||
|
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
||||||
|
'ping': ping
|
||||||
|
}
|
||||||
|
|
||||||
|
return info_dict, heartbeat_info_dict
|
||||||
|
|
||||||
|
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||||
|
def parse_format_id(id_code):
|
||||||
|
mobj = re.match(r'''(?x)
|
||||||
|
(?:archive_)?
|
||||||
|
(?:(?P<codec>[^_]+)_)?
|
||||||
|
(?:(?P<br>[\d]+)kbps_)?
|
||||||
|
(?:(?P<res>[\d+]+)p_)?
|
||||||
|
''', '%s_' % id_code)
|
||||||
|
return mobj.groupdict() if mobj else {}
|
||||||
|
|
||||||
|
protocol = 'niconico_dmc'
|
||||||
|
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||||
|
vdict = parse_format_id(video_quality['id'])
|
||||||
|
adict = parse_format_id(audio_quality['id'])
|
||||||
|
resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
|
||||||
|
vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'url': session_response['data']['session']['content_uri'],
|
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
|
||||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000),
|
'vcodec': vdict.get('codec'),
|
||||||
'vbr': float_or_none(video_quality.get('bitrate'), 1000),
|
'acodec': adict.get('codec'),
|
||||||
'height': resolution.get('height'),
|
'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
|
||||||
'width': resolution.get('width'),
|
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
||||||
|
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
||||||
|
'width': int_or_none(resolution.get('width')),
|
||||||
|
'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
|
||||||
|
'protocol': protocol,
|
||||||
|
'http_headers': {
|
||||||
|
'Origin': 'https://www.nicovideo.jp',
|
||||||
|
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
# Get video webpage. We are not actually interested in it for normal
|
# Get video webpage for API data.
|
||||||
# cases, but need the cookies in order to be able to download the
|
|
||||||
# info webpage
|
|
||||||
webpage, handle = self._download_webpage_handle(
|
webpage, handle = self._download_webpage_handle(
|
||||||
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
if video_id.startswith('so'):
|
if video_id.startswith('so'):
|
||||||
@ -284,86 +391,136 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'data-api-data="([^"]+)"', webpage,
|
'data-api-data="([^"]+)"', webpage,
|
||||||
'API data', default='{}'), video_id)
|
'API data', default='{}'), video_id)
|
||||||
|
|
||||||
def _format_id_from_url(video_url):
|
def get_video_info_web(items):
|
||||||
return 'economy' if video_real_url.endswith('low') else 'normal'
|
return dict_get(api_data['video'], items)
|
||||||
|
|
||||||
try:
|
# Get video info
|
||||||
video_real_url = api_data['video']['smileInfo']['url']
|
video_info_xml = self._download_xml(
|
||||||
except KeyError: # Flash videos
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||||
# Get flv info
|
video_id, note='Downloading video info page')
|
||||||
flv_info_webpage = self._download_webpage(
|
|
||||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
|
||||||
video_id, 'Downloading flv info')
|
|
||||||
|
|
||||||
flv_info = compat_parse_qs(flv_info_webpage)
|
def get_video_info_xml(items):
|
||||||
if 'url' not in flv_info:
|
if not isinstance(items, list):
|
||||||
if 'deleted' in flv_info:
|
items = [items]
|
||||||
raise ExtractorError('The video has been deleted.',
|
for item in items:
|
||||||
expected=True)
|
ret = xpath_text(video_info_xml, './/' + item)
|
||||||
elif 'closed' in flv_info:
|
if ret:
|
||||||
raise ExtractorError('Niconico videos now require logging in',
|
return ret
|
||||||
expected=True)
|
|
||||||
elif 'error' in flv_info:
|
|
||||||
raise ExtractorError('%s reports error: %s' % (
|
|
||||||
self.IE_NAME, flv_info['error'][0]), expected=True)
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to find video URL')
|
|
||||||
|
|
||||||
video_info_xml = self._download_xml(
|
if get_video_info_xml('error'):
|
||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
error_code = get_video_info_xml('code')
|
||||||
video_id, note='Downloading video info page')
|
|
||||||
|
|
||||||
def get_video_info(items):
|
if error_code == 'DELETED':
|
||||||
if not isinstance(items, list):
|
raise ExtractorError('The video has been deleted.',
|
||||||
items = [items]
|
expected=True)
|
||||||
for item in items:
|
elif error_code == 'NOT_FOUND':
|
||||||
ret = xpath_text(video_info_xml, './/' + item)
|
raise ExtractorError('The video is not found.',
|
||||||
if ret:
|
expected=True)
|
||||||
return ret
|
elif error_code == 'COMMUNITY':
|
||||||
|
self.to_screen('%s: The video is community members only.' % video_id)
|
||||||
|
else:
|
||||||
|
raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
|
||||||
|
|
||||||
video_real_url = flv_info['url'][0]
|
# Start extracting video formats
|
||||||
|
formats = []
|
||||||
|
|
||||||
extension = get_video_info('movie_type')
|
# Get HTML5 videos info
|
||||||
if not extension:
|
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
|
||||||
extension = determine_ext(video_real_url)
|
if not quality_info:
|
||||||
|
raise ExtractorError('The video can\'t be downloaded', expected=True)
|
||||||
|
|
||||||
formats = [{
|
for audio_quality in quality_info.get('audios') or {}:
|
||||||
'url': video_real_url,
|
for video_quality in quality_info.get('videos') or {}:
|
||||||
'ext': extension,
|
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||||
'format_id': _format_id_from_url(video_real_url),
|
continue
|
||||||
}]
|
formats.append(self._extract_format_for_quality(
|
||||||
else:
|
api_data, video_id, audio_quality, video_quality))
|
||||||
formats = []
|
|
||||||
|
|
||||||
dmc_info = api_data['video'].get('dmcInfo')
|
# Get flv/swf info
|
||||||
if dmc_info: # "New" HTML5 videos
|
timestamp = None
|
||||||
quality_info = dmc_info['quality']
|
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
||||||
for audio_quality in quality_info['audios']:
|
if video_real_url:
|
||||||
for video_quality in quality_info['videos']:
|
is_economy = video_real_url.endswith('low')
|
||||||
if not audio_quality['available'] or not video_quality['available']:
|
|
||||||
continue
|
|
||||||
formats.append(self._extract_format_for_quality(
|
|
||||||
api_data, video_id, audio_quality, video_quality))
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
if is_economy:
|
||||||
else: # "Old" HTML5 videos
|
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
|
||||||
formats = [{
|
|
||||||
|
# Invoking ffprobe to determine resolution
|
||||||
|
pp = FFmpegPostProcessor(self._downloader)
|
||||||
|
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
|
||||||
|
|
||||||
|
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
|
||||||
|
except PostProcessingError as err:
|
||||||
|
raise ExtractorError(err.msg, expected=True)
|
||||||
|
|
||||||
|
v_stream = a_stream = {}
|
||||||
|
|
||||||
|
# Some complex swf files doesn't have video stream (e.g. nm4809023)
|
||||||
|
for stream in metadata['streams']:
|
||||||
|
if stream['codec_type'] == 'video':
|
||||||
|
v_stream = stream
|
||||||
|
elif stream['codec_type'] == 'audio':
|
||||||
|
a_stream = stream
|
||||||
|
|
||||||
|
# Community restricted videos seem to have issues with the thumb API not returning anything at all
|
||||||
|
filesize = int(
|
||||||
|
(get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
|
||||||
|
or metadata['format']['size']
|
||||||
|
)
|
||||||
|
extension = (
|
||||||
|
get_video_info_xml('movie_type')
|
||||||
|
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
|
||||||
|
)
|
||||||
|
|
||||||
|
# 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
|
||||||
|
timestamp = (
|
||||||
|
parse_iso8601(get_video_info_web('first_retrieve'))
|
||||||
|
or unified_timestamp(get_video_info_web('postedDateTime'))
|
||||||
|
)
|
||||||
|
metadata_timestamp = (
|
||||||
|
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
|
||||||
|
or timestamp if extension != 'mp4' else 0
|
||||||
|
)
|
||||||
|
|
||||||
|
# According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
|
||||||
|
smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
|
||||||
|
|
||||||
|
is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
|
||||||
|
|
||||||
|
# If movie file size is unstable, old server movie is not source movie.
|
||||||
|
if filesize > 1:
|
||||||
|
formats.append({
|
||||||
'url': video_real_url,
|
'url': video_real_url,
|
||||||
'ext': 'mp4',
|
'format_id': 'smile' if not is_economy else 'smile_low',
|
||||||
'format_id': _format_id_from_url(video_real_url),
|
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
|
||||||
}]
|
'ext': extension,
|
||||||
|
'container': extension,
|
||||||
|
'vcodec': v_stream.get('codec_name'),
|
||||||
|
'acodec': a_stream.get('codec_name'),
|
||||||
|
# Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
|
||||||
|
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
|
||||||
|
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
|
||||||
|
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
|
||||||
|
'height': int_or_none(v_stream.get('height')),
|
||||||
|
'width': int_or_none(v_stream.get('width')),
|
||||||
|
'source_preference': 5 if not is_economy else -2,
|
||||||
|
'quality': 5 if is_source and not is_economy else None,
|
||||||
|
'filesize': filesize
|
||||||
|
})
|
||||||
|
|
||||||
def get_video_info(items):
|
self._sort_formats(formats)
|
||||||
return dict_get(api_data['video'], items)
|
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
title = get_video_info('title')
|
title = (
|
||||||
if not title:
|
get_video_info_xml('title') # prefer to get the untranslated original title
|
||||||
title = self._og_search_title(webpage, default=None)
|
or get_video_info_web(['originalTitle', 'title'])
|
||||||
if not title:
|
or self._og_search_title(webpage, default=None)
|
||||||
title = self._html_search_regex(
|
or self._html_search_regex(
|
||||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||||
webpage, 'video title')
|
webpage, 'video title'))
|
||||||
|
|
||||||
watch_api_data_string = self._html_search_regex(
|
watch_api_data_string = self._html_search_regex(
|
||||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||||
@ -372,14 +529,15 @@ class NiconicoIE(InfoExtractor):
|
|||||||
video_detail = watch_api_data.get('videoDetail', {})
|
video_detail = watch_api_data.get('videoDetail', {})
|
||||||
|
|
||||||
thumbnail = (
|
thumbnail = (
|
||||||
get_video_info(['thumbnail_url', 'thumbnailURL'])
|
self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
|
||||||
|
or dict_get( # choose highest from 720p to 240p
|
||||||
|
get_video_info_web('thumbnail'),
|
||||||
|
['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
|
||||||
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
||||||
or video_detail.get('thumbnail'))
|
or video_detail.get('thumbnail'))
|
||||||
|
|
||||||
description = get_video_info('description')
|
description = get_video_info_web('description')
|
||||||
|
|
||||||
timestamp = (parse_iso8601(get_video_info('first_retrieve'))
|
|
||||||
or unified_timestamp(get_video_info('postedDateTime')))
|
|
||||||
if not timestamp:
|
if not timestamp:
|
||||||
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
||||||
if match:
|
if match:
|
||||||
@ -388,19 +546,25 @@ class NiconicoIE(InfoExtractor):
|
|||||||
timestamp = parse_iso8601(
|
timestamp = parse_iso8601(
|
||||||
video_detail['postedAt'].replace('/', '-'),
|
video_detail['postedAt'].replace('/', '-'),
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||||
|
timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
|
||||||
|
|
||||||
view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
|
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
||||||
if not view_count:
|
if not view_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
||||||
webpage, 'view count', default=None)
|
webpage, 'view count', default=None)
|
||||||
if match:
|
if match:
|
||||||
view_count = int_or_none(match.replace(',', ''))
|
view_count = int_or_none(match.replace(',', ''))
|
||||||
view_count = view_count or video_detail.get('viewCount')
|
view_count = (
|
||||||
|
view_count
|
||||||
|
or video_detail.get('viewCount')
|
||||||
|
or try_get(api_data, lambda x: x['video']['count']['view']))
|
||||||
|
|
||||||
|
comment_count = (
|
||||||
|
int_or_none(get_video_info_web('comment_num'))
|
||||||
|
or video_detail.get('commentCount')
|
||||||
|
or try_get(api_data, lambda x: x['video']['count']['comment']))
|
||||||
|
|
||||||
comment_count = (int_or_none(get_video_info('comment_num'))
|
|
||||||
or video_detail.get('commentCount')
|
|
||||||
or try_get(api_data, lambda x: x['thread']['commentCount']))
|
|
||||||
if not comment_count:
|
if not comment_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||||
@ -409,22 +573,41 @@ class NiconicoIE(InfoExtractor):
|
|||||||
comment_count = int_or_none(match.replace(',', ''))
|
comment_count = int_or_none(match.replace(',', ''))
|
||||||
|
|
||||||
duration = (parse_duration(
|
duration = (parse_duration(
|
||||||
get_video_info('length')
|
get_video_info_web('length')
|
||||||
or self._html_search_meta(
|
or self._html_search_meta(
|
||||||
'video:duration', webpage, 'video duration', default=None))
|
'video:duration', webpage, 'video duration', default=None))
|
||||||
or video_detail.get('length')
|
or video_detail.get('length')
|
||||||
or get_video_info('duration'))
|
or get_video_info_web('duration'))
|
||||||
|
|
||||||
webpage_url = get_video_info('watch_url') or url
|
webpage_url = get_video_info_web('watch_url') or url
|
||||||
|
|
||||||
|
# for channel movie and community movie
|
||||||
|
channel_id = try_get(
|
||||||
|
api_data,
|
||||||
|
(lambda x: x['channel']['globalId'],
|
||||||
|
lambda x: x['community']['globalId']))
|
||||||
|
channel = try_get(
|
||||||
|
api_data,
|
||||||
|
(lambda x: x['channel']['name'],
|
||||||
|
lambda x: x['community']['name']))
|
||||||
|
|
||||||
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
||||||
# in the JSON, which will cause None to be returned instead of {}.
|
# in the JSON, which will cause None to be returned instead of {}.
|
||||||
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
||||||
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
|
uploader_id = str_or_none(
|
||||||
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
|
get_video_info_web(['ch_id', 'user_id'])
|
||||||
|
or owner.get('id')
|
||||||
|
or channel_id
|
||||||
|
)
|
||||||
|
uploader = (
|
||||||
|
get_video_info_web(['ch_name', 'user_nickname'])
|
||||||
|
or owner.get('nickname')
|
||||||
|
or channel
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'_api_data': api_data,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
@ -432,6 +615,8 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
|
'channel': channel,
|
||||||
|
'channel_id': channel_id,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
@ -440,7 +625,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NiconicoPlaylistIE(InfoExtractor):
|
class NiconicoPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||||
@ -456,60 +641,185 @@ class NiconicoPlaylistIE(InfoExtractor):
|
|||||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 100
|
|
||||||
|
|
||||||
def _call_api(self, list_id, resource, query):
|
_API_HEADERS = {
|
||||||
return self._download_json(
|
'X-Frontend-ID': '6',
|
||||||
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
'X-Frontend-Version': '0'
|
||||||
'Downloading %s JSON metatdata' % resource, query=query,
|
}
|
||||||
headers={'X-Frontend-Id': 6})['data']['mylist']
|
|
||||||
|
|
||||||
def _parse_owner(self, item):
|
|
||||||
owner = item.get('owner') or {}
|
|
||||||
if owner:
|
|
||||||
return {
|
|
||||||
'uploader': owner.get('name'),
|
|
||||||
'uploader_id': owner.get('id'),
|
|
||||||
}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def _fetch_page(self, list_id, page):
|
|
||||||
page += 1
|
|
||||||
items = self._call_api(list_id, 'page %d' % page, {
|
|
||||||
'page': page,
|
|
||||||
'pageSize': self._PAGE_SIZE,
|
|
||||||
})['items']
|
|
||||||
for item in items:
|
|
||||||
video = item.get('video') or {}
|
|
||||||
video_id = video.get('id')
|
|
||||||
if not video_id:
|
|
||||||
continue
|
|
||||||
count = video.get('count') or {}
|
|
||||||
get_count = lambda x: int_or_none(count.get(x))
|
|
||||||
info = {
|
|
||||||
'_type': 'url',
|
|
||||||
'id': video_id,
|
|
||||||
'title': video.get('title'),
|
|
||||||
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
|
||||||
'description': video.get('shortDescription'),
|
|
||||||
'duration': int_or_none(video.get('duration')),
|
|
||||||
'view_count': get_count('view'),
|
|
||||||
'comment_count': get_count('comment'),
|
|
||||||
'ie_key': NiconicoIE.ie_key(),
|
|
||||||
}
|
|
||||||
info.update(self._parse_owner(video))
|
|
||||||
yield info
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
mylist = self._call_api(list_id, 'list', {
|
|
||||||
'pageSize': 1,
|
def get_page_data(pagenum, pagesize):
|
||||||
})
|
return self._download_json(
|
||||||
entries = InAdvancePagedList(
|
'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||||
functools.partial(self._fetch_page, list_id),
|
query={'page': 1 + pagenum, 'pageSize': pagesize},
|
||||||
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
headers=self._API_HEADERS).get('data').get('mylist')
|
||||||
self._PAGE_SIZE)
|
|
||||||
result = self.playlist_result(
|
data = get_page_data(0, 1)
|
||||||
entries, list_id, mylist.get('name'), mylist.get('description'))
|
title = data.get('name')
|
||||||
result.update(self._parse_owner(mylist))
|
description = data.get('description')
|
||||||
return result
|
uploader = data.get('owner').get('name')
|
||||||
|
uploader_id = data.get('owner').get('id')
|
||||||
|
|
||||||
|
def pagefunc(pagenum):
|
||||||
|
data = get_page_data(pagenum, 25)
|
||||||
|
return ({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
|
||||||
|
} for item in data.get('items'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': list_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'entries': OnDemandPagedList(pagefunc, 25),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NicovideoSearchBaseIE(InfoExtractor):
|
||||||
|
_MAX_RESULTS = float('inf')
|
||||||
|
|
||||||
|
def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
|
||||||
|
query = query or {}
|
||||||
|
pages = [query['page']] if 'page' in query else itertools.count(1)
|
||||||
|
for page_num in pages:
|
||||||
|
query['page'] = str(page_num)
|
||||||
|
webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
|
||||||
|
results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.+?)(?=["\'])', webpage)
|
||||||
|
for item in results:
|
||||||
|
yield self.url_result('http://www.nicovideo.jp/watch/%s' % item, 'Niconico', item)
|
||||||
|
if not results:
|
||||||
|
break
|
||||||
|
|
||||||
|
def _get_n_results(self, query, n):
|
||||||
|
entries = self._entries(self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
|
||||||
|
if n < self._MAX_RESULTS:
|
||||||
|
entries = itertools.islice(entries, 0, n)
|
||||||
|
return self.playlist_result(entries, query, query)
|
||||||
|
|
||||||
|
|
||||||
|
class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
||||||
|
IE_DESC = 'Nico video search'
|
||||||
|
IE_NAME = 'nicovideo:search'
|
||||||
|
_SEARCH_KEY = 'nicosearch'
|
||||||
|
|
||||||
|
def _search_results(self, query):
|
||||||
|
return self._entries(
|
||||||
|
self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
|
||||||
|
|
||||||
|
|
||||||
|
class NicovideoSearchURLIE(NicovideoSearchBaseIE):
|
||||||
|
IE_NAME = '%s_url' % NicovideoSearchIE.IE_NAME
|
||||||
|
IE_DESC = 'Nico video search URLs'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.nicovideo.jp/search/sm9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sm9',
|
||||||
|
'title': 'sm9'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 40,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sm9',
|
||||||
|
'title': 'sm9'
|
||||||
|
},
|
||||||
|
'playlist_count': 31,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
query = self._match_id(url)
|
||||||
|
return self.playlist_result(self._entries(url, query), query, query)
|
||||||
|
|
||||||
|
|
||||||
|
class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
||||||
|
IE_DESC = 'Nico video search, newest first'
|
||||||
|
IE_NAME = '%s:date' % NicovideoSearchIE.IE_NAME
|
||||||
|
_SEARCH_KEY = 'nicosearchdate'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'nicosearchdateall:a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a',
|
||||||
|
'title': 'a'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1610,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_START_DATE = datetime.date(2007, 1, 1)
|
||||||
|
_RESULTS_PER_PAGE = 32
|
||||||
|
_MAX_PAGES = 50
|
||||||
|
|
||||||
|
def _entries(self, url, item_id, start_date=None, end_date=None):
|
||||||
|
start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
|
||||||
|
|
||||||
|
# If the last page has a full page of videos, we need to break down the query interval further
|
||||||
|
last_page_len = len(list(self._get_entries_for_date(
|
||||||
|
url, item_id, start_date, end_date, self._MAX_PAGES,
|
||||||
|
note='Checking number of videos from {0} to {1}'.format(start_date, end_date))))
|
||||||
|
if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
|
||||||
|
midpoint = start_date + ((end_date - start_date) // 2)
|
||||||
|
for entry in itertools.chain(
|
||||||
|
iter(self._entries(url, item_id, midpoint, end_date)),
|
||||||
|
iter(self._entries(url, item_id, start_date, midpoint))):
|
||||||
|
yield entry
|
||||||
|
else:
|
||||||
|
self.to_screen('{0}: Downloading results from {1} to {2}'.format(item_id, start_date, end_date))
|
||||||
|
for entry in iter(self._get_entries_for_date(
|
||||||
|
url, item_id, start_date, end_date, note=' Downloading page %(page)s')):
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
|
||||||
|
query = {
|
||||||
|
'start': compat_str(start_date),
|
||||||
|
'end': compat_str(end_date or start_date),
|
||||||
|
'sort': 'f',
|
||||||
|
'order': 'd',
|
||||||
|
}
|
||||||
|
if page_num:
|
||||||
|
query['page'] = compat_str(page_num)
|
||||||
|
|
||||||
|
for entry in iter(super(NicovideoSearchDateIE, self)._entries(url, item_id, query=query, note=note)):
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoUserIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.nicovideo.jp/user/419948',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '419948',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 101,
|
||||||
|
}
|
||||||
|
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
_API_HEADERS = {
|
||||||
|
'X-Frontend-ID': '6',
|
||||||
|
'X-Frontend-Version': '0'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _entries(self, list_id):
|
||||||
|
total_count = 1
|
||||||
|
count = page_num = 0
|
||||||
|
while count < total_count:
|
||||||
|
json_parsed = self._download_json(
|
||||||
|
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
|
||||||
|
headers=self._API_HEADERS,
|
||||||
|
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||||
|
if not page_num:
|
||||||
|
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||||
|
for entry in json_parsed["data"]["items"]:
|
||||||
|
count += 1
|
||||||
|
yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
|
||||||
|
page_num += 1
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
return self.playlist_result(self._entries(list_id), list_id)
|
||||||
|
@ -1,71 +1,113 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
class NuvidIE(InfoExtractor):
|
class NuvidIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://m.nuvid.com/video/1310741/',
|
'url': 'https://www.nuvid.com/video/6513023/italian-babe',
|
||||||
'md5': 'eab207b7ac4fccfb4e23c86201f11277',
|
'md5': '772d2f8288f3d3c5c45f7a41761c7844',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1310741',
|
'id': '6513023',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Horny babes show their awesome bodeis and',
|
'title': 'italian babe',
|
||||||
'duration': 129,
|
'format_id': '360p',
|
||||||
|
'duration': 321.0,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'thumbnails': list,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://m.nuvid.com/video/6523263',
|
||||||
|
'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6523263',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Slut brunette college student anal dorm',
|
||||||
|
'format_id': '720p',
|
||||||
|
'duration': 421.0,
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'thumbnails': list,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.nuvid.com/video/6415801/',
|
||||||
|
'md5': '638d5ececb138d5753593f751ae3f697',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6415801',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'My best friend wanted to fuck my wife for a long time',
|
||||||
|
'format_id': '720p',
|
||||||
|
'duration': 1882,
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'thumbnails': list,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
page_url = 'http://m.nuvid.com/video/%s' % video_id
|
qualities = {
|
||||||
|
'lq': '360p',
|
||||||
|
'hq': '720p',
|
||||||
|
}
|
||||||
|
|
||||||
|
json_url = 'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0'.format(**locals())
|
||||||
|
video_data = self._download_json(
|
||||||
|
json_url, video_id, headers={
|
||||||
|
'Accept': 'application/json, text/javascript, */*; q = 0.01',
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
|
||||||
|
}) or {}
|
||||||
|
|
||||||
|
# nice to have, not required
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
page_url, video_id, 'Downloading video page')
|
'http://m.nuvid.com/video/%s' % (video_id, ),
|
||||||
# When dwnld_speed exists and has a value larger than the MP4 file's
|
video_id, 'Downloading video page', fatal=False) or ''
|
||||||
# bitrate, Nuvid returns the MP4 URL
|
|
||||||
# It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
|
title = (
|
||||||
self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
|
try_get(video_data, lambda x: x['title'], compat_str)
|
||||||
mp4_webpage = self._download_webpage(
|
or self._html_search_regex(
|
||||||
page_url, video_id, 'Downloading video page for MP4 format')
|
(r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''',
|
||||||
|
r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''',
|
||||||
|
r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''),
|
||||||
|
webpage, 'title', group='title')).strip()
|
||||||
|
|
||||||
html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
|
|
||||||
video_url = self._html_search_regex(html5_video_re, webpage, video_id)
|
|
||||||
mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': video_url,
|
'url': source,
|
||||||
}]
|
'format_id': qualities.get(quality),
|
||||||
if mp4_video_url != video_url:
|
'height': int_or_none(qualities.get(quality)[:-1]),
|
||||||
formats.append({
|
} for quality, source in video_data.get('files').items() if source]
|
||||||
'url': mp4_video_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
self._check_formats(formats, video_id)
|
||||||
[r'<span title="([^"]+)">',
|
self._sort_formats(formats)
|
||||||
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
|
|
||||||
r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
|
duration = parse_duration(video_data.get('duration') or video_data.get('duration_format'))
|
||||||
thumbnails = [
|
thumbnails = [
|
||||||
{
|
{'url': thumb_url, }
|
||||||
'url': thumb_url,
|
for thumb_url in (
|
||||||
} for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
|
url_or_none(src) for src in re.findall(
|
||||||
|
r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>',
|
||||||
|
webpage))
|
||||||
]
|
]
|
||||||
thumbnail = thumbnails[0]['url'] if thumbnails else None
|
|
||||||
duration = parse_duration(self._html_search_regex(
|
|
||||||
[r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
|
|
||||||
r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'thumbnail': url_or_none(video_data.get('poster')),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
@ -5,15 +5,16 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
|
HEADRequest,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
remove_start,
|
remove_start,
|
||||||
@ -96,12 +97,100 @@ class RaiBaseIE(InfoExtractor):
|
|||||||
if not formats and geoprotection is True:
|
if not formats and geoprotection is True:
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
|
formats.extend(self._create_http_urls(relinker_url, formats))
|
||||||
|
|
||||||
return dict((k, v) for k, v in {
|
return dict((k, v) for k, v in {
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}.items() if v is not None)
|
}.items() if v is not None)
|
||||||
|
|
||||||
|
def _create_http_urls(self, relinker_url, fmts):
|
||||||
|
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
|
||||||
|
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||||
|
_QUALITY = {
|
||||||
|
# tbr: w, h
|
||||||
|
'250': [352, 198],
|
||||||
|
'400': [512, 288],
|
||||||
|
'700': [512, 288],
|
||||||
|
'800': [700, 394],
|
||||||
|
'1200': [736, 414],
|
||||||
|
'1800': [1024, 576],
|
||||||
|
'2400': [1280, 720],
|
||||||
|
'3200': [1440, 810],
|
||||||
|
'3600': [1440, 810],
|
||||||
|
'5000': [1920, 1080],
|
||||||
|
'10000': [1920, 1080],
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_url(url):
|
||||||
|
resp = self._request_webpage(
|
||||||
|
HEADRequest(url), None, headers={'User-Agent': 'Rai'},
|
||||||
|
fatal=False, errnote=False, note=False)
|
||||||
|
|
||||||
|
if resp is False:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if resp.code == 200:
|
||||||
|
return False if resp.url == url else resp.url
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_format_info(tbr):
|
||||||
|
import math
|
||||||
|
br = int_or_none(tbr)
|
||||||
|
if len(fmts) == 1 and not br:
|
||||||
|
br = fmts[0].get('tbr')
|
||||||
|
if br > 300:
|
||||||
|
tbr = compat_str(math.floor(br / 100) * 100)
|
||||||
|
else:
|
||||||
|
tbr = '250'
|
||||||
|
|
||||||
|
# try extracting info from available m3u8 formats
|
||||||
|
format_copy = None
|
||||||
|
for f in fmts:
|
||||||
|
if f.get('tbr'):
|
||||||
|
br_limit = math.floor(br / 100)
|
||||||
|
if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
|
||||||
|
format_copy = f.copy()
|
||||||
|
return {
|
||||||
|
'width': format_copy.get('width'),
|
||||||
|
'height': format_copy.get('height'),
|
||||||
|
'tbr': format_copy.get('tbr'),
|
||||||
|
'vcodec': format_copy.get('vcodec'),
|
||||||
|
'acodec': format_copy.get('acodec'),
|
||||||
|
'fps': format_copy.get('fps'),
|
||||||
|
'format_id': 'https-%s' % tbr,
|
||||||
|
} if format_copy else {
|
||||||
|
'width': _QUALITY[tbr][0],
|
||||||
|
'height': _QUALITY[tbr][1],
|
||||||
|
'format_id': 'https-%s' % tbr,
|
||||||
|
'tbr': int(tbr),
|
||||||
|
}
|
||||||
|
|
||||||
|
loc = test_url(_MP4_TMPL % (relinker_url, '*'))
|
||||||
|
if not isinstance(loc, compat_str):
|
||||||
|
return []
|
||||||
|
|
||||||
|
mobj = re.match(
|
||||||
|
_RELINKER_REG,
|
||||||
|
test_url(relinker_url) or '')
|
||||||
|
if not mobj:
|
||||||
|
return []
|
||||||
|
|
||||||
|
available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
|
||||||
|
available_qualities = [i for i in available_qualities if i]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for q in available_qualities:
|
||||||
|
fmt = {
|
||||||
|
'url': _MP4_TMPL % (relinker_url, q),
|
||||||
|
'protocol': 'https',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
fmt.update(get_format_info(q))
|
||||||
|
formats.append(fmt)
|
||||||
|
return formats
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_subtitles(url, video_data):
|
def _extract_subtitles(url, video_data):
|
||||||
STL_EXT = 'stl'
|
STL_EXT = 'stl'
|
||||||
@ -151,6 +240,22 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# 1080p direct mp4 url
|
||||||
|
'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
|
||||||
|
'md5': '2e501e8651d72f05ffe8f5d286ad560b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Leonardo - S1E1',
|
||||||
|
'alt_title': 'St 1 Ep 1 - Episodio 1',
|
||||||
|
'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Rai 1',
|
||||||
|
'duration': 3229,
|
||||||
|
'series': 'Leonardo',
|
||||||
|
'season': 'Season 1',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -158,6 +263,10 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
# subtitles at 'subtitlesArray' key (see #27698)
|
# subtitles at 'subtitlesArray' key (see #27698)
|
||||||
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# DRM protected
|
||||||
|
'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -166,6 +275,13 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
base + '.json', video_id, 'Downloading video JSON')
|
base + '.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
if try_get(
|
||||||
|
media,
|
||||||
|
(lambda x: x['rights_management']['rights']['drm'],
|
||||||
|
lambda x: x['program_info']['rights_management']['rights']['drm']),
|
||||||
|
dict):
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
title = media['name']
|
title = media['name']
|
||||||
|
|
||||||
video = media['video']
|
video = media['video']
|
||||||
@ -307,7 +423,7 @@ class RaiIE(RaiBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# with ContentItem in og:url
|
# with ContentItem in og:url
|
||||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||||
'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
|
'md5': '06345bd97c932f19ffb129973d07a020',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -6,7 +6,8 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none
|
int_or_none,
|
||||||
|
str_to_int
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -179,7 +180,7 @@ class RUTVIE(InfoExtractor):
|
|||||||
'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
|
'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
|
||||||
'rtmp_live': True,
|
'rtmp_live': True,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'vbr': int(quality),
|
'vbr': str_to_int(quality),
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
}
|
}
|
||||||
elif transport == 'm3u8':
|
elif transport == 'm3u8':
|
||||||
|
@ -1,105 +1,126 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import hashlib
|
import json
|
||||||
import time
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
merge_dicts,
|
||||||
|
parse_codecs,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _get_api_key(api_path):
|
|
||||||
if api_path.endswith('?'):
|
|
||||||
api_path = api_path[:-1]
|
|
||||||
|
|
||||||
api_key = 'fb5f58a820353bd7095de526253c14fd'
|
|
||||||
a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
|
|
||||||
return hashlib.md5(a.encode('ascii')).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
class StreamCZIE(InfoExtractor):
|
class StreamCZIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:stream|televizeseznam)\.cz/[^?#]+/(?P<display_id>[^?#]+)-(?P<id>[0-9]+)'
|
||||||
_API_URL = 'http://www.stream.cz/API'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
'url': 'https://www.televizeseznam.cz/video/lajna/buh-57953890',
|
||||||
'md5': '934bb6a6d220d99c010783c9719960d5',
|
'md5': '40c41ade1464a390a0b447e333df4239',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '765767',
|
'id': '57953890',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Peklo na talíři: Éčka pro děti',
|
'title': 'Bůh',
|
||||||
'description': 'Taška s grónskou pomazánkou a další pekelnosti ZDE',
|
'display_id': 'buh',
|
||||||
'thumbnail': 're:^http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
|
'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
|
||||||
'duration': 256,
|
'duration': 1369.6,
|
||||||
},
|
'view_count': int,
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
|
'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937',
|
||||||
'md5': '849a88c1e1ca47d41403c2ba5e59e261',
|
'md5': '41fd358000086a1ccdb068c77809b158',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10002447',
|
'id': '64087937',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Kancelář Blaník: Tři roky pro Mazánka',
|
'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna',
|
||||||
'description': 'md5:3862a00ba7bf0b3e44806b544032c859',
|
'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna',
|
||||||
'thumbnail': 're:^http://im.stream.cz/episode/537f838c50c11f8d21320000',
|
'description': 'md5:97a811000a6460266029d6c1c2ebcd59',
|
||||||
'duration': 368,
|
'duration': 50.2,
|
||||||
},
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
|
||||||
|
'md5': '3ee4d0be040e8f4a543e67e509d55e3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '64147267',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
|
||||||
|
'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
|
||||||
|
'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf',
|
||||||
|
'duration': 442.84,
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_formats(self, spl_url, video):
|
||||||
|
for ext, pref, streams in (
|
||||||
|
('ts', -1, video.get('http_stream', {}).get('qualities', {})),
|
||||||
|
('mp4', 1, video.get('mp4'))):
|
||||||
|
for format_id, stream in streams.items():
|
||||||
|
if not stream.get('url'):
|
||||||
|
continue
|
||||||
|
yield merge_dicts({
|
||||||
|
'format_id': '-'.join((format_id, ext)),
|
||||||
|
'ext': ext,
|
||||||
|
'source_preference': pref,
|
||||||
|
'url': urljoin(spl_url, stream['url']),
|
||||||
|
'tbr': float_or_none(stream.get('bandwidth'), scale=1000),
|
||||||
|
'duration': float_or_none(stream.get('duration'), scale=1000),
|
||||||
|
'width': stream.get('resolution', 2 * [0])[0] or None,
|
||||||
|
'height': stream.get('resolution', 2 * [0])[1] or int_or_none(format_id.replace('p', '')),
|
||||||
|
}, parse_codecs(stream.get('codec')))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
api_path = '/episode/%s' % video_id
|
|
||||||
|
|
||||||
req = sanitized_Request(self._API_URL + api_path)
|
data = self._download_json(
|
||||||
req.add_header('Api-Password', _get_api_key(api_path))
|
'https://www.televizeseznam.cz/api/graphql', video_id, 'Downloading GraphQL result',
|
||||||
data = self._download_json(req, video_id)
|
data=json.dumps({
|
||||||
|
'variables': {'urlName': video_id},
|
||||||
|
'query': '''
|
||||||
|
query LoadEpisode($urlName : String){ episode(urlName: $urlName){ ...VideoDetailFragmentOnEpisode } }
|
||||||
|
fragment VideoDetailFragmentOnEpisode on Episode {
|
||||||
|
id
|
||||||
|
spl
|
||||||
|
urlName
|
||||||
|
name
|
||||||
|
perex
|
||||||
|
duration
|
||||||
|
views
|
||||||
|
}'''
|
||||||
|
}).encode('utf-8'),
|
||||||
|
headers={'Content-Type': 'application/json;charset=UTF-8'}
|
||||||
|
)['data']['episode']
|
||||||
|
|
||||||
formats = []
|
spl_url = data['spl'] + 'spl2,3'
|
||||||
for quality, video in enumerate(data['video_qualities']):
|
metadata = self._download_json(spl_url, video_id, 'Downloading playlist')
|
||||||
for f in video['formats']:
|
if 'Location' in metadata and 'data' not in metadata:
|
||||||
typ = f['type'].partition('/')[2]
|
spl_url = metadata['Location']
|
||||||
qlabel = video.get('quality_label')
|
metadata = self._download_json(spl_url, video_id, 'Downloading redirected playlist')
|
||||||
formats.append({
|
video = metadata['data']
|
||||||
'format_note': '%s-%s' % (qlabel, typ) if qlabel else typ,
|
|
||||||
'format_id': '%s-%s' % (typ, f['quality']),
|
|
||||||
'url': f['source'],
|
|
||||||
'height': int_or_none(f['quality'].rstrip('p')),
|
|
||||||
'quality': quality,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
image = data.get('image')
|
|
||||||
if image:
|
|
||||||
thumbnail = self._proto_relative_url(
|
|
||||||
image.replace('{width}', '1240').replace('{height}', '697'),
|
|
||||||
scheme='http:',
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
thumbnail = None
|
|
||||||
|
|
||||||
stream = data.get('_embedded', {}).get('stream:show', {}).get('name')
|
|
||||||
if stream:
|
|
||||||
title = '%s: %s' % (stream, data['name'])
|
|
||||||
else:
|
|
||||||
title = data['name']
|
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
srt_url = data.get('subtitles_srt')
|
for subs in video.get('subtitles', {}).values():
|
||||||
if srt_url:
|
if not subs.get('language'):
|
||||||
subtitles['cs'] = [{
|
continue
|
||||||
'ext': 'srt',
|
for ext, sub_url in subs.get('urls').items():
|
||||||
'url': srt_url,
|
subtitles.setdefault(subs['language'], []).append({
|
||||||
}]
|
'ext': ext,
|
||||||
|
'url': urljoin(spl_url, sub_url)
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = list(self._extract_formats(spl_url, video))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'display_id': display_id,
|
||||||
'thumbnail': thumbnail,
|
'title': data.get('name'),
|
||||||
'formats': formats,
|
'description': data.get('perex'),
|
||||||
'description': data.get('web_site_text'),
|
'duration': float_or_none(data.get('duration')),
|
||||||
'duration': int_or_none(data.get('duration')),
|
|
||||||
'view_count': int_or_none(data.get('views')),
|
'view_count': int_or_none(data.get('views')),
|
||||||
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@ -1,19 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .jwplatform import JWPlatformIE
|
|
||||||
from .nexx import NexxIE
|
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
NO_DEFAULT,
|
ExtractorError,
|
||||||
smuggle_url,
|
extract_attributes,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .dplay import DPlayIE
|
||||||
|
|
||||||
class Tele5IE(InfoExtractor):
|
|
||||||
|
class Tele5IE(DPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_GEO_COUNTRIES = ['DE']
|
_GEO_COUNTRIES = ['DE']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -28,6 +25,7 @@ class Tele5IE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available: "404 Seite nicht gefunden"',
|
||||||
}, {
|
}, {
|
||||||
# jwplatform, nexx unavailable
|
# jwplatform, nexx unavailable
|
||||||
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
|
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
|
||||||
@ -42,7 +40,20 @@ class Tele5IE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [JWPlatformIE.ie_key()],
|
'skip': 'No longer available, redirects to Filme page',
|
||||||
|
}, {
|
||||||
|
'url': 'https://tele5.de/mediathek/angel-of-mine/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1252360',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20220109',
|
||||||
|
'timestamp': 1641762000,
|
||||||
|
'title': 'Angel of Mine',
|
||||||
|
'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -64,45 +75,18 @@ class Tele5IE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
video_id = self._match_id(url)
|
||||||
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
|
||||||
NEXX_ID_RE = r'\d{6,}'
|
player_info = extract_attributes(player_element)
|
||||||
JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
|
asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
|
||||||
|
endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
|
||||||
def nexx_result(nexx_id):
|
source_type = player_info.get('sourcetype')
|
||||||
return self.url_result(
|
if source_type:
|
||||||
'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
|
endpoint = '%s-%s' % (source_type, endpoint)
|
||||||
ie=NexxIE.ie_key(), video_id=nexx_id)
|
try:
|
||||||
|
return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
|
||||||
nexx_id = jwplatform_id = None
|
except ExtractorError as e:
|
||||||
|
if getattr(e, 'message', '') == 'Missing deviceId in context':
|
||||||
if video_id:
|
raise ExtractorError('DRM protected', cause=e, expected=True)
|
||||||
if re.match(NEXX_ID_RE, video_id):
|
raise
|
||||||
return nexx_result(video_id)
|
|
||||||
elif re.match(JWPLATFORM_ID_RE, video_id):
|
|
||||||
jwplatform_id = video_id
|
|
||||||
|
|
||||||
if not nexx_id:
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
def extract_id(pattern, name, default=NO_DEFAULT):
|
|
||||||
return self._html_search_regex(
|
|
||||||
(r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
|
|
||||||
r'\s+id\s*=\s*["\']player_(%s)' % pattern,
|
|
||||||
r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
|
|
||||||
default=default)
|
|
||||||
|
|
||||||
nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
|
|
||||||
if nexx_id:
|
|
||||||
return nexx_result(nexx_id)
|
|
||||||
|
|
||||||
if not jwplatform_id:
|
|
||||||
jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
|
|
||||||
|
|
||||||
return self.url_result(
|
|
||||||
smuggle_url(
|
|
||||||
'jwplatform:%s' % jwplatform_id,
|
|
||||||
{'geo_countries': self._GEO_COUNTRIES}),
|
|
||||||
ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
|
|
||||||
|
@ -41,8 +41,16 @@ class TV2DKIE(InfoExtractor):
|
|||||||
'duration': 1347,
|
'duration': 1347,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'add_ie': ['Kaltura'],
|
||||||
'skip_download': True,
|
}, {
|
||||||
|
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_7iwll9n0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20211027',
|
||||||
|
'title': 'Gadekamp #6 - Højhuse i København',
|
||||||
|
'uploader_id': 'tv2lorry',
|
||||||
|
'timestamp': 1635345229,
|
||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
'add_ie': ['Kaltura'],
|
||||||
}, {
|
}, {
|
||||||
@ -91,11 +99,14 @@ class TV2DKIE(InfoExtractor):
|
|||||||
add_entry(partner_id, kaltura_id)
|
add_entry(partner_id, kaltura_id)
|
||||||
if not entries:
|
if not entries:
|
||||||
kaltura_id = self._search_regex(
|
kaltura_id = self._search_regex(
|
||||||
r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
|
(r'entry_id\s*:\s*["\']([0-9a-z_]+)',
|
||||||
|
r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
|
||||||
partner_id = self._search_regex(
|
partner_id = self._search_regex(
|
||||||
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
||||||
'partner id')
|
'partner id')
|
||||||
add_entry(partner_id, kaltura_id)
|
add_entry(partner_id, kaltura_id)
|
||||||
|
if len(entries) == 1:
|
||||||
|
return entries[0]
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
|
||||||
|
@ -95,7 +95,6 @@ class UOLIE(InfoExtractor):
|
|||||||
if v:
|
if v:
|
||||||
query[k] = v
|
query[k] = v
|
||||||
f_url = update_url_query(f_url, query)
|
f_url = update_url_query(f_url, query)
|
||||||
format_id = format_id
|
|
||||||
if format_id == 'HLS':
|
if format_id == 'HLS':
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
f_url, media_id, 'mp4', 'm3u8_native',
|
f_url, media_id, 'mp4', 'm3u8_native',
|
||||||
|
@ -4,7 +4,11 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
dict_get,
|
dict_get,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
ISO639Utils,
|
||||||
|
parse_age_limit,
|
||||||
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -23,9 +27,10 @@ class URPlayIE(InfoExtractor):
|
|||||||
'upload_date': '20171214',
|
'upload_date': '20171214',
|
||||||
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
|
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
|
||||||
'duration': 2269,
|
'duration': 2269,
|
||||||
'categories': ['Kultur & historia'],
|
'categories': ['Vetenskap & teknik'],
|
||||||
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
|
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
|
||||||
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
||||||
|
'age_limit': 15,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||||
@ -50,11 +55,19 @@ class URPlayIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
url = url.replace('skola.se/Produkter', 'play.se/program')
|
url = url.replace('skola.se/Produkter', 'play.se/program')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
vid = int(video_id)
|
urplayer_data = self._search_regex(
|
||||||
accessible_episodes = self._parse_json(self._html_search_regex(
|
r'(?s)\bid\s*=\s*"__NEXT_DATA__"[^>]*>\s*({.+?})\s*</script',
|
||||||
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
webpage, 'urplayer next data', fatal=False) or {}
|
||||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
if urplayer_data:
|
||||||
urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
|
urplayer_data = self._parse_json(urplayer_data, video_id, fatal=False)
|
||||||
|
urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
|
||||||
|
if not urplayer_data:
|
||||||
|
raise ExtractorError('Unable to parse __NEXT_DATA__')
|
||||||
|
else:
|
||||||
|
accessible_episodes = self._parse_json(self._html_search_regex(
|
||||||
|
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
||||||
|
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
||||||
|
urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
|
||||||
episode = urplayer_data['title']
|
episode = urplayer_data['title']
|
||||||
raw_streaming_info = urplayer_data['streamingInfo']['raw']
|
raw_streaming_info = urplayer_data['streamingInfo']['raw']
|
||||||
host = self._download_json(
|
host = self._download_json(
|
||||||
@ -72,6 +85,30 @@ class URPlayIE(InfoExtractor):
|
|||||||
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
|
def parse_lang_code(code):
|
||||||
|
"3-character language code or None (utils candidate)"
|
||||||
|
if code is None:
|
||||||
|
return
|
||||||
|
lang = code.lower()
|
||||||
|
if not ISO639Utils.long2short(lang):
|
||||||
|
lang = ISO639Utils.short2long(lang)
|
||||||
|
return lang or None
|
||||||
|
|
||||||
|
for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
|
||||||
|
if (k in ('sd', 'hd') or not isinstance(v, dict)):
|
||||||
|
continue
|
||||||
|
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
|
||||||
|
if not sttl_url:
|
||||||
|
continue
|
||||||
|
lang = parse_lang_code(lang)
|
||||||
|
if not lang:
|
||||||
|
continue
|
||||||
|
sttl = subtitles.get(lang) or []
|
||||||
|
sttl.append({'ext': k, 'url': sttl_url, })
|
||||||
|
subtitles[lang] = sttl
|
||||||
|
|
||||||
image = urplayer_data.get('image') or {}
|
image = urplayer_data.get('image') or {}
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for k, v in image.items():
|
for k, v in image.items():
|
||||||
@ -104,4 +141,7 @@ class URPlayIE(InfoExtractor):
|
|||||||
'season': series.get('label'),
|
'season': series.get('label'),
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
||||||
|
'age_limit': parse_age_limit(min(try_get(a, lambda x: x['from'], int) or 0
|
||||||
|
for a in urplayer_data.get('ageRanges', []))),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
urljoin,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
@ -19,6 +20,7 @@ from ..compat import (
|
|||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_ord,
|
compat_ord,
|
||||||
compat_struct_pack,
|
compat_struct_pack,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -45,10 +47,24 @@ class VideaIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
||||||
'only_matching': True,
|
'md5': 'd57ccd8812c7fd491d33b1eab8c99975',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jAHDWfWSJH5XuFhH',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Supercars előzés',
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'duration': 64,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
|
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
|
||||||
'only_matching': True,
|
'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8YfIAjxwWGwT8HVQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||||
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'duration': 21,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -91,13 +107,20 @@ class VideaIE(InfoExtractor):
|
|||||||
k = S[(S[i] + S[j]) % 256]
|
k = S[(S[i] + S[j]) % 256]
|
||||||
res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
|
res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
|
||||||
|
|
||||||
return res.decode()
|
return res.decode('utf-8')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
query = {'v': video_id}
|
video_page = self._download_webpage(url, video_id)
|
||||||
player_page = self._download_webpage(
|
|
||||||
'https://videa.hu/player', video_id, query=query)
|
if 'videa.hu/player' in url:
|
||||||
|
player_url = url
|
||||||
|
player_page = video_page
|
||||||
|
else:
|
||||||
|
player_url = self._search_regex(
|
||||||
|
r'<iframe.*?src="(/player\?[^"]+)"', video_page, 'player url')
|
||||||
|
player_url = urljoin(url, player_url)
|
||||||
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
|
|
||||||
nonce = self._search_regex(
|
nonce = self._search_regex(
|
||||||
r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
|
r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
|
||||||
@ -107,6 +130,7 @@ class VideaIE(InfoExtractor):
|
|||||||
for i in range(0, 32):
|
for i in range(0, 32):
|
||||||
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
|
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
|
||||||
|
|
||||||
|
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
|
||||||
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
|
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
|
||||||
query['_s'] = random_seed
|
query['_s'] = random_seed
|
||||||
query['_t'] = result[:16]
|
query['_t'] = result[:16]
|
||||||
@ -121,13 +145,13 @@ class VideaIE(InfoExtractor):
|
|||||||
compat_b64decode(b64_info), key), video_id)
|
compat_b64decode(b64_info), key), video_id)
|
||||||
|
|
||||||
video = xpath_element(info, './video', 'video')
|
video = xpath_element(info, './video', 'video')
|
||||||
if not video:
|
if video is None:
|
||||||
raise ExtractorError(xpath_element(
|
raise ExtractorError(xpath_element(
|
||||||
info, './error', fatal=True), expected=True)
|
info, './error', fatal=True), expected=True)
|
||||||
sources = xpath_element(
|
sources = xpath_element(
|
||||||
info, './video_sources', 'sources', fatal=True)
|
info, './video_sources', 'sources', fatal=True)
|
||||||
hash_values = xpath_element(
|
hash_values = xpath_element(
|
||||||
info, './hash_values', 'hash values', fatal=True)
|
info, './hash_values', 'hash values', fatal=False)
|
||||||
|
|
||||||
title = xpath_text(video, './title', fatal=True)
|
title = xpath_text(video, './title', fatal=True)
|
||||||
|
|
||||||
@ -136,15 +160,16 @@ class VideaIE(InfoExtractor):
|
|||||||
source_url = source.text
|
source_url = source.text
|
||||||
source_name = source.get('name')
|
source_name = source.get('name')
|
||||||
source_exp = source.get('exp')
|
source_exp = source.get('exp')
|
||||||
if not (source_url and source_name and source_exp):
|
if not (source_url and source_name):
|
||||||
continue
|
continue
|
||||||
hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
|
hash_value = (
|
||||||
if not hash_value:
|
xpath_text(hash_values, 'hash_value_' + source_name)
|
||||||
continue
|
if hash_values is not None else None)
|
||||||
source_url = update_url_query(source_url, {
|
if hash_value and source_exp:
|
||||||
'md5': hash_value,
|
source_url = update_url_query(source_url, {
|
||||||
'expires': source_exp,
|
'md5': hash_value,
|
||||||
})
|
'expires': source_exp,
|
||||||
|
})
|
||||||
f = parse_codecs(source.get('codecs'))
|
f = parse_codecs(source.get('codecs'))
|
||||||
f.update({
|
f.update({
|
||||||
'url': self._proto_relative_url(source_url),
|
'url': self._proto_relative_url(source_url),
|
||||||
|
@ -1,38 +1,29 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import itertools
|
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_parse_qs,
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
|
||||||
std_headers,
|
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VikiBaseIE(InfoExtractor):
|
class VikiBaseIE(InfoExtractor):
|
||||||
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
|
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
|
||||||
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
|
_API_URL_TEMPLATE = 'https://api.viki.io%s'
|
||||||
_API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
|
|
||||||
|
|
||||||
|
_DEVICE_ID = '112395910d'
|
||||||
_APP = '100005a'
|
_APP = '100005a'
|
||||||
_APP_VERSION = '6.0.0'
|
_APP_VERSION = '6.11.3'
|
||||||
_APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
|
_APP_SECRET = 'd96704b180208dbb2efa30fe44c48bd8690441af9f567ba8fd710a72badc85198f7472'
|
||||||
|
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_NETRC_MACHINE = 'viki'
|
_NETRC_MACHINE = 'viki'
|
||||||
@ -45,43 +36,60 @@ class VikiBaseIE(InfoExtractor):
|
|||||||
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
|
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _prepare_call(self, path, timestamp=None, post_data=None):
|
def _stream_headers(self, timestamp, sig):
|
||||||
|
return {
|
||||||
|
'X-Viki-manufacturer': 'vivo',
|
||||||
|
'X-Viki-device-model': 'vivo 1606',
|
||||||
|
'X-Viki-device-os-ver': '6.0.1',
|
||||||
|
'X-Viki-connection-type': 'WIFI',
|
||||||
|
'X-Viki-carrier': '',
|
||||||
|
'X-Viki-as-id': '100005a-1625321982-3932',
|
||||||
|
'timestamp': str(timestamp),
|
||||||
|
'signature': str(sig),
|
||||||
|
'x-viki-app-ver': self._APP_VERSION
|
||||||
|
}
|
||||||
|
|
||||||
|
def _api_query(self, path, version=4, **kwargs):
|
||||||
path += '?' if '?' not in path else '&'
|
path += '?' if '?' not in path else '&'
|
||||||
if not timestamp:
|
app = self._APP
|
||||||
timestamp = int(time.time())
|
query = '/v{version}/{path}app={app}'.format(**locals())
|
||||||
query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
|
|
||||||
if self._token:
|
if self._token:
|
||||||
query += '&token=%s' % self._token
|
query += '&token=%s' % self._token
|
||||||
|
return query + ''.join('&{name}={val}.format(**locals())' for name, val in kwargs.items())
|
||||||
|
|
||||||
|
def _sign_query(self, path):
|
||||||
|
timestamp = int(time.time())
|
||||||
|
query = self._api_query(path, version=5)
|
||||||
sig = hmac.new(
|
sig = hmac.new(
|
||||||
self._APP_SECRET.encode('ascii'),
|
self._APP_SECRET.encode('ascii'),
|
||||||
query.encode('ascii'),
|
'{query}&t={timestamp}'.format(**locals()).encode('ascii'),
|
||||||
hashlib.sha1
|
hashlib.sha1).hexdigest()
|
||||||
).hexdigest()
|
return timestamp, sig, self._API_URL_TEMPLATE % query
|
||||||
url = self._API_URL_TEMPLATE % (query, sig)
|
|
||||||
return sanitized_Request(
|
|
||||||
url, json.dumps(post_data).encode('utf-8')) if post_data else url
|
|
||||||
|
|
||||||
def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
|
def _call_api(
|
||||||
|
self, path, video_id, note='Downloading JSON metadata', data=None, query=None, fatal=True):
|
||||||
|
if query is None:
|
||||||
|
timestamp, sig, url = self._sign_query(path)
|
||||||
|
else:
|
||||||
|
url = self._API_URL_TEMPLATE % self._api_query(path, version=4)
|
||||||
resp = self._download_json(
|
resp = self._download_json(
|
||||||
self._prepare_call(path, timestamp, post_data), video_id, note,
|
url, video_id, note, fatal=fatal, query=query,
|
||||||
headers={'x-viki-app-ver': self._APP_VERSION})
|
data=json.dumps(data).encode('utf-8') if data else None,
|
||||||
|
headers=({'x-viki-app-ver': self._APP_VERSION} if data
|
||||||
error = resp.get('error')
|
else self._stream_headers(timestamp, sig) if query is None
|
||||||
if error:
|
else None), expected_status=400) or {}
|
||||||
if error == 'invalid timestamp':
|
|
||||||
resp = self._download_json(
|
|
||||||
self._prepare_call(path, int(resp['current_timestamp']), post_data),
|
|
||||||
video_id, '%s (retry)' % note)
|
|
||||||
error = resp.get('error')
|
|
||||||
if error:
|
|
||||||
self._raise_error(resp['error'])
|
|
||||||
|
|
||||||
|
self._raise_error(resp.get('error'), fatal)
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
def _raise_error(self, error):
|
def _raise_error(self, error, fatal=True):
|
||||||
raise ExtractorError(
|
if error is None:
|
||||||
'%s returned error: %s' % (self.IE_NAME, error),
|
return
|
||||||
expected=True)
|
msg = '%s said: %s' % (self.IE_NAME, error)
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
else:
|
||||||
|
self.report_warning(msg)
|
||||||
|
|
||||||
def _check_errors(self, data):
|
def _check_errors(self, data):
|
||||||
for reason, status in (data.get('blocking') or {}).items():
|
for reason, status in (data.get('blocking') or {}).items():
|
||||||
@ -90,9 +98,10 @@ class VikiBaseIE(InfoExtractor):
|
|||||||
if reason == 'geo':
|
if reason == 'geo':
|
||||||
self.raise_geo_restricted(msg=message)
|
self.raise_geo_restricted(msg=message)
|
||||||
elif reason == 'paywall':
|
elif reason == 'paywall':
|
||||||
|
if try_get(data, lambda x: x['paywallable']['tvod']):
|
||||||
|
self._raise_error('This video is for rent only or TVOD (Transactional Video On demand)')
|
||||||
self.raise_login_required(message)
|
self.raise_login_required(message)
|
||||||
raise ExtractorError('%s said: %s' % (
|
self._raise_error(message)
|
||||||
self.IE_NAME, message), expected=True)
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
@ -102,35 +111,39 @@ class VikiBaseIE(InfoExtractor):
|
|||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
login_form = {
|
self._token = self._call_api(
|
||||||
'login_id': username,
|
'sessions.json', None, 'Logging in', fatal=False,
|
||||||
'password': password,
|
data={'username': username, 'password': password}).get('token')
|
||||||
}
|
|
||||||
|
|
||||||
login = self._call_api(
|
|
||||||
'sessions.json', None,
|
|
||||||
'Logging in', post_data=login_form)
|
|
||||||
|
|
||||||
self._token = login.get('token')
|
|
||||||
if not self._token:
|
if not self._token:
|
||||||
self.report_warning('Unable to get session token, login has probably failed')
|
self.report_warning('Login Failed: Unable to get session token')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dict_selection(dict_obj, preferred_key, allow_fallback=True):
|
def dict_selection(dict_obj, preferred_key):
|
||||||
if preferred_key in dict_obj:
|
if preferred_key in dict_obj:
|
||||||
return dict_obj.get(preferred_key)
|
return dict_obj[preferred_key]
|
||||||
|
return (list(filter(None, dict_obj.values())) or [None])[0]
|
||||||
if not allow_fallback:
|
|
||||||
return
|
|
||||||
|
|
||||||
filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
|
|
||||||
return filtered_dict[0] if filtered_dict else None
|
|
||||||
|
|
||||||
|
|
||||||
class VikiIE(VikiBaseIE):
|
class VikiIE(VikiBaseIE):
|
||||||
IE_NAME = 'viki'
|
IE_NAME = 'viki'
|
||||||
_VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
|
_VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'note': 'Free non-DRM video with storyboards in MPD',
|
||||||
|
'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1175236v',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Choosing Spouse by Lottery - Episode 1',
|
||||||
|
'timestamp': 1606463239,
|
||||||
|
'age_limit': 12,
|
||||||
|
'uploader': 'FCC',
|
||||||
|
'upload_date': '20201127',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1023585v',
|
'id': '1023585v',
|
||||||
@ -146,7 +159,7 @@ class VikiIE(VikiBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
},
|
},
|
||||||
'skip': 'Blocked in the US',
|
'skip': 'Content is only available to Viki Pass Plus subscribers',
|
||||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||||
}, {
|
}, {
|
||||||
# clip
|
# clip
|
||||||
@ -178,11 +191,11 @@ class VikiIE(VikiBaseIE):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
},
|
},
|
||||||
'skip': 'Blocked in the US',
|
'skip': 'Page not found!',
|
||||||
}, {
|
}, {
|
||||||
# episode
|
# episode
|
||||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||||
'md5': '0a53dc252e6e690feccd756861495a8c',
|
'md5': '670440c79f7109ca6564d4c7f24e3e81',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '44699v',
|
'id': '44699v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -193,7 +206,7 @@ class VikiIE(VikiBaseIE):
|
|||||||
'upload_date': '20100405',
|
'upload_date': '20100405',
|
||||||
'uploader': 'group8',
|
'uploader': 'group8',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 15,
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -224,7 +237,7 @@ class VikiIE(VikiBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# non-English description
|
# non-English description
|
||||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||||
'md5': '41faaba0de90483fb4848952af7c7d0d',
|
'md5': '78bf49fdaa51f9e7f9150262a9ef9bdf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '158036v',
|
'id': '158036v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -232,8 +245,8 @@ class VikiIE(VikiBaseIE):
|
|||||||
'upload_date': '20111122',
|
'upload_date': '20111122',
|
||||||
'timestamp': 1321985454,
|
'timestamp': 1321985454,
|
||||||
'description': 'md5:44b1e46619df3a072294645c770cef36',
|
'description': 'md5:44b1e46619df3a072294645c770cef36',
|
||||||
'title': 'Love In Magic',
|
'title': 'Love in Magic',
|
||||||
'age_limit': 13,
|
'age_limit': 15,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
@ -244,45 +257,53 @@ class VikiIE(VikiBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
resp = self._download_json(
|
video = self._call_api('videos/{0}.json'.format(video_id), video_id, 'Downloading video JSON', query={})
|
||||||
'https://www.viki.com/api/videos/' + video_id,
|
|
||||||
video_id, 'Downloading video JSON', headers={
|
|
||||||
'x-client-user-agent': std_headers['User-Agent'],
|
|
||||||
'x-viki-app-ver': '3.0.0',
|
|
||||||
})
|
|
||||||
video = resp['video']
|
|
||||||
|
|
||||||
self._check_errors(video)
|
self._check_errors(video)
|
||||||
|
|
||||||
title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
|
title = try_get(video, lambda x: x['titles']['en'], str)
|
||||||
episode_number = int_or_none(video.get('number'))
|
episode_number = int_or_none(video.get('number'))
|
||||||
if not title:
|
if not title:
|
||||||
title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
|
title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
|
||||||
container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
|
container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
|
||||||
container_title = self.dict_selection(container_titles, 'en')
|
container_title = self.dict_selection(container_titles, 'en')
|
||||||
title = '%s - %s' % (container_title, title)
|
if container_title and title == video_id:
|
||||||
|
title = container_title
|
||||||
|
else:
|
||||||
|
title = '%s - %s' % (container_title, title)
|
||||||
|
|
||||||
|
resp = self._call_api(
|
||||||
|
'playback_streams/%s.json?drms=dt3&device_id=%s' % (video_id, self._DEVICE_ID),
|
||||||
|
video_id, 'Downloading video streams JSON')['main'][0]
|
||||||
|
|
||||||
|
mpd_url = resp['url']
|
||||||
|
# 720p is hidden in another MPD which can be found in the current manifest content
|
||||||
|
mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
|
||||||
|
mpd_url = self._search_regex(
|
||||||
|
r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
|
||||||
|
if 'mpdhd_high' not in mpd_url:
|
||||||
|
# Modify the URL to get 1080p
|
||||||
|
mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
|
||||||
|
formats = self._extract_mpd_formats(mpd_url, video_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
||||||
|
thumbnails = [{
|
||||||
|
'id': thumbnail_id,
|
||||||
|
'url': thumbnail['url'],
|
||||||
|
} for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')]
|
||||||
like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
|
like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
|
||||||
|
|
||||||
thumbnails = []
|
stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id'])
|
||||||
for thumbnail_id, thumbnail in (video.get('images') or {}).items():
|
subtitles = dict((lang, [{
|
||||||
thumbnails.append({
|
'ext': ext,
|
||||||
'id': thumbnail_id,
|
'url': self._API_URL_TEMPLATE % self._api_query(
|
||||||
'url': thumbnail.get('url'),
|
'videos/{0}/auth_subtitles/{1}.{2}'.format(video_id, lang, ext), stream_id=stream_id)
|
||||||
})
|
} for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}).keys())
|
||||||
|
|
||||||
subtitles = {}
|
return {
|
||||||
for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
|
|
||||||
subtitles[subtitle_lang] = [{
|
|
||||||
'ext': subtitles_format,
|
|
||||||
'url': self._prepare_call(
|
|
||||||
'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
|
|
||||||
} for subtitles_format in ('srt', 'vtt')]
|
|
||||||
|
|
||||||
result = {
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': int_or_none(video.get('duration')),
|
'duration': int_or_none(video.get('duration')),
|
||||||
@ -296,79 +317,6 @@ class VikiIE(VikiBaseIE):
|
|||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
}
|
}
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
def add_format(format_id, format_dict, protocol='http'):
|
|
||||||
# rtmps URLs does not seem to work
|
|
||||||
if protocol == 'rtmps':
|
|
||||||
return
|
|
||||||
format_url = format_dict.get('url')
|
|
||||||
if not format_url:
|
|
||||||
return
|
|
||||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
|
|
||||||
stream = qs.get('stream', [None])[0]
|
|
||||||
if stream:
|
|
||||||
format_url = base64.b64decode(stream).decode()
|
|
||||||
if format_id in ('m3u8', 'hls'):
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
|
||||||
format_url, video_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
|
||||||
# Despite CODECS metadata in m3u8 all video-only formats
|
|
||||||
# are actually video+audio
|
|
||||||
for f in m3u8_formats:
|
|
||||||
if '_drm/index_' in f['url']:
|
|
||||||
continue
|
|
||||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
|
||||||
f['acodec'] = None
|
|
||||||
formats.append(f)
|
|
||||||
elif format_id in ('mpd', 'dash'):
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
|
|
||||||
elif format_url.startswith('rtmp'):
|
|
||||||
mobj = re.search(
|
|
||||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
|
||||||
format_url)
|
|
||||||
if not mobj:
|
|
||||||
return
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'rtmp-%s' % format_id,
|
|
||||||
'ext': 'flv',
|
|
||||||
'url': mobj.group('url'),
|
|
||||||
'play_path': mobj.group('playpath'),
|
|
||||||
'app': mobj.group('app'),
|
|
||||||
'page_url': url,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
'format_id': '%s-%s' % (format_id, protocol),
|
|
||||||
'height': int_or_none(self._search_regex(
|
|
||||||
r'^(\d+)[pP]$', format_id, 'height', default=None)),
|
|
||||||
})
|
|
||||||
|
|
||||||
for format_id, format_dict in (resp.get('streams') or {}).items():
|
|
||||||
add_format(format_id, format_dict)
|
|
||||||
if not formats:
|
|
||||||
streams = self._call_api(
|
|
||||||
'videos/%s/streams.json' % video_id, video_id,
|
|
||||||
'Downloading video streams JSON')
|
|
||||||
|
|
||||||
if 'external' in streams:
|
|
||||||
result.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': streams['external']['url'],
|
|
||||||
})
|
|
||||||
return result
|
|
||||||
|
|
||||||
for format_id, stream_dict in streams.items():
|
|
||||||
for protocol, format_dict in stream_dict.items():
|
|
||||||
add_format(format_id, format_dict, protocol)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
result['formats'] = formats
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
class VikiChannelIE(VikiBaseIE):
|
class VikiChannelIE(VikiBaseIE):
|
||||||
IE_NAME = 'viki:channel'
|
IE_NAME = 'viki:channel'
|
||||||
@ -378,9 +326,9 @@ class VikiChannelIE(VikiBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '50c',
|
'id': '50c',
|
||||||
'title': 'Boys Over Flowers',
|
'title': 'Boys Over Flowers',
|
||||||
'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
|
'description': 'md5:f08b679c200e1a273c695fe9986f21d7',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 71,
|
'playlist_mincount': 51,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
|
'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -401,33 +349,38 @@ class VikiChannelIE(VikiBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PER_PAGE = 25
|
_video_types = ('episodes', 'movies', 'clips', 'trailers')
|
||||||
|
|
||||||
|
def _entries(self, channel_id):
|
||||||
|
params = {
|
||||||
|
'app': self._APP, 'token': self._token, 'only_ids': 'true',
|
||||||
|
'direction': 'asc', 'sort': 'number', 'per_page': 30
|
||||||
|
}
|
||||||
|
video_types = self._video_types
|
||||||
|
for video_type in video_types:
|
||||||
|
if video_type not in self._video_types:
|
||||||
|
self.report_warning('Unknown video_type: ' + video_type)
|
||||||
|
page_num = 0
|
||||||
|
while True:
|
||||||
|
page_num += 1
|
||||||
|
params['page'] = page_num
|
||||||
|
res = self._call_api(
|
||||||
|
'containers/{channel_id}/{video_type}.json'.format(**locals()), channel_id, query=params, fatal=False,
|
||||||
|
note='Downloading %s JSON page %d' % (video_type.title(), page_num))
|
||||||
|
|
||||||
|
for video_id in res.get('response') or []:
|
||||||
|
yield self.url_result('https://www.viki.com/videos/' + video_id, VikiIE.ie_key(), video_id)
|
||||||
|
if not res.get('more'):
|
||||||
|
break
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
channel = self._call_api(
|
channel = self._call_api('containers/%s.json' % channel_id, channel_id, 'Downloading channel JSON')
|
||||||
'containers/%s.json' % channel_id, channel_id,
|
|
||||||
'Downloading channel JSON')
|
|
||||||
|
|
||||||
self._check_errors(channel)
|
self._check_errors(channel)
|
||||||
|
|
||||||
title = self.dict_selection(channel['titles'], 'en')
|
return self.playlist_result(
|
||||||
|
self._entries(channel_id), channel_id,
|
||||||
description = self.dict_selection(channel['descriptions'], 'en')
|
self.dict_selection(channel['titles'], 'en'),
|
||||||
|
self.dict_selection(channel['descriptions'], 'en'))
|
||||||
entries = []
|
|
||||||
for video_type in ('episodes', 'clips', 'movies'):
|
|
||||||
for page_num in itertools.count(1):
|
|
||||||
page = self._call_api(
|
|
||||||
'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
|
|
||||||
% (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
|
|
||||||
'Downloading %s JSON page #%d' % (video_type, page_num))
|
|
||||||
for video in page['response']:
|
|
||||||
video_id = video['id']
|
|
||||||
entries.append(self.url_result(
|
|
||||||
'https://www.viki.com/videos/%s' % video_id, 'Viki'))
|
|
||||||
if not page['pagination']['next']:
|
|
||||||
break
|
|
||||||
|
|
||||||
return self.playlist_result(entries, channel_id, title, description)
|
|
||||||
|
@ -271,7 +271,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
)?
|
)?
|
||||||
vimeo(?:pro)?\.com/
|
vimeo(?:pro)?\.com/
|
||||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||||
(?:.*?/)?
|
(?:.*?/)??
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
play_redirect_hls|
|
play_redirect_hls|
|
||||||
@ -517,14 +517,28 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
'url': 'https://vimeo.com/7809605',
|
'url': 'https://vimeo.com/7809605',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
||||||
'url': 'https://vimeo.com/392479337/a52724358e',
|
'url': 'https://vimeo.com/392479337/a52724358e',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# similar, but all numeric: ID must be 581039021, not 9603038895
|
||||||
|
# issue #29690
|
||||||
|
'url': 'https://vimeo.com/581039021/9603038895',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '581039021',
|
||||||
|
# these have to be provided but we don't care
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1627621014,
|
||||||
|
'title': 're:.+',
|
||||||
|
'uploader_id': 're:.+',
|
||||||
|
'uploader': 're:.+',
|
||||||
|
'upload_date': r're:\d+',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
# https://gettingthingsdone.com/workflowmap/
|
# https://gettingthingsdone.com/workflowmap/
|
||||||
# vimeo embed with check-password page protected by Referer header
|
# vimeo embed with check-password page protected by Referer header
|
||||||
|
@ -57,7 +57,7 @@ class WatIE(InfoExtractor):
|
|||||||
# 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
|
# 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
|
'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
|
||||||
video_id, query={'context': 'MYTF1'})
|
video_id, query={'context': 'MYTF1', 'pver': '4001000'})
|
||||||
video_info = video_data['media']
|
video_info = video_data['media']
|
||||||
|
|
||||||
error_desc = video_info.get('error_desc')
|
error_desc = video_info.get('error_desc')
|
||||||
|
@ -10,6 +10,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
@ -22,9 +23,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class WDRIE(InfoExtractor):
|
class WDRIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js'
|
__API_URL_TPL = '//deviceids-medp.wdr.de/ondemand/%s/%s'
|
||||||
|
_VALID_URL = (r'(?:https?:' + __API_URL_TPL) % (r'\d+', r'(?=\d+\.js)|wdr:)(?P<id>\d{6,})')
|
||||||
_GEO_COUNTRIES = ['DE']
|
_GEO_COUNTRIES = ['DE']
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
|
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-1557833',
|
'id': 'mdb-1557833',
|
||||||
@ -32,11 +34,20 @@ class WDRIE(InfoExtractor):
|
|||||||
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
|
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
|
||||||
'upload_date': '20180112',
|
'upload_date': '20180112',
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _asset_url(self, wdr_id):
|
||||||
|
id_len = max(len(wdr_id), 5)
|
||||||
|
return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js'))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
if url.startswith('wdr:'):
|
||||||
|
video_id = url[4:]
|
||||||
|
url = self._asset_url(video_id)
|
||||||
|
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
url, video_id, transform_source=strip_jsonp)
|
url, video_id, transform_source=strip_jsonp)
|
||||||
|
|
||||||
@ -115,10 +126,10 @@ class WDRIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WDRPageIE(InfoExtractor):
|
class WDRPageIE(WDRIE):
|
||||||
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
|
_MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$'
|
||||||
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
||||||
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -159,11 +170,11 @@ class WDRPageIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-1406149',
|
'id': 'mdb-2296252',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': r're:^WDR Fernsehen im Livestream (?:\(nur in Deutschland erreichbar\) )?[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'alt_title': 'WDR Fernsehen Live',
|
'alt_title': 'WDR Fernsehen Live',
|
||||||
'upload_date': '20150101',
|
'upload_date': '20201112',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -172,7 +183,7 @@ class WDRPageIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 6,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'aktuelle-stunde-120',
|
'id': 'aktuelle-stunde-120',
|
||||||
},
|
},
|
||||||
@ -180,10 +191,10 @@ class WDRPageIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-1552552',
|
'id': 'mdb-2627637',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': 're:^[0-9]{8}$',
|
'upload_date': 're:^[0-9]{8}$',
|
||||||
'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
|
'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$',
|
||||||
},
|
},
|
||||||
'skip': 'The id changes from week to week because of the new episode'
|
'skip': 'The id changes from week to week because of the new episode'
|
||||||
},
|
},
|
||||||
@ -196,6 +207,7 @@ class WDRPageIE(InfoExtractor):
|
|||||||
'upload_date': '20130919',
|
'upload_date': '20130919',
|
||||||
'title': 'Sachgeschichte - Achterbahn ',
|
'title': 'Sachgeschichte - Achterbahn ',
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
||||||
@ -221,6 +233,7 @@ class WDRPageIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
||||||
@ -234,7 +247,7 @@ class WDRPageIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = mobj.group('display_id')
|
display_id = dict_get(mobj.groupdict(), ('display_id', 'maus_id'), 'wdrmaus')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
@ -260,6 +273,14 @@ class WDRPageIE(InfoExtractor):
|
|||||||
jsonp_url = try_get(
|
jsonp_url = try_get(
|
||||||
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
||||||
if jsonp_url:
|
if jsonp_url:
|
||||||
|
# metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps
|
||||||
|
clip_id = media_link_obj['mediaObj'].get('ref')
|
||||||
|
if jsonp_url.endswith('.assetjsonp'):
|
||||||
|
asset = self._download_json(
|
||||||
|
jsonp_url, display_id, fatal=False, transform_source=strip_jsonp)
|
||||||
|
clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str)
|
||||||
|
if clip_id:
|
||||||
|
jsonp_url = self._asset_url(clip_id[4:])
|
||||||
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
||||||
|
|
||||||
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
|
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
|
||||||
@ -279,16 +300,14 @@ class WDRPageIE(InfoExtractor):
|
|||||||
class WDRElefantIE(InfoExtractor):
|
class WDRElefantIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
|
'url': 'http://www.wdrmaus.de/elefantenseite/#elefantenkino_wippe',
|
||||||
|
# adaptive stream: unstable file MD5
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Folge Oster-Spezial 2015',
|
'title': 'Wippe',
|
||||||
'id': 'mdb-1088195',
|
'id': 'mdb-1198320',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': None,
|
'age_limit': None,
|
||||||
'upload_date': '20150406'
|
'upload_date': '20071003'
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -323,6 +342,7 @@ class WDRMobileIE(InfoExtractor):
|
|||||||
/[0-9]+/[0-9]+/
|
/[0-9]+/[0-9]+/
|
||||||
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
||||||
IE_NAME = 'wdr:mobile'
|
IE_NAME = 'wdr:mobile'
|
||||||
|
_WORKING = False # no such domain
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -82,7 +82,7 @@ class XVideosIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://www.xvideos.com/video%s/' % video_id, video_id)
|
'https://www.xvideos.com/video%s/0' % video_id, video_id)
|
||||||
|
|
||||||
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
|
@ -13,6 +13,7 @@ from .common import InfoExtractor, SearchInfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
|
compat_map as map,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
@ -25,8 +26,10 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
|
error_to_compat_str,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@ -339,6 +342,60 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _search_results(self, query, params):
|
||||||
|
data = {
|
||||||
|
'context': {
|
||||||
|
'client': {
|
||||||
|
'clientName': 'WEB',
|
||||||
|
'clientVersion': '2.20201021.03.00',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'query': query,
|
||||||
|
}
|
||||||
|
if params:
|
||||||
|
data['params'] = params
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
search = self._download_json(
|
||||||
|
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||||
|
video_id='query "%s"' % query,
|
||||||
|
note='Downloading page %s' % page_num,
|
||||||
|
errnote='Unable to download API page', fatal=False,
|
||||||
|
data=json.dumps(data).encode('utf8'),
|
||||||
|
headers={'content-type': 'application/json'})
|
||||||
|
if not search:
|
||||||
|
break
|
||||||
|
slr_contents = try_get(
|
||||||
|
search,
|
||||||
|
(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
|
||||||
|
lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
|
||||||
|
list)
|
||||||
|
if not slr_contents:
|
||||||
|
break
|
||||||
|
for slr_content in slr_contents:
|
||||||
|
isr_contents = try_get(
|
||||||
|
slr_content,
|
||||||
|
lambda x: x['itemSectionRenderer']['contents'],
|
||||||
|
list)
|
||||||
|
if not isr_contents:
|
||||||
|
continue
|
||||||
|
for content in isr_contents:
|
||||||
|
if not isinstance(content, dict):
|
||||||
|
continue
|
||||||
|
video = content.get('videoRenderer')
|
||||||
|
if not isinstance(video, dict):
|
||||||
|
continue
|
||||||
|
video_id = video.get('videoId')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
yield self._extract_video(video)
|
||||||
|
token = try_get(
|
||||||
|
slr_contents,
|
||||||
|
lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
||||||
|
compat_str)
|
||||||
|
if not token:
|
||||||
|
break
|
||||||
|
data['continuation'] = token
|
||||||
|
|
||||||
|
|
||||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com'
|
IE_DESC = 'YouTube.com'
|
||||||
@ -415,6 +472,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
(?: # the various things that can precede the ID:
|
(?: # the various things that can precede the ID:
|
||||||
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|
||||||
|
|shorts/
|
||||||
|(?: # or the v= param in all its forms
|
|(?: # or the v= param in all its forms
|
||||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||||
@ -1117,6 +1175,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# YT 'Shorts'
|
||||||
|
'url': 'https://youtube.com/shorts/4L2J27mJ3Dc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4L2J27mJ3Dc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20211025',
|
||||||
|
'uploader': 'Charlie Berens',
|
||||||
|
'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
|
||||||
|
'uploader_id': 'fivedlrmilkshake',
|
||||||
|
'title': 'Midwest Squid Game #Shorts',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
_formats = {
|
_formats = {
|
||||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||||
@ -1253,6 +1327,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
return id_m.group('id')
|
return id_m.group('id')
|
||||||
|
|
||||||
|
def _get_player_code(self, video_id, player_url, player_id=None):
|
||||||
|
if not player_id:
|
||||||
|
player_id = self._extract_player_info(player_url)
|
||||||
|
|
||||||
|
if player_id not in self._code_cache:
|
||||||
|
self._code_cache[player_id] = self._download_webpage(
|
||||||
|
player_url, video_id,
|
||||||
|
note='Downloading player ' + player_id,
|
||||||
|
errnote='Download of %s failed' % player_url)
|
||||||
|
return self._code_cache[player_id]
|
||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
|
|
||||||
@ -1265,12 +1350,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if cache_spec is not None:
|
if cache_spec is not None:
|
||||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||||
|
|
||||||
if player_id not in self._code_cache:
|
code = self._get_player_code(video_id, player_url, player_id)
|
||||||
self._code_cache[player_id] = self._download_webpage(
|
|
||||||
player_url, video_id,
|
|
||||||
note='Downloading player ' + player_id,
|
|
||||||
errnote='Download of %s failed' % player_url)
|
|
||||||
code = self._code_cache[player_id]
|
|
||||||
res = self._parse_sig_js(code)
|
res = self._parse_sig_js(code)
|
||||||
|
|
||||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||||
@ -1323,10 +1403,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
funcname = self._search_regex(
|
funcname = self._search_regex(
|
||||||
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
|
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||||||
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
|
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
||||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
|
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
|
||||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||||
# Obsolete patterns
|
# Obsolete patterns
|
||||||
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
@ -1349,11 +1429,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if player_url is None:
|
if player_url is None:
|
||||||
raise ExtractorError('Cannot decrypt signature without player_url')
|
raise ExtractorError('Cannot decrypt signature without player_url')
|
||||||
|
|
||||||
if player_url.startswith('//'):
|
|
||||||
player_url = 'https:' + player_url
|
|
||||||
elif not re.match(r'https?://', player_url):
|
|
||||||
player_url = compat_urlparse.urljoin(
|
|
||||||
'https://www.youtube.com', player_url)
|
|
||||||
try:
|
try:
|
||||||
player_id = (player_url, self._signature_cache_id(s))
|
player_id = (player_url, self._signature_cache_id(s))
|
||||||
if player_id not in self._player_cache:
|
if player_id not in self._player_cache:
|
||||||
@ -1370,6 +1445,105 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Signature extraction failed: ' + tb, cause=e)
|
'Signature extraction failed: ' + tb, cause=e)
|
||||||
|
|
||||||
|
def _extract_player_url(self, webpage):
|
||||||
|
player_url = self._search_regex(
|
||||||
|
r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
|
||||||
|
webpage or '', 'player URL', fatal=False)
|
||||||
|
if not player_url:
|
||||||
|
return
|
||||||
|
if player_url.startswith('//'):
|
||||||
|
player_url = 'https:' + player_url
|
||||||
|
elif not re.match(r'https?://', player_url):
|
||||||
|
player_url = compat_urlparse.urljoin(
|
||||||
|
'https://www.youtube.com', player_url)
|
||||||
|
return player_url
|
||||||
|
|
||||||
|
# from yt-dlp
|
||||||
|
# See also:
|
||||||
|
# 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
|
||||||
|
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
|
||||||
|
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
|
||||||
|
def _extract_n_function_name(self, jscode):
|
||||||
|
target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
|
||||||
|
nfunc_and_idx = self._search_regex(
|
||||||
|
r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
|
||||||
|
jscode, 'Initial JS player n function name')
|
||||||
|
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
|
||||||
|
if not idx:
|
||||||
|
return nfunc
|
||||||
|
if int_or_none(idx) == 0:
|
||||||
|
real_nfunc = self._search_regex(
|
||||||
|
r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode,
|
||||||
|
'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals()))
|
||||||
|
if real_nfunc:
|
||||||
|
return real_nfunc
|
||||||
|
return self._parse_json(self._search_regex(
|
||||||
|
r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
|
||||||
|
'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
|
||||||
|
|
||||||
|
def _extract_n_function(self, video_id, player_url):
|
||||||
|
player_id = self._extract_player_info(player_url)
|
||||||
|
func_code = self._downloader.cache.load('youtube-nsig', player_id)
|
||||||
|
|
||||||
|
if func_code:
|
||||||
|
jsi = JSInterpreter(func_code)
|
||||||
|
else:
|
||||||
|
jscode = self._get_player_code(video_id, player_url, player_id)
|
||||||
|
funcname = self._extract_n_function_name(jscode)
|
||||||
|
jsi = JSInterpreter(jscode)
|
||||||
|
func_code = jsi.extract_function_code(funcname)
|
||||||
|
self._downloader.cache.store('youtube-nsig', player_id, func_code)
|
||||||
|
|
||||||
|
if self._downloader.params.get('youtube_print_sig_code'):
|
||||||
|
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(player_id, func_code[1]))
|
||||||
|
|
||||||
|
return lambda s: jsi.extract_function_from_code(*func_code)([s])
|
||||||
|
|
||||||
|
def _n_descramble(self, n_param, player_url, video_id):
|
||||||
|
"""Compute the response to YT's "n" parameter challenge
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_param -- challenge string that is the value of the
|
||||||
|
URL's "n" query parameter
|
||||||
|
player_url -- URL of YT player JS
|
||||||
|
video_id
|
||||||
|
"""
|
||||||
|
|
||||||
|
sig_id = ('nsig_value', n_param)
|
||||||
|
if sig_id in self._player_cache:
|
||||||
|
return self._player_cache[sig_id]
|
||||||
|
|
||||||
|
try:
|
||||||
|
player_id = ('nsig', player_url)
|
||||||
|
if player_id not in self._player_cache:
|
||||||
|
self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
|
||||||
|
func = self._player_cache[player_id]
|
||||||
|
self._player_cache[sig_id] = func(n_param)
|
||||||
|
if self._downloader.params.get('verbose', False):
|
||||||
|
self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
|
||||||
|
return self._player_cache[sig_id]
|
||||||
|
except Exception as e:
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'[%s] %s (%s %s)' % (
|
||||||
|
self.IE_NAME,
|
||||||
|
'Unable to decode n-parameter: download likely to be throttled',
|
||||||
|
error_to_compat_str(e),
|
||||||
|
traceback.format_exc()))
|
||||||
|
|
||||||
|
def _unthrottle_format_urls(self, video_id, player_url, formats):
|
||||||
|
for fmt in formats:
|
||||||
|
parsed_fmt_url = compat_urlparse.urlparse(fmt['url'])
|
||||||
|
qs = compat_urlparse.parse_qs(parsed_fmt_url.query)
|
||||||
|
n_param = qs.get('n')
|
||||||
|
if not n_param:
|
||||||
|
continue
|
||||||
|
n_param = n_param[-1]
|
||||||
|
n_response = self._n_descramble(n_param, player_url, video_id)
|
||||||
|
if n_response:
|
||||||
|
qs['n'] = [n_response]
|
||||||
|
fmt['url'] = compat_urlparse.urlunparse(
|
||||||
|
parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||||
|
|
||||||
def _mark_watched(self, video_id, player_response):
|
def _mark_watched(self, video_id, player_response):
|
||||||
playback_url = url_or_none(try_get(
|
playback_url = url_or_none(try_get(
|
||||||
player_response,
|
player_response,
|
||||||
@ -1631,11 +1805,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if not (sc and fmt_url and encrypted_sig):
|
if not (sc and fmt_url and encrypted_sig):
|
||||||
continue
|
continue
|
||||||
if not player_url:
|
if not player_url:
|
||||||
if not webpage:
|
player_url = self._extract_player_url(webpage)
|
||||||
continue
|
|
||||||
player_url = self._search_regex(
|
|
||||||
r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
|
|
||||||
webpage, 'player URL', fatal=False)
|
|
||||||
if not player_url:
|
if not player_url:
|
||||||
continue
|
continue
|
||||||
signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
|
signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
|
||||||
@ -1781,6 +1951,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
is_live = video_details.get('isLive')
|
is_live = video_details.get('isLive')
|
||||||
owner_profile_url = microformat.get('ownerProfileUrl')
|
owner_profile_url = microformat.get('ownerProfileUrl')
|
||||||
|
|
||||||
|
if not player_url:
|
||||||
|
player_url = self._extract_player_url(webpage)
|
||||||
|
self._unthrottle_format_urls(video_id, player_url, formats)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(video_title) if is_live else video_title,
|
'title': self._live_title(video_title) if is_live else video_title,
|
||||||
@ -2020,7 +2194,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
(?:channel|c|user|feed|hashtag)/|
|
(?:channel|c|user|feed|hashtag)/|
|
||||||
(?:playlist|watch)\?.*?\blist=|
|
(?:playlist|watch)\?.*?\blist=|
|
||||||
(?!(?:watch|embed|v|e)\b)
|
(?!(?:watch|embed|v|e|results)\b)
|
||||||
)
|
)
|
||||||
(?P<id>[^/?\#&]+)
|
(?P<id>[^/?\#&]+)
|
||||||
'''
|
'''
|
||||||
@ -2323,6 +2497,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
|
'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Search tab',
|
||||||
|
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
|
||||||
|
'playlist_mincount': 40,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCYO_jab_esuFRV4b17AJtAw',
|
||||||
|
'title': '3Blue1Brown - Search - linear algebra',
|
||||||
|
'description': 'md5:e1384e8a133307dd10edee76e875d62f',
|
||||||
|
'uploader': '3Blue1Brown',
|
||||||
|
'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -2720,8 +2905,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_selected_tab(tabs):
|
def _extract_selected_tab(tabs):
|
||||||
for tab in tabs:
|
for tab in tabs:
|
||||||
if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
|
renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
|
||||||
return tab['tabRenderer']
|
if renderer.get('selected') is True:
|
||||||
|
return renderer
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unable to find selected tab')
|
raise ExtractorError('Unable to find selected tab')
|
||||||
|
|
||||||
@ -2778,6 +2964,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
title = channel_title or item_id
|
title = channel_title or item_id
|
||||||
if tab_title:
|
if tab_title:
|
||||||
title += ' - %s' % tab_title
|
title += ' - %s' % tab_title
|
||||||
|
if selected_tab.get('expandedText'):
|
||||||
|
title += ' - %s' % selected_tab['expandedText']
|
||||||
description = renderer.get('description')
|
description = renderer.get('description')
|
||||||
playlist_id = renderer.get('externalId')
|
playlist_id = renderer.get('externalId')
|
||||||
else:
|
else:
|
||||||
@ -3019,106 +3207,62 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com searches'
|
IE_DESC = 'YouTube.com searches'
|
||||||
# there doesn't appear to be a real limit, for example if you search for
|
|
||||||
# 'python' you get more than 8.000.000 results
|
|
||||||
_MAX_RESULTS = float('inf')
|
|
||||||
IE_NAME = 'youtube:search'
|
IE_NAME = 'youtube:search'
|
||||||
_SEARCH_KEY = 'ytsearch'
|
_SEARCH_KEY = 'ytsearch'
|
||||||
_SEARCH_PARAMS = None
|
_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
|
||||||
_TESTS = []
|
_MAX_RESULTS = float('inf')
|
||||||
|
_TESTS = [{
|
||||||
def _entries(self, query, n):
|
'url': 'ytsearch10:youtube-dl test video',
|
||||||
data = {
|
'playlist_count': 10,
|
||||||
'context': {
|
'info_dict': {
|
||||||
'client': {
|
'id': 'youtube-dl test video',
|
||||||
'clientName': 'WEB',
|
'title': 'youtube-dl test video',
|
||||||
'clientVersion': '2.20201021.03.00',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
'query': query,
|
|
||||||
}
|
}
|
||||||
if self._SEARCH_PARAMS:
|
}]
|
||||||
data['params'] = self._SEARCH_PARAMS
|
|
||||||
total = 0
|
|
||||||
for page_num in itertools.count(1):
|
|
||||||
search = self._download_json(
|
|
||||||
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
|
||||||
video_id='query "%s"' % query,
|
|
||||||
note='Downloading page %s' % page_num,
|
|
||||||
errnote='Unable to download API page', fatal=False,
|
|
||||||
data=json.dumps(data).encode('utf8'),
|
|
||||||
headers={'content-type': 'application/json'})
|
|
||||||
if not search:
|
|
||||||
break
|
|
||||||
slr_contents = try_get(
|
|
||||||
search,
|
|
||||||
(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
|
|
||||||
lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
|
|
||||||
list)
|
|
||||||
if not slr_contents:
|
|
||||||
break
|
|
||||||
for slr_content in slr_contents:
|
|
||||||
isr_contents = try_get(
|
|
||||||
slr_content,
|
|
||||||
lambda x: x['itemSectionRenderer']['contents'],
|
|
||||||
list)
|
|
||||||
if not isr_contents:
|
|
||||||
continue
|
|
||||||
for content in isr_contents:
|
|
||||||
if not isinstance(content, dict):
|
|
||||||
continue
|
|
||||||
video = content.get('videoRenderer')
|
|
||||||
if not isinstance(video, dict):
|
|
||||||
continue
|
|
||||||
video_id = video.get('videoId')
|
|
||||||
if not video_id:
|
|
||||||
continue
|
|
||||||
yield self._extract_video(video)
|
|
||||||
total += 1
|
|
||||||
if total == n:
|
|
||||||
return
|
|
||||||
token = try_get(
|
|
||||||
slr_contents,
|
|
||||||
lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
|
||||||
compat_str)
|
|
||||||
if not token:
|
|
||||||
break
|
|
||||||
data['continuation'] = token
|
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
return self.playlist_result(self._entries(query, n), query)
|
entries = itertools.islice(self._search_results(query, self._SEARCH_PARAMS), 0, None if n == float('inf') else n)
|
||||||
|
return self.playlist_result(entries, query, query)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||||
_SEARCH_KEY = 'ytsearchdate'
|
_SEARCH_KEY = 'ytsearchdate'
|
||||||
IE_DESC = 'YouTube.com searches, newest videos first'
|
IE_DESC = 'YouTube.com searches, newest videos first'
|
||||||
_SEARCH_PARAMS = 'CAI%3D'
|
_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'ytsearchdate10:youtube-dl test video',
|
||||||
|
'playlist_count': 10,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youtube-dl test video',
|
||||||
|
'title': 'youtube-dl test video',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
r"""
|
class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
|
||||||
class YoutubeSearchURLIE(YoutubeSearchIE):
|
IE_DESC = 'YouTube search URLs with sorting and filter support'
|
||||||
IE_DESC = 'YouTube.com search URLs'
|
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
|
||||||
IE_NAME = 'youtube:search_url'
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||||
'playlist_mincount': 5,
|
'playlist_mincount': 5,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'youtube-dl test video',
|
||||||
'title': 'youtube-dl test video',
|
'title': 'youtube-dl test video',
|
||||||
}
|
},
|
||||||
|
'params': {'playlistend': 5}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
|
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
query = compat_urllib_parse_unquote_plus(mobj.group('query'))
|
query = (qs.get('search_query') or qs.get('q'))[0]
|
||||||
webpage = self._download_webpage(url, query)
|
params = qs.get('sp', ('',))[0]
|
||||||
return self.playlist_result(self._process_page(webpage), playlist_title=query)
|
return self.playlist_result(self._search_results(query, params), query, query)
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeFeedsInfoExtractor(YoutubeTabIE):
|
class YoutubeFeedsInfoExtractor(YoutubeTabIE):
|
||||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
@ -145,6 +146,7 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
'timestamp': 1613948400,
|
'timestamp': 1613948400,
|
||||||
'upload_date': '20210221',
|
'upload_date': '20210221',
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||||
}, {
|
}, {
|
||||||
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
||||||
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
||||||
@ -158,6 +160,7 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
'timestamp': 1608604200,
|
'timestamp': 1608604200,
|
||||||
'upload_date': '20201222',
|
'upload_date': '20201222',
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
|
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -190,6 +193,17 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
|
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'video_artede_083871-001-A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tödliche Flucht (1/6)',
|
||||||
|
'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315',
|
||||||
|
'duration': 3193.0,
|
||||||
|
'timestamp': 1641355200,
|
||||||
|
'upload_date': '20220105',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_entry(self, url, player, content, video_id):
|
def _extract_entry(self, url, player, content, video_id):
|
||||||
@ -197,12 +211,18 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
|
|
||||||
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||||
|
|
||||||
ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
|
def get_ptmd_path(d):
|
||||||
|
return (
|
||||||
|
d.get('http://zdf.de/rels/streams/ptmd')
|
||||||
|
or d.get('http://zdf.de/rels/streams/ptmd-template',
|
||||||
|
'').replace('{playerId}', 'ngplayer_2_4'))
|
||||||
|
|
||||||
|
ptmd_path = get_ptmd_path(try_get(t, lambda x: x['streams']['default'], dict) or {})
|
||||||
|
if not ptmd_path:
|
||||||
|
ptmd_path = get_ptmd_path(t)
|
||||||
|
|
||||||
if not ptmd_path:
|
if not ptmd_path:
|
||||||
ptmd_path = t[
|
raise ExtractorError('Could not extract ptmd_path')
|
||||||
'http://zdf.de/rels/streams/ptmd-template'].replace(
|
|
||||||
'{playerId}', 'ngplayer_2_4')
|
|
||||||
|
|
||||||
info = self._extract_ptmd(
|
info = self._extract_ptmd(
|
||||||
urljoin(url, ptmd_path), video_id, player['apiToken'], url)
|
urljoin(url, ptmd_path), video_id, player['apiToken'], url)
|
||||||
|
@ -8,6 +8,16 @@ from .utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
)
|
)
|
||||||
|
from .compat import (
|
||||||
|
compat_collections_abc,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
MutableMapping = compat_collections_abc.MutableMapping
|
||||||
|
|
||||||
|
|
||||||
|
class Nonlocal:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
_OPERATORS = [
|
_OPERATORS = [
|
||||||
('|', operator.or_),
|
('|', operator.or_),
|
||||||
@ -22,10 +32,56 @@ _OPERATORS = [
|
|||||||
('*', operator.mul),
|
('*', operator.mul),
|
||||||
]
|
]
|
||||||
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
|
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
|
||||||
_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
|
_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
|
||||||
|
|
||||||
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
|
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
|
||||||
|
|
||||||
|
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
||||||
|
|
||||||
|
|
||||||
|
class JS_Break(ExtractorError):
|
||||||
|
def __init__(self):
|
||||||
|
ExtractorError.__init__(self, 'Invalid break')
|
||||||
|
|
||||||
|
|
||||||
|
class JS_Continue(ExtractorError):
|
||||||
|
def __init__(self):
|
||||||
|
ExtractorError.__init__(self, 'Invalid continue')
|
||||||
|
|
||||||
|
|
||||||
|
class LocalNameSpace(MutableMapping):
|
||||||
|
def __init__(self, *stack):
|
||||||
|
self.stack = tuple(stack)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
for scope in self.stack:
|
||||||
|
if key in scope:
|
||||||
|
return scope[key]
|
||||||
|
raise KeyError(key)
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
for scope in self.stack:
|
||||||
|
if key in scope:
|
||||||
|
scope[key] = value
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.stack[0][key] = value
|
||||||
|
return value
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
raise NotImplementedError('Deleting is not supported')
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for scope in self.stack:
|
||||||
|
for scope_item in iter(scope):
|
||||||
|
yield scope_item
|
||||||
|
|
||||||
|
def __len__(self, key):
|
||||||
|
return len(iter(self))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'LocalNameSpace%s' % (self.stack, )
|
||||||
|
|
||||||
|
|
||||||
class JSInterpreter(object):
|
class JSInterpreter(object):
|
||||||
def __init__(self, code, objects=None):
|
def __init__(self, code, objects=None):
|
||||||
@ -34,11 +90,56 @@ class JSInterpreter(object):
|
|||||||
self.code = code
|
self.code = code
|
||||||
self._functions = {}
|
self._functions = {}
|
||||||
self._objects = objects
|
self._objects = objects
|
||||||
|
self.__named_object_counter = 0
|
||||||
|
|
||||||
|
def _named_object(self, namespace, obj):
|
||||||
|
self.__named_object_counter += 1
|
||||||
|
name = '__youtube_dl_jsinterp_obj%s' % (self.__named_object_counter, )
|
||||||
|
namespace[name] = obj
|
||||||
|
return name
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _separate(expr, delim=',', max_split=None):
|
||||||
|
if not expr:
|
||||||
|
return
|
||||||
|
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
||||||
|
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||||
|
for idx, char in enumerate(expr):
|
||||||
|
if char in _MATCHING_PARENS:
|
||||||
|
counters[_MATCHING_PARENS[char]] += 1
|
||||||
|
elif char in counters:
|
||||||
|
counters[char] -= 1
|
||||||
|
if char != delim[pos] or any(counters.values()):
|
||||||
|
pos = 0
|
||||||
|
continue
|
||||||
|
elif pos != delim_len:
|
||||||
|
pos += 1
|
||||||
|
continue
|
||||||
|
yield expr[start: idx - delim_len]
|
||||||
|
start, pos = idx + 1, 0
|
||||||
|
splits += 1
|
||||||
|
if max_split and splits >= max_split:
|
||||||
|
break
|
||||||
|
yield expr[start:]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _separate_at_paren(expr, delim):
|
||||||
|
separated = list(JSInterpreter._separate(expr, delim, 1))
|
||||||
|
if len(separated) < 2:
|
||||||
|
raise ExtractorError('No terminating paren {0} in {1}'.format(delim, expr))
|
||||||
|
return separated[0][1:].strip(), separated[1].strip()
|
||||||
|
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise ExtractorError('Recursion limit reached')
|
raise ExtractorError('Recursion limit reached')
|
||||||
|
|
||||||
|
sub_statements = list(self._separate(stmt, ';'))
|
||||||
|
stmt = (sub_statements or ['']).pop()
|
||||||
|
for sub_stmt in sub_statements:
|
||||||
|
ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1)
|
||||||
|
if should_abort:
|
||||||
|
return ret
|
||||||
|
|
||||||
should_abort = False
|
should_abort = False
|
||||||
stmt = stmt.lstrip()
|
stmt = stmt.lstrip()
|
||||||
stmt_m = re.match(r'var\s', stmt)
|
stmt_m = re.match(r'var\s', stmt)
|
||||||
@ -61,25 +162,124 @@ class JSInterpreter(object):
|
|||||||
if expr == '': # Empty expression
|
if expr == '': # Empty expression
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if expr.startswith('('):
|
if expr.startswith('{'):
|
||||||
parens_count = 0
|
inner, outer = self._separate_at_paren(expr, '}')
|
||||||
for m in re.finditer(r'[()]', expr):
|
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1)
|
||||||
if m.group(0) == '(':
|
if not outer or should_abort:
|
||||||
parens_count += 1
|
return inner
|
||||||
else:
|
|
||||||
parens_count -= 1
|
|
||||||
if parens_count == 0:
|
|
||||||
sub_expr = expr[1:m.start()]
|
|
||||||
sub_result = self.interpret_expression(
|
|
||||||
sub_expr, local_vars, allow_recursion)
|
|
||||||
remaining_expr = expr[m.end():].strip()
|
|
||||||
if not remaining_expr:
|
|
||||||
return sub_result
|
|
||||||
else:
|
|
||||||
expr = json.dumps(sub_result) + remaining_expr
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Premature end of parens in %r' % expr)
|
expr = json.dumps(inner) + outer
|
||||||
|
|
||||||
|
if expr.startswith('('):
|
||||||
|
inner, outer = self._separate_at_paren(expr, ')')
|
||||||
|
inner = self.interpret_expression(inner, local_vars, allow_recursion)
|
||||||
|
if not outer:
|
||||||
|
return inner
|
||||||
|
else:
|
||||||
|
expr = json.dumps(inner) + outer
|
||||||
|
|
||||||
|
if expr.startswith('['):
|
||||||
|
inner, outer = self._separate_at_paren(expr, ']')
|
||||||
|
name = self._named_object(local_vars, [
|
||||||
|
self.interpret_expression(item, local_vars, allow_recursion)
|
||||||
|
for item in self._separate(inner)])
|
||||||
|
expr = name + outer
|
||||||
|
|
||||||
|
m = re.match(r'try\s*', expr)
|
||||||
|
if m:
|
||||||
|
if expr[m.end()] == '{':
|
||||||
|
try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
|
||||||
|
else:
|
||||||
|
try_expr, expr = expr[m.end() - 1:], ''
|
||||||
|
ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1)
|
||||||
|
if should_abort:
|
||||||
|
return ret
|
||||||
|
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
|
||||||
|
|
||||||
|
m = re.match(r'(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
|
||||||
|
md = m.groupdict() if m else {}
|
||||||
|
if md.get('catch'):
|
||||||
|
# We ignore the catch block
|
||||||
|
_, expr = self._separate_at_paren(expr, '}')
|
||||||
|
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
|
||||||
|
|
||||||
|
elif md.get('for'):
|
||||||
|
def raise_constructor_error(c):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Premature return in the initialization of a for loop in {0!r}'.format(c))
|
||||||
|
|
||||||
|
constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
|
||||||
|
if remaining.startswith('{'):
|
||||||
|
body, expr = self._separate_at_paren(remaining, '}')
|
||||||
|
else:
|
||||||
|
m = re.match(r'switch\s*\(', remaining) # FIXME
|
||||||
|
if m:
|
||||||
|
switch_val, remaining = self._separate_at_paren(remaining[m.end() - 1:], ')')
|
||||||
|
body, expr = self._separate_at_paren(remaining, '}')
|
||||||
|
body = 'switch(%s){%s}' % (switch_val, body)
|
||||||
|
else:
|
||||||
|
body, expr = remaining, ''
|
||||||
|
start, cndn, increment = self._separate(constructor, ';')
|
||||||
|
if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]:
|
||||||
|
raise_constructor_error(constructor)
|
||||||
|
while True:
|
||||||
|
if not self.interpret_expression(cndn, local_vars, allow_recursion):
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1)
|
||||||
|
if should_abort:
|
||||||
|
return ret
|
||||||
|
except JS_Break:
|
||||||
|
break
|
||||||
|
except JS_Continue:
|
||||||
|
pass
|
||||||
|
if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]:
|
||||||
|
raise_constructor_error(constructor)
|
||||||
|
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
|
||||||
|
|
||||||
|
elif md.get('switch'):
|
||||||
|
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
|
||||||
|
switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
|
||||||
|
body, expr = self._separate_at_paren(remaining, '}')
|
||||||
|
items = body.replace('default:', 'case default:').split('case ')[1:]
|
||||||
|
for default in (False, True):
|
||||||
|
matched = False
|
||||||
|
for item in items:
|
||||||
|
case, stmt = [i.strip() for i in self._separate(item, ':', 1)]
|
||||||
|
if default:
|
||||||
|
matched = matched or case == 'default'
|
||||||
|
elif not matched:
|
||||||
|
matched = (case != 'default'
|
||||||
|
and switch_val == self.interpret_expression(case, local_vars, allow_recursion))
|
||||||
|
if not matched:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1)
|
||||||
|
if should_abort:
|
||||||
|
return ret
|
||||||
|
except JS_Break:
|
||||||
|
break
|
||||||
|
if matched:
|
||||||
|
break
|
||||||
|
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
|
||||||
|
|
||||||
|
# Comma separated statements
|
||||||
|
sub_expressions = list(self._separate(expr))
|
||||||
|
expr = sub_expressions.pop().strip() if sub_expressions else ''
|
||||||
|
for sub_expr in sub_expressions:
|
||||||
|
self.interpret_expression(sub_expr, local_vars, allow_recursion)
|
||||||
|
|
||||||
|
for m in re.finditer(r'''(?x)
|
||||||
|
(?P<pre_sign>\+\+|--)(?P<var1>%(_NAME_RE)s)|
|
||||||
|
(?P<var2>%(_NAME_RE)s)(?P<post_sign>\+\+|--)''' % globals(), expr):
|
||||||
|
var = m.group('var1') or m.group('var2')
|
||||||
|
start, end = m.span()
|
||||||
|
sign = m.group('pre_sign') or m.group('post_sign')
|
||||||
|
ret = local_vars[var]
|
||||||
|
local_vars[var] += 1 if sign[0] == '+' else -1
|
||||||
|
if m.group('pre_sign'):
|
||||||
|
ret = local_vars[var]
|
||||||
|
expr = expr[:start] + json.dumps(ret) + expr[end:]
|
||||||
|
|
||||||
for op, opfunc in _ASSIGN_OPERATORS:
|
for op, opfunc in _ASSIGN_OPERATORS:
|
||||||
m = re.match(r'''(?x)
|
m = re.match(r'''(?x)
|
||||||
@ -88,14 +288,13 @@ class JSInterpreter(object):
|
|||||||
(?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
|
(?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
|
||||||
if not m:
|
if not m:
|
||||||
continue
|
continue
|
||||||
right_val = self.interpret_expression(
|
right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
|
||||||
m.group('expr'), local_vars, allow_recursion - 1)
|
|
||||||
|
|
||||||
if m.groupdict().get('index'):
|
if m.groupdict().get('index'):
|
||||||
lvar = local_vars[m.group('out')]
|
lvar = local_vars[m.group('out')]
|
||||||
idx = self.interpret_expression(
|
idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
|
||||||
m.group('index'), local_vars, allow_recursion)
|
if not isinstance(idx, int):
|
||||||
assert isinstance(idx, int)
|
raise ExtractorError('List indices must be integers: %s' % (idx, ))
|
||||||
cur = lvar[idx]
|
cur = lvar[idx]
|
||||||
val = opfunc(cur, right_val)
|
val = opfunc(cur, right_val)
|
||||||
lvar[idx] = val
|
lvar[idx] = val
|
||||||
@ -109,8 +308,13 @@ class JSInterpreter(object):
|
|||||||
if expr.isdigit():
|
if expr.isdigit():
|
||||||
return int(expr)
|
return int(expr)
|
||||||
|
|
||||||
|
if expr == 'break':
|
||||||
|
raise JS_Break()
|
||||||
|
elif expr == 'continue':
|
||||||
|
raise JS_Continue()
|
||||||
|
|
||||||
var_m = re.match(
|
var_m = re.match(
|
||||||
r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
|
r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE,
|
||||||
expr)
|
expr)
|
||||||
if var_m:
|
if var_m:
|
||||||
return local_vars[var_m.group('name')]
|
return local_vars[var_m.group('name')]
|
||||||
@ -124,91 +328,161 @@ class JSInterpreter(object):
|
|||||||
r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
|
r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
|
||||||
if m:
|
if m:
|
||||||
val = local_vars[m.group('in')]
|
val = local_vars[m.group('in')]
|
||||||
idx = self.interpret_expression(
|
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
|
||||||
m.group('idx'), local_vars, allow_recursion - 1)
|
|
||||||
return val[idx]
|
return val[idx]
|
||||||
|
|
||||||
|
def raise_expr_error(where, op, exp):
|
||||||
|
raise ExtractorError('Premature {0} return of {1} in {2!r}'.format(where, op, exp))
|
||||||
|
|
||||||
|
for op, opfunc in _OPERATORS:
|
||||||
|
separated = list(self._separate(expr, op))
|
||||||
|
if len(separated) < 2:
|
||||||
|
continue
|
||||||
|
right_val = separated.pop()
|
||||||
|
left_val = op.join(separated)
|
||||||
|
left_val, should_abort = self.interpret_statement(
|
||||||
|
left_val, local_vars, allow_recursion - 1)
|
||||||
|
if should_abort:
|
||||||
|
raise_expr_error('left-side', op, expr)
|
||||||
|
right_val, should_abort = self.interpret_statement(
|
||||||
|
right_val, local_vars, allow_recursion - 1)
|
||||||
|
if should_abort:
|
||||||
|
raise_expr_error('right-side', op, expr)
|
||||||
|
return opfunc(left_val or 0, right_val)
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
|
r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE,
|
||||||
expr)
|
expr)
|
||||||
if m:
|
if m:
|
||||||
variable = m.group('var')
|
variable = m.group('var')
|
||||||
member = remove_quotes(m.group('member') or m.group('member2'))
|
nl = Nonlocal()
|
||||||
arg_str = m.group('args')
|
|
||||||
|
|
||||||
if variable in local_vars:
|
nl.member = remove_quotes(m.group('member') or m.group('member2'))
|
||||||
obj = local_vars[variable]
|
arg_str = expr[m.end():]
|
||||||
|
if arg_str.startswith('('):
|
||||||
|
arg_str, remaining = self._separate_at_paren(arg_str, ')')
|
||||||
else:
|
else:
|
||||||
if variable not in self._objects:
|
arg_str, remaining = None, arg_str
|
||||||
self._objects[variable] = self.extract_object(variable)
|
|
||||||
obj = self._objects[variable]
|
|
||||||
|
|
||||||
if arg_str is None:
|
def assertion(cndn, msg):
|
||||||
# Member access
|
""" assert, but without risk of getting optimized out """
|
||||||
if member == 'length':
|
if not cndn:
|
||||||
return len(obj)
|
raise ExtractorError('{0} {1}: {2}'.format(nl.member, msg, expr))
|
||||||
return obj[member]
|
|
||||||
|
|
||||||
assert expr.endswith(')')
|
def eval_method():
|
||||||
# Function call
|
# nonlocal member
|
||||||
if arg_str == '':
|
member = nl.member
|
||||||
argvals = tuple()
|
if variable == 'String':
|
||||||
else:
|
obj = compat_str
|
||||||
argvals = tuple([
|
elif variable in local_vars:
|
||||||
|
obj = local_vars[variable]
|
||||||
|
else:
|
||||||
|
if variable not in self._objects:
|
||||||
|
self._objects[variable] = self.extract_object(variable)
|
||||||
|
obj = self._objects[variable]
|
||||||
|
|
||||||
|
if arg_str is None:
|
||||||
|
# Member access
|
||||||
|
if member == 'length':
|
||||||
|
return len(obj)
|
||||||
|
return obj[member]
|
||||||
|
|
||||||
|
# Function call
|
||||||
|
argvals = [
|
||||||
self.interpret_expression(v, local_vars, allow_recursion)
|
self.interpret_expression(v, local_vars, allow_recursion)
|
||||||
for v in arg_str.split(',')])
|
for v in self._separate(arg_str)]
|
||||||
|
|
||||||
if member == 'split':
|
if obj == compat_str:
|
||||||
assert argvals == ('',)
|
if member == 'fromCharCode':
|
||||||
return list(obj)
|
assertion(argvals, 'takes one or more arguments')
|
||||||
if member == 'join':
|
return ''.join(map(chr, argvals))
|
||||||
assert len(argvals) == 1
|
raise ExtractorError('Unsupported string method %s' % (member, ))
|
||||||
return argvals[0].join(obj)
|
|
||||||
if member == 'reverse':
|
|
||||||
assert len(argvals) == 0
|
|
||||||
obj.reverse()
|
|
||||||
return obj
|
|
||||||
if member == 'slice':
|
|
||||||
assert len(argvals) == 1
|
|
||||||
return obj[argvals[0]:]
|
|
||||||
if member == 'splice':
|
|
||||||
assert isinstance(obj, list)
|
|
||||||
index, howMany = argvals
|
|
||||||
res = []
|
|
||||||
for i in range(index, min(index + howMany, len(obj))):
|
|
||||||
res.append(obj.pop(index))
|
|
||||||
return res
|
|
||||||
|
|
||||||
return obj[member](argvals)
|
if member == 'split':
|
||||||
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
assertion(argvals == [''], 'with arguments is not implemented')
|
||||||
|
return list(obj)
|
||||||
|
elif member == 'join':
|
||||||
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
|
assertion(len(argvals) == 1, 'takes exactly one argument')
|
||||||
|
return argvals[0].join(obj)
|
||||||
|
elif member == 'reverse':
|
||||||
|
assertion(not argvals, 'does not take any arguments')
|
||||||
|
obj.reverse()
|
||||||
|
return obj
|
||||||
|
elif member == 'slice':
|
||||||
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
|
assertion(len(argvals) == 1, 'takes exactly one argument')
|
||||||
|
return obj[argvals[0]:]
|
||||||
|
elif member == 'splice':
|
||||||
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
index, howMany = map(int, (argvals + [len(obj)])[:2])
|
||||||
|
if index < 0:
|
||||||
|
index += len(obj)
|
||||||
|
add_items = argvals[2:]
|
||||||
|
res = []
|
||||||
|
for i in range(index, min(index + howMany, len(obj))):
|
||||||
|
res.append(obj.pop(index))
|
||||||
|
for i, item in enumerate(add_items):
|
||||||
|
obj.insert(index + i, item)
|
||||||
|
return res
|
||||||
|
elif member == 'unshift':
|
||||||
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
for item in reversed(argvals):
|
||||||
|
obj.insert(0, item)
|
||||||
|
return obj
|
||||||
|
elif member == 'pop':
|
||||||
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
|
assertion(not argvals, 'does not take any arguments')
|
||||||
|
if not obj:
|
||||||
|
return
|
||||||
|
return obj.pop()
|
||||||
|
elif member == 'push':
|
||||||
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
obj.extend(argvals)
|
||||||
|
return obj
|
||||||
|
elif member == 'forEach':
|
||||||
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||||
|
f, this = (argvals + [''])[:2]
|
||||||
|
return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)]
|
||||||
|
elif member == 'indexOf':
|
||||||
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||||
|
idx, start = (argvals + [0])[:2]
|
||||||
|
try:
|
||||||
|
return obj.index(idx, start)
|
||||||
|
except ValueError:
|
||||||
|
return -1
|
||||||
|
|
||||||
for op, opfunc in _OPERATORS:
|
if isinstance(obj, list):
|
||||||
m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
|
member = int(member)
|
||||||
if not m:
|
nl.member = member
|
||||||
continue
|
return obj[member](argvals)
|
||||||
x, abort = self.interpret_statement(
|
|
||||||
m.group('x'), local_vars, allow_recursion - 1)
|
|
||||||
if abort:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Premature left-side return of %s in %r' % (op, expr))
|
|
||||||
y, abort = self.interpret_statement(
|
|
||||||
m.group('y'), local_vars, allow_recursion - 1)
|
|
||||||
if abort:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Premature right-side return of %s in %r' % (op, expr))
|
|
||||||
return opfunc(x, y)
|
|
||||||
|
|
||||||
m = re.match(
|
if remaining:
|
||||||
r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
|
return self.interpret_expression(
|
||||||
|
self._named_object(local_vars, eval_method()) + remaining,
|
||||||
|
local_vars, allow_recursion)
|
||||||
|
else:
|
||||||
|
return eval_method()
|
||||||
|
|
||||||
|
m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
|
||||||
if m:
|
if m:
|
||||||
fname = m.group('func')
|
fname = m.group('func')
|
||||||
argvals = tuple([
|
argvals = tuple([
|
||||||
int(v) if v.isdigit() else local_vars[v]
|
int(v) if v.isdigit() else local_vars[v]
|
||||||
for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple()
|
for v in self._separate(m.group('args'))])
|
||||||
if fname not in self._functions:
|
if fname in local_vars:
|
||||||
|
return local_vars[fname](argvals)
|
||||||
|
elif fname not in self._functions:
|
||||||
self._functions[fname] = self.extract_function(fname)
|
self._functions[fname] = self.extract_function(fname)
|
||||||
return self._functions[fname](argvals)
|
return self._functions[fname](argvals)
|
||||||
|
|
||||||
raise ExtractorError('Unsupported JS expression %r' % expr)
|
if expr:
|
||||||
|
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||||
|
|
||||||
def extract_object(self, objname):
|
def extract_object(self, objname):
|
||||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||||
@ -233,30 +507,52 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def extract_function(self, funcname):
|
def extract_function_code(self, funcname):
|
||||||
|
""" @returns argnames, code """
|
||||||
func_m = re.search(
|
func_m = re.search(
|
||||||
r'''(?x)
|
r'''(?x)
|
||||||
(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
|
(?:function\s+%(f_n)s|[{;,]\s*%(f_n)s\s*=\s*function|var\s+%(f_n)s\s*=\s*function)\s*
|
||||||
\((?P<args>[^)]*)\)\s*
|
\((?P<args>[^)]*)\)\s*
|
||||||
\{(?P<code>[^}]+)\}''' % (
|
(?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % {'f_n': re.escape(funcname), },
|
||||||
re.escape(funcname), re.escape(funcname), re.escape(funcname)),
|
|
||||||
self.code)
|
self.code)
|
||||||
|
code, _ = self._separate_at_paren(func_m.group('code'), '}') # refine the match
|
||||||
if func_m is None:
|
if func_m is None:
|
||||||
raise ExtractorError('Could not find JS function %r' % funcname)
|
raise ExtractorError('Could not find JS function %r' % funcname)
|
||||||
argnames = func_m.group('args').split(',')
|
return func_m.group('args').split(','), code
|
||||||
|
|
||||||
return self.build_function(argnames, func_m.group('code'))
|
def extract_function(self, funcname):
|
||||||
|
return self.extract_function_from_code(*self.extract_function_code(funcname))
|
||||||
|
|
||||||
|
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||||
|
local_vars = {}
|
||||||
|
while True:
|
||||||
|
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
|
||||||
|
if mobj is None:
|
||||||
|
break
|
||||||
|
start, body_start = mobj.span()
|
||||||
|
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
|
||||||
|
name = self._named_object(
|
||||||
|
local_vars,
|
||||||
|
self.extract_function_from_code(
|
||||||
|
[x.strip() for x in mobj.group('args').split(',')],
|
||||||
|
body, local_vars, *global_stack))
|
||||||
|
code = code[:start] + name + remaining
|
||||||
|
return self.build_function(argnames, code, local_vars, *global_stack)
|
||||||
|
|
||||||
def call_function(self, funcname, *args):
|
def call_function(self, funcname, *args):
|
||||||
f = self.extract_function(funcname)
|
return self.extract_function(funcname)(args)
|
||||||
return f(args)
|
|
||||||
|
|
||||||
def build_function(self, argnames, code):
|
def build_function(self, argnames, code, *global_stack):
|
||||||
def resf(args):
|
global_stack = list(global_stack) or [{}]
|
||||||
local_vars = dict(zip(argnames, args))
|
local_vars = global_stack.pop(0)
|
||||||
for stmt in code.split(';'):
|
|
||||||
res, abort = self.interpret_statement(stmt, local_vars)
|
def resf(args, **kwargs):
|
||||||
if abort:
|
local_vars.update(dict(zip(argnames, args)))
|
||||||
|
local_vars.update(kwargs)
|
||||||
|
var_stack = LocalNameSpace(local_vars, *global_stack)
|
||||||
|
for stmt in self._separate(code.replace('\n', ''), ';'):
|
||||||
|
ret, should_abort = self.interpret_statement(stmt, var_stack)
|
||||||
|
if should_abort:
|
||||||
break
|
break
|
||||||
return res
|
return ret
|
||||||
return resf
|
return resf
|
||||||
|
@ -270,11 +270,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--match-title',
|
'--match-title',
|
||||||
dest='matchtitle', metavar='REGEX',
|
dest='matchtitle', metavar='REGEX',
|
||||||
help='Download only matching titles (regex or caseless sub-string)')
|
help='Download only matching titles (case-insensitive regex or sub-string)')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--reject-title',
|
'--reject-title',
|
||||||
dest='rejecttitle', metavar='REGEX',
|
dest='rejecttitle', metavar='REGEX',
|
||||||
help='Skip download for matching titles (regex or caseless sub-string)')
|
help='Skip download for matching titles (case-insensitive regex or sub-string)')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--max-downloads',
|
'--max-downloads',
|
||||||
dest='max_downloads', metavar='NUMBER', type=int, default=None,
|
dest='max_downloads', metavar='NUMBER', type=int, default=None,
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2021.06.06'
|
__version__ = '2021.12.17'
|
||||||
|
Loading…
Reference in New Issue
Block a user