Compare commits

..

51 Commits

Author SHA1 Message Date
Sergey M․
e147619669 release 2020.12.05 2020-12-05 03:40:00 +07:00
Sergey M․
e7f93fbd85 [ChangeLog] Actualize
[ci skip]
2020-12-05 03:31:07 +07:00
Sergey M․
58f7ada235 [teachable:course] Improve extraction (closes #24507, closes #27286) 2020-12-05 03:24:56 +07:00
Sergey M․
c67b33888f [nrk] Improve error extraction 2020-12-05 03:00:10 +07:00
Sergey M․
e8c0af04b7 [nrktv] Relax _VALID_URL 2020-12-05 02:46:28 +07:00
Sergey M․
5d769860c3 [nrktv:series] Improve extraction (closes #21926) 2020-12-05 02:43:33 +07:00
Sergey M․
02b04785ee [nrktv:series] Improve extraction 2020-12-05 02:21:07 +07:00
Sergey M․
41c92b8d02 [nrktv:season] Improve extraction 2020-12-05 01:20:14 +07:00
Remita Amine
fe0c28f956 [nrk] fix call to moved method 2020-12-04 18:08:08 +01:00
Remita Amine
957c65b9ea [nrk] fix typo 2020-12-04 18:05:27 +01:00
Remita Amine
5e95e18ce9 [nrk] improve format extraction and geo-restriction detection (closes #24221) 2020-12-04 18:04:38 +01:00
Sergey M․
e91df0c550 [pornhub] Handle HTTP errors gracefully (closes #26414) 2020-12-04 23:56:50 +07:00
Sergey M․
c5636e9bca [nrktv] Relax _VALID_URL (closes #27299, closes #26185) 2020-12-04 23:38:42 +07:00
Remita Amine
2e47264235 [zdf] extract webm formats(closes #26659) 2020-12-04 14:16:32 +01:00
Sergey M․
1c78cb118c [travis] Disable download jobs
Until youtube-dl OSS approval by Travis or moving to GitHub Actions
[ci skip]
2020-12-03 23:25:36 +07:00
Matthew Rayermann
beab2f88c9 [nhk] Add audio clip test to NHK extractor (#27269) 2020-12-03 07:47:56 +00:00
Remita Amine
0025447369 [gamespot] Extract DASH and HTTP formats 2020-12-03 00:35:47 +01:00
Remita Amine
da4304609d [extractor/commons] improve Akamai HTTP formats extraction 2020-12-03 00:33:55 +01:00
Remita Amine
ea89680aea [tver] correct episode_number key 2020-12-02 22:49:51 +01:00
Remita Amine
664dd8ba85 [extractor/common] improve Akamai HTTP format extraction
- Allow m3u8 manifest without an additional audio format
- Fix extraction for qualities starting with a number
Solution provided by @nixxo based on: https://stackoverflow.com/a/5984688
2020-12-02 21:49:09 +01:00
Remita Amine
64554c12e1 [tver] Add new extractor (closes #26662)(closes #27284) 2020-12-02 21:49:09 +01:00
opusforlife2
4ded9c0f00 [pornhub] Add support for pornhub.org (#27276)
Most ISPs block the other two TLDs through deep packet inspection
2020-12-03 01:30:08 +07:00
Remita Amine
c0820dd52a [extractors] Add QubIE import 2020-12-02 00:41:21 +01:00
Sergey M․
2bb70750a9 release 2020.12.02 2020-12-02 01:37:40 +07:00
Sergey M․
09d923f2eb [ChangeLog] Actualize
[ci skip]
2020-12-02 01:22:43 +07:00
Remita Amine
37d979ad33 [tva] Add support for qub.ca (closes #27235) 2020-12-01 12:25:02 +01:00
Remita Amine
95ac4de229 [toggle] Detect DRM protected videos (closes #16479)(closes #20805) 2020-12-01 10:38:53 +01:00
Remita Amine
d3e142b3fa [toggle] Add support for new MeWatch URLs (closes #27256) 2020-12-01 10:13:06 +01:00
Sergey M․
132aece1ed [youtube:tab] Extract channels only from channels tab (closes #27266) 2020-12-01 04:44:10 +07:00
Sergey M․
3e4e338133 [cspan] Extract info from jwplayer data (closes #3672, closes #3734, closes #10638, closes #13030, closes #18806, closes #23148, closes #24461, closes #26171, closes #26800, closes #27263) 2020-12-01 01:53:12 +07:00
Roman Beránek
be19ae11fd [cspan] Pass Referer header with format's video URL (#26032) (closes #25729) 2020-12-01 01:14:29 +07:00
Sergey M․
59d63d8d4a [youtube] Improve age-gated videos extraction (closes #27259) 2020-12-01 00:49:26 +07:00
Remita Amine
cfeba5d17f [mediaset] add support for movie URLs(closes #27240) 2020-11-30 09:46:15 +01:00
Sergey M․
6da0e5e7a2 [yandexmusic] Refactor and add support for artist's tracks and albums (closes #11887, closes #22284) 2020-11-30 00:25:06 +07:00
Sergey M․
d6ce649f15 [yandexmusic:track] Fix extraction (closes #26449, closes #26669, closes #26747, closes #26748, closes #26762) 2020-11-29 22:15:51 +07:00
Sergey M․
b449b73dcc release 2020.11.29 2020-11-29 13:53:01 +07:00
Sergey M․
16c822e91e [ChangeLog] Actualize
[ci skip]
2020-11-29 13:49:12 +07:00
Michael Munch
4318170779 [drtv] Extend _VALID_URL (#27243) 2020-11-29 13:44:36 +07:00
Sergey M․
fb626c0586 [tiktok] Fix extraction (closes #20809, closes #22838, closes #22850, closes #25987, closes #26281, closes #26411, closes #26639, closes #26776, closes #27237) 2020-11-29 08:09:20 +07:00
bopol
717d1d2d5a [ina] Add support for mobile URLs (#27229) 2020-11-29 04:15:53 +07:00
Sergey M․
9585b376db [YoutubeDL] Write static debug to stderr and respect quiet for dynamic debug (closes #14579, closes #22593)
TODO: logging and verbosity needs major refactoring (refs #10894)
2020-11-29 04:04:06 +07:00
JChris246
f04cfe24e0 [pornhub] Fix like and dislike count extraction (closes #27227) (#27234) 2020-11-29 02:32:13 +07:00
Sergey M․
20c50c6556 [youtube] Improve yt initial player response extraction (closes #27216) 2020-11-28 15:02:31 +07:00
Remita Amine
f9f9699f2f [videa] improve extraction 2020-11-26 12:56:49 +01:00
Adrian Heine né Lang
a3cf22e590 [videa] Adapt to updates (#26301)
closes #25973, closes #25650.
2020-11-26 11:55:06 +00:00
Remita Amine
99de2f38d3 [spreaker] fix SpreakerShowIE test URL 2020-11-25 21:39:17 +01:00
Sergey M․
9fe50837c3 release 2020.11.26 2020-11-26 03:05:51 +07:00
Sergey M․
4dc545553f [ChangeLog] Actualize
[ci skip]
2020-11-26 03:03:51 +07:00
Sergey M․
686e898fde [spreaker] Add extractor (closes #13480, closes #13877) 2020-11-26 02:58:48 +07:00
Remita Amine
3a78198a96 [vlive] improve extraction for geo-restricted videos 2020-11-25 11:40:37 +01:00
Remita Amine
836c810716 [vlive] Add support for post URLs(closes #27122)(closes #27123) 2020-11-25 11:26:26 +01:00
32 changed files with 1281 additions and 461 deletions

View File

@@ -18,7 +18,7 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
- [ ] I've verified that I'm running youtube-dl version **2020.12.05**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.11.24
[debug] youtube-dl version 2020.12.05
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -19,7 +19,7 @@ labels: 'site-support-request'
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
- [ ] I've verified that I'm running youtube-dl version **2020.12.05**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@@ -18,13 +18,13 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->
- [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
- [ ] I've verified that I'm running youtube-dl version **2020.12.05**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@@ -18,7 +18,7 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
- [ ] I've verified that I'm running youtube-dl version **2020.12.05**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.11.24
[debug] youtube-dl version 2020.12.05
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -19,13 +19,13 @@ labels: 'request'
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->
- [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
- [ ] I've verified that I'm running youtube-dl version **2020.12.05**
- [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@@ -12,29 +12,29 @@ python:
dist: trusty
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
# - YTDL_TEST_SET=download
jobs:
include:
- python: 3.7
dist: xenial
env: YTDL_TEST_SET=core
- python: 3.7
dist: xenial
env: YTDL_TEST_SET=download
# - python: 3.7
# dist: xenial
# env: YTDL_TEST_SET=download
- python: 3.8
dist: xenial
env: YTDL_TEST_SET=core
- python: 3.8
dist: xenial
env: YTDL_TEST_SET=download
# - python: 3.8
# dist: xenial
# env: YTDL_TEST_SET=download
- python: 3.8-dev
dist: xenial
env: YTDL_TEST_SET=core
- python: 3.8-dev
dist: xenial
env: YTDL_TEST_SET=download
# - python: 3.8-dev
# dist: xenial
# env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
# - env: JYTHON=true; YTDL_TEST_SET=download
- name: flake8
python: 3.8
dist: xenial
@@ -42,9 +42,9 @@ jobs:
script: flake8 .
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
# - env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
# - env: JYTHON=true; YTDL_TEST_SET=download
before_install:
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
script: ./devscripts/run_tests.sh

View File

@@ -1,3 +1,76 @@
version 2020.12.05
Core
* [extractor/common] Improve Akamai HTTP format extraction
* Allow m3u8 manifest without an additional audio format
* Fix extraction for qualities starting with a number
Extractors
* [teachable:course] Improve extraction (#24507, #27286)
* [nrk] Improve error extraction
* [nrktv:series] Improve extraction (#21926)
* [nrktv:season] Improve extraction
* [nrk] Improve format extraction and geo-restriction detection (#24221)
* [pornhub] Handle HTTP errors gracefully (#26414)
* [nrktv] Relax URL regular expression (#27299, #26185)
+ [zdf] Extract webm formats (#26659)
+ [gamespot] Extract DASH and HTTP formats
+ [tver] Add support for tver.jp (#26662, #27284)
+ [pornhub] Add support for pornhub.org (#27276)
version 2020.12.02
Extractors
+ [tva] Add support for qub.ca (#27235)
+ [toggle] Detect DRM protected videos (closes #16479)(closes #20805)
+ [toggle] Add support for new MeWatch URLs (#27256)
* [youtube:tab] Extract channels only from channels tab (#27266)
+ [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030,
#18806, #23148, #24461, #26171, #26800, #27263)
* [cspan] Pass Referer header with format's video URL (#26032, #25729)
* [youtube] Improve age-gated videos extraction (#27259)
+ [mediaset] Add support for movie URLs (#27240)
* [yandexmusic] Refactor
+ [yandexmusic] Add support for artist's tracks and albums (#11887, #22284)
* [yandexmusic:track] Fix extraction (#26449, #26669, #26747, #26748, #26762)
version 2020.11.29
Core
* [YoutubeDL] Write static debug to stderr and respect quiet for dynamic debug
(#14579, #22593)
Extractors
* [drtv] Extend URL regular expression (#27243)
* [tiktok] Fix extraction (#20809, #22838, #22850, #25987, #26281, #26411,
#26639, #26776, #27237)
+ [ina] Add support for mobile URLs (#27229)
* [pornhub] Fix like and dislike count extraction (#27227, #27234)
* [youtube] Improve yt initial player response extraction (#27216)
* [videa] Fix extraction (#25650, #25973, #26301)
version 2020.11.26
Core
* [downloader/fragment] Set final file's mtime according to last fragment's
Last-Modified header (#11718, #18384, #27138)
Extractors
+ [spreaker] Add support for spreaker.com (#13480, #13877)
* [vlive] Improve extraction for geo-restricted videos
+ [vlive] Add support for post URLs (#27122, #27123)
* [viki] Fix video API request (#27184)
* [bbc] Fix BBC Three clip extraction
* [bbc] Fix BBC News videos extraction
+ [medaltv] Add support for medal.tv (#27149)
* [youtube] Improve music metadata and license extraction (#26013)
* [nrk] Fix extraction
* [cda] Fix extraction (#17803, #24458, #24518, #26381)
version 2020.11.24
Core

View File

@@ -308,6 +308,7 @@
- **FrontendMasters**
- **FrontendMastersCourse**
- **FrontendMastersLesson**
- **FujiTVFODPlus7**
- **Funimation**
- **Funk**
- **Fusion**
@@ -471,6 +472,7 @@
- **massengeschmack.tv**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **MedalTV**
- **media.ccc.de**
- **media.ccc.de:lists**
- **Medialaan**
@@ -485,6 +487,7 @@
- **META**
- **metacafe**
- **Metacritic**
- **mewatch**
- **Mgoon**
- **MGTV**: 芒果TV
- **MiaoPai**
@@ -708,6 +711,7 @@
- **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜
- **QuantumTV**
- **Qub**
- **Quickline**
- **QuicklineLive**
- **R7**
@@ -839,6 +843,10 @@
- **Sport5**
- **SportBox**
- **SportDeutschland**
- **Spreaker**
- **SpreakerPage**
- **SpreakerShow**
- **SpreakerShowPage**
- **SpringboardPlatform**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk
@@ -907,7 +915,7 @@
- **ThisAV**
- **ThisOldHouse**
- **TikTok**
- **TikTokUser**
- **TikTokUser** (Currently broken)
- **tinypic**: tinypic.com videos
- **TMZ**
- **TMZArticle**
@@ -946,6 +954,7 @@
- **TVANouvellesArticle**
- **TVC**
- **TVCArticle**
- **TVer**
- **tvigle**: Интернет-телевидение Tvigle.ru
- **tvland.com**
- **TVN24**
@@ -1055,6 +1064,7 @@
- **vk:wallpost**
- **vlive**
- **vlive:channel**
- **vlive:post**
- **Vodlocker**
- **VODPl**
- **VODPlatform**
@@ -1126,6 +1136,8 @@
- **yahoo:japannews**: Yahoo! Japan News
- **YandexDisk**
- **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
- **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек
- **YandexVideo**

View File

@@ -1610,7 +1610,7 @@ class YoutubeDL(object):
if req_format is None:
req_format = self._default_format_spec(info_dict, download=download)
if self.params.get('verbose'):
self.to_stdout('[debug] Default format spec: %s' % req_format)
self._write_string('[debug] Default format spec: %s\n' % req_format)
format_selector = self.build_format_selector(req_format)
@@ -1871,7 +1871,7 @@ class YoutubeDL(object):
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
if info_dict.get('requested_formats') is not None:

View File

@@ -2614,33 +2614,32 @@ class InfoExtractor(object):
hls_host = hosts.get('hls')
if hls_host:
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
formats.extend(self._extract_m3u8_formats(
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
http_host = hosts.get('http')
if http_host and 'hdnea=' not in manifest_url:
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
if http_host and m3u8_formats and 'hdnea=' not in m3u8_url:
REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
qualities_length = len(qualities)
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
if len(m3u8_formats) in (qualities_length, qualities_length + 1):
i = 0
http_formats = []
for f in formats:
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
for f in m3u8_formats:
if f['vcodec'] != 'none':
for protocol in ('http', 'https'):
http_f = f.copy()
del http_f['manifest_url']
http_url = re.sub(
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
http_f.update({
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
'url': http_url,
'protocol': protocol,
})
http_formats.append(http_f)
formats.append(http_f)
i += 1
formats.extend(http_formats)
return formats

View File

@@ -10,6 +10,8 @@ from ..utils import (
find_xpath_attr,
get_element_by_class,
int_or_none,
js_to_json,
merge_dicts,
smuggle_url,
unescapeHTML,
)
@@ -98,6 +100,26 @@ class CSpanIE(InfoExtractor):
bc_attr['data-bcid'])
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
def add_referer(formats):
for f in formats:
f.setdefault('http_headers', {})['Referer'] = url
# As of 01.12.2020 this path looks to cover all cases making the rest
# of the code unnecessary
jwsetup = self._parse_json(
self._search_regex(
r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if jwsetup:
info = self._parse_jwplayer_data(
jwsetup, video_id, require_title=False, m3u8_id='hls',
base_url=url)
add_referer(info['formats'])
ld_info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts(info, ld_info)
# Obsolete
# We first look for clipid, because clipprog always appears before
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
@@ -165,6 +187,7 @@ class CSpanIE(InfoExtractor):
formats = self._extract_m3u8_formats(
path, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
add_referer(formats)
self._sort_formats(formats)
entries.append({
'id': '%s_%d' % (video_id, partnum + 1),

View File

@@ -29,7 +29,7 @@ class DRTVIE(InfoExtractor):
https?://
(?:
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
)
(?P<id>[\da-z_-]+)
'''
@@ -111,6 +111,9 @@ class DRTVIE(InfoExtractor):
}, {
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
'only_matching': True,
}, {
'url': 'https://www.dr.dk/drtv/program/jagten_220924',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -393,6 +393,7 @@ from .frontendmasters import (
FrontendMastersLessonIE,
FrontendMastersCourseIE
)
from .fujitv import FujiTVFODPlus7IE
from .funimation import FunimationIE
from .funk import FunkIE
from .fusion import FusionIE
@@ -1082,6 +1083,12 @@ from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .spreaker import (
SpreakerIE,
SpreakerPageIE,
SpreakerShowIE,
SpreakerShowPageIE,
)
from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
from .srgssr import (
@@ -1179,7 +1186,10 @@ from .tnaflix import (
EMPFlixIE,
MovieFapIE,
)
from .toggle import ToggleIE
from .toggle import (
ToggleIE,
MeWatchIE,
)
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .toutv import TouTvIE
@@ -1212,7 +1222,10 @@ from .tv2dk import (
from .tv2hu import TV2HuIE
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tva import TVAIE
from .tva import (
TVAIE,
QubIE,
)
from .tvanouvelles import (
TVANouvellesIE,
TVANouvellesArticleIE,
@@ -1221,6 +1234,7 @@ from .tvc import (
TVCIE,
TVCArticleIE,
)
from .tver import TVerIE
from .tvigle import TvigleIE
from .tvland import TVLandIE
from .tvn24 import TVN24IE
@@ -1375,6 +1389,7 @@ from .vk import (
)
from .vlive import (
VLiveIE,
VLivePostIE,
VLiveChannelIE,
)
from .vodlocker import VodlockerIE
@@ -1471,6 +1486,8 @@ from .yandexmusic import (
YandexMusicTrackIE,
YandexMusicAlbumIE,
YandexMusicPlaylistIE,
YandexMusicArtistTracksIE,
YandexMusicArtistAlbumsIE,
)
from .yandexvideo import YandexVideoIE
from .yapfiles import YapFilesIE

View File

@@ -0,0 +1,35 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class FujiTVFODPlus7IE(InfoExtractor):
_VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
_BITRATE_MAP = {
300: (320, 180),
800: (640, 360),
1200: (1280, 720),
2000: (1280, 720),
}
def _real_extract(self, url):
video_id = self._match_id(url)
formats = self._extract_m3u8_formats(
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
for f in formats:
wh = self._BITRATE_MAP.get(f.get('tbr'))
if wh:
f.update({
'width': wh[0],
'height': wh[1],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_id,
'formats': formats,
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
}

View File

@@ -1,16 +1,7 @@
from __future__ import unicode_literals
import re
from .once import OnceIE
from ..compat import (
compat_urllib_parse_unquote,
)
from ..utils import (
unescapeHTML,
url_basename,
dict_get,
)
from ..compat import compat_urllib_parse_unquote
class GameSpotIE(OnceIE):
@@ -24,17 +15,16 @@ class GameSpotIE(OnceIE):
'title': 'Arma 3 - Community Guide: SITREP I',
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
},
'skip': 'manifest URL give HTTP Error 404: Not Found',
}, {
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
'md5': '173ea87ad762cf5d3bf6163dceb255a6',
'info_dict': {
'id': 'gs-2300-6424837',
'ext': 'mp4',
'title': 'Now Playing - The Witcher 3: Wild Hunt',
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
},
'params': {
'skip_download': True, # m3u8 downloads
},
}, {
'url': 'https://www.gamespot.com/videos/embed/6439218/',
'only_matching': True,
@@ -49,90 +39,40 @@ class GameSpotIE(OnceIE):
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
data_video_json = self._search_regex(
r'data-video=["\'](.*?)["\']', webpage, 'data video')
data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
data_video = self._parse_json(self._html_search_regex(
r'data-video=(["\'])({.*?})\1', webpage,
'video data', group=2), page_id)
title = compat_urllib_parse_unquote(data_video['title'])
streams = data_video['videoStreams']
manifest_url = None
formats = []
f4m_url = streams.get('f4m_stream')
if f4m_url:
manifest_url = f4m_url
formats.extend(self._extract_f4m_formats(
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
m3u8_url = streams.get('adaptive_stream')
if m3u8_url:
manifest_url = m3u8_url
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, page_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
progressive_url = dict_get(
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
if progressive_url and manifest_url:
qualities_basename = self._search_regex(
r'/([^/]+)\.csmil/',
manifest_url, 'qualities basename', default=None)
if qualities_basename:
QUALITIES_RE = r'((,\d+)+,?)'
qualities = self._search_regex(
QUALITIES_RE, qualities_basename,
'qualities', default=None)
if qualities:
qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
qualities.sort()
http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
http_url_basename = url_basename(progressive_url)
if m3u8_formats:
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
lambda f: f.get('vcodec') != 'none', m3u8_formats))
if len(qualities) == len(m3u8_formats):
for q, m3u8_format in zip(qualities, m3u8_formats):
f = m3u8_format.copy()
f.update({
'url': progressive_url.replace(
http_url_basename, http_template % q),
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
else:
for q in qualities:
formats.append({
'url': progressive_url.replace(
http_url_basename, http_template % q),
'ext': 'mp4',
'format_id': 'http-%d' % q,
'tbr': q,
})
for f in m3u8_formats:
formats.append(f)
http_f = f.copy()
del http_f['manifest_url']
http_f.update({
'format_id': f['format_id'].replace('hls-', 'http-'),
'protocol': 'http',
'url': f['url'].replace('.m3u8', '.mp4'),
})
formats.append(http_f)
onceux_json = self._search_regex(
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
if onceux_json:
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
if onceux_url:
formats.extend(self._extract_once_formats(re.sub(
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
http_formats_preference=-1))
mpd_url = streams.get('adaptive_dash')
if mpd_url:
formats.extend(self._extract_mpd_formats(
mpd_url, page_id, mpd_id='dash', fatal=False))
if not formats:
for quality in ['sd', 'hd']:
# It's actually a link to a flv file
flv_url = streams.get('f4m_{0}'.format(quality))
if flv_url is not None:
formats.append({
'url': flv_url,
'ext': 'flv',
'format_id': quality,
})
self._sort_formats(formats)
return {
'id': data_video['guid'],
'id': data_video.get('guid') or page_id,
'display_id': page_id,
'title': compat_urllib_parse_unquote(data_video['title']),
'title': title,
'formats': formats,
'description': self._html_search_meta('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage),

View File

@@ -12,7 +12,7 @@ from ..utils import (
class InaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
_VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
_TESTS = [{
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
@@ -31,6 +31,9 @@ class InaIE(InfoExtractor):
}, {
'url': 'https://www.ina.fr/video/P16173408-video.html',
'only_matching': True,
}, {
'url': 'http://m.ina.fr/video/I12055569',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -23,7 +23,7 @@ class MediasetIE(ThePlatformBaseIE):
https?://
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
(?:
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
player/index\.html\?.*?\bprogramGuid=
)
)(?P<id>[0-9A-Z]{16,})
@@ -88,6 +88,9 @@ class MediasetIE(ThePlatformBaseIE):
}, {
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
'only_matching': True,
}, {
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
'only_matching': True,
}]
@staticmethod

View File

@@ -10,7 +10,7 @@ class NhkVodIE(InfoExtractor):
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
# clip
# video clip
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
'md5': '256a1be14f48d960a7e61e2532d95ec3',
'info_dict': {
@@ -21,6 +21,19 @@ class NhkVodIE(InfoExtractor):
'timestamp': 1565965194,
'upload_date': '20190816',
},
}, {
# audio clip
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
'info_dict': {
'id': 'r_inventions-20201104-1-en',
'ext': 'm4a',
'title': "Japan's Top Inventions - Miniature Video Cameras",
'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
'only_matching': True,

View File

@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
@@ -17,6 +18,7 @@ from ..utils import (
parse_age_limit,
parse_duration,
try_get,
urljoin,
url_or_none,
)
@@ -24,6 +26,27 @@ from ..utils import (
class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO']
def _extract_nrk_formats(self, asset_url, video_id):
return self._extract_m3u8_formats(
re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url),
video_id, 'mp4', 'm3u8_native', fatal=False)
def _raise_error(self, data):
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type)
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
class NRKIE(NRKBaseIE):
_VALID_URL = r'''(?x)
@@ -82,6 +105,9 @@ class NRKIE(NRKBaseIE):
'http://psapi.nrk.no/playback/manifest/%s' % video_id,
video_id, 'Downloading manifest JSON')
if manifest.get('playability') == 'nonPlayable':
self._raise_error(manifest['nonPlayable'])
playable = manifest['playable']
formats = []
@@ -94,9 +120,7 @@ class NRKIE(NRKBaseIE):
if not format_url:
continue
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_nrk_formats(format_url, video_id))
self._sort_formats(formats)
data = self._download_json(
@@ -143,14 +167,7 @@ class NRKIE(NRKBaseIE):
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
_VALID_URL = r'''(?x)
https?://
(?:tv|radio)\.nrk(?:super)?\.no/
(?:serie(?:/[^/]+){1,2}|program)/
(?![Ee]pisodes)%s
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{
'url': 'https://tv.nrk.no/program/MDDP12000117',
@@ -275,6 +292,9 @@ class NRKTVIE(NRKBaseIE):
}, {
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
'only_matching': True,
}, {
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201507/NPUB21019315',
'only_matching': True,
}]
_api_host = None
@@ -295,6 +315,7 @@ class NRKTVIE(NRKBaseIE):
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
urls = []
entries = []
conviva = data.get('convivaStatistics') or {}
@@ -311,19 +332,13 @@ class NRKTVIE(NRKBaseIE):
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
for num, asset in enumerate(media_assets, 1):
asset_url = asset.get('url')
if not asset_url:
if not asset_url or asset_url in urls:
continue
formats = self._extract_akamai_formats(asset_url, video_id)
formats = self._extract_nrk_formats(asset_url, video_id)
if not formats:
continue
self._sort_formats(formats)
# Some f4m streams may not work with hdcore in fragments' URLs
for f in formats:
extra_param = f.get('extra_param_to_segment_url')
if extra_param and 'hdcore' in extra_param:
del f['extra_param_to_segment_url']
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
@@ -343,34 +358,20 @@ class NRKTVIE(NRKBaseIE):
if not entries:
media_url = data.get('mediaUrl')
if media_url:
formats = self._extract_akamai_formats(media_url, video_id)
self._sort_formats(formats)
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
'title': make_title(title),
'duration': duration,
'formats': formats,
}]
if media_url and media_url not in urls:
formats = self._extract_nrk_formats(media_url, video_id)
if formats:
self._sort_formats(formats)
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
'title': make_title(title),
'duration': duration,
'formats': formats,
}]
if not entries:
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
expected=True)
self._raise_error(data)
series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
@@ -517,7 +518,8 @@ class NRKTVSerieBaseIE(InfoExtractor):
config = self._parse_json(
self._search_regex(
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>',
r'PRELOADED_STATE_*\s*=\s*({.+?})\s*\n'),
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False, transform_source=js_to_json)
if not config:
@@ -527,12 +529,26 @@ class NRKTVSerieBaseIE(InfoExtractor):
(lambda x: x['initialState']['series'], lambda x: x['series']),
dict)
def _extract_seasons(self, seasons):
def _extract_seasons(self, domain, series_id, seasons):
if isinstance(seasons, dict):
seasons = seasons.get('seasons')
if not isinstance(seasons, list):
return []
entries = []
for season in seasons:
entries.extend(self._extract_episodes(season))
if not isinstance(season, dict):
continue
episodes = self._extract_episodes(season)
if episodes:
entries.extend(episodes)
continue
season_name = season.get('name')
if season_name and isinstance(season_name, compat_str):
entries.append(self.url_result(
'https://%s.nrk.no/serie/%s/sesong/%s'
% (domain, series_id, season_name),
ie=NRKTVSeasonIE.ie_key(),
video_title=season.get('title')))
return entries
def _extract_episodes(self, season):
@@ -545,24 +561,94 @@ class NRKTVSerieBaseIE(InfoExtractor):
return []
entries = []
for episode in entry_list:
nrk_id = episode.get('prfId')
nrk_id = episode.get('prfId') or episode.get('episodeId')
if not nrk_id or not isinstance(nrk_id, compat_str):
continue
if not re.match(NRKTVIE._EPISODE_RE, nrk_id):
continue
entries.append(self.url_result(
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
return entries
_ASSETS_KEYS = ('episodes', 'instalments',)
def _extract_assets_key(self, embedded):
for asset_key in self._ASSETS_KEYS:
if embedded.get(asset_key):
return asset_key
def _entries(self, data, display_id):
for page_num in itertools.count(1):
embedded = data.get('_embedded')
if not isinstance(embedded, dict):
break
assets_key = self._extract_assets_key(embedded)
if not assets_key:
break
# Extract entries
entries = try_get(
embedded,
(lambda x: x[assets_key]['_embedded'][assets_key],
lambda x: x[assets_key]),
list)
for e in self._extract_entries(entries):
yield e
# Find next URL
next_url = urljoin(
'https://psapi.nrk.no/',
try_get(
data,
(lambda x: x['_links']['next']['href'],
lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
compat_str))
if not next_url:
break
data = self._download_json(
next_url, display_id,
'Downloading %s JSON page %d' % (assets_key, page_num),
fatal=False)
if not data:
break
class NRKTVSeasonIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
_TEST = {
_VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)'
_TESTS = [{
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
'info_dict': {
'id': '1',
'id': 'backstage/1',
'title': 'Sesong 1',
},
'playlist_mincount': 30,
}
}, {
# no /sesong/ in path
'url': 'https://tv.nrk.no/serie/lindmo/2016',
'info_dict': {
'id': 'lindmo/2016',
'title': '2016',
},
'playlist_mincount': 29,
}, {
# weird nested _embedded in catalog JSON response
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens/sesong/1',
'info_dict': {
'id': 'dickie-dick-dickens/1',
'title': 'Sesong 1',
},
'playlist_mincount': 11,
}, {
# 841 entries, multi page
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201509',
'info_dict': {
'id': 'dagsnytt/201509',
'title': 'September 2015',
},
'playlist_mincount': 841,
}, {
# 180 entries, single page
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
@@ -570,42 +656,26 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
else super(NRKTVSeasonIE, cls).suitable(url))
def _real_extract(self, url):
display_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
domain = mobj.group('domain')
serie = mobj.group('serie')
season_id = mobj.group('id')
display_id = '%s/%s' % (serie, season_id)
webpage = self._download_webpage(url, display_id)
data = self._download_json(
'https://psapi.nrk.no/%s/catalog/series/%s/seasons/%s'
% (domain, serie, season_id), display_id, query={'pageSize': 50})
series = self._extract_series(webpage, display_id)
season = next(
s for s in series['seasons']
if int(display_id) == s.get('seasonNumber'))
title = try_get(season, lambda x: x['titles']['title'], compat_str)
title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
return self.playlist_result(
self._extract_episodes(season), display_id, title)
self._entries(data, display_id),
display_id, title)
class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{
'url': 'https://tv.nrk.no/serie/blank',
'info_dict': {
'id': 'blank',
'title': 'Blank',
'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
},
'playlist_mincount': 30,
}, {
# new layout, seasons
'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': {
'id': 'backstage',
'title': 'Backstage',
'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
},
'playlist_mincount': 60,
}, {
# new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
@@ -613,7 +683,30 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
'playlist_mincount': 10,
'playlist_mincount': 90,
}, {
# new layout, instalments, more entries
'url': 'https://tv.nrk.no/serie/lindmo',
'only_matching': True,
}, {
'url': 'https://tv.nrk.no/serie/blank',
'info_dict': {
'id': 'blank',
'title': 'Blank',
'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
},
'playlist_mincount': 30,
'expected_warnings': ['HTTP Error 404: Not Found'],
}, {
# new layout, seasons
'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': {
'id': 'backstage',
'title': 'Backstage',
'description': 'md5:63692ceb96813d9a207e9910483d948b',
},
'playlist_mincount': 60,
'expected_warnings': ['HTTP Error 404: Not Found'],
}, {
# old layout
'url': 'https://tv.nrksuper.no/serie/labyrint',
@@ -632,6 +725,13 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
}, {
'url': 'https://tv.nrk.no/serie/postmann-pat',
'only_matching': True,
}, {
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens',
'info_dict': {
'id': 'dickie-dick-dickens',
},
'playlist_mincount': 8,
'expected_warnings': ['HTTP Error 404: Not Found'],
}]
@classmethod
@@ -642,18 +742,32 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
else super(NRKTVSeriesIE, cls).suitable(url))
def _real_extract(self, url):
series_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
domain = mobj.group('domain')
series_id = mobj.group('id')
title = description = None
webpage = self._download_webpage(url, series_id)
# New layout (e.g. https://tv.nrk.no/serie/backstage)
series = self._extract_series(webpage, series_id, fatal=False)
if series:
title = try_get(series, lambda x: x['titles']['title'], compat_str)
description = try_get(
series, lambda x: x['titles']['subtitle'], compat_str)
data = self._download_json(
'https://psapi.nrk.no/%s/catalog/series/%s/instalments'
% (domain, series_id), series_id, query={'pageSize': 50},
fatal=False)
if data:
return self.playlist_result(
self._entries(data, series_id), series_id, title, description)
# New layout (e.g. https://tv.nrk.no/serie/backstage)
if series:
entries = []
entries.extend(self._extract_seasons(series.get('seasons')))
entries.extend(self._extract_seasons(domain, series_id, series.get('seasons')))
entries.extend(self._extract_entries(series.get('instalments')))
entries.extend(self._extract_episodes(series.get('extraMaterial')))
return self.playlist_result(entries, series_id, title, description)

View File

@@ -31,7 +31,12 @@ class PornHubBaseIE(InfoExtractor):
def dl(*args, **kwargs):
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
webpage, urlh = dl(*args, **kwargs)
ret = dl(*args, **kwargs)
if not ret:
return ret
webpage, urlh = ret
if any(re.search(p, webpage) for p in (
r'<body\b[^>]+\bonload=["\']go\(\)',
@@ -53,7 +58,7 @@ class PornHubIE(PornHubBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
@@ -152,6 +157,9 @@ class PornHubIE(PornHubBaseIE):
}, {
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
'only_matching': True,
}, {
'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
'only_matching': True,
}, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
'only_matching': True,
@@ -160,7 +168,7 @@ class PornHubIE(PornHubBaseIE):
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
webpage)
def _extract_count(self, pattern, webpage, name):
@@ -346,9 +354,9 @@ class PornHubIE(PornHubBaseIE):
view_count = self._extract_count(
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
like_count = self._extract_count(
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
dislike_count = self._extract_count(
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
@@ -422,7 +430,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph',
'playlist_mincount': 118,
@@ -490,7 +498,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
'only_matching': True,
@@ -605,7 +613,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
_TESTS = [{
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'info_dict': {

View File

@@ -0,0 +1,176 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
int_or_none,
str_or_none,
try_get,
unified_timestamp,
url_or_none,
)
def _extract_episode(data, episode_id=None):
title = data['title']
download_url = data['download_url']
series = try_get(data, lambda x: x['show']['title'], compat_str)
uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
thumbnails = []
for image in ('image_original', 'image_medium', 'image'):
image_url = url_or_none(data.get('%s_url' % image))
if image_url:
thumbnails.append({'url': image_url})
def stats(key):
return int_or_none(try_get(
data,
(lambda x: x['%ss_count' % key],
lambda x: x['stats']['%ss' % key])))
def duration(key):
return float_or_none(data.get(key), scale=1000)
return {
'id': compat_str(episode_id or data['episode_id']),
'url': download_url,
'display_id': data.get('permalink'),
'title': title,
'description': data.get('description'),
'timestamp': unified_timestamp(data.get('published_at')),
'uploader': uploader,
'uploader_id': str_or_none(data.get('author_id')),
'creator': uploader,
'duration': duration('duration') or duration('length'),
'view_count': stats('play'),
'like_count': stats('like'),
'comment_count': stats('message'),
'format': 'MPEG Layer 3',
'format_id': 'mp3',
'container': 'mp3',
'ext': 'mp3',
'thumbnails': thumbnails,
'series': series,
'extractor_key': SpreakerIE.ie_key(),
}
class SpreakerIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
api\.spreaker\.com/
(?:
(?:download/)?episode|
v2/episodes
)/
(?P<id>\d+)
'''
_TESTS = [{
'url': 'https://api.spreaker.com/episode/12534508',
'info_dict': {
'id': '12534508',
'display_id': 'swm-ep15-how-to-market-your-music-part-2',
'ext': 'mp3',
'title': 'EP:15 | Music Marketing (Likes) - Part 2',
'description': 'md5:0588c43e27be46423e183076fa071177',
'timestamp': 1502250336,
'upload_date': '20170809',
'uploader': 'SWM',
'uploader_id': '9780658',
'duration': 1063.42,
'view_count': int,
'like_count': int,
'comment_count': int,
'series': 'Success With Music (SWM)',
},
}, {
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
'only_matching': True,
}, {
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
'only_matching': True,
}]
def _real_extract(self, url):
episode_id = self._match_id(url)
data = self._download_json(
'https://api.spreaker.com/v2/episodes/%s' % episode_id,
episode_id)['response']['episode']
return _extract_episode(data, episode_id)
class SpreakerPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
episode_id = self._search_regex(
(r'data-episode_id=["\'](?P<id>\d+)',
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
return self.url_result(
'https://api.spreaker.com/episode/%s' % episode_id,
ie=SpreakerIE.ie_key(), video_id=episode_id)
class SpreakerShowIE(InfoExtractor):
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
_TESTS = [{
'url': 'https://api.spreaker.com/show/4652058',
'info_dict': {
'id': '4652058',
},
'playlist_mincount': 118,
}]
def _entries(self, show_id):
for page_num in itertools.count(1):
episodes = self._download_json(
'https://api.spreaker.com/show/%s/episodes' % show_id,
show_id, note='Downloading JSON page %d' % page_num, query={
'page': page_num,
'max_per_page': 100,
})
pager = try_get(episodes, lambda x: x['response']['pager'], dict)
if not pager:
break
results = pager.get('results')
if not results or not isinstance(results, list):
break
for result in results:
if not isinstance(result, dict):
continue
yield _extract_episode(result)
if page_num == pager.get('last_page'):
break
def _real_extract(self, url):
show_id = self._match_id(url)
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
class SpreakerShowPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/show/success-with-music',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
show_id = self._search_regex(
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
return self.url_result(
'https://api.spreaker.com/show/%s' % show_id,
ie=SpreakerShowIE.ie_key(), video_id=show_id)

View File

@@ -269,7 +269,7 @@ class TeachableCourseIE(TeachableBaseIE):
r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
webpage):
li = mobj.group('li')
if 'fa-youtube-play' not in li:
if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li):
continue
lecture_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,

View File

@@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import (
compat_str,
ExtractorError,
float_or_none,
int_or_none,
str_or_none,
try_get,
@@ -13,7 +14,7 @@ from ..utils import (
class TikTokBaseIE(InfoExtractor):
def _extract_aweme(self, data):
def _extract_video(self, data, video_id=None):
video = data['video']
description = str_or_none(try_get(data, lambda x: x['desc']))
width = int_or_none(try_get(data, lambda x: video['width']))
@@ -21,43 +22,54 @@ class TikTokBaseIE(InfoExtractor):
format_urls = set()
formats = []
for format_id in (
'play_addr_lowbr', 'play_addr', 'play_addr_h264',
'download_addr'):
for format in try_get(
video, lambda x: x[format_id]['url_list'], list) or []:
format_url = url_or_none(format)
if not format_url:
continue
if format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'url': format_url,
'ext': 'mp4',
'height': height,
'width': width,
})
for format_id in ('download', 'play'):
format_url = url_or_none(video.get('%sAddr' % format_id))
if not format_url:
continue
if format_url in format_urls:
continue
format_urls.add(format_url)
formats.append({
'url': format_url,
'ext': 'mp4',
'height': height,
'width': width,
'http_headers': {
'Referer': 'https://www.tiktok.com/',
}
})
self._sort_formats(formats)
thumbnail = url_or_none(try_get(
video, lambda x: x['cover']['url_list'][0], compat_str))
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
timestamp = int_or_none(data.get('create_time'))
comment_count = int_or_none(data.get('comment_count')) or int_or_none(
try_get(data, lambda x: x['statistics']['comment_count']))
repost_count = int_or_none(try_get(
data, lambda x: x['statistics']['share_count']))
thumbnail = url_or_none(video.get('cover'))
duration = float_or_none(video.get('duration'))
aweme_id = data['aweme_id']
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
uploader_id = try_get(data, lambda x: x['author']['id'], compat_str)
timestamp = int_or_none(data.get('createTime'))
def stats(key):
return int_or_none(try_get(
data, lambda x: x['stats']['%sCount' % key]))
view_count = stats('play')
like_count = stats('digg')
comment_count = stats('comment')
repost_count = stats('share')
aweme_id = data.get('id') or video_id
return {
'id': aweme_id,
'title': uploader or aweme_id,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'timestamp': timestamp,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
'repost_count': repost_count,
'formats': formats,
@@ -65,62 +77,56 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:m\.)?tiktok\.com/v|
(?:www\.)?tiktok\.com/share/video
)
/(?P<id>\d+)
'''
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@[^/]+/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://m.tiktok.com/v/6606727368545406213.html',
'md5': 'd584b572e92fcd48888051f238022420',
'url': 'https://www.tiktok.com/@zureeal/video/6606727368545406213',
'md5': '163ceff303bb52de60e6887fe399e6cd',
'info_dict': {
'id': '6606727368545406213',
'ext': 'mp4',
'title': 'Zureeal',
'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
'thumbnail': r're:^https?://.*~noop.image',
'thumbnail': r're:^https?://.*',
'duration': 15,
'uploader': 'Zureeal',
'uploader_id': '188294915489964032',
'timestamp': 1538248586,
'upload_date': '20180929',
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
}
}, {
'url': 'https://www.tiktok.com/share/video/6606727368545406213',
'only_matching': True,
}]
def _real_initialize(self):
# Setup session (will set necessary cookies)
self._request_webpage(
'https://www.tiktok.com/', None, note='Setting up session')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://m.tiktok.com/v/%s.html' % video_id, video_id)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(self._search_regex(
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
return self._extract_aweme(data)
r'<script[^>]+\bid=["\']__NEXT_DATA__[^>]+>\s*({.+?})\s*</script',
webpage, 'data'), video_id)['props']['pageProps']['itemInfo']['itemStruct']
return self._extract_video(data, video_id)
class TikTokUserIE(TikTokBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:m\.)?tiktok\.com/h5/share/usr|
(?:www\.)?tiktok\.com/share/user
)
/(?P<id>\d+)
'''
_VALID_URL = r'https://(?:www\.)?tiktok\.com/@(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
'url': 'https://www.tiktok.com/@zureeal',
'info_dict': {
'id': '188294915489964032',
},
'playlist_mincount': 24,
}, {
'url': 'https://www.tiktok.com/share/user/188294915489964032',
'only_matching': True,
}]
_WORKING = False
@classmethod
def suitable(cls, url):
return False if TikTokIE.suitable(url) else super(TikTokUserIE, cls).suitable(url)
def _real_extract(self, url):
user_id = self._match_id(url)
@@ -130,7 +136,7 @@ class TikTokUserIE(TikTokBaseIE):
entries = []
for aweme in data['aweme_list']:
try:
entry = self._extract_aweme(aweme)
entry = self._extract_video(aweme)
except ExtractorError:
continue
entry['extractor_key'] = TikTokIE.ie_key()

View File

@@ -11,13 +11,13 @@ from ..utils import (
float_or_none,
int_or_none,
parse_iso8601,
sanitized_Request,
strip_or_none,
)
class ToggleIE(InfoExtractor):
IE_NAME = 'toggle'
_VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
_VALID_URL = r'(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}|toggle:)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': {
@@ -84,28 +84,12 @@ class ToggleIE(InfoExtractor):
'only_matching': True,
}]
_FORMAT_PREFERENCES = {
'wvm-STBMain': -10,
'wvm-iPadMain': -20,
'wvm-iPhoneMain': -30,
'wvm-Android': -40,
}
_API_USER = 'tvpapi_147'
_API_PASS = '11111'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, note='Downloading video page')
api_user = self._search_regex(
r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
default=self._API_USER, group='user')
api_pass = self._search_regex(
r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
default=self._API_PASS, group='pass')
params = {
'initObj': {
'Locale': {
@@ -118,17 +102,16 @@ class ToggleIE(InfoExtractor):
'SiteGuid': 0,
'DomainID': '0',
'UDID': '',
'ApiUser': api_user,
'ApiPass': api_pass
'ApiUser': self._API_USER,
'ApiPass': self._API_PASS
},
'MediaID': video_id,
'mediaType': 0,
}
req = sanitized_Request(
info = self._download_json(
'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
json.dumps(params).encode('utf-8'))
info = self._download_json(req, video_id, 'Downloading video info json')
video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8'))
title = info['MediaName']
@@ -141,11 +124,16 @@ class ToggleIE(InfoExtractor):
vid_format = vid_format.replace(' ', '')
# if geo-restricted, m3u8 is inaccessible, but mp4 is okay
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, ext='mp4', m3u8_id=vid_format,
note='Downloading %s m3u8 information' % vid_format,
errnote='Failed to download %s m3u8 information' % vid_format,
fatal=False))
fatal=False)
for f in m3u8_formats:
# Apple FairPlay Streaming
if '/fpshls/' in f['url']:
continue
formats.append(f)
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id=vid_format,
@@ -158,28 +146,21 @@ class ToggleIE(InfoExtractor):
note='Downloading %s ISM manifest' % vid_format,
errnote='Failed to download %s ISM manifest' % vid_format,
fatal=False))
elif ext in ('mp4', 'wvm'):
# wvm are drm-protected files
elif ext == 'mp4':
formats.append({
'ext': ext,
'url': video_url,
'format_id': vid_format,
'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
'format_note': 'DRM-protected video' if ext == 'wvm' else None
})
if not formats:
for meta in (info.get('Metas') or []):
if meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
raise ExtractorError(
'This video is DRM protected.', expected=True)
# Most likely because geo-blocked
raise ExtractorError('No downloadable videos found', expected=True)
self._sort_formats(formats)
duration = int_or_none(info.get('Duration'))
description = info.get('Description')
created_at = parse_iso8601(info.get('CreationDate') or None)
average_rating = float_or_none(info.get('Rating'))
view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
thumbnails = []
for picture in info.get('Pictures', []):
if not isinstance(picture, dict):
@@ -199,15 +180,46 @@ class ToggleIE(InfoExtractor):
})
thumbnails.append(thumbnail)
def counter(prefix):
return int_or_none(
info.get(prefix + 'Counter') or info.get(prefix.lower() + '_counter'))
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': created_at,
'average_rating': average_rating,
'view_count': view_count,
'like_count': like_count,
'description': strip_or_none(info.get('Description')),
'duration': int_or_none(info.get('Duration')),
'timestamp': parse_iso8601(info.get('CreationDate') or None),
'average_rating': float_or_none(info.get('Rating')),
'view_count': counter('View'),
'like_count': counter('Like'),
'thumbnails': thumbnails,
'formats': formats,
}
class MeWatchIE(InfoExtractor):
IE_NAME = 'mewatch'
_VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[0-9a-zA-Z-]+-(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
'info_dict': {
'id': '1008625',
'ext': 'mp4',
'title': 'Recipe Of Life 味之道',
'timestamp': 1603306526,
'description': 'md5:6e88cde8af2068444fc8e1bc3ebf257c',
'upload_date': '20201021',
},
'params': {
'skip_download': 'm3u8 download',
},
}]
def _real_extract(self, url):
item_id = self._match_id(url)
custom_id = self._download_json(
'https://cdn.mewatch.sg/api/items/' + item_id,
item_id, query={'segments': 'all'})['customId']
return self.url_result(
'toggle:' + custom_id, ToggleIE.ie_key(), custom_id)

View File

@@ -4,7 +4,9 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
smuggle_url,
strip_or_none,
)
@@ -23,7 +25,8 @@ class TVAIE(InfoExtractor):
'params': {
# m3u8 download
'skip_download': True,
}
},
'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'https://video.tva.ca/details/_5596811470001',
'only_matching': True,
@@ -32,26 +35,54 @@ class TVAIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
'Accept': 'application/json',
}, query={
'appId': '5955fc5f23eec60006c951f1',
})
def get_attribute(key):
for attribute in video_data.get('attributes', []):
if attribute.get('key') == key:
return attribute.get('value')
return None
return {
'_type': 'url_transparent',
'id': video_id,
'title': get_attribute('title'),
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
'description': get_attribute('description'),
'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
'duration': float_or_none(get_attribute('video-duration'), 1000),
'ie_key': 'BrightcoveNew',
}
class QubIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
'md5': '949490fd0e7aee11d0543777611fbd53',
'info_dict': {
'id': '6084352463001',
'ext': 'mp4',
'title': 'Épisode 01',
'uploader_id': '5481942443001',
'upload_date': '20190907',
'timestamp': 1567899756,
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
},
}, {
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
'only_matching': True,
}]
# reference_id also works with old account_id(5481942443001)
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
def _real_extract(self, url):
entity_id = self._match_id(url)
entity = self._download_json(
'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
entity_id, query={'id': entity_id})
video_id = entity['videoId']
episode = strip_or_none(entity.get('name'))
return {
'_type': 'url_transparent',
'id': video_id,
'title': episode,
# 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
'url': 'https://videos.tva.ca/details/_' + video_id,
'description': entity.get('longDescription'),
'duration': float_or_none(entity.get('durationMillis'), 1000),
'episode': episode,
'episode_number': int_or_none(entity.get('episodeNumber')),
# 'ie_key': 'BrightcoveNew',
'ie_key': TVAIE.ie_key(),
}

View File

@@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
remove_start,
smuggle_url,
try_get,
)
class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>(?:corner|episode|feature)/(?P<id>f?\d+))'
# videos are only available for 7 days
_TESTS = [{
'url': 'https://tver.jp/corner/f0062178',
'only_matching': True,
}, {
'url': 'https://tver.jp/feature/f0062413',
'only_matching': True,
}, {
'url': 'https://tver.jp/episode/79622438',
'only_matching': True,
}]
_TOKEN = None
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_initialize(self):
self._TOKEN = self._download_json(
'https://tver.jp/api/access_token.php', None)['token']
def _real_extract(self, url):
path, video_id = re.match(self._VALID_URL, url).groups()
main = self._download_json(
'https://api.tver.jp/v4/' + path, video_id,
query={'token': self._TOKEN})['main']
p_id = main['publisher_id']
service = remove_start(main['service'], 'ts_')
info = {
'_type': 'url_transparent',
'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
}
if service == 'cx':
info.update({
'title': main.get('subtitle') or main['title'],
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
'ie_key': 'FujiTVFODPlus7',
})
else:
r_id = main['reference_id']
if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
r_id = 'ref:' + r_id
bc_url = smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
{'geo_countries': ['JP']})
info.update({
'url': bc_url,
'ie_key': 'BrightcoveNew',
})
return info

View File

@@ -1,16 +1,25 @@
# coding: utf-8
from __future__ import unicode_literals
import random
import re
import string
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
mimetype2ext,
parse_codecs,
update_url_query,
xpath_element,
xpath_text,
)
from ..compat import (
compat_b64decode,
compat_ord,
compat_struct_pack,
)
class VideaIE(InfoExtractor):
@@ -19,7 +28,7 @@ class VideaIE(InfoExtractor):
videa(?:kid)?\.hu/
(?:
videok/(?:[^/]+/)*[^?#&]+-|
player\?.*?\bv=|
(?:videojs_)?player\?.*?\bv=|
player/v/
)
(?P<id>[^?#&]+)
@@ -53,6 +62,7 @@ class VideaIE(InfoExtractor):
'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
'only_matching': True,
}]
_STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
@staticmethod
def _extract_urls(webpage):
@@ -60,26 +70,84 @@ class VideaIE(InfoExtractor):
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
webpage)]
@staticmethod
def rc4(cipher_text, key):
res = b''
key_len = len(key)
S = list(range(256))
j = 0
for i in range(256):
j = (j + S[i] + ord(key[i % key_len])) % 256
S[i], S[j] = S[j], S[i]
i = 0
j = 0
for m in range(len(cipher_text)):
i = (i + 1) % 256
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
k = S[(S[i] + S[j]) % 256]
res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
return res.decode()
def _real_extract(self, url):
video_id = self._match_id(url)
query = {'v': video_id}
player_page = self._download_webpage(
'https://videa.hu/player', video_id, query=query)
info = self._download_xml(
'http://videa.hu/videaplayer_get_xml.php', video_id,
query={'v': video_id})
nonce = self._search_regex(
r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
l = nonce[:32]
s = nonce[32:]
result = ''
for i in range(0, 32):
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
video = xpath_element(info, './/video', 'video', fatal=True)
sources = xpath_element(info, './/video_sources', 'sources', fatal=True)
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
query['_s'] = random_seed
query['_t'] = result[:16]
b64_info, handle = self._download_webpage_handle(
'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
if b64_info.startswith('<?xml'):
info = self._parse_xml(b64_info, video_id)
else:
key = result[16:] + random_seed + handle.headers['x-videa-xs']
info = self._parse_xml(self.rc4(
compat_b64decode(b64_info), key), video_id)
video = xpath_element(info, './video', 'video')
if not video:
raise ExtractorError(xpath_element(
info, './error', fatal=True), expected=True)
sources = xpath_element(
info, './video_sources', 'sources', fatal=True)
hash_values = xpath_element(
info, './hash_values', 'hash values', fatal=True)
title = xpath_text(video, './title', fatal=True)
formats = []
for source in sources.findall('./video_source'):
source_url = source.text
if not source_url:
source_name = source.get('name')
source_exp = source.get('exp')
if not (source_url and source_name and source_exp):
continue
hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
if not hash_value:
continue
source_url = update_url_query(source_url, {
'md5': hash_value,
'expires': source_exp,
})
f = parse_codecs(source.get('codecs'))
f.update({
'url': source_url,
'url': self._proto_relative_url(source_url),
'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
'format_id': source.get('name'),
'width': int_or_none(source.get('width')),
@@ -88,8 +156,7 @@ class VideaIE(InfoExtractor):
formats.append(f)
self._sort_formats(formats)
thumbnail = xpath_text(video, './poster_src')
duration = int_or_none(xpath_text(video, './duration'))
thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))
age_limit = None
is_adult = xpath_text(video, './is_adult_content', default=None)
@@ -100,7 +167,7 @@ class VideaIE(InfoExtractor):
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'duration': int_or_none(xpath_text(video, './duration')),
'age_limit': age_limit,
'formats': formats,
}

View File

@@ -13,6 +13,8 @@ from ..utils import (
ExtractorError,
int_or_none,
merge_dicts,
str_or_none,
strip_or_none,
try_get,
urlencode_postdata,
)
@@ -66,6 +68,10 @@ class VLiveIE(VLiveBaseIE):
}, {
'url': 'https://www.vlive.tv/embed/1326',
'only_matching': True,
}, {
# works only with gcc=KR
'url': 'https://www.vlive.tv/video/225019',
'only_matching': True,
}]
def _real_initialize(self):
@@ -100,26 +106,26 @@ class VLiveIE(VLiveBaseIE):
raise ExtractorError('Unable to log in', expected=True)
def _call_api(self, path_template, video_id, fields=None):
query = {'appId': self._APP_ID}
query = {'appId': self._APP_ID, 'gcc': 'KR'}
if fields:
query['fields'] = fields
return self._download_json(
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
def _real_extract(self, url):
video_id = self._match_id(url)
try:
post = self._call_api(
'post/v1.0/officialVideoPost-%s', video_id,
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
return self._download_json(
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
raise
def _real_extract(self, url):
video_id = self._match_id(url)
post = self._call_api(
'post/v1.0/officialVideoPost-%s', video_id,
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
video = post['officialVideo']
def get_common_fields():
@@ -170,6 +176,83 @@ class VLiveIE(VLiveBaseIE):
raise ExtractorError('Unknown status ' + status)
class VLivePostIE(VLiveIE):
IE_NAME = 'vlive:post'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
_TESTS = [{
# uploadType = SOS
'url': 'https://www.vlive.tv/post/1-20088044',
'info_dict': {
'id': '1-20088044',
'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
},
'playlist_count': 3,
}, {
# uploadType = V
'url': 'https://www.vlive.tv/post/1-20087926',
'info_dict': {
'id': '1-20087926',
'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
},
'playlist_count': 1,
}]
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
_SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
_INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
def _real_extract(self, url):
post_id = self._match_id(url)
post = self._call_api(
'post/v1.0/post-%s', post_id,
'attachments{video},officialVideo{videoSeq},plainBody,title')
video_seq = str_or_none(try_get(
post, lambda x: x['officialVideo']['videoSeq']))
if video_seq:
return self.url_result(
'http://www.vlive.tv/video/' + video_seq,
VLiveIE.ie_key(), video_seq)
title = post['title']
entries = []
for idx, video in enumerate(post['attachments']['video'].values()):
video_id = video.get('videoId')
if not video_id:
continue
upload_type = video.get('uploadType')
upload_info = video.get('uploadInfo') or {}
entry = None
if upload_type == 'SOS':
download = self._call_api(
self._SOS_TMPL, video_id)['videoUrl']['download']
formats = []
for f_id, f_url in download.items():
formats.append({
'format_id': f_id,
'url': f_url,
'height': int_or_none(f_id[:-1]),
})
self._sort_formats(formats)
entry = {
'formats': formats,
'id': video_id,
'thumbnail': upload_info.get('imageUrl'),
}
elif upload_type == 'V':
vod_id = upload_info.get('videoId')
if not vod_id:
continue
inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
entry = self._extract_video_info(video_id, vod_id, inkey)
if entry:
entry['title'] = '%s_part%s' % (title, idx)
entries.append(entry)
return self.playlist_result(
entries, post_id, title, strip_or_none(post.get('plainBody')))
class VLiveChannelIE(VLiveBaseIE):
IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'

View File

@@ -46,57 +46,69 @@ class YandexMusicBaseIE(InfoExtractor):
self._handle_error(response)
return response
def _call_api(self, ep, tld, url, item_id, note, query):
return self._download_json(
'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
item_id, note,
fatal=False,
headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
'X-Retpath-Y': url,
},
query=query)
class YandexMusicTrackIE(YandexMusicBaseIE):
IE_NAME = 'yandexmusic:track'
IE_DESC = 'Яндекс.Музыка - Трек'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
_TESTS = [{
'url': 'http://music.yandex.ru/album/540508/track/4878838',
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
'md5': 'dec8b661f12027ceaba33318787fff76',
'info_dict': {
'id': '4878838',
'ext': 'mp3',
'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
'filesize': 4628061,
'title': 'md5:c63e19341fdbe84e43425a30bc777856',
'filesize': int,
'duration': 193.04,
'track': 'Gypsy Eyes 1',
'album': 'Gypsy Soul',
'album_artist': 'Carlo Ambrosio',
'artist': 'Carlo Ambrosio & Fabio Di Bari',
'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
'release_year': 2009,
},
'skip': 'Travis CI servers blocked by YandexMusic',
# 'skip': 'Travis CI servers blocked by YandexMusic',
}, {
# multiple disks
'url': 'http://music.yandex.ru/album/3840501/track/705105',
'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e',
'md5': '82a54e9e787301dd45aba093cf6e58c0',
'info_dict': {
'id': '705105',
'ext': 'mp3',
'title': 'Hooverphonic - Sometimes',
'filesize': 5743386,
'title': 'md5:f86d4a9188279860a83000277024c1a6',
'filesize': int,
'duration': 239.27,
'track': 'Sometimes',
'album': 'The Best of Hooverphonic',
'album_artist': 'Hooverphonic',
'artist': 'Hooverphonic',
'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
'release_year': 2016,
'genre': 'pop',
'disc_number': 2,
'track_number': 9,
},
'skip': 'Travis CI servers blocked by YandexMusic',
# 'skip': 'Travis CI servers blocked by YandexMusic',
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
album_id, track_id = mobj.group('album_id'), mobj.group('id')
tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
track = self._download_json(
'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
track_id, 'Downloading track JSON')['track']
track = self._call_api(
'track', tld, url, track_id, 'Downloading track JSON',
{'track': '%s:%s' % (track_id, album_id)})['track']
track_title = track['title']
download_data = self._download_json(
@@ -109,8 +121,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
'Downloading track location JSON',
query={'format': 'json'})
key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
storage = track['storageDir'].split('.')
f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1])
f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
thumbnail = None
cover_uri = track.get('albums', [{}])[0].get('coverUri')
@@ -180,42 +191,85 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
def _extract_tracks(self, source, item_id, url, tld):
tracks = source['tracks']
track_ids = [compat_str(track_id) for track_id in source['trackIds']]
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
# missing tracks should be retrieved manually.
if len(tracks) < len(track_ids):
present_track_ids = set([
compat_str(track['id'])
for track in tracks if track.get('id')])
missing_track_ids = [
track_id for track_id in track_ids
if track_id not in present_track_ids]
missing_tracks = self._call_api(
'track-entries', tld, url, item_id,
'Downloading missing tracks JSON', {
'entries': ','.join(missing_track_ids),
'lang': tld,
'external-domain': 'music.yandex.%s' % tld,
'overembed': 'false',
'strict': 'true',
})
if missing_tracks:
tracks.extend(missing_tracks)
return tracks
def _build_playlist(self, tracks):
return [
self.url_result(
'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
entries = []
for track in tracks:
track_id = track.get('id') or track.get('realId')
if not track_id:
continue
albums = track.get('albums')
if not albums or not isinstance(albums, list):
continue
album = albums[0]
if not isinstance(album, dict):
continue
album_id = album.get('id')
if not album_id:
continue
entries.append(self.url_result(
'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
return entries
class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
IE_NAME = 'yandexmusic:album'
IE_DESC = 'Яндекс.Музыка - Альбом'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
_TESTS = [{
'url': 'http://music.yandex.ru/album/540508',
'info_dict': {
'id': '540508',
'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
},
'playlist_count': 50,
'skip': 'Travis CI servers blocked by YandexMusic',
# 'skip': 'Travis CI servers blocked by YandexMusic',
}, {
'url': 'https://music.yandex.ru/album/3840501',
'info_dict': {
'id': '3840501',
'title': 'Hooverphonic - The Best of Hooverphonic (2016)',
'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
},
'playlist_count': 33,
'skip': 'Travis CI servers blocked by YandexMusic',
# 'skip': 'Travis CI servers blocked by YandexMusic',
}]
def _real_extract(self, url):
album_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
album_id = mobj.group('id')
album = self._download_json(
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
album_id, 'Downloading album JSON')
album = self._call_api(
'album', tld, url, album_id, 'Downloading album JSON',
{'album': album_id})
entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
@@ -236,21 +290,24 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
'info_dict': {
'id': '1245',
'title': 'Что слушают Enter Shikari',
'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
},
'playlist_count': 6,
'skip': 'Travis CI servers blocked by YandexMusic',
'playlist_count': 5,
# 'skip': 'Travis CI servers blocked by YandexMusic',
}, {
# playlist exceeding the limit of 150 tracks shipped with webpage (see
# https://github.com/ytdl-org/youtube-dl/issues/6666)
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
'only_matching': True,
}, {
# playlist exceeding the limit of 150 tracks (see
# https://github.com/ytdl-org/youtube-dl/issues/6666)
'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
'info_dict': {
'id': '1036',
'title': 'Музыка 90-х',
'id': '1364',
'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
},
'playlist_mincount': 300,
'skip': 'Travis CI servers blocked by YandexMusic',
'playlist_mincount': 437,
# 'skip': 'Travis CI servers blocked by YandexMusic',
}]
def _real_extract(self, url):
@@ -259,16 +316,8 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
user = mobj.group('user')
playlist_id = mobj.group('id')
playlist = self._download_json(
'https://music.yandex.%s/handlers/playlist.jsx' % tld,
playlist_id, 'Downloading missing tracks JSON',
fatal=False,
headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
'X-Retpath-Y': url,
},
query={
playlist = self._call_api(
'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
'owner': user,
'kinds': playlist_id,
'light': 'true',
@@ -277,37 +326,103 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'overembed': 'false',
})['playlist']
tracks = playlist['tracks']
track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
# missing tracks should be retrieved manually.
if len(tracks) < len(track_ids):
present_track_ids = set([
compat_str(track['id'])
for track in tracks if track.get('id')])
missing_track_ids = [
track_id for track_id in track_ids
if track_id not in present_track_ids]
missing_tracks = self._download_json(
'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
playlist_id, 'Downloading missing tracks JSON',
fatal=False,
headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
},
query={
'entries': ','.join(missing_track_ids),
'lang': tld,
'external-domain': 'music.yandex.%s' % tld,
'overembed': 'false',
'strict': 'true',
})
if missing_tracks:
tracks.extend(missing_tracks)
tracks = self._extract_tracks(playlist, playlist_id, url, tld)
return self.playlist_result(
self._build_playlist(tracks),
compat_str(playlist_id),
playlist.get('title'), playlist.get('description'))
class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
def _call_artist(self, tld, url, artist_id):
return self._call_api(
'artist', tld, url, artist_id,
'Downloading artist %s JSON' % self._ARTIST_WHAT, {
'artist': artist_id,
'what': self._ARTIST_WHAT,
'sort': self._ARTIST_SORT or '',
'dir': '',
'period': '',
'lang': tld,
'external-domain': 'music.yandex.%s' % tld,
'overembed': 'false',
})
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
tracks = self._extract_tracks(data, artist_id, url, tld)
title = try_get(data, lambda x: x['artist']['name'], compat_str)
return self.playlist_result(
self._build_playlist(tracks), artist_id, title)
class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
IE_NAME = 'yandexmusic:artist:tracks'
IE_DESC = 'Яндекс.Музыка - Артист - Треки'
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/tracks'
_TESTS = [{
'url': 'https://music.yandex.ru/artist/617526/tracks',
'info_dict': {
'id': '617526',
'title': 'md5:131aef29d45fd5a965ca613e708c040b',
},
'playlist_count': 507,
# 'skip': 'Travis CI servers blocked by YandexMusic',
}]
_ARTIST_SORT = ''
_ARTIST_WHAT = 'tracks'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
tracks = self._extract_tracks(data, artist_id, url, tld)
artist = try_get(data, lambda x: x['artist']['name'], compat_str)
title = '%s - %s' % (artist or artist_id, 'Треки')
return self.playlist_result(
self._build_playlist(tracks), artist_id, title)
class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
IE_NAME = 'yandexmusic:artist:albums'
IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/albums'
_TESTS = [{
'url': 'https://music.yandex.ru/artist/617526/albums',
'info_dict': {
'id': '617526',
'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
},
'playlist_count': 8,
# 'skip': 'Travis CI servers blocked by YandexMusic',
}]
_ARTIST_SORT = 'year'
_ARTIST_WHAT = 'albums'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
entries = []
for album in data['albums']:
if not isinstance(album, dict):
continue
album_id = album.get('id')
if not album_id:
continue
entries.append(self.url_result(
'http://music.yandex.ru/album/%s' % album_id,
ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
artist = try_get(data, lambda x: x['artist']['name'], compat_str)
title = '%s - %s' % (artist or artist_id, 'Альбомы')
return self.playlist_result(entries, artist_id, title)

View File

@@ -283,6 +283,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
}
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
def _call_api(self, ep, query, video_id):
data = self._DEFAULT_API_DATA.copy()
@@ -601,7 +602,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
},
# Normal age-gate video (No vevo, embed allowed)
# Normal age-gate video (No vevo, embed allowed), available via embed page
{
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
@@ -617,6 +618,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'age_limit': 18,
},
},
{
# Age-gated video only available with authentication (unavailable
# via embed page workaround)
'url': 'XgnwCQzjau8',
'only_matching': True,
},
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
{
@@ -1068,7 +1075,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
},
{
# with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
# with '};' inside yt initial data (see [1])
# see [2] for an example with '};' inside ytInitialPlayerResponse
# 1. https://github.com/ytdl-org/youtube-dl/issues/27093
# 2. https://github.com/ytdl-org/youtube-dl/issues/27216
'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
'info_dict': {
'id': 'CHqg6qOn4no',
@@ -1633,8 +1643,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get video info
video_info = {}
embed_webpage = None
if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
or re.search(r'player-age-gate-content">', video_webpage) is not None):
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
@@ -1686,7 +1696,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not video_info and not player_response:
player_response = extract_player_response(
self._search_regex(
r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,
(r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE,
self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
'initial player response', default='{}'),
video_id)
@@ -2785,12 +2796,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# TODO
pass
def _shelf_entries(self, shelf_renderer):
def _shelf_entries(self, shelf_renderer, skip_channels=False):
ep = try_get(
shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
compat_str)
shelf_url = urljoin('https://www.youtube.com', ep)
if shelf_url:
# Skipping links to another channels, note that checking for
# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
# will not work
if skip_channels and '/channels?' in shelf_url:
return
title = try_get(
shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
yield self.url_result(shelf_url, video_title=title)
@@ -2901,9 +2917,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}
def _entries(self, tab, identity_token):
slr_renderer = try_get(tab, lambda x: x['sectionListRenderer'], dict)
tab_content = try_get(tab, lambda x: x['content'], dict)
if not tab_content:
return
slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
if not slr_renderer:
return
is_channels_tab = tab.get('title') == 'Channels'
continuation = None
slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
for slr_content in slr_contents:
@@ -2930,7 +2950,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue
renderer = isr_content.get('shelfRenderer')
if renderer:
for entry in self._shelf_entries(renderer):
for entry in self._shelf_entries(renderer, not is_channels_tab):
yield entry
continue
renderer = isr_content.get('backstagePostThreadRenderer')
@@ -3060,7 +3080,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
description = None
playlist_id = item_id
playlist = self.playlist_result(
self._entries(selected_tab['content'], identity_token),
self._entries(selected_tab, identity_token),
playlist_id=playlist_id, playlist_title=title,
playlist_description=description)
playlist.update(self._extract_uploader(data))

View File

@@ -40,7 +40,7 @@ class ZDFBaseIE(InfoExtractor):
class ZDFIE(ZDFBaseIE):
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
_GEO_COUNTRIES = ['DE']
_TESTS = [{
@@ -119,7 +119,7 @@ class ZDFIE(ZDFBaseIE):
if not ptmd_path:
ptmd_path = t[
'http://zdf.de/rels/streams/ptmd-template'].replace(
'{playerId}', 'portal')
'{playerId}', 'ngplayer_2_4')
ptmd = self._call_api(
urljoin(url, ptmd_path), player, url, video_id, 'metadata')

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2020.11.24'
__version__ = '2020.12.05'