Compare commits

..

No commits in common. "13ec444a982fa0c3b2e61666792069d35111ff31" and "2e4726423544041887641df18e2f4c4c5e3f4f6d" have entirely different histories.

12 changed files with 92 additions and 239 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2020.12.05** - [ ] I've verified that I'm running youtube-dl version **2020.12.02**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.12.05 [debug] youtube-dl version 2020.12.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2020.12.05** - [ ] I've verified that I'm running youtube-dl version **2020.12.02**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2020.12.05** - [ ] I've verified that I'm running youtube-dl version **2020.12.02**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2020.12.05** - [ ] I've verified that I'm running youtube-dl version **2020.12.02**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.12.05 [debug] youtube-dl version 2020.12.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.02. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2020.12.05** - [ ] I've verified that I'm running youtube-dl version **2020.12.02**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

View File

@ -1,24 +1,3 @@
version 2020.12.05
Core
* [extractor/common] Improve Akamai HTTP format extraction
* Allow m3u8 manifest without an additional audio format
* Fix extraction for qualities starting with a number
Extractors
* [teachable:course] Improve extraction (#24507, #27286)
* [nrk] Improve error extraction
* [nrktv:series] Improve extraction (#21926)
* [nrktv:season] Improve extraction
* [nrk] Improve format extraction and geo-restriction detection (#24221)
* [pornhub] Handle HTTP errors gracefully (#26414)
* [nrktv] Relax URL regular expression (#27299, #26185)
+ [zdf] Extract webm formats (#26659)
+ [gamespot] Extract DASH and HTTP formats
+ [tver] Add support for tver.jp (#26662, #27284)
+ [pornhub] Add support for pornhub.org (#27276)
version 2020.12.02 version 2020.12.02
Extractors Extractors

View File

@ -308,7 +308,6 @@
- **FrontendMasters** - **FrontendMasters**
- **FrontendMastersCourse** - **FrontendMastersCourse**
- **FrontendMastersLesson** - **FrontendMastersLesson**
- **FujiTVFODPlus7**
- **Funimation** - **Funimation**
- **Funk** - **Funk**
- **Fusion** - **Fusion**
@ -711,7 +710,6 @@
- **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜 - **qqmusic:toplist**: QQ音乐 - 排行榜
- **QuantumTV** - **QuantumTV**
- **Qub**
- **Quickline** - **Quickline**
- **QuicklineLive** - **QuicklineLive**
- **R7** - **R7**
@ -954,7 +952,6 @@
- **TVANouvellesArticle** - **TVANouvellesArticle**
- **TVC** - **TVC**
- **TVCArticle** - **TVCArticle**
- **TVer**
- **tvigle**: Интернет-телевидение Tvigle.ru - **tvigle**: Интернет-телевидение Tvigle.ru
- **tvland.com** - **tvland.com**
- **TVN24** - **TVN24**

View File

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -18,7 +17,6 @@ from ..utils import (
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
try_get, try_get,
urljoin,
url_or_none, url_or_none,
) )
@ -26,27 +24,6 @@ from ..utils import (
class NRKBaseIE(InfoExtractor): class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO'] _GEO_COUNTRIES = ['NO']
def _extract_nrk_formats(self, asset_url, video_id):
return self._extract_m3u8_formats(
re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url),
video_id, 'mp4', 'm3u8_native', fatal=False)
def _raise_error(self, data):
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type)
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
class NRKIE(NRKBaseIE): class NRKIE(NRKBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -105,9 +82,6 @@ class NRKIE(NRKBaseIE):
'http://psapi.nrk.no/playback/manifest/%s' % video_id, 'http://psapi.nrk.no/playback/manifest/%s' % video_id,
video_id, 'Downloading manifest JSON') video_id, 'Downloading manifest JSON')
if manifest.get('playability') == 'nonPlayable':
self._raise_error(manifest['nonPlayable'])
playable = manifest['playable'] playable = manifest['playable']
formats = [] formats = []
@ -120,7 +94,9 @@ class NRKIE(NRKBaseIE):
if not format_url: if not format_url:
continue continue
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8': if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_nrk_formats(format_url, video_id)) formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
data = self._download_json( data = self._download_json(
@ -167,7 +143,14 @@ class NRKIE(NRKBaseIE):
class NRKTVIE(NRKBaseIE): class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio' IE_DESC = 'NRK TV and NRK Radio'
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})' _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE _VALID_URL = r'''(?x)
https?://
(?:tv|radio)\.nrk(?:super)?\.no/
(?:serie(?:/[^/]+){1,2}|program)/
(?![Ee]pisodes)%s
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{ _TESTS = [{
'url': 'https://tv.nrk.no/program/MDDP12000117', 'url': 'https://tv.nrk.no/program/MDDP12000117',
@ -292,9 +275,6 @@ class NRKTVIE(NRKBaseIE):
}, { }, {
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller', 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201507/NPUB21019315',
'only_matching': True,
}] }]
_api_host = None _api_host = None
@ -315,7 +295,6 @@ class NRKTVIE(NRKBaseIE):
title = data.get('fullTitle') or data.get('mainTitle') or data['title'] title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id video_id = data.get('id') or video_id
urls = []
entries = [] entries = []
conviva = data.get('convivaStatistics') or {} conviva = data.get('convivaStatistics') or {}
@ -332,13 +311,19 @@ class NRKTVIE(NRKBaseIE):
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
for num, asset in enumerate(media_assets, 1): for num, asset in enumerate(media_assets, 1):
asset_url = asset.get('url') asset_url = asset.get('url')
if not asset_url or asset_url in urls: if not asset_url:
continue continue
formats = self._extract_nrk_formats(asset_url, video_id) formats = self._extract_akamai_formats(asset_url, video_id)
if not formats: if not formats:
continue continue
self._sort_formats(formats) self._sort_formats(formats)
# Some f4m streams may not work with hdcore in fragments' URLs
for f in formats:
extra_param = f.get('extra_param_to_segment_url')
if extra_param and 'hdcore' in extra_param:
del f['extra_param_to_segment_url']
entry_id, entry_title = video_id_and_title(num) entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration')) duration = parse_duration(asset.get('duration'))
subtitles = {} subtitles = {}
@ -358,20 +343,34 @@ class NRKTVIE(NRKBaseIE):
if not entries: if not entries:
media_url = data.get('mediaUrl') media_url = data.get('mediaUrl')
if media_url and media_url not in urls: if media_url:
formats = self._extract_nrk_formats(media_url, video_id) formats = self._extract_akamai_formats(media_url, video_id)
if formats: self._sort_formats(formats)
self._sort_formats(formats) duration = parse_duration(data.get('duration'))
duration = parse_duration(data.get('duration')) entries = [{
entries = [{ 'id': video_id,
'id': video_id, 'title': make_title(title),
'title': make_title(title), 'duration': duration,
'duration': duration, 'formats': formats,
'formats': formats, }]
}]
if not entries: if not entries:
self._raise_error(data) MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
expected=True)
series = conviva.get('seriesName') or data.get('seriesTitle') series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
@ -518,8 +517,7 @@ class NRKTVSerieBaseIE(InfoExtractor):
config = self._parse_json( config = self._parse_json(
self._search_regex( self._search_regex(
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
r'PRELOADED_STATE_*\s*=\s*({.+?})\s*\n'),
webpage, 'config', default='{}' if not fatal else NO_DEFAULT), webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False, transform_source=js_to_json) display_id, fatal=False, transform_source=js_to_json)
if not config: if not config:
@ -529,26 +527,12 @@ class NRKTVSerieBaseIE(InfoExtractor):
(lambda x: x['initialState']['series'], lambda x: x['series']), (lambda x: x['initialState']['series'], lambda x: x['series']),
dict) dict)
def _extract_seasons(self, domain, series_id, seasons): def _extract_seasons(self, seasons):
if isinstance(seasons, dict):
seasons = seasons.get('seasons')
if not isinstance(seasons, list): if not isinstance(seasons, list):
return [] return []
entries = [] entries = []
for season in seasons: for season in seasons:
if not isinstance(season, dict): entries.extend(self._extract_episodes(season))
continue
episodes = self._extract_episodes(season)
if episodes:
entries.extend(episodes)
continue
season_name = season.get('name')
if season_name and isinstance(season_name, compat_str):
entries.append(self.url_result(
'https://%s.nrk.no/serie/%s/sesong/%s'
% (domain, series_id, season_name),
ie=NRKTVSeasonIE.ie_key(),
video_title=season.get('title')))
return entries return entries
def _extract_episodes(self, season): def _extract_episodes(self, season):
@ -561,94 +545,24 @@ class NRKTVSerieBaseIE(InfoExtractor):
return [] return []
entries = [] entries = []
for episode in entry_list: for episode in entry_list:
nrk_id = episode.get('prfId') or episode.get('episodeId') nrk_id = episode.get('prfId')
if not nrk_id or not isinstance(nrk_id, compat_str): if not nrk_id or not isinstance(nrk_id, compat_str):
continue continue
if not re.match(NRKTVIE._EPISODE_RE, nrk_id):
continue
entries.append(self.url_result( entries.append(self.url_result(
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)) 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
return entries return entries
_ASSETS_KEYS = ('episodes', 'instalments',)
def _extract_assets_key(self, embedded):
for asset_key in self._ASSETS_KEYS:
if embedded.get(asset_key):
return asset_key
def _entries(self, data, display_id):
for page_num in itertools.count(1):
embedded = data.get('_embedded')
if not isinstance(embedded, dict):
break
assets_key = self._extract_assets_key(embedded)
if not assets_key:
break
# Extract entries
entries = try_get(
embedded,
(lambda x: x[assets_key]['_embedded'][assets_key],
lambda x: x[assets_key]),
list)
for e in self._extract_entries(entries):
yield e
# Find next URL
next_url = urljoin(
'https://psapi.nrk.no/',
try_get(
data,
(lambda x: x['_links']['next']['href'],
lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
compat_str))
if not next_url:
break
data = self._download_json(
next_url, display_id,
'Downloading %s JSON page %d' % (assets_key, page_num),
fatal=False)
if not data:
break
class NRKTVSeasonIE(NRKTVSerieBaseIE): class NRKTVSeasonIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)' _VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
_TESTS = [{ _TEST = {
'url': 'https://tv.nrk.no/serie/backstage/sesong/1', 'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
'info_dict': { 'info_dict': {
'id': 'backstage/1', 'id': '1',
'title': 'Sesong 1', 'title': 'Sesong 1',
}, },
'playlist_mincount': 30, 'playlist_mincount': 30,
}, { }
# no /sesong/ in path
'url': 'https://tv.nrk.no/serie/lindmo/2016',
'info_dict': {
'id': 'lindmo/2016',
'title': '2016',
},
'playlist_mincount': 29,
}, {
# weird nested _embedded in catalog JSON response
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens/sesong/1',
'info_dict': {
'id': 'dickie-dick-dickens/1',
'title': 'Sesong 1',
},
'playlist_mincount': 11,
}, {
# 841 entries, multi page
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201509',
'info_dict': {
'id': 'dagsnytt/201509',
'title': 'September 2015',
},
'playlist_mincount': 841,
}, {
# 180 entries, single page
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
'only_matching': True,
}]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@ -656,39 +570,25 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
else super(NRKTVSeasonIE, cls).suitable(url)) else super(NRKTVSeasonIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
domain = mobj.group('domain')
serie = mobj.group('serie')
season_id = mobj.group('id')
display_id = '%s/%s' % (serie, season_id)
data = self._download_json( webpage = self._download_webpage(url, display_id)
'https://psapi.nrk.no/%s/catalog/series/%s/seasons/%s'
% (domain, serie, season_id), display_id, query={'pageSize': 50})
title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id series = self._extract_series(webpage, display_id)
season = next(
s for s in series['seasons']
if int(display_id) == s.get('seasonNumber'))
title = try_get(season, lambda x: x['titles']['title'], compat_str)
return self.playlist_result( return self.playlist_result(
self._entries(data, display_id), self._extract_episodes(season), display_id, title)
display_id, title)
class NRKTVSeriesIE(NRKTVSerieBaseIE): class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
'id': 'groenn-glede',
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
'playlist_mincount': 90,
}, {
# new layout, instalments, more entries
'url': 'https://tv.nrk.no/serie/lindmo',
'only_matching': True,
}, {
'url': 'https://tv.nrk.no/serie/blank', 'url': 'https://tv.nrk.no/serie/blank',
'info_dict': { 'info_dict': {
'id': 'blank', 'id': 'blank',
@ -696,17 +596,24 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e', 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
}, },
'playlist_mincount': 30, 'playlist_mincount': 30,
'expected_warnings': ['HTTP Error 404: Not Found'],
}, { }, {
# new layout, seasons # new layout, seasons
'url': 'https://tv.nrk.no/serie/backstage', 'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': { 'info_dict': {
'id': 'backstage', 'id': 'backstage',
'title': 'Backstage', 'title': 'Backstage',
'description': 'md5:63692ceb96813d9a207e9910483d948b', 'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
}, },
'playlist_mincount': 60, 'playlist_mincount': 60,
'expected_warnings': ['HTTP Error 404: Not Found'], }, {
# new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
'id': 'groenn-glede',
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
'playlist_mincount': 10,
}, { }, {
# old layout # old layout
'url': 'https://tv.nrksuper.no/serie/labyrint', 'url': 'https://tv.nrksuper.no/serie/labyrint',
@ -725,13 +632,6 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
}, { }, {
'url': 'https://tv.nrk.no/serie/postmann-pat', 'url': 'https://tv.nrk.no/serie/postmann-pat',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens',
'info_dict': {
'id': 'dickie-dick-dickens',
},
'playlist_mincount': 8,
'expected_warnings': ['HTTP Error 404: Not Found'],
}] }]
@classmethod @classmethod
@ -742,32 +642,18 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
else super(NRKTVSeriesIE, cls).suitable(url)) else super(NRKTVSeriesIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) series_id = self._match_id(url)
domain = mobj.group('domain')
series_id = mobj.group('id')
title = description = None
webpage = self._download_webpage(url, series_id) webpage = self._download_webpage(url, series_id)
# New layout (e.g. https://tv.nrk.no/serie/backstage)
series = self._extract_series(webpage, series_id, fatal=False) series = self._extract_series(webpage, series_id, fatal=False)
if series: if series:
title = try_get(series, lambda x: x['titles']['title'], compat_str) title = try_get(series, lambda x: x['titles']['title'], compat_str)
description = try_get( description = try_get(
series, lambda x: x['titles']['subtitle'], compat_str) series, lambda x: x['titles']['subtitle'], compat_str)
data = self._download_json(
'https://psapi.nrk.no/%s/catalog/series/%s/instalments'
% (domain, series_id), series_id, query={'pageSize': 50},
fatal=False)
if data:
return self.playlist_result(
self._entries(data, series_id), series_id, title, description)
# New layout (e.g. https://tv.nrk.no/serie/backstage)
if series:
entries = [] entries = []
entries.extend(self._extract_seasons(domain, series_id, series.get('seasons'))) entries.extend(self._extract_seasons(series.get('seasons')))
entries.extend(self._extract_entries(series.get('instalments'))) entries.extend(self._extract_entries(series.get('instalments')))
entries.extend(self._extract_episodes(series.get('extraMaterial'))) entries.extend(self._extract_episodes(series.get('extraMaterial')))
return self.playlist_result(entries, series_id, title, description) return self.playlist_result(entries, series_id, title, description)

View File

@ -541,10 +541,6 @@ class PeerTubeIE(InfoExtractor):
'format_id': format_id, 'format_id': format_id,
'filesize': file_size, 'filesize': file_size,
}) })
if format_id == '0p':
f['vcodec'] = 'none'
else:
f['fps'] = int_or_none(file_.get('fps'))
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -31,12 +31,7 @@ class PornHubBaseIE(InfoExtractor):
def dl(*args, **kwargs): def dl(*args, **kwargs):
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
ret = dl(*args, **kwargs) webpage, urlh = dl(*args, **kwargs)
if not ret:
return ret
webpage, urlh = ret
if any(re.search(p, webpage) for p in ( if any(re.search(p, webpage) for p in (
r'<body\b[^>]+\bonload=["\']go\(\)', r'<body\b[^>]+\bonload=["\']go\(\)',

View File

@ -269,7 +269,7 @@ class TeachableCourseIE(TeachableBaseIE):
r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)', r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
webpage): webpage):
li = mobj.group('li') li = mobj.group('li')
if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li): if 'fa-youtube-play' not in li:
continue continue
lecture_url = self._search_regex( lecture_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li, r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2020.12.05' __version__ = '2020.12.02'