Compare commits

..

2 Commits

Author SHA1 Message Date
Remita Amine
07333d0062 [telequebec] Fix Extraction and Add Support for video.telequebec.tv
closes #25733
closes #26883
closes #27339
2020-12-08 00:35:47 +01:00
Sergey M․
5bd7ad2e81
[youtube:tab] Capture and output alerts (closes #27340) 2020-12-08 01:12:00 +07:00
3 changed files with 110 additions and 73 deletions

View File

@ -1152,6 +1152,7 @@ from .telequebec import (
TeleQuebecSquatIE, TeleQuebecSquatIE,
TeleQuebecEmissionIE, TeleQuebecEmissionIE,
TeleQuebecLiveIE, TeleQuebecLiveIE,
TeleQuebecVideoIE,
) )
from .teletask import TeleTaskIE from .teletask import TeleTaskIE
from .telewebion import TelewebionIE from .telewebion import TelewebionIE

View File

@ -12,25 +12,16 @@ from ..utils import (
class TeleQuebecBaseIE(InfoExtractor): class TeleQuebecBaseIE(InfoExtractor):
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
@staticmethod @staticmethod
def _result(url, ie_key): def _brightcove_result(brightcove_id, player_id, account_id='6150020952001'):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url(url, {'geo_countries': ['CA']}), 'url': smuggle_url(TeleQuebecBaseIE.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, brightcove_id), {'geo_countries': ['CA']}),
'ie_key': ie_key, 'ie_key': 'BrightcoveNew',
} }
@staticmethod
def _limelight_result(media_id):
return TeleQuebecBaseIE._result(
'limelight:media:' + media_id, 'LimelightMedia')
@staticmethod
def _brightcove_result(brightcove_id):
return TeleQuebecBaseIE._result(
'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s'
% brightcove_id, 'BrightcoveNew')
class TeleQuebecIE(TeleQuebecBaseIE): class TeleQuebecIE(TeleQuebecBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -44,14 +35,18 @@ class TeleQuebecIE(TeleQuebecBaseIE):
# available till 01.01.2023 # available till 01.01.2023
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane', 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
'info_dict': { 'info_dict': {
'id': '577116881b4b439084e6b1cf4ef8b1b3', 'id': '6155972771001',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Un petit choc et puis repart!', 'title': 'Un petit choc et puis repart!',
'description': 'md5:067bc84bd6afecad85e69d1000730907', 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
'timestamp': 1589262469,
'uploader_id': '6150020952001',
'upload_date': '20200512',
}, },
'params': { 'params': {
'skip_download': True, 'format': 'bestvideo',
}, },
'add_ie': ['BrightcoveNew'],
}, { }, {
'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout', 'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
'info_dict': { 'info_dict': {
@ -65,7 +60,6 @@ class TeleQuebecIE(TeleQuebecBaseIE):
}, },
'params': { 'params': {
'format': 'bestvideo', 'format': 'bestvideo',
'skip_download': True,
}, },
'add_ie': ['BrightcoveNew'], 'add_ie': ['BrightcoveNew'],
}, { }, {
@ -79,25 +73,20 @@ class TeleQuebecIE(TeleQuebecBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
media_id = self._match_id(url) media_id = self._match_id(url)
media = self._download_json(
media_data = self._download_json( 'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id,
'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
media_id)['media'] media_id)['media']
source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove')
source_id = media_data['streamInfo']['sourceId'] info = self._brightcove_result(source_id, '22gPKdt7f')
source = (try_get( product = media.get('product') or {}
media_data, lambda x: x['streamInfo']['source'], season = product.get('season') or {}
compat_str) or 'limelight').lower()
if source == 'brightcove':
info = self._brightcove_result(source_id)
else:
info = self._limelight_result(source_id)
info.update({ info.update({
'title': media_data.get('title'), 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str),
'description': try_get( 'series': try_get(season, lambda x: x['serie']['titre']),
media_data, lambda x: x['descriptions'][0]['text'], compat_str), 'season': season.get('name'),
'duration': int_or_none( 'season_number': int_or_none(season.get('seasonNo')),
media_data.get('durationInMilliseconds'), 1000), 'episode': product.get('titre'),
'episode_number': int_or_none(product.get('episodeNo')),
}) })
return info return info
@ -148,7 +137,7 @@ class TeleQuebecSquatIE(InfoExtractor):
} }
class TeleQuebecEmissionIE(TeleQuebecBaseIE): class TeleQuebecEmissionIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
@ -160,15 +149,16 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente', 'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
'info_dict': { 'info_dict': {
'id': '66648a6aef914fe3badda25e81a4d50a', 'id': '6154476028001',
'ext': 'mp4', 'ext': 'mp4',
'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?", 'title': 'Des soins esthétiques à 377 % dintérêts annuels, ça vous tente?',
'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014', 'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f',
'upload_date': '20171024', 'upload_date': '20200505',
'timestamp': 1508862118, 'timestamp': 1588713424,
'uploader_id': '6150020952001',
}, },
'params': { 'params': {
'skip_download': True, 'format': 'bestvideo',
}, },
}, { }, {
'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression', 'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
@ -187,26 +177,26 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
media_id = self._search_regex( media_id = self._search_regex(
r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage, r'mediaId\s*:\s*(?P<id>\d+)', webpage, 'media id')
'limelight id')
info = self._limelight_result(media_id) return self.url_result(
info.update({ 'http://zonevideo.telequebec.tv/media/' + media_id,
'title': self._og_search_title(webpage, default=None), TeleQuebecIE.ie_key())
'description': self._og_search_description(webpage, default=None),
})
return info
class TeleQuebecLiveIE(InfoExtractor): class TeleQuebecLiveIE(TeleQuebecBaseIE):
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)' _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
_TEST = { _TEST = {
'url': 'http://zonevideo.telequebec.tv/endirect/', 'url': 'http://zonevideo.telequebec.tv/endirect/',
'info_dict': { 'info_dict': {
'id': 'endirect', 'id': '6159095684001',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True, 'is_live': True,
'description': 'Canal principal de Télé-Québec',
'uploader_id': '6150020952001',
'timestamp': 1590439901,
'upload_date': '20200525',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -214,25 +204,49 @@ class TeleQuebecLiveIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) return self._brightcove_result('6159095684001', 'skCsmi2Uw')
m3u8_url = None
webpage = self._download_webpage(
'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
fatal=False)
if webpage:
m3u8_url = self._search_regex(
r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'm3u8 url', default=None, group='url')
if not m3u8_url:
m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls')
self._sort_formats(formats)
return { class TeleQuebecVideoIE(TeleQuebecBaseIE):
'id': video_id, _VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P<id>\d+)'
'title': self._live_title('Télé-Québec - En direct'), _TESTS = [{
'is_live': True, 'url': 'https://video.telequebec.tv/player/31110/stream',
'formats': formats, 'info_dict': {
} 'id': '6202570652001',
'ext': 'mp4',
'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée',
'description': 'md5:685a7e4c450ba777c60adb6e71e41526',
'upload_date': '20201019',
'timestamp': 1603115930,
'uploader_id': '6101674910001',
},
'params': {
'format': 'bestvideo',
},
}, {
'url': 'https://video.telequebec.tv/player-live/28527',
'only_matching': True,
}]
def _call_api(self, path, video_id):
return self._download_json(
'http://beacon.playback.api.brightcove.com/telequebec/api/assets/' + path,
video_id, query={'device_layout': 'web', 'device_type': 'web'})['data']
def _real_extract(self, url):
asset_id = self._match_id(url)
asset = self._call_api(asset_id, asset_id)['asset']
stream = self._call_api(
asset_id + '/streams/' + asset['streams'][0]['id'], asset_id)['stream']
stream_url = stream['url']
account_id = try_get(
stream, lambda x: x['video_provider_details']['account_id']) or '6101674910001'
info = self._brightcove_result(stream_url, 'default', account_id)
info.update({
'description': asset.get('long_description') or asset.get('short_description'),
'series': asset.get('series_original_name'),
'season_number': int_or_none(asset.get('season_number')),
'episode': asset.get('original_name'),
'episode_number': int_or_none(asset.get('episode_number')),
})
return info

View File

@ -3060,6 +3060,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
return uploader return uploader
@staticmethod
def _extract_alert(data):
alerts = []
for alert in try_get(data, lambda x: x['alerts'], list) or []:
if not isinstance(alert, dict):
continue
alert_text = try_get(
alert, lambda x: x['alertRenderer']['text'], dict)
if not alert_text:
continue
text = try_get(
alert_text,
(lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
compat_str)
if text:
alerts.append(text)
return '\n'.join(alerts)
def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token): def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
selected_tab = self._extract_selected_tab(tabs) selected_tab = self._extract_selected_tab(tabs)
renderer = try_get( renderer = try_get(
@ -3127,6 +3145,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
compat_str) or video_id compat_str) or video_id
if video_id: if video_id:
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
# Capture and output alerts
alert = self._extract_alert(data)
if alert:
raise ExtractorError(alert, expected=True)
# Failed to recognize # Failed to recognize
raise ExtractorError('Unable to recognize tab page') raise ExtractorError('Unable to recognize tab page')