From 8105f8ed844304a07806813ee89e461e61316d76 Mon Sep 17 00:00:00 2001 From: Volodymyr Date: Mon, 7 Feb 2022 18:30:39 +0200 Subject: [PATCH 1/3] Add extractor for teleportal.ua --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/teleportal.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/teleportal.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 50b7cb4a0..2fbf921ec 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1227,6 +1227,7 @@ from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telemb import TeleMBIE +from .teleportal import TeleportalIE from .telequebec import ( TeleQuebecIE, TeleQuebecSquatIE, diff --git a/youtube_dl/extractor/teleportal.py b/youtube_dl/extractor/teleportal.py new file mode 100644 index 000000000..b254ee3dc --- /dev/null +++ b/youtube_dl/extractor/teleportal.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TeleportalIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?teleportal\.ua(/ua)?/(?P[0-9a-z-/]+)' + _TEST = { + 'url': 'https://teleportal.ua/ua/show/stb/master-cheff/bitva-sezonov/vypusk-3', + 'md5': '07bd056c45b515fa9cc0202b8403df41', + 'info_dict': { + 'id': 'show/stb/master-cheff/bitva-sezonov/vypusk-3', + 'ext': 'mp4', + 'title': 'МастерШеф. Битва сезонів 3 випуск: найогидніший випуск сезону!', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': r're:^

Не пропустіть.*', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + backend_url = 'https://tp-back.starlight.digital/ua/{}'.format(video_id) + metadata = self._download_json(backend_url, video_id) + api_metadata = self._download_json('https://vcms-api2.starlight.digital/player-api/{}?referer=https://teleportal.ua/&lang=ua'.format(metadata["hash"]), video_id) + + return { + 'id': video_id, + 'title': metadata['title'], + 'description': metadata['description'], + 'real_id': metadata['id'], + 'hash': metadata['hash'], + 'url': api_metadata['video'][0]['mediaHls'], + 'thumbnail': api_metadata['video'][0]['poster'], + 'formats': self._extract_m3u8_formats(api_metadata['video'][0]['mediaHls'], video_id, 'mp4'), + } From 35081ecf00d3c204f29daac04b9f608906cbc8dc Mon Sep 17 00:00:00 2001 From: Volodymyr Date: Wed, 9 Feb 2022 21:15:01 +0200 Subject: [PATCH 2/3] Minor fixes --- youtube_dl/extractor/teleportal.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/teleportal.py b/youtube_dl/extractor/teleportal.py index b254ee3dc..6c976e7f6 100644 --- a/youtube_dl/extractor/teleportal.py +++ b/youtube_dl/extractor/teleportal.py @@ -22,15 +22,19 @@ class TeleportalIE(InfoExtractor): video_id = self._match_id(url) backend_url = 'https://tp-back.starlight.digital/ua/{}'.format(video_id) metadata = self._download_json(backend_url, video_id) - api_metadata = self._download_json('https://vcms-api2.starlight.digital/player-api/{}?referer=https://teleportal.ua/&lang=ua'.format(metadata["hash"]), video_id) + api_metadata = self._download_json('https://vcms-api2.starlight.digital/player-api/{}?referer=https://teleportal.ua/&lang=ua'.format(metadata['hash']), video_id) + + try: + thumbnail = api_metadata['video'][0]['poster'] + except (KeyError, IndexError): + thumbnail = None return { 'id': video_id, - 'title': metadata['title'], - 'description': metadata['description'], - 'real_id': metadata['id'], - 'hash': metadata['hash'], - 'url': api_metadata['video'][0]['mediaHls'], - 'thumbnail': api_metadata['video'][0]['poster'], + 'title': metadata.get('title'), + 'description': metadata.get('description'), + 'real_id': metadata.get('id'), + 'hash': metadata.get('hash'), + 'thumbnail': thumbnail, 'formats': self._extract_m3u8_formats(api_metadata['video'][0]['mediaHls'], video_id, 'mp4'), } From 3918da628d1848ed3ec0fe98c242b5c25defd6b1 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 24 Feb 2022 23:52:47 +0000 Subject: [PATCH 3/3] Extract further fields; implement some review comments --- youtube_dl/extractor/teleportal.py | 83 ++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/teleportal.py b/youtube_dl/extractor/teleportal.py index 6c976e7f6..3becc2648 100644 --- a/youtube_dl/extractor/teleportal.py +++ b/youtube_dl/extractor/teleportal.py @@ -2,39 +2,88 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + clean_html, + float_or_none, + parse_iso8601, + str_or_none, + str_to_int, + try_get, + url_or_none, +) class TeleportalIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teleportal\.ua(/ua)?/(?P[0-9a-z-/]+)' + _VALID_URL = r'https?://(?:www\.)?teleportal\.ua/(?:ua/)?(?P[0-9a-z-]+(?:/[0-9a-z-]+)*)' _TEST = { 'url': 'https://teleportal.ua/ua/show/stb/master-cheff/bitva-sezonov/vypusk-3', - 'md5': '07bd056c45b515fa9cc0202b8403df41', + # no permanent check on file contents as HLS may vary 'info_dict': { 'id': 'show/stb/master-cheff/bitva-sezonov/vypusk-3', 'ext': 'mp4', 'title': 'МастерШеф. Битва сезонів 3 випуск: найогидніший випуск сезону!', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': r're:^

Не пропустіть.*', - } + 'display_id': '2618466', + 'description': 'md5:4179bcc3a12edfa2f655888cd741ac09', + 'timestamp': 1644102480, + 'upload_date': '20220205', + 'thumbnail': r're:^https?://.+\.jpg$', + 'release_timestamp': 1643994000, + 'duration': 11254.0, + 'series_id': '20632', + 'series': 'МастерШеф. Битва сезонів 3 випуск: найогидніший випуск сезону!', + 'season': 'Битва сезонів', + 'episode': 'Найогидніший випуск сезону!', + 'episode_num': 3, + 'categories': ['Шоу'], + }, + 'params': { + 'hls_prefer_native': True, + # 'skip_download': True, + }, } def _real_extract(self, url): video_id = self._match_id(url) - backend_url = 'https://tp-back.starlight.digital/ua/{}'.format(video_id) - metadata = self._download_json(backend_url, video_id) - api_metadata = self._download_json('https://vcms-api2.starlight.digital/player-api/{}?referer=https://teleportal.ua/&lang=ua'.format(metadata['hash']), video_id) + backend_url = 'https://tp-back.starlight.digital/ua/' + video_id + series_metadata = self._download_json(backend_url, video_id) or {} + title = series_metadata['title'] + _hash = series_metadata.get('hash', '') + api_url = 'https://vcms-api2.starlight.digital/player-api/' + _hash + api_metadata = self._download_json( + api_url, video_id, + query={ + 'referer': 'https://teleportal.ua/', + 'lang': 'ua', + } + ) + video_info = api_metadata['video'][0] + formats = [] + for media in ('mediaHlsNoAdv', 'mediaHls'): + media = url_or_none(try_get(video_info, lambda x: x[media])) + if not media: + continue + formats.extend(self._extract_m3u8_formats(media, video_id, 'mp4', fatal=False)) + break + self._sort_formats(formats) - try: - thumbnail = api_metadata['video'][0]['poster'] - except (KeyError, IndexError): - thumbnail = None + thumbnail = url_or_none(video_info.get('poster')) + category = series_metadata.get('typeTitle') return { 'id': video_id, - 'title': metadata.get('title'), - 'description': metadata.get('description'), - 'real_id': metadata.get('id'), - 'hash': metadata.get('hash'), + 'title': title, + 'formats': formats, + 'description': clean_html(series_metadata.get('description')) or series_metadata.get('seoDescription'), + 'display_id': str_or_none(video_info.get('vcmsId')), + 'hash': _hash, 'thumbnail': thumbnail, - 'formats': self._extract_m3u8_formats(api_metadata['video'][0]['mediaHls'], video_id, 'mp4'), + 'timestamp': parse_iso8601(video_info.get('time_upload_video'), delimiter=' '), + 'release_timestamp': parse_iso8601(video_info.get('publishDate'), delimiter=' '), + 'duration': float_or_none(video_info.get('duration')), + 'series_id': str_or_none(series_metadata.get('id')), + 'series': series_metadata.get('title'), + 'season': video_info.get('seasonName') or series_metadata.get('seasonGallery', {}).get('title'), + 'episode': video_info.get('name'), + 'episode_num': str_to_int(series_metadata.get('seriesTitle')), + 'categories': [category] if category else None, }