Compare commits

...

6 Commits

Author SHA1 Message Date
dirkf
11ca9ef561
Merge 3918da628d1848ed3ec0fe98c242b5c25defd6b1 into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:19:39 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
dirkf
3918da628d Extract further fields; implement some review comments 2022-02-25 00:09:45 +00:00
Volodymyr
35081ecf00 Minor fixes 2022-02-09 21:15:01 +02:00
Volodymyr
8105f8ed84 Add extractor for teleportal.ua 2022-02-07 18:43:43 +02:00
4 changed files with 124 additions and 10 deletions

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
),
(
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO',
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
),
]

View File

@ -1264,6 +1264,7 @@ from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
from .telemb import TeleMBIE
from .teleportal import TeleportalIE
from .telequebec import (
TeleQuebecIE,
TeleQuebecSquatIE,

View File

@ -0,0 +1,89 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
float_or_none,
parse_iso8601,
str_or_none,
str_to_int,
try_get,
url_or_none,
)
class TeleportalIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teleportal\.ua/(?:ua/)?(?P<id>[0-9a-z-]+(?:/[0-9a-z-]+)*)'
_TEST = {
'url': 'https://teleportal.ua/ua/show/stb/master-cheff/bitva-sezonov/vypusk-3',
# no permanent check on file contents as HLS may vary
'info_dict': {
'id': 'show/stb/master-cheff/bitva-sezonov/vypusk-3',
'ext': 'mp4',
'title': 'МастерШеф. Битва сезонів 3 випуск: найогидніший випуск сезону!',
'display_id': '2618466',
'description': 'md5:4179bcc3a12edfa2f655888cd741ac09',
'timestamp': 1644102480,
'upload_date': '20220205',
'thumbnail': r're:^https?://.+\.jpg$',
'release_timestamp': 1643994000,
'duration': 11254.0,
'series_id': '20632',
'series': 'МастерШеф. Битва сезонів 3 випуск: найогидніший випуск сезону!',
'season': 'Битва сезонів',
'episode': 'Найогидніший випуск сезону!',
'episode_num': 3,
'categories': ['Шоу'],
},
'params': {
'hls_prefer_native': True,
# 'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
backend_url = 'https://tp-back.starlight.digital/ua/' + video_id
series_metadata = self._download_json(backend_url, video_id) or {}
title = series_metadata['title']
_hash = series_metadata.get('hash', '')
api_url = 'https://vcms-api2.starlight.digital/player-api/' + _hash
api_metadata = self._download_json(
api_url, video_id,
query={
'referer': 'https://teleportal.ua/',
'lang': 'ua',
}
)
video_info = api_metadata['video'][0]
formats = []
for media in ('mediaHlsNoAdv', 'mediaHls'):
media = url_or_none(try_get(video_info, lambda x: x[media]))
if not media:
continue
formats.extend(self._extract_m3u8_formats(media, video_id, 'mp4', fatal=False))
break
self._sort_formats(formats)
thumbnail = url_or_none(video_info.get('poster'))
category = series_metadata.get('typeTitle')
return {
'id': video_id,
'title': title,
'formats': formats,
'description': clean_html(series_metadata.get('description')) or series_metadata.get('seoDescription'),
'display_id': str_or_none(video_info.get('vcmsId')),
'hash': _hash,
'thumbnail': thumbnail,
'timestamp': parse_iso8601(video_info.get('time_upload_video'), delimiter=' '),
'release_timestamp': parse_iso8601(video_info.get('publishDate'), delimiter=' '),
'duration': float_or_none(video_info.get('duration')),
'series_id': str_or_none(series_metadata.get('id')),
'series': series_metadata.get('title'),
'season': video_info.get('seasonName') or series_metadata.get('seasonGallery', {}).get('title'),
'episode': video_info.get('name'),
'episode_num': str_to_int(series_metadata.get('seriesTitle')),
'categories': [category] if category else None,
}

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
'clientVersion': '19.45.4',
'clientVersion': '20.10.4',
'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone',
'osVersion': '18.1.0.22B83',
'osVersion': '18.3.2.22D82',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00',
'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
},
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00',
'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
},
},
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20241126.01.00',
'clientVersion': '2.20250312.04.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES),
}
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])',
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code)