Compare commits

...

6 Commits

Author SHA1 Message Date
dirkf
0233170cf1
Merge 5c1e5a1160a3e65fd2673b6437f3882d348991cb into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-21 20:37:20 +01:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
dirkf
5c1e5a1160 [TubiTv] Add TubiTvShow series/season extractor based on yt-dlp 2024-10-08 16:18:39 +01:00
dirkf
76067fbdb1 [TubiTv] Update TubiTv extractor
* back-port login and extraction from yt-dlp
* further extract uploader, age_limit, cast, categories, series
2024-10-08 16:14:01 +01:00
dirkf
229f59e7c3 [core] Let Git ignore __pycache__, .pytest_cache 2024-10-07 15:52:33 +01:00
5 changed files with 241 additions and 49 deletions

2
.gitignore vendored
View File

@ -1,3 +1,4 @@
__pycache__/
*.pyc *.pyc
*.pyo *.pyo
*.class *.class
@ -5,6 +6,7 @@
*.DS_Store *.DS_Store
wine-py2exe/ wine-py2exe/
py2exe.log py2exe.log
.pytest_cache/
*.kate-swp *.kate-swp
build/ build/
dist/ dist/

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54', 'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
), ),
( (
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js', 'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO', 'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
), ),
] ]

View File

@ -1328,7 +1328,10 @@ from .trovo import (
from .trunews import TruNewsIE from .trunews import TruNewsIE
from .trutv import TruTVIE from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubitv import TubiTvIE from .tubitv import (
TubiTvIE,
TubiTvShowIE,
)
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tunein import ( from .tunein import (
TuneInClipIE, TuneInClipIE,

View File

@ -1,23 +1,62 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
get_element_by_id,
int_or_none, int_or_none,
join_nonempty,
js_to_json,
merge_dicts,
parse_age_limit,
sanitized_Request, sanitized_Request,
strip_or_none,
T,
traverse_obj,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
class TubiTvIE(InfoExtractor): class TubiTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)' IE_NAME = 'tubitv'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
_LOGIN_URL = 'http://tubitv.com/login' _LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv' _NETRC_MACHINE = 'tubitv'
_GEO_COUNTRIES = ['US']
_TESTS = [{ _TESTS = [{
'url': 'https://tubitv.com/movies/100004539/the-39-steps',
'info_dict': {
'id': '100004539',
'ext': 'mp4',
'title': 'The 39 Steps',
'description': 'md5:bb2f2dd337f0dc58c06cb509943f54c8',
'uploader_id': 'abc2558d54505d4f0f32be94f2e7108c',
'release_year': 1935,
'thumbnail': r're:^https?://.+\.(jpe?g|png)$',
'duration': 5187,
},
'params': {'skip_download': 'm3u8'},
'skip': 'This content is currently unavailable',
}, {
'url': 'https://tubitv.com/tv-shows/554628/s01-e01-rise-of-the-snakes',
'info_dict': {
'id': '554628',
'ext': 'mp4',
'title': 'S01:E01 - Rise of the Snakes',
'description': 'md5:ba136f586de53af0372811e783a3f57d',
'episode': 'Rise of the Snakes',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'uploader_id': '2a9273e728c510d22aa5c57d0646810b',
'release_year': 2011,
'thumbnail': r're:^https?://.+\.(jpe?g|png)$',
'duration': 1376,
},
'params': {'skip_download': 'm3u8'},
'skip': 'This content is currently unavailable',
}, {
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320', 'md5': '43ac06be9326f41912dc64ccf7a80320',
'info_dict': { 'info_dict': {
@ -27,6 +66,7 @@ class TubiTvIE(InfoExtractor):
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434', 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
}, },
'skip': 'Content Unavailable',
}, { }, {
'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories', 'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
'only_matching': True, 'only_matching': True,
@ -34,24 +74,42 @@ class TubiTvIE(InfoExtractor):
'url': 'http://tubitv.com/movies/383676/tracker', 'url': 'http://tubitv.com/movies/383676/tracker',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://tubitv.com/movies/560057/penitentiary?start=true', 'url': 'https://tubitv.com/tv-shows/200141623/s01-e01-episode-1',
'info_dict': { 'info_dict': {
'id': '560057', 'id': '200141623',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Penitentiary', 'title': 'Shameless S01:E01 - Episode 1',
'description': 'md5:8d2fc793a93cc1575ff426fdcb8dd3f9', 'description': 'Having her handbag stolen proves to be a blessing in disguise for Fiona when handsome stranger Steve comes to her rescue.',
'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2', 'timestamp': 1725148800,
'release_year': 1979, 'upload_date': '20240901',
'uploader': 'all3-media',
'uploader_id': '9b8e3a8d789b1c843f4b680c025a1853',
'release_year': 2004,
'episode': 'Episode 1',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'series': 'Shameless',
'cast': list,
'age_limit': 17,
}, },
'params': { 'params': {
'skip_download': True, 'format': 'best/bestvideo',
'skip_download': 'm3u8'
}, },
}] }]
# DRM formats are included only to raise appropriate error
_UNPLAYABLE_FORMATS = ('hlsv6_widevine', 'hlsv6_widevine_nonclearlead', 'hlsv6_playready_psshv0',
'hlsv6_fairplay', 'dash_widevine', 'dash_widevine_nonclearlead')
def _login(self): def _login(self):
username, password = self._get_login_info() username, password = self._get_login_info()
if username is None: if username is None:
return return
self._perform_login(username, password)
def _perform_login(self, username, password):
self.report_login() self.report_login()
form_data = { form_data = {
'username': username, 'username': username,
@ -62,7 +120,7 @@ class TubiTvIE(InfoExtractor):
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_page = self._download_webpage( login_page = self._download_webpage(
request, None, False, 'Wrong login info') request, None, False, 'Wrong login info')
if not re.search(r'id="tubi-logout"', login_page): if get_element_by_id('tubi-logout', login_page) is None:
raise ExtractorError( raise ExtractorError(
'Login failed (invalid username/password)', expected=True) 'Login failed (invalid username/password)', expected=True)
@ -70,41 +128,146 @@ class TubiTvIE(InfoExtractor):
self._login() self._login()
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, video_type = self._match_valid_url(url).group('id', 'type')
video_data = self._download_json( webpage = self._download_webpage('https://tubitv.com/{0}/{1}/'.format(video_type, video_id), video_id)
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id) data = self._search_json(
title = video_data['title'] r'window\.__data\s*=', webpage, 'data', video_id,
transform_source=js_to_json)['video']['byId']
video_data = data[video_id]
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
title = strip_or_none(info.get('title'))
info['title'] = title or strip_or_none(video_data['title'])
formats = self._extract_m3u8_formats( formats = []
self._proto_relative_url(video_data['url']), drm_formats = 0
video_id, 'mp4', 'm3u8_native')
for resource in traverse_obj(video_data, ('video_resources', lambda _, v: v['type'] and v['manifest']['url'])):
manifest_url = url_or_none(resource['manifest']['url'])
if not manifest_url:
continue
resource_type = resource['type']
if resource_type == 'dash':
formats.extend(self._extract_mpd_formats(manifest_url, video_id, mpd_id=resource_type, fatal=False))
elif resource_type in ('hlsv3', 'hlsv6'):
formats.extend(self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id=resource_type, fatal=False))
elif resource_type in self._UNPLAYABLE_FORMATS:
drm_formats += 1
else:
self.report_warning('Skipping unknown resource type "{0}"'.format(resource_type))
if not formats and drm_formats > 0:
self.report_drm(video_id)
elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed
raise ExtractorError('This content is currently unavailable', expected=True)
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = []
for thumbnail_url in video_data.get('thumbnails', []):
if not thumbnail_url:
continue
thumbnails.append({
'url': self._proto_relative_url(thumbnail_url),
})
subtitles = {} subtitles = {}
for sub in video_data.get('subtitles', []): for sub in traverse_obj(video_data, ('subtitles', lambda _, v: v['url'])):
sub_url = sub.get('url') sub_url = self._proto_relative_url(sub['url'])
if not sub_url: if not sub_url:
continue continue
subtitles.setdefault(sub.get('lang', 'English'), []).append({ subtitles.setdefault(sub.get('lang', 'English'), []).append({
'url': self._proto_relative_url(sub_url), 'url': sub_url,
}) })
return { season_number, episode_number, episode_title = self._search_regex(
r'\bS(\d+):E(\d+) - (.+)', info['title'], 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None))
return merge_dicts({
'id': video_id, 'id': video_id,
'title': title,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnails': thumbnails, 'season_number': int_or_none(season_number),
'description': video_data.get('description'), 'episode_number': int_or_none(episode_number),
'duration': int_or_none(video_data.get('duration')), 'episode': episode_title
'uploader_id': video_data.get('publisher_id'), }, traverse_obj(video_data, {
'release_year': int_or_none(video_data.get('year')), 'description': ('description', T(strip_or_none)),
} 'duration': ('duration', T(int_or_none)),
'uploader': ('import_id', T(strip_or_none)),
'uploader_id': ('publisher_id', T(strip_or_none)),
'release_year': ('year', T(int_or_none)),
'thumbnails': ('thumbnails', Ellipsis, T(self._proto_relative_url), {'url': T(url_or_none)}),
'cast': ('actors', Ellipsis, T(strip_or_none)),
'categories': ('tags', Ellipsis, T(strip_or_none)),
'age_limit': ('ratings', 0, 'value', T(parse_age_limit)),
}), traverse_obj(data, (lambda _, v: v['type'] == 's', {
'series': ('title', T(strip_or_none)),
# 'series_id': ('id', T(compat_str)),
}), get_all=False), info)
class TubiTvShowIE(InfoExtractor):
IE_NAME = 'tubitv:series'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/\d+/(?P<show_name>[^/?#]+)(?:/season-(?P<season>\d+))?'
_TESTS = [{
'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true',
'playlist_mincount': 390,
'info_dict': {
'id': 'the-joy-of-painting-with-bob-ross',
},
}, {
'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross/season-1',
'playlist_count': 13,
'info_dict': {
'id': 'the-joy-of-painting-with-bob-ross-season-1',
},
}, {
'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross/season-3',
'playlist_count': 13,
'info_dict': {
'id': 'the-joy-of-painting-with-bob-ross-season-3',
},
}]
def _real_extract(self, url):
playlist_id, selected_season = self._match_valid_url(url).group(
'show_name', 'season')
def entries(s_url, playlist_id, selected_season_num):
def get_season_data(s_num, fatal=False):
if s_num is None:
url, s_id = s_url, playlist_id
else:
url = '%s/season-%d' % (s_url, s_num)
s_id = '%s-season-%d' % (playlist_id, s_num)
webpage = self._download_webpage(url, s_id, fatal=fatal)
data = self._search_json(
r'window\s*\.\s*__data\s*=', webpage or '', 'data', s_id,
transform_source=js_to_json, default={})
return data['video'] if fatal else data.get('video', {})
data = get_season_data(None, fatal=True)
# The {series_id}.seasons JSON may lack some episodes that are available
# Iterate over the season numbers instead [1]
# 1. https://github.com/yt-dlp/yt-dlp/issues/11170#issuecomment-2399918777
seasons = (
traverse_obj(data, (
'byId', lambda _, v: v['type'] == 's', 'seasons', Ellipsis,
'number', T(int_or_none)))
if selected_season is None
else [selected_season])
unavail_cnt = 0
select_episodes = lambda _, v: v['type'] == 'v'
for season_number in seasons:
if not data:
data = get_season_data(season_number)
unavail_cnt += len(traverse_obj(data, ('byId', select_episodes, 'policy_match', T(lambda m: (not m) or None))))
for episode_id, episode in traverse_obj(data, ('byId', select_episodes, T(lambda e: (e['id'], e)))):
yield merge_dicts(self.url_result(
'https://tubitv.com/tv-shows/{0}/'.format(episode_id), TubiTvIE.ie_key(), episode_id), {
'season_number': season_number,
'episode_number': int_or_none(episode.get('num')),
})
data = None
if unavail_cnt > 0:
self.report_warning('%d items were marked as unavailable: check that the desired content is available or provide login parameters if needed' % unavail_cnt)
return self.playlist_result(
entries(url, playlist_id, int_or_none(selected_season)),
join_nonempty(playlist_id, selected_season, delim='-season-'))

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.45.4', 'clientVersion': '20.10.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '18.1.0.22B83', 'osVersion': '18.3.2.22D82',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'MWEB', 'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00', 'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA # mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'TVHTML5', 'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00', 'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
}, },
}, },
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'WEB', 'clientName': 'WEB',
'clientVersion': '2.20241126.01.00', 'clientVersion': '2.20250312.04.00',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
) )
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None): def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex( var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])', r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='') jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code) func_name = self._extract_n_function_name(jsi.code)