Compare commits

...

5 Commits

Author SHA1 Message Date
dirkf
6e8837d66f
Merge 5860937e17a249d80739efd07629dacd2392eba2 into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:19:37 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
dirkf
5860937e17 New extraction tactic for Kaltura ID using image URL 2022-02-08 01:48:32 +00:00
dirkf
09476ecdde [UNO] Add extractor for United Nations 2022-02-07 20:19:38 +00:00
4 changed files with 99 additions and 10 deletions

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54', 'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
), ),
( (
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js', 'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO', 'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
), ),
] ]

View File

@ -1430,6 +1430,7 @@ from .dlive import (
from .umg import UMGDeIE from .umg import UMGDeIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .unity import UnityIE from .unity import UnityIE
from .uno import UNOIE
from .uol import UOLIE from .uol import UOLIE
from .uplynk import ( from .uplynk import (
UplynkIE, UplynkIE,

View File

@ -0,0 +1,64 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_timestamp,
url_or_none,
)
class UNOIE(InfoExtractor):
_VALID_URL = r'https?://media\.un\.org/(?:\w+/)+(?P<id>k\d[\w]+)'
_TESTS = [{
'url': 'https://media.un.org/en/asset/k1r/k1r3vy9ikk',
'md5': '981c41cb283227f079d1e5059fd0d30c',
'info_dict': {
'id': '1_r3vy9ikk',
'ext': 'mp4',
'title': 'md5:abde2a46d396051535e5e6fd6f627a19',
'description': 'md5:2cba11ee153ae3e6ae2c629e7c4e39b0',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 5768,
'timestamp': 1625216872,
'upload_date': '20210702',
'uploader_id': 'UNWebTV_New_York',
}
}, {
'url': 'https://media.un.org/en/asset/k12/k12gpkg3qx',
'md5': '5978503ca886a922a0f00cf5a7e82395',
'info_dict': {
'id': '1_vohfjqkj',
'ext': 'mp4',
'title': '1851st Meeting, 81st session Committee on the Elimination of Discrimination Against Women (CEDAW)',
'description': 'Informal meeting with NGOs and human rights institutions - 1851st Meeting, 81st session CEDAW',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 3502,
'timestamp': 1644235332,
'upload_date': '20220207',
'uploader_id': 'nathalie.minard@un.org',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(r'partnerId\s*:\s*(\d+)\b', webpage, 'Partner ID')
video_id = self._search_regex(r'/p/%s(?:/\w+)+?/entry_id/(\w+)/' % (partner_id, ), webpage, 'Kaltura ID')
title = (
self._html_search_meta(('title', 'og:title'), webpage)
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title').rsplit('|', 1)[0]).strip()
result = self.url_result(
'kaltura:%s:%s' % (partner_id, video_id), 'Kaltura',
video_title=title,
video_id=video_id)
if result:
result.update({
'_type': 'url_transparent',
'description': self._html_search_meta(('description', 'og:description'), webpage, 'description'),
'creator': self._html_search_meta('author', webpage),
'upoader_id': self._html_search_meta('publisher', webpage),
'thumbnail': url_or_none(self._og_search_thumbnail(webpage)),
'timestamp': unified_timestamp(self._og_search_property('updated_time', webpage)),
})
return result

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.45.4', 'clientVersion': '20.10.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '18.1.0.22B83', 'osVersion': '18.3.2.22D82',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'MWEB', 'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00', 'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA # mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'TVHTML5', 'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00', 'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
}, },
}, },
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'WEB', 'clientName': 'WEB',
'clientVersion': '2.20241126.01.00', 'clientVersion': '2.20250312.04.00',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
) )
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None): def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex( var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])', r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='') jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code) func_name = self._extract_n_function_name(jsi.code)