Compare commits

...

8 Commits

Author SHA1 Message Date
Jia Rong
ba5329111e
Merge c950d57de684b3ab6613960565ccf4189221b9f1 into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:00:56 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
fourjr
c950d57de6 [channelnewsasia] follow conventions 2020-11-22 18:04:45 +08:00
fourjr
59e9536b98 [channelnewsasia] fix tests, more relaxed regex 2020-11-22 18:03:12 +08:00
fourjr
357a506c20 [ChannelNewsAsia] follow conventions 2020-11-22 00:03:40 +08:00
fourjr
6ee229b4f1 [ChannelNewsAsia] add fallback 2020-11-22 00:02:27 +08:00
fourjr
9aeb231113 [ChannelNewsAsia] Add new extractor 2020-11-21 23:01:32 +08:00
4 changed files with 78 additions and 10 deletions

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
),
(
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO',
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
),
]

View File

@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class ChannelNewsAsiaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?channelnewsasia\.com/(?:(?:-|\w|\d)+)/(?:(?:-|\w|\d)+)/(?P<id>(?:-|\w|\d)+)'
_TESTS = [{
'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech/wizards-of-tech-body-13515106',
'md5': 'ed9ed143052f0da3ee8a8fa59ba16870',
'info_dict': {
'id': 'w0ZWRzajE6qDPXDb7DSeaOCJ3bJ3GDqC',
'ext': 'mp4',
'title': 'Wizards Of Tech_2020_0_1_Body',
'description': 'md5:b3882dd00e329e623a179465de9f5478',
},
}, {
'url': 'https://www.channelnewsasia.com/news/asia/removing-the-negative-influences-of-religion-in-tibet-video-13604084',
'md5': 'ed846cfca037823fa6d3d0d7af8a4e8f',
'info_dict': {
'id': 'ljZjd0ajE6NNMhVJ3Gb-QfL1l0p-qW6-',
'ext': 'mp4',
'title': "Removing the 'negative influences of religion' in Tibet | Video",
'description': 'md5:777989926133319de6f6501372175fbf',
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ooyala_id = (
self._search_regex(
r'id="ooyala-\d+-((?:\d|\w|-)+)--\d+', webpage, 'ooyala id',
default=None, fatal=False)
or self._search_regex(
r'video-asset-id="((?:\d|\w|-)+)', webpage, 'ooyala id',
default=None, fatal=False))
return self.url_result(
'ooyala:' + ooyala_id, 'Ooyala', ooyala_id)

View File

@ -213,6 +213,7 @@ from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .channelnewsasia import ChannelNewsAsiaIE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
from .chilloutzone import ChilloutzoneIE

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
'clientVersion': '19.45.4',
'clientVersion': '20.10.4',
'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone',
'osVersion': '18.1.0.22B83',
'osVersion': '18.3.2.22D82',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00',
'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
},
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00',
'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
},
},
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20241126.01.00',
'clientVersion': '2.20250312.04.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES),
}
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])',
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code)