Compare commits

...

6 Commits

Author SHA1 Message Date
Guredora
80a9ff1eb6
Merge 2730e55b36d299e17beacc4aaa8fa9441932c8aa into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:14:16 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
guredora
2730e55b36 [NhkRadio] fix error from flake8 2021-04-05 18:01:07 +09:00
guredora
7bb932b964 [NhkRadio] fix error from flake8 2021-04-05 17:50:39 +09:00
guredora
2bbe77d484 [NhkRadio] add new extractor 2021-04-04 23:42:45 +09:00
4 changed files with 144 additions and 10 deletions

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54', 'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
), ),
( (
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js', 'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO', 'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
), ),
] ]

View File

@ -810,6 +810,10 @@ from .nhk import (
NhkVodIE, NhkVodIE,
NhkVodProgramIE, NhkVodProgramIE,
) )
from .nhkRadio import (
NhkRadioIE,
NhkRadioProgramIE
)
from .nhl import NHLIE from .nhl import NHLIE
from .nick import ( from .nick import (
NickIE, NickIE,

View File

@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError, parse_iso8601
from ..compat import compat_HTTPError
import re
class NhkRadioBase(InfoExtractor):
def _get_json_meta(self, program_id, corner_id):
program_corner_id = program_id + "_" + corner_id
try:
data = self._download_json(
"https://www.nhk.or.jp/radioondemand/json/"
+ program_id
+ "/bangumi_"
+ program_corner_id
+ ".json",
program_corner_id,
)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
raise ExtractorError("The invalid url", expected=True)
return data
def _extract_program(self, info, program_corner_id):
id = program_corner_id + "_" + info.get("headline_id")
file = info.get("file_list")[0]
formats = self._extract_m3u8_formats(file.get("file_name", None), id, "m4a", entry_protocol="m3u8_native", m3u8_id="hls")
self._sort_formats(formats)
return {
"id": id,
"title": file.get("file_title"),
"formats": formats,
"timestamp": parse_iso8601(file.get("close_time")),
}
class NhkRadioIE(NhkRadioBase):
_VALID_URL = r"https?://www\.nhk\.or\.jp/radio/player/ondemand\.html\?p=(?P<program_id>\d+)_(?P<corner_id>\d+)_(?P<headline_id>\d+)"
_TESTS = [
{
"url": "https://www.nhk.or.jp/radio/player/ondemand.html?p=4812_01_2898188",
"info_dict": {
"id": "4812_01_2898188",
"ext": "m4a",
"title": "世界へ発信!ニュースで英語術 #209▽“首相長男から接待” 総務省11人を処分",
"upload_date": str,
"timestamp": int,
},
},
{
"url": "https://www.nhk.or.jp/radio/player/ondemand.html?p=0444_01_2890944",
"info_dict": {
"ext": "m4a",
"id": "0444_01_2890944",
"title": "歌謡スクランブル 春色コレクション(3) ▽尾崎亜美",
"upload_date": str,
"timestamp": int,
},
},
]
def _real_extract(self, url):
program_id, corner_id, headline_id = re.match(self._VALID_URL, url).groups()
program_corner_id = program_id + "_" + corner_id
data = self._download_json(
"https://www.nhk.or.jp/radioondemand/json/"
+ program_id
+ "/bangumi_"
+ program_corner_id
+ ".json",
program_corner_id,
)
for detail in data["main"]["detail_list"]:
if headline_id == detail.get("headline_id"):
return self._extract_program(detail, program_corner_id)
raise ExtractorError("The program not found", expected=True)
class NhkRadioProgramIE(NhkRadioBase):
_VALID_URL = r"https?://www\.nhk\.or\.jp/radio/ondemand/detail\.html\?p=(?P<program_id>\d+)_(?P<corner_id>\d+)"
_TESTS = [
{
"url": "https://www.nhk.or.jp/radio/ondemand/detail.html?p=0164_01",
"info_dict": {"title": "青春アドベンチャー", "id": "0164_01"},
"playlist_mincount": 5,
},
{
"url": "https://www.nhk.or.jp/radio/ondemand/detail.html?p=0455_01",
"info_dict": {"id": "0455_01", "title": "弾き語りフォーユー"},
"playlist_mincount": 5,
},
]
def _real_extract(self, url):
program_id, corner_id = re.match(self._VALID_URL, url).groups()
data = self._get_json_meta(program_id, corner_id)
entries = []
for detail in data["main"]["detail_list"]:
entries.append(self._extract_program(detail, program_id + "_" + corner_id))
return self.playlist_result(
entries, program_id + "_" + corner_id, data["main"]["program_name"]
)

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.45.4', 'clientVersion': '20.10.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '18.1.0.22B83', 'osVersion': '18.3.2.22D82',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'MWEB', 'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00', 'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA # mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'TVHTML5', 'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00', 'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
}, },
}, },
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'WEB', 'clientName': 'WEB',
'clientVersion': '2.20241126.01.00', 'clientVersion': '2.20250312.04.00',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
) )
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None): def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex( var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])', r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='') jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code) func_name = self._extract_n_function_name(jsi.code)