Compare commits

...

13 Commits

Author SHA1 Message Date
Tom-Oliver Heidel
22838f27d2
Merge 1a57b3c03f6c4b0ad25767e9c646fb67ba0b773b into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 06:56:44 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
Unknown
1a57b3c03f flake8 fix. 2020-08-07 01:50:00 +02:00
Unknown
c68a4ae679 auth fix. 2020-08-07 01:28:23 +02:00
Unknown
a58f3e7777 flake8 2020-08-06 23:58:43 +02:00
Unknown
fb4126a18c working login. aac hq is working if available 2020-08-06 23:31:12 +02:00
Unknown
2ab47fa3a8 updated test. sign in sc.py 2020-08-06 15:41:41 +02:00
Unknown
09cace68c2 hard web auth. 2020-08-06 00:47:37 +02:00
Unknown
66f48768b0 sc web-auth api 2020-08-05 19:10:25 +02:00
Unknown
ea4b8b6747 Revert "Merge remote-tracking branch 'origin/master'"
This reverts commit 2e6682d80e170e715adc68d5e79b6e946d16e622.
2020-08-04 23:22:27 +02:00
Unknown
2e6682d80e Merge remote-tracking branch 'origin/master' 2020-08-04 23:19:10 +02:00
Unknown
8fb38341ae oload.cloud (cloud extension support) 2018-09-26 00:43:52 +02:00
3 changed files with 121 additions and 20 deletions

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54', 'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
), ),
( (
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js', 'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO', 'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
), ),
] ]

View File

@ -3,6 +3,8 @@ from __future__ import unicode_literals
import itertools import itertools
import re import re
import json
import random
from .common import ( from .common import (
InfoExtractor, InfoExtractor,
@ -28,6 +30,7 @@ from ..utils import (
update_url_query, update_url_query,
url_or_none, url_or_none,
urlhandle_detect_ext, urlhandle_detect_ext,
sanitized_Request,
) )
@ -309,7 +312,81 @@ class SoundcloudIE(InfoExtractor):
raise raise
def _real_initialize(self): def _real_initialize(self):
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or "T5R4kgWS2PRf6lzLyIravUMnKlbIxQag" # 'EXLwg5lHTO2dslU5EePe3xkw0m1h86Cd' # 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
self._login()
_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"
_API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
_API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
_access_token = None
_HEADERS = {}
_NETRC_MACHINE = 'soundcloud'
def _login(self):
username, password = self._get_login_info()
if username is None:
return
def genDevId():
def genNumBlock():
return ''.join([str(random.randrange(10)) for i in range(6)])
return '-'.join([genNumBlock() for i in range(4)])
payload = {
'client_id': self._CLIENT_ID,
'recaptcha_pubkey': 'null',
'recaptcha_response': 'null',
'credentials': {
'identifier': username,
'password': password
},
'signature': self.sign(username, password, self._CLIENT_ID),
'device_id': genDevId(),
'user_agent': self._USER_AGENT
}
query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
response = self._download_json(login, None)
self._access_token = response.get('session').get('access_token')
if not self._access_token:
self.report_warning('Unable to get access token, login may has failed')
else:
self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
# signature generation
def sign(self, user, pw, clid):
a = 33
i = 1
s = 440123
w = 117
u = 1800000
l = 1042
b = 37
k = 37
c = 5
n = "0763ed7314c69015fd4a0dc16bbf4b90" # _KEY
y = "8" # _REV
r = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36" # _USER_AGENT
e = user # _USERNAME
t = clid # _CLIENT_ID
d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]])
p = n + y + d + r + e + t + d + n
h = p
m = 8011470
f = 0
for f in range(f, len(h)):
m = (m >> 1) + ((1 & m) << 23)
m += ord(h[f])
m &= 16777215
# c is not even needed
out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c)
return out
@classmethod @classmethod
def _resolv_url(cls, url): def _resolv_url(cls, url):
@ -389,7 +466,7 @@ class SoundcloudIE(InfoExtractor):
if not format_url: if not format_url:
continue continue
stream = self._download_json( stream = self._download_json(
format_url, track_id, query=query, fatal=False) format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
if not isinstance(stream, dict): if not isinstance(stream, dict):
continue continue
stream_url = url_or_none(stream.get('url')) stream_url = url_or_none(stream.get('url'))
@ -487,7 +564,7 @@ class SoundcloudIE(InfoExtractor):
info_json_url = self._resolv_url(self._BASE_URL + resolve_title) info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
info = self._download_json( info = self._download_json(
info_json_url, full_title, 'Downloading info JSON', query=query) info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
return self._extract_info_dict(info, full_title, token) return self._extract_info_dict(info, full_title, token)
@ -503,7 +580,7 @@ class SoundcloudPlaylistBaseIE(SoundcloudIE):
'ids': ','.join([compat_str(t['id']) for t in tracks]), 'ids': ','.join([compat_str(t['id']) for t in tracks]),
'playlistId': playlist_id, 'playlistId': playlist_id,
'playlistSecretToken': token, 'playlistSecretToken': token,
}) }, headers=self._HEADERS)
entries = [] entries = []
for track in tracks: for track in tracks:
track_id = str_or_none(track.get('id')) track_id = str_or_none(track.get('id'))
@ -547,7 +624,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
full_title += '/' + token full_title += '/' + token
info = self._download_json(self._resolv_url( info = self._download_json(self._resolv_url(
self._BASE_URL + full_title), full_title) self._BASE_URL + full_title), full_title, headers=self._HEADERS)
if 'errors' in info: if 'errors' in info:
msgs = (compat_str(err['error_message']) for err in info['errors']) msgs = (compat_str(err['error_message']) for err in info['errors'])
@ -574,7 +651,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
for i in itertools.count(): for i in itertools.count():
response = self._download_json( response = self._download_json(
next_href, playlist_id, next_href, playlist_id,
'Downloading track page %s' % (i + 1), query=query) 'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS)
collection = response['collection'] collection = response['collection']
@ -696,7 +773,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
user = self._download_json( user = self._download_json(
self._resolv_url(self._BASE_URL + uploader), self._resolv_url(self._BASE_URL + uploader),
uploader, 'Downloading user info') uploader, 'Downloading user info', headers=self._HEADERS)
resource = mobj.group('rsrc') or 'all' resource = mobj.group('rsrc') or 'all'
@ -721,7 +798,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
track_name = self._match_id(url) track_name = self._match_id(url)
track = self._download_json(self._resolv_url(url), track_name) track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS)
track_id = self._search_regex( track_id = self._search_regex(
r'soundcloud:track-stations:(\d+)', track['id'], 'track id') r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
@ -754,7 +831,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
data = self._download_json( data = self._download_json(
self._API_V2_BASE + 'playlists/' + playlist_id, self._API_V2_BASE + 'playlists/' + playlist_id,
playlist_id, 'Downloading playlist', query=query) playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS)
return self._extract_set(data, token) return self._extract_set(data, token)
@ -791,7 +868,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
for i in itertools.count(1): for i in itertools.count(1):
response = self._download_json( response = self._download_json(
next_url, collection_id, 'Downloading page {0}'.format(i), next_url, collection_id, 'Downloading page {0}'.format(i),
'Unable to download API page') 'Unable to download API page', headers=self._HEADERS)
collection = response.get('collection', []) collection = response.get('collection', [])
if not collection: if not collection:

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.45.4', 'clientVersion': '20.10.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '18.1.0.22B83', 'osVersion': '18.3.2.22D82',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'MWEB', 'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00', 'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA # mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'TVHTML5', 'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00', 'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
}, },
}, },
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'WEB', 'clientName': 'WEB',
'clientVersion': '2.20241126.01.00', 'clientVersion': '2.20250312.04.00',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
) )
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None): def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex( var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])', r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='') jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code) func_name = self._extract_n_function_name(jsi.code)