Compare commits

...

8 Commits

Author SHA1 Message Date
dirkf
d96975ac23
Merge da90df06c3b8e7e6cc66a124c196aef4eb856a89 into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:20:23 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
dirkf
da90df06c3
Linty 2024-11-29 14:27:29 +00:00
dirkf
98e6231a25
Further small improvements 2024-11-29 14:17:10 +00:00
dirkf
ab9ad567d8
Incorporate changes from PR #32979
* update tests
* send `Referer` when fetching media link

Co-authored-by: mp3butcher <julienvalentin51@gmail.com>
2024-11-29 14:11:41 +00:00
dirkf
9dd91758ae [DoodStream] Support more TLDs
* also fix title extraction
2022-06-04 09:26:41 +01:00
dirkf
3687a4f9bd [DoodStream] Add extractor from yt-dlp back-port and improve 2022-06-04 08:19:09 +01:00
4 changed files with 167 additions and 10 deletions

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54', 'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
), ),
( (
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js', 'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO', 'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
), ),
] ]

View File

@ -0,0 +1,132 @@
# coding: utf-8
from __future__ import unicode_literals
from random import choice as random_choice
from string import ascii_letters, digits
from time import time as time_time
from ..compat import compat_filter as filter
from ..utils import (
clean_html,
ExtractorError,
get_element_by_class,
parse_duration,
parse_filesize,
update_url_query,
unified_strdate,
url_or_none,
)
from .common import InfoExtractor
class DoodStreamIE(InfoExtractor):
# dood.* redirects
# .watch -> .re (but HEAD request fails in GenericIE)
# .so -> .li
_VALID_URL = r'https?://(?:www\.)?(?P<host>dood\.(?:to|la|li|pm|re|sh|watch|ws|one)|ds2play\.com)/[ed]/(?P<id>[a-z\d]+)'
_TESTS = [{
'url': 'https://dood.li/e/h7ecgw5oqn8k',
'md5': '90f2af170551c17fc78bee7426890054',
'info_dict': {
'id': 'h7ecgw5oqn8k',
'ext': 'mp4',
'title': 'Free-Slow-Music',
'upload_date': '20230814',
'thumbnail': 'https://img.doodcdn.co/splash/7mbnwydhb6kb7xyk.jpg',
},
}, {
'url': 'https://dood.to/d/is34uy8wvaet',
'md5': '04740d3ba93bcd638aa7a097d9226710',
'info_dict': {
'id': 'is34uy8wvaet',
'ext': 'mp4',
'title': 'Akhanda (2021) Telugu DVDScr MP3 700MB',
'upload_date': '20211202',
'thumbnail': r're:https?://img\.doodcdn\.com?/[\w/]+\.jpg',
'filesize_approx': int,
'duration': 9886,
},
'skip': 'Video not found',
}, {
'url': 'https://dood.sh/d/wlihoael8uog',
'md5': '2c14444c89788cc309738c1560abe278',
'info_dict': {
'id': 'wlihoael8uog',
'ext': 'mp4',
'title': 'VID 20220319 161659',
'thumbnail': r're:https?://img\.doodcdn\.com?/splash/rmpnhb8ckkk79cge\.jpg',
'upload_date': '20220319',
'filesize_approx': int,
'duration': 12.0,
},
}, {
'url': 'http://dood.ws/d/h7ecgw5oqn8k',
'only_matching': True,
}, {
'url': 'https://dood.li/d/wlihoael8uog',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
host = 'dood.li'
url = 'https://%s/e/%s' % (host, video_id)
webpage = self._download_webpage(url, video_id, note='Downloading "/e/" webpage')
def get_title(html, fatal=False):
return self._html_search_regex(r'<title\b[^>]*>([^<]+?)(?:[|-]\s+DoodStream\s*)?</title', html, 'title', fatal=fatal)
title = get_title(webpage)
if title == 'Video not found' or (
title == '' and 'Not Found' == self._html_search_regex(r'<h1\b[^>]*>([^<]+?)</h1', webpage, 'heading1', default=None)):
raise ExtractorError(title, expected=True)
pass_md5, token = self._search_regex(
r'["\']/(?P<pm>pass_md5/[\da-f-]+/(?P<tok>[\da-z]+))', webpage, 'tokens',
group=('pm', 'tok'))
headers = {
'Referer': url,
}
# construct the media link
final_url = self._download_webpage(
'https://%s/%s' % (host, pass_md5), video_id, headers={
'Referer': url,
}, note='Downloading authpage URL')
final_url += ''.join((random_choice(ascii_letters + digits)
for _ in range(10)))
final_url = update_url_query(final_url, {
'token': token,
'expiry': int(time_time() * 1000),
})
# get additional metadata
thumb = next(filter(None, (url_or_none(self._html_search_meta(x, webpage, default=None))
for x in ('og:image', 'twitter:image'))), None)
description = self._html_search_meta(
('og:description', 'description', 'twitter:description'), webpage, default=None)
webpage = self._download_webpage(
'https://%s/d/%s' % (host, video_id), video_id, fatal=False,
note='Downloading alternative "/d/" page') or ''
title = (
self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None)
or get_title(webpage, fatal=(title is not None))
or title)
def get_class_text(x):
return clean_html(get_element_by_class(x, webpage))
return {
'id': video_id,
'title': title,
'url': final_url,
'http_headers': headers,
'ext': 'mp4',
'upload_date': unified_strdate(get_class_text('uploadate')),
'duration': parse_duration(get_class_text('length')),
'filesize_approx': parse_filesize(get_class_text('size')),
'description': description,
'thumbnail': thumb,
}

View File

@ -304,6 +304,7 @@ from .dlf import (
from .dfb import DFBIE from .dfb import DFBIE
from .dhm import DHMIE from .dhm import DHMIE
from .digg import DiggIE from .digg import DiggIE
from .doodstream import DoodStreamIE
from .dotsub import DotsubIE from .dotsub import DotsubIE
from .douyutv import ( from .douyutv import (
DouyuShowIE, DouyuShowIE,

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.45.4', 'clientVersion': '20.10.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '18.1.0.22B83', 'osVersion': '18.3.2.22D82',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'MWEB', 'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00', 'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA # mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'TVHTML5', 'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00', 'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
}, },
}, },
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'WEB', 'clientName': 'WEB',
'clientVersion': '2.20241126.01.00', 'clientVersion': '2.20250312.04.00',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
) )
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None): def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex( var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])', r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='') jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code) func_name = self._extract_n_function_name(jsi.code)