Compare commits

...

4 Commits

Author SHA1 Message Date
PRB0t
d5e09e3310
Merge 8edbee2a507a4337d99b5a1b9b73d4c6721dddd5 into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:20:25 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
PRB0t
8edbee2a50 🤖 Add NoodleDude extractor 2022-07-22 12:14:48 +02:00
4 changed files with 105 additions and 10 deletions

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
),
(
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO',
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
),
]

View File

@ -833,6 +833,7 @@ from .nintendo import NintendoIE
from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE
from .nonktube import NonkTubeIE
from .noodledude import NoodleDudeIE
from .noovo import NoovoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE

View File

@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
import json
class NoodleDudeIE(InfoExtractor):
IE_NAME = 'NoodleDude'
_VALID_URL = r'https?://(www\.)?noodledude\.io/videos/(?P<id>[0-9a-zA-Z_-]+)'
_TEST = {
'url': 'https://www.noodledude.io/videos/kawaii-vs-goth',
'md5': '9d3465ea49d16860a531035517ea8aec',
'info_dict': {
'id': 'kawaii-vs-goth',
'ext': 'mp4',
'title': 'Kawaii VS Goth',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:f16fef1f758a4dc38041bd6648b9d3b2',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, headers={'Referer': 'https://www.noodledude.io/'})
#with open('webpage.tmp', 'w') as f:
#f.write(webpage)
# TODO more code goes here, for example ...
title = self._html_search_regex(r'<h1 id="video-title".*?>(.+?)</h1>', webpage, 'title')
description = self._html_search_meta('description', webpage, 'decription')
print('Title:', title)
print('Description:', description)
iframe_url = self._search_regex(r'<iframe\s*src="(.+?)"', webpage, 'iframe_url', flags=re.MULTILINE)
#print('iframe: ', iframe_url)
iframe_data = self._download_webpage(iframe_url, video_id, headers={'Referer': 'https://www.noodledude.io/'})
#with open('iframe.tmp', 'w') as f:
#f.write(iframe_data)
m3u8_url = self._search_regex(r'<source.*?src="(.+?)"', iframe_data, 'm3u8_url')
print('M3U8:', m3u8_url)
poster_url = self._search_regex(r'<video.*?data-poster="(.+?)"', iframe_data, 'poster_url')
print('Poster:', poster_url)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', headers={'Referer': 'https://iframe.mediadelivery.net/'})
print('Formats:', json.dumps(formats))
for f in formats:
f.setdefault('http_headers', {})['Referer'] = 'https://iframe.mediadelivery.net/'
return {
'id': video_id,
'url': url,
'title': title,
'description': description,
'formats': formats,
'thumbnail': poster_url
#'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
# TODO more properties (see youtube_dl/extractor/common.py)
}

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
'clientVersion': '19.45.4',
'clientVersion': '20.10.4',
'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone',
'osVersion': '18.1.0.22B83',
'osVersion': '18.3.2.22D82',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00',
'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
},
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00',
'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
},
},
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20241126.01.00',
'clientVersion': '2.20250312.04.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES),
}
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])',
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code)