Compare commits

...

17 Commits

Author SHA1 Message Date
DarkFighterLuke
04e6c4c41c
Merge f2a095884c293ebffd2363dc5b24c644bf6f4837 into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:19:39 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
DarkFighterLuke
f2a095884c Set uploader default values to None 2022-02-27 01:41:22 +01:00
DarkFighterLuke
f124fd7f68 Apply refactoring suggested by @rautamiekka 2022-02-27 01:30:18 +01:00
DarkFighterLuke
caa0f4be1b Merge branch 'xvideos' of github.com:DarkFighterLuke/youtube-dl into xvideos 2022-02-26 16:52:09 +01:00
DarkFighterLuke
109b85c534 Fix checks on empty data
modified:   youtube_dl/extractor/xvideos.py
2022-02-26 16:51:59 +01:00
DarkFighterLuke
e048a6184b Fix checks on empty data 2022-02-26 16:50:40 +01:00
DarkFighterLuke
3aba2a7912 Add default fallback values for creator_data 2022-02-26 15:51:58 +01:00
DarkFighterLuke
2606c3108e Check whether actors_data has been found 2022-02-26 15:50:14 +01:00
DarkFighterLuke
48e5ff4bb1 Add default fallback values 2022-02-26 15:45:23 +01:00
DarkFighterLuke
fd7aaa6ae9 Rename views to view_count 2022-02-26 15:28:09 +01:00
DarkFighterLuke
81787fd733 Scrape uploader url 2022-02-26 15:16:41 +01:00
DarkFighterLuke
a4ca768169 Scrape views 2022-02-26 15:06:16 +01:00
DarkFighterLuke
16a2888b1a Scrape actors
modified:   youtube_dl/extractor/xvideos.py
2022-02-26 14:58:52 +01:00
DarkFighterLuke
b7340781df Scrape creator 2022-02-26 14:54:42 +01:00
DarkFighterLuke
80abd212b4 Scrape tags 2022-02-26 14:25:01 +01:00
3 changed files with 74 additions and 15 deletions

View File

@ -232,8 +232,32 @@ _NSIG_TESTS = [
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54', 'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
), ),
( (
'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js', 'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO', 'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
), ),
] ]

View File

@ -10,6 +10,8 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration, parse_duration,
str_to_int,
urljoin,
) )
@ -25,14 +27,21 @@ class XVideosIE(InfoExtractor):
(?P<id>[0-9]+) (?P<id>[0-9]+)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', 'url': 'https://www.xvideos.com/video50011247/when_girls_play_-_adriana_chechik_abella_danger_-_tradimento_-_twistys',
'md5': '14cea69fcb84db54293b1e971466c2e1', 'md5': 'aa54f96311768b3a8bfe54b8c8fda070',
'info_dict': { 'info_dict': {
'id': '4588838', 'id': '50011247',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Biker Takes his Girl', 'title': 'When Girls Play - (Adriana Chechik, Abella Danger) - Betrayal - Twistys',
'duration': 108, 'duration': 720,
'age_limit': 18, 'age_limit': 18,
'tags': ['lesbian', 'teen', 'hardcore', 'latina', 'rough', 'squirt', 'big-ass', 'cheater', 'twistys', 'cheat', 'ass-play', 'when-girls-play'],
'creator': 'Twistys',
'uploader': 'Twistys',
'uploader_id': 'Twistys',
'uploader_url': '/channels/twistys1',
'actors': [{'given_name': 'Adriana Chechik', 'url': 'https://www.xvideos.com/pornstars/adriana-chechik'}, {'given_name': 'Abella Danger', 'url': 'https://www.xvideos.com/pornstars/abella-danger'}],
'view_count': int,
} }
}, { }, {
'url': 'https://flashservice.xvideos.com/embedframe/4588838', 'url': 'https://flashservice.xvideos.com/embedframe/4588838',
@ -137,6 +146,25 @@ class XVideosIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
tags = self._search_regex(r'<meta name="keywords" content="xvideos,xvideos\.com, x videos,x video,porn,video,videos,(?P<tag>.+?)"', webpage, 'tags', group='tag', default='').split(',')
creator_data = re.findall(r'<a href="(?P<creator_url>.+?)" class="btn btn-default label main uploader-tag hover-name"><span class="name">(?P<creator>.+?)<', webpage)
creator = None
uploader_url = None
if creator_data != []:
uploader_url, creator = creator_data[0][0:2]
actors_data = re.findall(r'href="(?P<actor_url>/pornstars/.+?)" class="btn btn-default label profile hover-name"><span class="name">(?P<actor_name>.+?)</span>', webpage)
actors = []
if actors_data != []:
for actor_tuple in actors_data:
actors.append({
'given_name': actor_tuple[1],
'url': urljoin(url, actor_tuple[0]),
})
views = self._search_regex(r'<strong class="mobile-hide">(?P<views>.+?)<', webpage, 'views', group='views', default=None)
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
@ -144,4 +172,11 @@ class XVideosIE(InfoExtractor):
'duration': duration, 'duration': duration,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'age_limit': 18, 'age_limit': 18,
'tags': tags,
'creator': creator,
'uploader': creator,
'uploader_id': creator,
'uploader_url': uploader_url,
'actors': actors,
'view_count': str_to_int(views),
} }

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.45.4', 'clientVersion': '20.10.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '18.1.0.22B83', 'osVersion': '18.3.2.22D82',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'MWEB', 'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00', 'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA # mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
@ -122,7 +122,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'TVHTML5', 'clientName': 'TVHTML5',
'clientVersion': '7.20250120.19.00', 'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
}, },
}, },
@ -133,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'WEB', 'clientName': 'WEB',
'clientVersion': '2.20241126.01.00', 'clientVersion': '2.20250312.04.00',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -692,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
) )
@ -1857,7 +1857,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None): def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex( var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])', r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='') jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code) func_name = self._extract_n_function_name(jsi.code)