diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index ac1ccc404..808733ed1 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -6,9 +6,11 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + float_or_none, int_or_none, NO_DEFAULT, str_to_int, + urljoin, ) @@ -16,7 +18,7 @@ class XNXXIE(InfoExtractor): _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P[0-9a-z]+)/' _TESTS = [{ 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', - 'md5': '7583e96c15c0f21e9da3453d9920fbba', + 'md5': '73c071a361a09aae7e7d60008221fd13', 'info_dict': { 'id': '55awb78', 'ext': 'mp4', @@ -25,7 +27,29 @@ class XNXXIE(InfoExtractor): 'duration': 469, 'view_count': int, 'age_limit': 18, + 'tags': ['video game', 'skyrim', '3d', 'game', '3d game', 'video games', 'rule34', 'test', 'rough', 'sfm', 'fallout', 'porno game', 'skyrim hentai', 'h game', '3d horse', '3d porno anime', 'xx video wwxxx cartoon cartoons', 'gaming', 'games', '3d porno desenho'], + 'uploader': 'Glurp', + 'uploader_id': 'Glurp', + 'uploader_url': '/porn-maker/glurp', }, + }, { + 'url': 'https://www.xnxx.com/video-h46klf8/babes_-_come_back_to_bed_starring_abella_danger_and_darcie_dolce_clip', + 'md5': 'b8b4a594b4091de46ce05d0a9d45317c', + 'info_dict': { + 'id': 'h46klf8', + 'ext': 'mp4', + 'title': 'Babes - Come Back to Bed starring Abella Danger and Darcie Dolce clip', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 480, + 'view_count': int, + 'age_limit': 18, + 'tags': ['lesbicas', 'darcie dolce', 'darcie dolce lesbian', 'abella danger lesbian', 'babes', 'lesb', 'abella danger', 'mmf', 'come', 'darcie', 'lesbianas', 'mulher chupando peito da outra', 'abella', 'girl on girl', 'darcie dolce abella danger', 'chupando peitos', 'para', 'lesbian', 'chupando peitos lesbicas', 'black lesbians'], + 'uploader': 'Babes Network', + 'uploader_id': 'Babes Network', + 'uploader_url': '/porn-maker/babes3', + 'actors': [{'given_name': 'darcie dolce', 'url': 'https://www.xnxx.com/search/darcie%20dolce'}, {'given_name': 'darcie dolce lesbian', 'url': 'https://www.xnxx.com/search/darcie%20dolce%20lesbian'}, {'given_name': 'abella danger lesbian', 'url': 'https://www.xnxx.com/search/abella%20danger%20lesbian'}, {'given_name': 'abella danger', 'url': 'https://www.xnxx.com/search/abella%20danger'}, {'given_name': 'darcie dolce abella danger', 'url': 'https://www.xnxx.com/search/darcie%20dolce%20abella%20danger'}], + 'average_rating': float, + } }, { 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', 'only_matching': True, @@ -70,8 +94,27 @@ class XNXXIE(InfoExtractor): 'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False) duration = int_or_none(self._og_search_property('duration', webpage)) view_count = str_to_int(self._search_regex( - r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count', - default=None)) + r'-.+?\t+- (?P.+?) ', webpage, 'view count', group='views', + default=0)) + + tags = self._search_regex(r'(?P.+?)', webpage) + uploader_id = None + uploader_url = None + if uploader_data != []: + uploader_url, uploader_id = uploader_data[0][0:2] + + actors_data = re.findall(r'(?P.+?)', webpage) + actors = [] + if actors_data != []: + for actor_tuple in actors_data: + actors.append({ + 'given_name': actor_tuple[1], + 'url': urljoin(url, actor_tuple[0]), + }) + + rating = float_or_none(self._search_regex(r'(?P.+?)%', webpage, 'rating', group='rating')) return { 'id': video_id, @@ -81,4 +124,11 @@ class XNXXIE(InfoExtractor): 'view_count': view_count, 'age_limit': 18, 'formats': formats, + 'tags': tags, + 'creator': uploader_id, + 'uploader': uploader_id, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, + 'actors': actors, + 'average_rating': rating, }