2023-07-22 03:32:02 +09:00
# coding: utf-8
2014-01-21 22:16:44 +09:00
from __future__ import unicode_literals
2016-11-06 23:42:41 +09:00
import re
2013-06-24 05:27:16 +09:00
from . common import InfoExtractor
2023-07-22 03:32:02 +09:00
from . . compat import compat_str
2016-05-06 00:02:54 +09:00
from . . utils import (
2020-05-20 05:39:41 +09:00
determine_ext ,
2016-05-06 00:02:54 +09:00
ExtractorError ,
int_or_none ,
2023-07-22 03:32:02 +09:00
join_nonempty ,
2019-10-06 00:04:49 +09:00
merge_dicts ,
2016-05-06 00:02:54 +09:00
str_to_int ,
2023-07-22 03:32:02 +09:00
T ,
traverse_obj ,
2016-05-06 00:02:54 +09:00
unified_strdate ,
2018-07-21 21:08:28 +09:00
url_or_none ,
2023-07-22 03:32:02 +09:00
urljoin ,
2016-05-06 00:02:54 +09:00
)
2013-06-24 05:27:16 +09:00
class RedTubeIE ( InfoExtractor ) :
2020-09-20 13:39:42 +09:00
_VALID_URL = r ' https?://(?:(?: \ w+ \ .)?redtube \ .com/|embed \ .redtube \ .com/ \ ?.*? \ bid=)(?P<id>[0-9]+) '
2023-07-22 03:32:02 +09:00
_EMBED_REGEX = [ r ' <iframe[^>]+?src=[ " \' ](?P<url>(?:https?:)?//embed \ .redtube \ .com/ \ ?.*? \ bid= \ d+) ' ]
2016-11-06 23:39:29 +09:00
_TESTS = [ {
2021-09-26 22:29:08 +09:00
' url ' : ' https://www.redtube.com/38864951 ' ,
2023-07-22 22:10:55 +09:00
' md5 ' : ' d7de9cb32e8adb3f6379f1a30f655fae ' ,
2021-09-26 22:29:08 +09:00
' info_dict ' : {
' id ' : ' 38864951 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu ' ,
2023-07-22 03:32:02 +09:00
' description ' : ' Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: LeoLulu ' ,
2021-09-26 22:29:08 +09:00
' upload_date ' : ' 20210111 ' ,
' timestamp ' : 1610343109 ,
' duration ' : 646 ,
' view_count ' : int ,
' age_limit ' : 18 ,
' thumbnail ' : r ' re:https:// \ wi-ph \ .rdtcdn \ .com/videos/.+/.+ \ .jpg ' ,
} ,
2023-07-22 22:10:55 +09:00
' expected_warnings ' : [
' Failed to download m3u8 information: HTTP Error 404 ' ,
] ,
' params ' : {
' format ' : ' [format_id !^= hls] ' ,
2023-07-22 23:36:08 +09:00
} ,
2016-11-06 23:39:29 +09:00
} , {
' url ' : ' http://embed.redtube.com/?bgcolor=000000&id=1443286 ' ,
' only_matching ' : True ,
2020-09-20 13:39:42 +09:00
} , {
' url ' : ' http://it.redtube.com/66418 ' ,
' only_matching ' : True ,
2016-11-06 23:39:29 +09:00
} ]
2013-06-24 05:27:16 +09:00
2023-07-22 03:32:02 +09:00
@classmethod
def _extract_urls ( cls , webpage ) :
for embed_re in cls . _EMBED_REGEX :
for from_ in re . findall ( embed_re , webpage ) :
yield from_
2016-11-06 23:42:41 +09:00
2013-10-04 18:41:57 +09:00
def _real_extract ( self , url ) :
2014-11-26 20:52:45 +09:00
video_id = self . _match_id ( url )
2016-11-06 23:39:29 +09:00
webpage = self . _download_webpage (
' http://www.redtube.com/ %s ' % video_id , video_id )
2013-06-24 05:27:16 +09:00
2020-01-03 00:45:42 +09:00
ERRORS = (
( ( ' video-deleted-info ' , ' >This video has been removed ' ) , ' has been removed ' ) ,
( ( ' private_video_text ' , ' >This video is private ' , ' >Send a friend request to its owner to be able to view it ' ) , ' is private ' ) ,
)
for patterns , message in ERRORS :
if any ( p in webpage for p in patterns ) :
raise ExtractorError (
' Video %s %s ' % ( video_id , message ) , expected = True )
2015-03-25 23:08:35 +09:00
2019-10-06 00:04:49 +09:00
info = self . _search_json_ld ( webpage , video_id , default = { } )
if not info . get ( ' title ' ) :
info [ ' title ' ] = self . _html_search_regex (
2020-05-20 04:11:05 +09:00
( r ' <h( \ d)[^>]+class= " (?:video_title_text|videoTitle|video_title)[^ " ]* " >(?P<title>(?:(?! \ 1).)+)</h \ 1> ' ,
2019-10-06 00:04:49 +09:00
r ' (?:videoTitle|title) \ s*: \ s*([ " \' ])(?P<title>(?:(?! \ 1).)+) \ 1 ' , ) ,
webpage , ' title ' , group = ' title ' ,
default = None ) or self . _og_search_title ( webpage )
2016-05-06 00:02:54 +09:00
formats = [ ]
sources = self . _parse_json (
self . _search_regex (
r ' sources \ s*: \ s*( { .+?}) ' , webpage , ' source ' , default = ' {} ' ) ,
video_id , fatal = False )
2023-07-22 03:32:02 +09:00
def full_url ( u ) :
return urljoin ( url , u )
for fmt in traverse_obj ( sources , ( T ( dict . items ) , {
' url ' : ( 1 , T ( full_url ) ) ,
' format_id ' : ( 2 , T ( compat_str ) ) ,
' height ' : ( 2 , T ( int_or_none ) ) , } ) ) :
if ' url ' in fmt :
formats . append ( fmt )
medias = self . _search_regex (
r ''' mediaDefinitions?[ " ' ]? \ s*: \ s*( \ [[ \ s \ S]+?} \ s* \ ]) ''' , webpage ,
' media definitions ' , default = ' {} ' )
medias = self . _parse_json ( medias , video_id , fatal = False )
for fmt in traverse_obj ( medias , ( Ellipsis , T ( dict ) ) ) :
format_url = full_url ( fmt . get ( ' videoUrl ' ) )
2021-06-16 23:08:11 +09:00
if not format_url :
continue
2023-07-22 03:32:02 +09:00
more_media = None
if fmt [ ' format ' ] == ' hls ' or ( fmt [ ' format ' ] == ' mp4 ' and not fmt . get ( ' quality ' ) ) :
2021-06-16 23:08:11 +09:00
more_media = self . _download_json ( format_url , video_id , fatal = False )
2023-07-22 03:32:02 +09:00
if more_media is None :
more_media = [ fmt ]
for fmt in traverse_obj ( more_media , ( Ellipsis , {
' url ' : ( ' videoUrl ' , T ( full_url ) ) ,
' ext ' : ( ' format ' , T ( compat_str ) ) ,
' format_id ' : ( ' quality ' , T ( compat_str ) ) , } ) ) :
format_url = fmt . get ( ' url ' )
2018-07-21 21:08:28 +09:00
if not format_url :
2017-09-06 00:45:07 +09:00
continue
2023-07-22 03:32:02 +09:00
if fmt . get ( ' ext ' ) == ' hls ' or determine_ext ( format_url ) == ' m3u8 ' :
2020-05-20 05:39:41 +09:00
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' ,
2023-07-22 03:32:02 +09:00
entry_protocol = ' m3u8_native ' , m3u8_id = ' hls ' ,
2020-05-20 05:39:41 +09:00
fatal = False ) )
2023-07-22 03:32:02 +09:00
continue
fmt [ ' height ' ] = int_or_none ( fmt . get ( ' format_id ' ) )
fmt [ ' format_id ' ] = join_nonempty ( ' ext ' , ' format_id ' , from_dict = fmt )
formats . append ( fmt )
2017-09-06 00:45:07 +09:00
if not formats :
2023-07-22 03:32:02 +09:00
video_url = url_or_none ( self . _html_search_regex (
r ' <source src= " (.+?) " type= " video/mp4 " > ' , webpage , ' video URL ' ) )
if video_url :
formats . append ( { ' url ' : video_url } )
self . _check_formats ( formats , video_id )
2016-05-06 00:02:54 +09:00
self . _sort_formats ( formats )
thumbnail = self . _og_search_thumbnail ( webpage )
upload_date = unified_strdate ( self . _search_regex (
2019-10-06 00:04:49 +09:00
r ' <span[^>]+>(?:ADDED|Published on) ([^<]+)< ' ,
webpage , ' upload date ' , default = None ) )
2018-02-21 23:55:28 +09:00
duration = int_or_none ( self . _og_search_property (
' video:duration ' , webpage , default = None ) or self . _search_regex (
r ' videoDuration \ s*: \ s*( \ d+) ' , webpage , ' duration ' , default = None ) )
2016-05-06 00:02:54 +09:00
view_count = str_to_int ( self . _search_regex (
2018-02-03 00:32:53 +09:00
( r ' <div[^>]*>Views</div> \ s*<div[^>]*> \ s*([ \ d,.]+) ' ,
2019-10-06 00:04:49 +09:00
r ' <span[^>]*>VIEWS</span> \ s*</td> \ s*<td> \ s*([ \ d,.]+) ' ,
r ' <span[^>]+ \ bclass=[ " \' ]video_view_count[^>]*> \ s*([ \ d,.]+) ' ) ,
webpage , ' view count ' , default = None ) )
2016-05-06 00:02:54 +09:00
2013-10-06 23:39:35 +09:00
# No self-labeling, but they describe themselves as
# "Home of Videos Porno"
age_limit = 18
2019-10-06 00:04:49 +09:00
return merge_dicts ( info , {
2014-01-21 22:16:44 +09:00
' id ' : video_id ,
2014-11-26 20:52:45 +09:00
' ext ' : ' mp4 ' ,
2016-05-06 00:02:54 +09:00
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
' duration ' : duration ,
' view_count ' : view_count ,
2013-10-06 23:39:35 +09:00
' age_limit ' : age_limit ,
2016-05-06 00:02:54 +09:00
' formats ' : formats ,
2019-10-06 00:04:49 +09:00
} )