Compare commits

..

5 Commits

Author SHA1 Message Date
Remita Amine
9d50f86232 [twitter] Add tests for more cards 2021-01-15 10:32:01 +01:00
Sergey M․
7e92f9015e
[youporn] Restrict fallback download URL (refs #27822) 2021-01-15 15:12:04 +07:00
Sergey M․
aa860b8016
[youporn] Improve height and tbr extraction (refs #23659, refs #20425) 2021-01-15 14:54:23 +07:00
Sergey M․
b484097b01
[youporn] Fix extraction (closes #27822) 2021-01-15 14:43:52 +07:00
Sergey M․
ab9001dab5
[twitter] Add support for unified cards (closes #27826) 2021-01-15 12:39:21 +07:00
2 changed files with 52 additions and 6 deletions

View File

@ -373,6 +373,24 @@ class TwitterIE(TwitterBaseIE):
'uploader_id': '1eVjYOLGkGrQL', 'uploader_id': '1eVjYOLGkGrQL',
}, },
'add_ie': ['TwitterBroadcast'], 'add_ie': ['TwitterBroadcast'],
}, {
# unified card
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
'info_dict': {
'id': '1349794411333394432',
'ext': 'mp4',
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
'uploader': 'Brooklyn Nets',
'uploader_id': 'BrooklynNets',
'duration': 324.484,
'timestamp': 1610651040,
'upload_date': '20210114',
},
'params': {
'skip_download': True,
},
}, { }, {
# Twitch Clip Embed # Twitch Clip Embed
'url': 'https://twitter.com/GunB1g/status/1163218564784017422', 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
@ -389,6 +407,22 @@ class TwitterIE(TwitterBaseIE):
# appplayer card # appplayer card
'url': 'https://twitter.com/poco_dandy/status/1150646424461176832', 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
'only_matching': True, 'only_matching': True,
}, {
# video_direct_message card
'url': 'https://twitter.com/qarev001/status/1348948114569269251',
'only_matching': True,
}, {
# poll2choice_video card
'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
'only_matching': True,
}, {
# poll3choice_video card
'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
'only_matching': True,
}, {
# poll4choice_video card
'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -433,8 +467,7 @@ class TwitterIE(TwitterBaseIE):
'tags': tags, 'tags': tags,
} }
media = try_get(status, lambda x: x['extended_entities']['media'][0]) def extract_from_video_info(media):
if media and media.get('type') != 'photo':
video_info = media.get('video_info') or {} video_info = media.get('video_info') or {}
formats = [] formats = []
@ -461,6 +494,10 @@ class TwitterIE(TwitterBaseIE):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': float_or_none(video_info.get('duration_millis'), 1000), 'duration': float_or_none(video_info.get('duration_millis'), 1000),
}) })
media = try_get(status, lambda x: x['extended_entities']['media'][0])
if media and media.get('type') != 'photo':
extract_from_video_info(media)
else: else:
card = status.get('card') card = status.get('card')
if card: if card:
@ -493,7 +530,12 @@ class TwitterIE(TwitterBaseIE):
'_type': 'url', '_type': 'url',
'url': get_binding_value('card_url'), 'url': get_binding_value('card_url'),
}) })
# amplify, promo_video_website, promo_video_convo, appplayer, ... elif card_name == 'unified_card':
media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
extract_from_video_info(next(iter(media_entities.values())))
# amplify, promo_video_website, promo_video_convo, appplayer,
# video_direct_message, poll2choice_video, poll3choice_video,
# poll4choice_video, ...
else: else:
is_amplify = card_name == 'amplify' is_amplify = card_name == 'amplify'
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url') vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')

View File

@ -60,6 +60,9 @@ class YouPornIE(InfoExtractor):
}, { }, {
'url': 'http://www.youporn.com/watch/505835', 'url': 'http://www.youporn.com/watch/505835',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -88,7 +91,7 @@ class YouPornIE(InfoExtractor):
# Main source # Main source
definitions = self._parse_json( definitions = self._parse_json(
self._search_regex( self._search_regex(
r'mediaDefinition\s*=\s*(\[.+?\]);', webpage, r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
'media definitions', default='[]'), 'media definitions', default='[]'),
video_id, fatal=False) video_id, fatal=False)
if definitions: if definitions:
@ -100,7 +103,7 @@ class YouPornIE(InfoExtractor):
links.append(video_url) links.append(video_url)
# Fallback #1, this also contains extra low quality 180p format # Fallback #1, this also contains extra low quality 180p format
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage): for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
links.append(link) links.append(link)
# Fallback #2 (unavailable as at 22.06.2017) # Fallback #2 (unavailable as at 22.06.2017)
@ -128,8 +131,9 @@ class YouPornIE(InfoExtractor):
# Video URL's path looks like this: # Video URL's path looks like this:
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
# We will benefit from it by extracting some metadata # We will benefit from it by extracting some metadata
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url) mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
if mobj: if mobj:
height = int(mobj.group('height')) height = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate')) bitrate = int(mobj.group('bitrate'))