[twitter] Add tests for more cards

[youporn] Restrict fallback download URL (refs #27822 )
[youporn] Improve height and tbr extraction (refs #23659 , refs #20425 )
2025-07-12 22:44:14 +09:00 · 2021-01-15 10:32:01 +01:00 · 2021-01-15 15:12:04 +07:00 · 2021-01-15 14:54:23 +07:00 · 2021-01-15 14:43:52 +07:00 · 2021-01-15 12:39:21 +07:00
2 changed files with 52 additions and 6 deletions
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@ -373,6 +373,24 @@ class TwitterIE(TwitterBaseIE):
            'uploader_id': '1eVjYOLGkGrQL',
        },
        'add_ie': ['TwitterBroadcast'],
+    }, {
+        # unified card
+        'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
+        'info_dict': {
+            'id': '1349794411333394432',
+            'ext': 'mp4',
+            'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
+            'uploader': 'Brooklyn Nets',
+            'uploader_id': 'BrooklynNets',
+            'duration': 324.484,
+            'timestamp': 1610651040,
+            'upload_date': '20210114',
+        },
+        'params': {
+            'skip_download': True,
+        },
    }, {
        # Twitch Clip Embed
        'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
@ -389,6 +407,22 @@ class TwitterIE(TwitterBaseIE):
        # appplayer card
        'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
        'only_matching': True,
+    }, {
+        # video_direct_message card
+        'url': 'https://twitter.com/qarev001/status/1348948114569269251',
+        'only_matching': True,
+    }, {
+        # poll2choice_video card
+        'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
+        'only_matching': True,
+    }, {
+        # poll3choice_video card
+        'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
+        'only_matching': True,
+    }, {
+        # poll4choice_video card
+        'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
@ -433,8 +467,7 @@ class TwitterIE(TwitterBaseIE):
            'tags': tags,
        }

-        media = try_get(status, lambda x: x['extended_entities']['media'][0])
-        if media and media.get('type') != 'photo':
+        def extract_from_video_info(media):
            video_info = media.get('video_info') or {}

            formats = []
@ -461,6 +494,10 @@ class TwitterIE(TwitterBaseIE):
                'thumbnails': thumbnails,
                'duration': float_or_none(video_info.get('duration_millis'), 1000),
            })
+
+        media = try_get(status, lambda x: x['extended_entities']['media'][0])
+        if media and media.get('type') != 'photo':
+            extract_from_video_info(media)
        else:
            card = status.get('card')
            if card:
@ -493,7 +530,12 @@ class TwitterIE(TwitterBaseIE):
                        '_type': 'url',
                        'url': get_binding_value('card_url'),
                    })
-                # amplify, promo_video_website, promo_video_convo, appplayer, ...
+                elif card_name == 'unified_card':
+                    media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
+                    extract_from_video_info(next(iter(media_entities.values())))
+                # amplify, promo_video_website, promo_video_convo, appplayer,
+                # video_direct_message, poll2choice_video, poll3choice_video,
+                # poll4choice_video, ...
                else:
                    is_amplify = card_name == 'amplify'
                    vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@ -60,6 +60,9 @@ class YouPornIE(InfoExtractor):
    }, {
        'url': 'http://www.youporn.com/watch/505835',
        'only_matching': True,
+    }, {
+        'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
+        'only_matching': True,
    }]

    @staticmethod
@ -88,7 +91,7 @@ class YouPornIE(InfoExtractor):
        # Main source
        definitions = self._parse_json(
            self._search_regex(
-                r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
+                r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
                'media definitions', default='[]'),
            video_id, fatal=False)
        if definitions:
@ -100,7 +103,7 @@ class YouPornIE(InfoExtractor):
                    links.append(video_url)

        # Fallback #1, this also contains extra low quality 180p format
-        for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
+        for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
            links.append(link)

        # Fallback #2 (unavailable as at 22.06.2017)
@ -128,8 +131,9 @@ class YouPornIE(InfoExtractor):
            # Video URL's path looks like this:
            #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
            # We will benefit from it by extracting some metadata
-            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
            if mobj:
                height = int(mobj.group('height'))
                bitrate = int(mobj.group('bitrate'))
Author	SHA1	Message	Date
Remita Amine	9d50f86232	[twitter] Add tests for more cards	2021-01-15 10:32:01 +01:00
Sergey M․	7e92f9015e	[youporn] Restrict fallback download URL (refs #27822 )	2021-01-15 15:12:04 +07:00
Sergey M․	aa860b8016	[youporn] Improve height and tbr extraction (refs #23659 , refs #20425 )	2021-01-15 14:54:23 +07:00
Sergey M․	b484097b01	[youporn] Fix extraction (closes #27822 )	2021-01-15 14:43:52 +07:00
Sergey M․	ab9001dab5	[twitter] Add support for unified cards (closes #27826 )	2021-01-15 12:39:21 +07:00