[pornhub] Detect flagged videos

[pornhub] Extract formats from get_media end point (#28395 )
2025-07-13 06:54:15 +09:00 · 2021-03-10 02:56:01 +07:00 · 2021-03-10 02:54:10 +07:00
1 changed files with 31 additions and 13 deletions
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -167,6 +167,7 @@ class PornHubIE(PornHubBaseIE):
        'params': {
            'skip_download': True,
        },
+        'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
    }, {
        # subtitles
        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
@ -265,7 +266,8 @@ class PornHubIE(PornHubBaseIE):
        webpage = dl_webpage('pc')

        error_msg = self._html_search_regex(
-            r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+            (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+             r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
            webpage, 'error message', default=None, group='error')
        if error_msg:
            error_msg = re.sub(r'\s+', ' ', error_msg)
@ -394,6 +396,21 @@ class PornHubIE(PornHubBaseIE):

        upload_date = None
        formats = []
+
+        def add_format(format_url, height=None):
+            tbr = None
+            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
+            if mobj:
+                if not height:
+                    height = int(mobj.group('height'))
+                tbr = int(mobj.group('tbr'))
+            formats.append({
+                'url': format_url,
+                'format_id': '%dp' % height if height else None,
+                'height': height,
+                'tbr': tbr,
+            })
+
        for video_url, height in video_urls:
            if not upload_date:
                upload_date = self._search_regex(
@ -410,18 +427,19 @@ class PornHubIE(PornHubBaseIE):
                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False))
                continue
-            tbr = None
-            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
-            if mobj:
-                if not height:
-                    height = int(mobj.group('height'))
-                tbr = int(mobj.group('tbr'))
-            formats.append({
-                'url': video_url,
-                'format_id': '%dp' % height if height else None,
-                'height': height,
-                'tbr': tbr,
-            })
+            if '/video/get_media' in video_url:
+                medias = self._download_json(video_url, video_id, fatal=False)
+                if isinstance(medias, list):
+                    for media in medias:
+                        if not isinstance(media, dict):
+                            continue
+                        video_url = url_or_none(media.get('videoUrl'))
+                        if not video_url:
+                            continue
+                        height = int_or_none(media.get('quality'))
+                        add_format(video_url, height)
+                continue
+            add_format(video_url)
        self._sort_formats(formats)

        video_uploader = self._html_search_regex(
Author	SHA1	Message	Date
Sergey M․	1a1ccd9a6e	[pornhub] Detect flagged videos	2021-03-10 02:56:01 +07:00
Sergey M․	7dc513487f	[pornhub] Extract formats from get_media end point (#28395 )	2021-03-10 02:54:10 +07:00