[XVideos] Update XVideosSearchIE

2025-09-16 06:44:14 +09:00 · 2023-10-16 04:14:53 +01:00 · 2023-10-16 04:14:53 +01:00 · 596a6203f7
commit 596a6203f7
parent 45088c6301
1 changed files with 67 additions and 14 deletions
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@ -934,24 +934,77 @@ class XVideosChannelIE(XVideosPlaylistBaseIE):
        return super(XVideosChannelIE, self)._real_extract(url)
-class XVideosSearchIE(XVideosPlaylistIE):
+class XVideosSearchIE(XVideosPlaylistBaseIE):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:[^/]+\.)?xvideos2?\.com/
                          \?k=(?P<id>[^#?/&]+)
                          (?:&[^&]+)*?(?:&p=(?P<pnum>\d+))?
                 '''
    _TESTS = [{
-        # uninteresting search with probably at least two pages of results,
+        'note': 'paginated search result',
-        # but not too many more
+        'url': 'http://www.xvideos.com/?k=lithuania',
-        'url': 'http://www.xvideos.com/?k=libya&sort=length',
+        'info_dict': {
-        'playlist_mincount': 30,
+            'id': 'lithuania',
-    }, ]
+            'title': 'lithuania (all)',
        },
        'playlist_mincount': 75,
    }, {
        'note': 'second page of paginated search result',
        'url': 'http://www.xvideos.com/?k=lithuania&p=1',
        'info_dict': {
            'id': 'lithuania/1',
            'title': 'lithuania (p2)',
        },
        'playlist_count': 27,
    }, {
        'note': 'search with sort',
        'url': 'http://www.xvideos.com/?k=lithuania&sort=length',
        'info_dict': {
            'id': 'lithuania/sort=length',
            'title': 'lithuania (sort=length,all)',
        },
        'playlist': [{
            'info_dict': {
                'id': r're:\d+',
                'ext': 'mp4',
                'title': r're:\w+',
                'uploader': r're:\w+',
                'age_limit': int,
                'duration': 'lambda d: d >= 4954',  # for video 56455303:
            },
        }],
    }]
-    def _get_next_page(self, url, num, page):
+    def _get_playlist_id(self, playlist_id, **kwargs):
-        parsed_url = compat_urlparse.urlparse(url)
+        url = kwargs['url']
-        qs = compat_parse_qs(parsed_url.query)
+        sub = compat_urlparse.urlsplit(url).query
-        qs['p'] = [num]
+        sub = re.sub(r'(^|&)k=[^&]+(?:&|$)', r'\1', sub)
-        parsed_url = (
+        sub = re.sub(r'(^|&)p=', r'\1', sub)
-            list(parsed_url[:4])
+        return join_nonempty(
-            + [compat_urllib_parse_urlencode(qs, True), None])
+            playlist_id, sub.replace('&', '/') or None, delim='/')
-        return compat_urlparse.urlunparse(parsed_url), False
+
    def _get_title(self, page, playlist_id, **kwargs):
        pnum = int_or_none(kwargs.pop('pnum', None))
        title = super(XVideosSearchIE, self)._get_title(page, playlist_id, **kwargs)
        title, t_pnum = (title.split(', page ') + [None])[:2]
        # actually, let's ignore the page title
        title = playlist_id.split('/')
        sub = title[1:]
        title = title[0]
        id_pnum = traverse_obj(sub, (
            -1, T(lambda s: s.split('=')), -1, T(int_or_none)))
        if id_pnum is not None:
            del sub[-1]
            if pnum is None:
                pnum = id_pnum
        if pnum is None:
            t_pnum = int_or_none(t_pnum)
            if t_pnum is not None:
                pnum = t_pnum
        sub.append(('p%s' % (pnum + 1)) if pnum is not None else 'all')
        sub = join_nonempty(*sub, delim=',')
        if sub:
            title = '%s (%s)' % (title, sub)
        return title