Merge b804854e3bd4c663dfa6c264968a663d739a64d7 into 3eb8d22ddb8982ca4fb56bb7a8d6517538bf14c6

[JSInterp] Temporary fix for #33102
[Misc] Correct [_]IE_DESC/NAME in a few IEs
2025-09-16 06:44:14 +09:00 · 2025-04-01 09:00:19 +02:00 · 2025-03-31 04:21:09 +01:00 · 2025-03-26 12:47:19 +00:00 · 2025-03-26 12:17:49 +00:00 · 2022-02-27 01:40:37 +01:00
7 changed files with 65 additions and 18 deletions
--- a/youtube_dl/extractor/bokecc.py
+++ b/youtube_dl/extractor/bokecc.py
@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):


 class BokeCCIE(BokeCCBaseIE):
-    _IE_DESC = 'CC视频'
+    IE_DESC = 'CC视频'
    _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'

    _TESTS = [{
--- a/youtube_dl/extractor/cloudy.py
+++ b/youtube_dl/extractor/cloudy.py
@ -9,7 +9,7 @@ from ..utils import (


 class CloudyIE(InfoExtractor):
-    _IE_DESC = 'cloudy.ec'
+    IE_DESC = 'cloudy.ec'
    _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
    _TESTS = [{
        'url': 'https://www.cloudy.ec/v/af511e2527aac',
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -422,6 +422,8 @@ class InfoExtractor(object):
    _GEO_COUNTRIES = None
    _GEO_IP_BLOCKS = None
    _WORKING = True
+    # supply this in public subclasses: used in supported sites list, etc
+    # IE_DESC = 'short description of IE'

    def __init__(self, downloader=None):
        """Constructor. Receives an optional downloader."""
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@ -35,15 +35,6 @@ from ..utils import (

 class ITVBaseIE(InfoExtractor):

-    def _search_nextjs_data(self, webpage, video_id, **kw):
-        transform_source = kw.pop('transform_source', None)
-        fatal = kw.pop('fatal', True)
-        return self._parse_json(
-            self._search_regex(
-                r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
-                webpage, 'next.js data', group='js', fatal=fatal, **kw),
-            video_id, transform_source=transform_source, fatal=fatal)
-
    def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
        if errnote is False:
            return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):

 class ITVIE(ITVBaseIE):
    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
-    _IE_DESC = 'ITVX'
+    IE_DESC = 'ITVX'
+    _WORKING = False
+
    _TESTS = [{
        'note': 'Hub URLs redirect to ITVX',
        'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
                'ext': determine_ext(href, 'vtt'),
            })

-        next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
+        next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
        video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
        title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
        info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):

 class ITVBTCCIE(ITVBaseIE):
    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
-    _IE_DESC = 'ITV articles: News, British Touring Car Championship'
+    IE_DESC = 'ITV articles: News, British Touring Car Championship'
    _TESTS = [{
        'note': 'British Touring Car Championship',
        'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
--- a/youtube_dl/extractor/senateisvp.py
+++ b/youtube_dl/extractor/senateisvp.py
@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
        ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
        ['arch', '', 'http://ussenate-f.akamaihd.net/']
    ]
-    _IE_NAME = 'senate.gov'
+    IE_NAME = 'senate.gov'
    _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
    _TESTS = [{
        'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@ -6,9 +6,11 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
+    float_or_none,
    int_or_none,
    NO_DEFAULT,
    str_to_int,
+    urljoin,
 )


@ -16,7 +18,7 @@ class XNXXIE(InfoExtractor):
    _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/'
    _TESTS = [{
        'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video',
-        'md5': '7583e96c15c0f21e9da3453d9920fbba',
+        'md5': '73c071a361a09aae7e7d60008221fd13',
        'info_dict': {
            'id': '55awb78',
            'ext': 'mp4',
@ -25,7 +27,29 @@ class XNXXIE(InfoExtractor):
            'duration': 469,
            'view_count': int,
            'age_limit': 18,
+            'tags': ['video game', 'skyrim', '3d', 'game', '3d game', 'video games', 'rule34', 'test', 'rough', 'sfm', 'fallout', 'porno game', 'skyrim hentai', 'h game', '3d horse', '3d porno anime', 'xx video wwxxx cartoon cartoons', 'gaming', 'games', '3d porno desenho'],
+            'uploader': 'Glurp',
+            'uploader_id': 'Glurp',
+            'uploader_url': '/porn-maker/glurp',
        },
+    }, {
+        'url': 'https://www.xnxx.com/video-h46klf8/babes_-_come_back_to_bed_starring_abella_danger_and_darcie_dolce_clip',
+        'md5': 'b8b4a594b4091de46ce05d0a9d45317c',
+        'info_dict': {
+            'id': 'h46klf8',
+            'ext': 'mp4',
+            'title': 'Babes - Come Back to Bed  starring  Abella Danger and Darcie Dolce clip',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 480,
+            'view_count': int,
+            'age_limit': 18,
+            'tags': ['lesbicas', 'darcie dolce', 'darcie dolce lesbian', 'abella danger lesbian', 'babes', 'lesb', 'abella danger', 'mmf', 'come', 'darcie', 'lesbianas', 'mulher chupando peito da outra', 'abella', 'girl on girl', 'darcie dolce abella danger', 'chupando peitos', 'para', 'lesbian', 'chupando peitos lesbicas', 'black lesbians'],
+            'uploader': 'Babes Network',
+            'uploader_id': 'Babes Network',
+            'uploader_url': '/porn-maker/babes3',
+            'actors': [{'given_name': 'darcie dolce', 'url': 'https://www.xnxx.com/search/darcie%20dolce'}, {'given_name': 'darcie dolce lesbian', 'url': 'https://www.xnxx.com/search/darcie%20dolce%20lesbian'}, {'given_name': 'abella danger lesbian', 'url': 'https://www.xnxx.com/search/abella%20danger%20lesbian'}, {'given_name': 'abella danger', 'url': 'https://www.xnxx.com/search/abella%20danger'}, {'given_name': 'darcie dolce abella danger', 'url': 'https://www.xnxx.com/search/darcie%20dolce%20abella%20danger'}],
+            'average_rating': float,
+        }
    }, {
        'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
        'only_matching': True,
@ -70,8 +94,27 @@ class XNXXIE(InfoExtractor):
            'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
        duration = int_or_none(self._og_search_property('duration', webpage))
        view_count = str_to_int(self._search_regex(
-            r'id=["\']nb-views-number[^>]+>([\d,.]+)', webpage, 'view count',
-            default=None))
+            r'-.+?\t+- (?P<views>.+?) <span class="icon-f icf-eye">', webpage, 'view count', group='views',
+            default=0))
+
+        tags = self._search_regex(r'<meta name="keywords" content="porn,porn movies,free porn,free porn movies,sex,porno,free sex,tube porn,tube,videos,full porn,xxnx,xnxxx,xxx,pussy,(?P<tags>.+?)"', webpage, 'tags', group='tags', default='').split(',')
+
+        uploader_data = re.findall(r'<a class=".+?-plate" href="(?P<uploader_url>.+?)">(?P<uploader_name>.+?)</a>', webpage)
+        uploader_id = None
+        uploader_url = None
+        if uploader_data != []:
+            uploader_url, uploader_id = uploader_data[0][0:2]
+
+        actors_data = re.findall(r'<a class="is-pornstar" href="(?P<actor_url>.+?)">(?P<actor_name>.+?)</a>', webpage)
+        actors = []
+        if actors_data != []:
+            for actor_tuple in actors_data:
+                actors.append({
+                    'given_name': actor_tuple[1],
+                    'url': urljoin(url, actor_tuple[0]),
+                })
+
+        rating = float_or_none(self._search_regex(r'<span class="rating-box value">(?P<rating>.+?)%</span>', webpage, 'rating', group='rating'))

        return {
            'id': video_id,
@ -81,4 +124,11 @@ class XNXXIE(InfoExtractor):
            'view_count': view_count,
            'age_limit': 18,
            'formats': formats,
+            'tags': tags,
+            'creator': uploader_id,
+            'uploader': uploader_id,
+            'uploader_id': uploader_id,
+            'uploader_url': uploader_url,
+            'actors': actors,
+            'average_rating': rating,
        }
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -686,6 +686,8 @@ class JSInterpreter(object):
            raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)

    def _dump(self, obj, namespace):
+        if obj is JS_Undefined:
+            return 'undefined'
        try:
            return json.dumps(obj)
        except TypeError:
Author	SHA1	Message	Date
DarkFighterLuke	0a2059479b	Merge b804854e3bd4c663dfa6c264968a663d739a64d7 into 3eb8d22ddb8982ca4fb56bb7a8d6517538bf14c6	2025-04-01 09:00:19 +02:00
dirkf	3eb8d22ddb	[JSInterp] Temporary fix for #33102	2025-03-31 04:21:09 +01:00
dirkf	4e714f9df1	[Misc] Correct [_]IE_DESC/NAME in a few IEs * thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c * also add documenting comment in `InfoExtractor`	2025-03-26 12:47:19 +00:00
dirkf	c1ea7f5a24	[ITV] Mark ITVX not working * update old shim * correct [_]IE_DESC	2025-03-26 12:17:49 +00:00
DarkFighterLuke	b804854e3b	Set uploader default values to None	2022-02-27 01:40:37 +01:00
DarkFighterLuke	fa32787f2b	Apply refactoring suggested by @rautamiekka	2022-02-27 01:32:20 +01:00
DarkFighterLuke	d9c98f01d5	Fix flake8 issues	2022-02-26 16:57:39 +01:00
DarkFighterLuke	068cd1b81c	Scrape creator	2022-02-26 16:53:11 +01:00
DarkFighterLuke	0e05a7d5d8	Scrape average_rating	2022-02-26 16:49:42 +01:00
DarkFighterLuke	4d93d7072d	Fix checks on empty data	2022-02-26 16:48:10 +01:00
DarkFighterLuke	9823824d7c	Scrape actors	2022-02-26 16:39:06 +01:00
DarkFighterLuke	072873437c	Fix uploader regex	2022-02-26 16:28:00 +01:00
DarkFighterLuke	441bafa2d1	Scrape tags modified: youtube_dl/extractor/xnxx.py	2022-02-26 16:20:04 +01:00
DarkFighterLuke	b6f851ba4d	Fix view_count regex	2022-02-26 16:13:05 +01:00
DarkFighterLuke	6e109f8ad4	Scrape uploader	2022-02-26 16:01:18 +01:00