[myvideoge] Add new extractor (#31360 )

NB download tests on CI servers blocked Co-authored-by: Alfonso Solbes <fonk666@gmail.com>
[xhamster] add support for new domain xhvid.com (#31370 )
2023-02-02 23:25:44 +00:00 · 2023-02-02 23:20:14 +00:00 · 2023-02-02 23:19:03 +00:00 · 2023-02-02 17:33:09 +00:00 · 2023-02-02 17:26:31 +00:00 · 2023-02-02 17:21:05 +00:00
14 changed files with 883 additions and 109 deletions
--- a/README.md
+++ b/README.md
@ -632,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use
 The current default template is `%(title)s-%(id)s.%(ext)s`.
-In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
 #### Output template and Windows batch files
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@ -11,8 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
 import re
 from youtube_dl.compat import compat_re_Pattern
 from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
@ -140,15 +138,23 @@ class TestJSInterpreter(unittest.TestCase):
        ''')
        self.assertTrue(math.isnan(jsi.call_function('x')))
    def test_Date(self):
        jsi = JSInterpreter('''
        function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
        ''')
        self.assertEqual(jsi.call_function('x'), 86000)
        jsi = JSInterpreter('''
        function x(dt) { return new Date(dt) - 0; }
        ''')
        self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
        # date format m/d/y
        jsi = JSInterpreter('''
        function x() { return new Date('12/31/1969 18:01:26 MDT') - 0; }
        ''')
        self.assertEqual(jsi.call_function('x'), 86000)
    def test_call(self):
        jsi = JSInterpreter('''
        function x() { return 2; }
@ -158,6 +164,57 @@ class TestJSInterpreter(unittest.TestCase):
        self.assertEqual(jsi.call_function('z'), 5)
        self.assertEqual(jsi.call_function('y'), 2)
    def test_if(self):
        jsi = JSInterpreter('''
        function x() {
            let a = 9;
            if (0==0) {a++}
            return a
        }''')
        self.assertEqual(jsi.call_function('x'), 10)
        jsi = JSInterpreter('''
        function x() {
            if (0==0) {return 10}
        }''')
        self.assertEqual(jsi.call_function('x'), 10)
        jsi = JSInterpreter('''
        function x() {
            if (0!=0) {return 1}
            else {return 10}
        }''')
        self.assertEqual(jsi.call_function('x'), 10)
        """  # Unsupported
        jsi = JSInterpreter('''
        function x() {
            if (0!=0) return 1;
            else {return 10}
        }''')
        self.assertEqual(jsi.call_function('x'), 10)
        """
    def test_elseif(self):
        jsi = JSInterpreter('''
        function x() {
            if (0!=0) {return 1}
            else if (1==0) {return 2}
            else {return 10}
        }''')
        self.assertEqual(jsi.call_function('x'), 10)
        """  # Unsupported
        jsi = JSInterpreter('''
        function x() {
            if (0!=0) return 1;
            else if (1==0) {return 2}
            else {return 10}
        }''')
        self.assertEqual(jsi.call_function('x'), 10)
        # etc
        """
    def test_for_loop(self):
        # function x() { a=0; for (i=0; i-10; i++) {a++} a }
        jsi = JSInterpreter('''
@ -165,6 +222,13 @@ class TestJSInterpreter(unittest.TestCase):
        ''')
        self.assertEqual(jsi.call_function('x'), 10)
    def test_while_loop(self):
        # function x() { a=0; while (a<10) {a++} a }
        jsi = JSInterpreter('''
        function x() { a=0; while (a<10) {a++} return a }
        ''')
        self.assertEqual(jsi.call_function('x'), 10)
    def test_switch(self):
        jsi = JSInterpreter('''
        function x(f) { switch(f){
@ -383,13 +447,28 @@ class TestJSInterpreter(unittest.TestCase):
        jsi = JSInterpreter('''
        function x() { let a=/,,[/,913,/](,)}/; return a; }
        ''')
-        self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
+        attrs = set(('findall', 'finditer', 'flags', 'groupindex',
                     'groups', 'match', 'pattern', 'scanner',
                     'search', 'split', 'sub', 'subn'))
        self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
        jsi = JSInterpreter('''
        function x() { let a=/,,[/,913,/](,)}/i; return a; }
        ''')
        self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
        jsi = JSInterpreter(r'''
        function x() { let a=[/[)\\]/]; return a[0]; }
        ''')
        self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
        """  # fails
        jsi = JSInterpreter(r'''
        function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
        ''')
        self.assertEqual(jsi.call_function('x'), 5)
        """
    def test_char_code_at(self):
        jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
        self.assertEqual(jsi.call_function('x', 0), 116)
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -135,6 +135,10 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
        'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
    ),
    (
        'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
        'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
    ),
 ]
--- a/youtube_dl/extractor/americastestkitchen.py
+++ b/youtube_dl/extractor/americastestkitchen.py
@ -15,7 +15,7 @@ from ..utils import (
 class AmericasTestKitchenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
        'md5': 'b861c3e365ac38ad319cfd509c30577f',
@ -23,15 +23,20 @@ class AmericasTestKitchenIE(InfoExtractor):
            'id': '5b400b9ee338f922cb06450c',
            'title': 'Japanese Suppers',
            'ext': 'mp4',
            'display_id': 'weeknight-japanese-suppers',
            'description': 'md5:64e606bfee910627efc4b5f050de92b3',
-            'thumbnail': r're:^https?://',
+            'timestamp': 1523304000,
-            'timestamp': 1523318400,
+            'upload_date': '20180409',
-            'upload_date': '20180410',
+            'release_date': '20180409',
            'release_date': '20180410',
            'series': "America's Test Kitchen",
            'season': 'Season 18',
            'season_number': 18,
            'episode': 'Japanese Suppers',
            'episode_number': 15,
            'duration': 1376,
            'thumbnail': r're:^https?://',
            'average_rating': 0,
            'view_count': int,
        },
        'params': {
            'skip_download': True,
@ -44,15 +49,20 @@ class AmericasTestKitchenIE(InfoExtractor):
            'id': '5fbe8c61bda2010001c6763b',
            'title': 'Simple Chicken Dinner',
            'ext': 'mp4',
            'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
            'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
-            'thumbnail': r're:^https?://',
+            'timestamp': 1610737200,
-            'timestamp': 1610755200,
+            'upload_date': '20210115',
-            'upload_date': '20210116',
+            'release_date': '20210115',
            'release_date': '20210116',
            'series': "America's Test Kitchen",
            'season': 'Season 21',
            'season_number': 21,
            'episode': 'Simple Chicken Dinner',
            'episode_number': 3,
            'duration': 1397,
            'thumbnail': r're:^https?://',
            'view_count': int,
            'average_rating': 0,
        },
        'params': {
            'skip_download': True,
@ -60,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
    }, {
        'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
        'only_matching': True,
    }, {
        'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
        'only_matching': True,
    }, {
        'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
        'only_matching': True,
    }, {
        'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
        'only_matching': True,
@ -94,7 +110,7 @@ class AmericasTestKitchenIE(InfoExtractor):
 class AmericasTestKitchenSeasonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
    _TESTS = [{
        # ATK Season
        'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@ -105,48 +121,93 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
        'playlist_count': 13,
    }, {
        # Cooks Country Season
-        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
+        'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
        'info_dict': {
            'id': 'season_12',
            'title': 'Season 12',
        },
        'playlist_count': 13,
    }, {
        # America's Test Kitchen Series
        'url': 'https://www.americastestkitchen.com/',
        'info_dict': {
            'id': 'americastestkitchen',
            'title': 'America\'s Test Kitchen',
        },
        'playlist_count': 558,
    }, {
        # Cooks Country Series
        'url': 'https://www.americastestkitchen.com/cookscountry',
        'info_dict': {
            'id': 'cookscountry',
            'title': 'Cook\'s Country',
        },
        'playlist_count': 199,
    }, {
        'url': 'https://www.americastestkitchen.com/cookscountry/',
        'only_matching': True,
    }, {
        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
        'only_matching': True,
    }, {
        'url': 'https://www.cookscountry.com',
        'only_matching': True,
    }, {
        'url': 'https://www.americastestkitchen.com/cooksillustrated/',
        'only_matching': True,
    }, {
        'url': 'https://www.cooksillustrated.com',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        show_name, season_number = re.match(self._VALID_URL, url).groups()
+        match = re.match(self._VALID_URL, url).groupdict()
-        season_number = int(season_number)
+        show = match.get('show2')
        show_path = ('/' + show) if show else ''
        show = show or match['show']
        season_number = int_or_none(match.get('season'))
-        slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
+        slug, title = {
            'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
            'cookscountry': ('cco', 'Cook\'s Country'),
            'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
        }[show]
-        season = 'Season %d' % season_number
+        facet_filters = [
            'search_document_klass:episode',
            'search_show_slug:' + slug,
        ]
        if season_number:
            playlist_id = 'season_%d' % season_number
            playlist_title = 'Season %d' % season_number
            facet_filters.append('search_season_list:' + playlist_title)
        else:
            playlist_id = show
            playlist_title = title
        season_search = self._download_json(
            'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
-            season, headers={
+            playlist_id, headers={
-                'Origin': 'https://www.%s.com' % show_name,
+                'Origin': 'https://www.americastestkitchen.com',
                'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
                'X-Algolia-Application-Id': 'Y1FNZXUI30',
            }, query={
-                'facetFilters': json.dumps([
+                'facetFilters': json.dumps(facet_filters),
-                    'search_season_list:' + season,
+                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
                    'search_document_klass:episode',
                    'search_show_slug:' + slug,
                ]),
                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
                'attributesToHighlight': '',
                'hitsPerPage': 1000,
            })
        def entries():
            for episode in (season_search.get('hits') or []):
-                search_url = episode.get('search_url')
+                search_url = episode.get('search_url')  # always formatted like '/episode/123-title-of-episode'
                if not search_url:
                    continue
                yield {
                    '_type': 'url',
-                    'url': 'https://www.%s.com%s' % (show_name, search_url),
+                    'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
-                    'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
+                    'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
                    'title': episode.get('title'),
                    'description': episode.get('description'),
                    'timestamp': unified_timestamp(episode.get('search_document_date')),
@ -156,4 +217,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
                }
        return self.playlist_result(
-            entries(), 'season_%d' % season_number, season)
+            entries(), playlist_id, playlist_title)
--- a/youtube_dl/extractor/blerp.py
+++ b/youtube_dl/extractor/blerp.py
@ -0,0 +1,173 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 from ..utils import (
    strip_or_none,
    traverse_obj,
 )
 from .common import InfoExtractor
 class BlerpIE(InfoExtractor):
    IE_NAME = 'blerp'
    _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
    _TESTS = [{
        'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
        'info_dict': {
            'id': '6320fe8745636cb4dd677a5a',
            'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
            'uploader': 'luminousaj',
            'uploader_id': '5fb81e51aa66ae000c395478',
            'ext': 'mp3',
            'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
        }
    }, {
        'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
        'info_dict': {
            'id': '5bc94ef4796001000498429f',
            'title': 'Yee',
            'uploader': '179617322678353920',
            'uploader_id': '5ba99cf71386730004552c42',
            'ext': 'mp3',
            'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
        }
    }]
    _GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
    _GRAPHQL_QUERY = (
        '''query webBitePageGetBite($_id: MongoID!) {
            web {
                biteById(_id: $_id) {
                    ...bitePageFrag
                    __typename
                }
                __typename
            }
        }
        fragment bitePageFrag on Bite {
            _id
            title
            userKeywords
            keywords
            color
            visibility
            isPremium
            owned
            price
            extraReview
            isAudioExists
            image {
                filename
                original {
                    url
                    __typename
                }
                __typename
            }
            userReactions {
                _id
                reactions
                createdAt
                __typename
            }
            topReactions
            totalSaveCount
            saved
            blerpLibraryType
            license
            licenseMetaData
            playCount
            totalShareCount
            totalFavoriteCount
            totalAddedToBoardCount
            userCategory
            userAudioQuality
            audioCreationState
            transcription
            userTranscription
            description
            createdAt
            updatedAt
            author
            listingType
            ownerObject {
                _id
                username
                profileImage {
                    filename
                    original {
                        url
                        __typename
                    }
                    __typename
                }
                __typename
            }
            transcription
            favorited
            visibility
            isCurated
            sourceUrl
            audienceRating
            strictAudienceRating
            ownerId
            reportObject {
                reportedContentStatus
                __typename
            }
            giphy {
                mp4
                gif
                __typename
            }
            audio {
                filename
                original {
                    url
                    __typename
                }
                mp3 {
                    url
                    __typename
                }
                __typename
            }
            __typename
        }
        ''')
    def _real_extract(self, url):
        audio_id = self._match_id(url)
        data = {
            'operationName': self._GRAPHQL_OPERATIONNAME,
            'query': self._GRAPHQL_QUERY,
            'variables': {
                '_id': audio_id
            }
        }
        headers = {
            'Content-Type': 'application/json'
        }
        json_result = self._download_json('https://api.blerp.com/graphql',
                                          audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
        bite_json = json_result['data']['web']['biteById']
        info_dict = {
            'id': bite_json['_id'],
            'url': bite_json['audio']['mp3']['url'],
            'title': bite_json['title'],
            'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
            'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
            'ext': 'mp3',
            'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
        }
        return info_dict
--- a/youtube_dl/extractor/callin.py
+++ b/youtube_dl/extractor/callin.py
@ -0,0 +1,74 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    traverse_obj,
    try_get,
 )
 class CallinIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
    _TESTS = [{
        'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
        'md5': '14ede27ee2c957b7e4db93140fc0745c',
        'info_dict': {
            'id': 'PrumRdSQJW',
            'ext': 'mp4',
            'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
            'description': 'Or, why the government doesn’t like SpaceX',
            'channel': 'The Pull Request',
            'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
        }
    }, {
        'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
        'md5': '16f704ddbf82a27e3930533b12062f07',
        'info_dict': {
            'id': 'lzxMidUnjA',
            'ext': 'mp4',
            'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
            'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
            'channel': 'The DEBRIEF With Briahna Joy Gray',
            'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
        }
    }]
    def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
        return self._parse_json(
            self._search_regex(
                r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
                webpage, 'next.js data', fatal=fatal, **kw),
            video_id, transform_source=transform_source, fatal=fatal)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        next_data = self._search_nextjs_data(webpage, video_id)
        episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
        if not episode:
            raise ExtractorError('Failed to find episode data')
        title = episode.get('title') or self._og_search_title(webpage)
        description = episode.get('description') or self._og_search_description(webpage)
        formats = []
        formats.extend(self._extract_m3u8_formats(
            episode.get('m3u8'), video_id, 'mp4',
            entry_protocol='m3u8_native', fatal=False))
        self._sort_formats(formats)
        channel = try_get(episode, lambda x: x['show']['title'], compat_str)
        channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
            'channel': channel,
            'channel_url': channel_url,
        }
--- a/youtube_dl/extractor/cammodels.py
+++ b/youtube_dl/extractor/cammodels.py
@ -3,7 +3,6 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    url_or_none,
 )
@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
    def _real_extract(self, url):
        user_id = self._match_id(url)
        webpage = self._download_webpage(
            url, user_id, headers=self.geo_verification_headers())
        manifest_root = self._html_search_regex(
            r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
        if not manifest_root:
            ERRORS = (
                ("I'm offline, but let's stay connected", 'This user is currently offline'),
                ('in a private show', 'This user is in a private show'),
                ('is currently performing LIVE', 'This model is currently performing live'),
            )
            for pattern, message in ERRORS:
                if pattern in webpage:
                    error = message
                    expected = True
                    break
            else:
                error = 'Unable to find manifest URL root'
                expected = False
            raise ExtractorError(error, expected=expected)
        manifest = self._download_json(
-            '%s%s.json' % (manifest_root, user_id), user_id)
+            'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
        formats = []
        thumbnails = []
        for format_id, format_dict in manifest['formats'].items():
            if not isinstance(format_dict, dict):
                continue
@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
                        'preference': -1,
                    })
                else:
                    if format_id == 'jpeg':
                        thumbnails.append({
                            'url': f['url'],
                            'width': f['width'],
                            'height': f['height'],
                            'format_id': f['format_id'],
                        })
                    continue
                formats.append(f)
        self._sort_formats(formats)
@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
        return {
            'id': user_id,
            'title': self._live_title(user_id),
            'thumbnails': thumbnails,
            'is_live': True,
            'formats': formats,
            'age_limit': 18
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -138,6 +138,7 @@ from .bleacherreport import (
    BleacherReportIE,
    BleacherReportCMSIE,
 )
 from .blerp import BlerpIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bongacams import BongaCamsIE
@ -158,6 +159,7 @@ from .businessinsider import BusinessInsiderIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
 from .callin import CallinIE
 from .camdemy import (
    CamdemyIE,
    CamdemyFolderIE
@ -374,6 +376,7 @@ from .fc2 import (
    FC2EmbedIE,
 )
 from .fczenit import FczenitIE
 from .fifa import FifaIE
 from .filmon import (
    FilmOnIE,
    FilmOnChannelIE,
@ -725,6 +728,7 @@ from .myvi import (
    MyviIE,
    MyviEmbedIE,
 )
 from .myvideoge import MyVideoGeIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
    NationalGeographicVideoIE,
@ -1667,3 +1671,7 @@ from .zingmp3 import (
 )
 from .zoom import ZoomIE
 from .zype import ZypeIE
 from .pr0gramm import (
    Pr0grammIE,
    Pr0grammStaticIE,
 )
--- a/youtube_dl/extractor/fifa.py
+++ b/youtube_dl/extractor/fifa.py
@ -0,0 +1,101 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    traverse_obj,
    unified_timestamp,
 )
 if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
    BaseInfoExtractor = InfoExtractor
    import re
    class InfoExtractor(BaseInfoExtractor):
        @classmethod
        def _match_valid_url(cls, url):
            return re.match(cls._VALID_URL, url)
 class FifaIE(InfoExtractor):
    _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
    _TESTS = [{
        'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
        'info_dict': {
            'id': '7on10qPcnyLajDDU3ntg6y',
            'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
            'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
            'ext': 'mp4',
            'categories': ['FIFA Tournaments'],
            'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
            'duration': 8165,
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
        'info_dict': {
            'id': '1cg5r5Qt6Qt12ilkDgb1sV',
            'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
            'description': 'md5:d908c74ee66322b804ae2e521b02a855',
            'ext': 'mp4',
            'categories': ['FIFA Tournaments', 'Highlights'],
            'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
            'duration': 902,
            'release_timestamp': 1404777600,
            'release_date': '20140708',
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
        'info_dict': {
            'id': '3C6gQH9C2DLwzNx7BMRQdp',
            'title': 'Josimar goal against Northern Ireland | Classic Goals',
            'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
            'ext': 'mp4',
            'categories': ['FIFA Tournaments', 'Goal'],
            'duration': 28,
            'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
        },
        'params': {'skip_download': 'm3u8'},
    }]
    def _real_extract(self, url):
        video_id, locale = self._match_valid_url(url).group('id', 'locale')
        webpage = self._download_webpage(url, video_id)
        preconnect_link = self._search_regex(
            r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
        video_details = self._download_json(
            '{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
        preplay_parameters = self._download_json(
            '{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
        content_data = self._download_json(
            # 1. query string is expected to be sent as-is
            # 2. `sig` must be appended
            # 3. if absent, the call appears to work but the manifest is bad (404)
            'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
            video_id, 'Downloading Content Data')
        # formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
        formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': video_details['title'],
            'description': video_details.get('description'),
            'duration': int_or_none(video_details.get('duration')),
            'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
            'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
            'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
            'formats': formats,
            'subtitles': subtitles,
        }
--- a/youtube_dl/extractor/myvideoge.py
+++ b/youtube_dl/extractor/myvideoge.py
@ -0,0 +1,87 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    get_element_by_id,
    get_element_by_class,
    int_or_none,
    js_to_json,
    MONTH_NAMES,
    qualities,
    unified_strdate,
 )
 class MyVideoGeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'https://www.myvideo.ge/v/3941048',
        'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
        'info_dict': {
            'id': '3941048',
            'ext': 'mp4',
            'title': 'The best prikol',
            'upload_date': '20200611',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'chixa33',
            'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
        },
        # working from local dev system
        'skip': 'site blocks CI servers',
    }
    _MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
    _quality = staticmethod(qualities(('SD', 'HD')))
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = (
            self._og_search_title(webpage, default=None)
            or clean_html(get_element_by_class('my_video_title', webpage))
            or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
        jwplayer_sources = self._parse_json(
            self._search_regex(
                r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
            or '',
            video_id, transform_source=js_to_json, fatal=False)
        formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
        for f in formats or []:
            f['preference'] = self._quality(f['format_id'])
        self._sort_formats(formats)
        description = (
            self._og_search_description(webpage)
            or get_element_by_id('long_desc_holder', webpage)
            or self._html_search_meta('description', webpage))
        uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
        upload_date = get_element_by_class('mv_vid_upl_date', webpage)
        # as ka locale may not be present roll a local date conversion
        upload_date = (unified_strdate(
            # translate any ka month to an en one
            re.sub('|'.join(self._MONTH_NAMES_KA),
                   lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
                   upload_date, re.I))
            if upload_date else None)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'uploader': uploader,
            'formats': formats,
            'thumbnail': self._og_search_thumbnail(webpage),
            'upload_date': upload_date,
            'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
            'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
            'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
        }
--- a/youtube_dl/extractor/pr0gramm.py
+++ b/youtube_dl/extractor/pr0gramm.py
@ -0,0 +1,105 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 import re
 from ..utils import (
    merge_dicts,
 )
 class Pr0grammStaticIE(InfoExtractor):
    # Possible urls:
    # https://pr0gramm.com/static/5466437
    _VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'https://pr0gramm.com/static/5466437',
        'md5': '52fa540d70d3edc286846f8ca85938aa',
        'info_dict': {
            'id': '5466437',
            'ext': 'mp4',
            'title': 'pr0gramm-5466437 by g11st',
            'uploader': 'g11st',
            'upload_date': '20221221',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        # Fetch media sources
        entries = self._parse_html5_media_entries(url, webpage, video_id)
        media_info = entries[0]
        # this raises if there are no formats
        self._sort_formats(media_info.get('formats') or [])
        # Fetch author
        uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
        # Fetch approx upload timestamp from filename
        # Have None-defaults in case the extraction fails
        uploadDay = None
        uploadMon = None
        uploadYear = None
        uploadTimestr = None
        # (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
        m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
        if (m):
            # Up to a day of accuracy should suffice...
            uploadDay = m.groupdict().get('day')
            uploadMon = m.groupdict().get('mon')
            uploadYear = m.groupdict().get('year')
            uploadTimestr = uploadYear + uploadMon + uploadDay
        return merge_dicts({
            'id': video_id,
            'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
            'uploader': uploader,
            'upload_date': uploadTimestr
        }, media_info)
 # This extractor is for the primary url (used for sharing, and appears in the
 # location bar) Since this page loads the DOM via JS, yt-dl can't find any
 # video information here. So let's redirect to a compatibility version of
 # the site, which does contain the <video>-element  by itself,  without requiring
 # js to be ran.
 class Pr0grammIE(InfoExtractor):
    # Possible urls:
    # https://pr0gramm.com/new/546637
    # https://pr0gramm.com/new/video/546637
    # https://pr0gramm.com/top/546637
    # https://pr0gramm.com/top/video/546637
    # https://pr0gramm.com/user/g11st/uploads/5466437
    # https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
    # https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
    # https://pr0gramm.com/user/froschler/1elf/5232030
    # https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
    # https://pr0gramm.com/top/fruher war alles damals/5498175
    _VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
    _TEST = {
        'url': 'https://pr0gramm.com/new/video/5466437',
        'info_dict': {
            'id': '5466437',
            'ext': 'mp4',
            'title': 'pr0gramm-5466437 by g11st',
            'uploader': 'g11st',
            'upload_date': '20221221',
        }
    }
    def _generic_title():
        return "oof"
    def _real_extract(self, url):
        video_id = self._match_id(url)
        return self.url_result(
            'https://pr0gramm.com/static/' + video_id,
            video_id=video_id,
            ie=Pr0grammStaticIE.ie_key())
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -24,7 +24,7 @@ from ..utils import (
 class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:.+?\.)?%s/
@ -123,6 +123,9 @@ class XHamsterIE(InfoExtractor):
    }, {
        'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
        'only_matching': True,
    }, {
        'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -433,6 +436,9 @@ class XHamsterUserIE(InfoExtractor):
    }, {
        'url': 'https://xhday.com/users/mobhunter',
        'only_matching': True,
    }, {
        'url': 'https://xhvid.com/users/pelushe21',
        'only_matching': True,
    }]
    def _entries(self, user_id):
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -315,7 +315,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
        title = try_get(
            renderer,
            (lambda x: x['title']['runs'][0]['text'],
-             lambda x: x['title']['simpleText']), compat_str)
+             lambda x: x['title']['simpleText'],
             lambda x: x['headline']['simpleText']), compat_str)
        description = try_get(
            renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
            compat_str)
@ -2207,6 +2208,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
    IE_NAME = 'youtube:tab'
    _TESTS = [{
        # Shorts
        'url': 'https://www.youtube.com/@SuperCooperShorts/shorts',
        'playlist_mincount': 5,
        'info_dict': {
            'description': 'Short clips from Super Cooper Sundays!',
            'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
            'title': 'Super Cooper Shorts - Shorts',
        }
    }, {
        # Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
        'url': 'https://www.youtube.com/@emergencyawesome/shorts',
        'info_dict': {
            'description': 'md5:592c080c06fef4de3c902c4a8eecd850',
            'id': 'UCDiFRMQWpcp8_KD4vwIVicw',
            'title': 'Emergency Awesome - Home',
        },
        'playlist_mincount': 5,
    }, {
        # playlists, multipage
        'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
        'playlist_mincount': 94,
@ -2680,7 +2699,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
    def _rich_grid_entries(self, contents):
        for content in contents:
-            video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
+            video_renderer = try_get(
                content,
                (lambda x: x['richItemRenderer']['content']['videoRenderer'],
                 lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
                dict)
            if video_renderer:
                entry = self._video_entry(video_renderer)
                if entry:
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -187,19 +187,6 @@ class LocalNameSpace(ChainMap):
 class JSInterpreter(object):
    __named_object_counter = 0
    _RE_FLAGS = {
        # special knowledge: Python's re flags are bitmask values, current max 128
        # invent new bitmask values well above that for literal parsing
        # TODO: new pattern class to execute matches with these flags
        'd': 1024,  # Generate indices for substring matches
        'g': 2048,  # Global search
        'i': re.I,  # Case-insensitive search
        'm': re.M,  # Multi-line search
        's': re.S,  # Allows . to match newline characters
        'u': re.U,  # Treat a pattern as a sequence of unicode code points
        'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
    }
    _OBJ_NAME = '__youtube_dl_jsinterp_obj'
    OP_CHARS = None
@ -214,12 +201,51 @@ class JSInterpreter(object):
        def __init__(self, msg, *args, **kwargs):
            expr = kwargs.pop('expr', None)
            if expr is not None:
-                msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
+                msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
            super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
    class JS_RegExp(object):
        _RE_FLAGS = {
            # special knowledge: Python's re flags are bitmask values, current max 128
            # invent new bitmask values well above that for literal parsing
            # TODO: new pattern class to execute matches with these flags
            'd': 1024,  # Generate indices for substring matches
            'g': 2048,  # Global search
            'i': re.I,  # Case-insensitive search
            'm': re.M,  # Multi-line search
            's': re.S,  # Allows . to match newline characters
            'u': re.U,  # Treat a pattern as a sequence of unicode code points
            'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
        }
        def __init__(self, pattern_txt, flags=''):
            if isinstance(flags, compat_str):
                flags, _ = self.regex_flags(flags)
            # Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
            # First, avoid https://github.com/python/cpython/issues/74534
            self.__self = re.compile(pattern_txt.replace('[[', r'[\['), flags)
            for name in dir(self.__self):
                # Only these? Obviously __class__, __init__.
                # PyPy creates a __weakref__ attribute with value None
                # that can't be setattr'd but also can't need to be copied.
                if name in ('__class__', '__init__', '__weakref__'):
                    continue
                setattr(self, name, getattr(self.__self, name))
        @classmethod
        def regex_flags(cls, expr):
            flags = 0
            if not expr:
                return flags, expr
            for idx, ch in enumerate(expr):
                if ch not in cls._RE_FLAGS:
                    break
                flags |= cls._RE_FLAGS[ch]
            return flags, expr[idx + 1:]
    @classmethod
    def __op_chars(cls):
-        op_chars = set(';,')
+        op_chars = set(';,[')
        for op in cls._all_operators():
            for c in op[0]:
                op_chars.add(c)
@ -231,17 +257,6 @@ class JSInterpreter(object):
        namespace[name] = obj
        return name
    @classmethod
    def _regex_flags(cls, expr):
        flags = 0
        if not expr:
            return flags, expr
        for idx, ch in enumerate(expr):
            if ch not in cls._RE_FLAGS:
                break
            flags |= cls._RE_FLAGS[ch]
        return flags, expr[idx + 1:]
    @classmethod
    def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
        if not expr:
@ -268,7 +283,7 @@ class JSInterpreter(object):
                elif in_quote == '/' and char in '[]':
                    in_regex_char_group = char == '['
            escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op))
+            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
            if char != delim[pos] or any(counters.values()) or in_quote:
                pos = skipping = 0
@ -301,7 +316,7 @@ class JSInterpreter(object):
        separated = list(cls._separate(expr, delim, 1))
        if len(separated) < 2:
-            raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals()))
+            raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
        return separated[0][1:].strip(), separated[1].strip()
    @staticmethod
@ -328,7 +343,7 @@ class JSInterpreter(object):
        try:
            return opfunc(left_val, right_val)
        except Exception as e:
-            raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
+            raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
    def _index(self, obj, idx, allow_undefined=False):
        if idx == 'length':
@ -338,7 +353,7 @@ class JSInterpreter(object):
        except Exception as e:
            if allow_undefined:
                return JS_Undefined
-            raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
+            raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
    def _dump(self, obj, namespace):
        try:
@ -352,6 +367,7 @@ class JSInterpreter(object):
        allow_recursion -= 1
        should_return = False
        # fails on (eg) if (...) stmt1; else stmt2;
        sub_statements = list(self._separate(stmt, ';')) or ['']
        expr = stmt = sub_statements.pop().strip()
        for sub_stmt in sub_statements:
@ -371,25 +387,30 @@ class JSInterpreter(object):
        if expr[0] in _QUOTES:
            inner, outer = self._separate(expr, expr[0], 1)
            if expr[0] == '/':
-                flags, outer = self._regex_flags(outer)
+                flags, outer = self.JS_RegExp.regex_flags(outer)
-                inner = re.compile(inner[1:], flags=flags)  # , strict=True))
+                inner = self.JS_RegExp(inner[1:], flags=flags)
            else:
                inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True))
            if not outer:
                return inner, should_return
            expr = self._named_object(local_vars, inner) + outer
-        if expr.startswith('new '):
+        new_kw, _, obj = expr.partition('new ')
-            obj = expr[4:]
+        if not new_kw:
-            if obj.startswith('Date('):
+            for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
-                left, right = self._separate_at_paren(obj[4:])
+                                  ('RegExp', self.JS_RegExp),
-                expr = unified_timestamp(
+                                  ('Error', self.Exception)):
-                    self.interpret_expression(left, local_vars, allow_recursion), False)
+                if not obj.startswith(klass + '('):
                    continue
                left, right = self._separate_at_paren(obj[len(klass):])
                argvals = self.interpret_iter(left, local_vars, allow_recursion)
                expr = konstr(*argvals)
                if not expr:
-                    raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
+                    raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr)
-                expr = self._dump(int(expr * 1000), local_vars) + right
+                expr = self._dump(expr, local_vars) + right
                break
            else:
-                raise self.Exception('Unsupported object {obj}'.format(**locals()), expr=expr)
+                raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
        if expr.startswith('void '):
            left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
@ -428,11 +449,47 @@ class JSInterpreter(object):
        m = re.match(r'''(?x)
                (?P<try>try)\s*\{|
                (?P<if>if)\s*\(|
                (?P<switch>switch)\s*\(|
-                (?P<for>for)\s*\(
+                (?P<for>for)\s*\(|
                (?P<while>while)\s*\(
                ''', expr)
        md = m.groupdict() if m else {}
-        if md.get('try'):
+        if md.get('if'):
            cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
            if expr.startswith('{'):
                if_expr, expr = self._separate_at_paren(expr)
            else:
                # may lose ... else ... because of ll.368-374
                if_expr, expr = self._separate_at_paren(expr, delim=';')
            else_expr = None
            m = re.match(r'else\s*(?P<block>\{)?', expr)
            if m:
                if m.group('block'):
                    else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
                else:
                    # handle subset ... else if (...) {...} else ...
                    # TODO: make interpret_statement do this properly, if possible
                    exprs = list(self._separate(expr[m.end():], delim='}', max_split=2))
                    if len(exprs) > 1:
                        if re.match(r'\s*if\s*\(', exprs[0]) and re.match(r'\s*else\b', exprs[1]):
                            else_expr = exprs[0] + '}' + exprs[1]
                            expr = (exprs[2] + '}') if len(exprs) == 3 else None
                        else:
                            else_expr = exprs[0]
                            exprs.append('')
                            expr = '}'.join(exprs[1:])
                    else:
                        else_expr = exprs[0]
                        expr = None
                    else_expr = else_expr.lstrip() + '}'
            cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
            ret, should_abort = self.interpret_statement(
                if_expr if cndn else else_expr, local_vars, allow_recursion)
            if should_abort:
                return ret, True
        elif md.get('try'):
            try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
            err = None
            try:
@ -469,8 +526,8 @@ class JSInterpreter(object):
            if err:
                raise err
-        elif md.get('for'):
+        elif md.get('for') or md.get('while'):
-            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
+            init_or_cond, remaining = self._separate_at_paren(expr[m.end() - 1:])
            if remaining.startswith('{'):
                body, expr = self._separate_at_paren(remaining)
            else:
@ -481,11 +538,12 @@ class JSInterpreter(object):
                    body = 'switch(%s){%s}' % (switch_val, body)
                else:
                    body, expr = remaining, ''
-            start, cndn, increment = self._separate(constructor, ';')
+            if md.get('for'):
-            self.interpret_expression(start, local_vars, allow_recursion)
+                start, cndn, increment = self._separate(init_or_cond, ';')
-            while True:
+                self.interpret_expression(start, local_vars, allow_recursion)
-                if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
+            else:
-                    break
+                cndn, increment = init_or_cond, None
            while _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
                try:
                    ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
                    if should_abort:
@ -494,7 +552,8 @@ class JSInterpreter(object):
                    break
                except JS_Continue:
                    pass
-                self.interpret_expression(increment, local_vars, allow_recursion)
+                if increment:
                    self.interpret_expression(increment, local_vars, allow_recursion)
        elif md.get('switch'):
            switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
@ -749,6 +808,10 @@ class JSInterpreter(object):
                    if idx >= len(obj):
                        return None
                    return ord(obj[idx])
                elif member == 'replace':
                    assertion(isinstance(obj, compat_str), 'must be applied on a string')
                    assertion(len(argvals) == 2, 'takes exactly two arguments')
                    return re.sub(argvals[0], argvals[1], obj)
                idx = int(member) if isinstance(obj, list) else member
                return obj[idx](argvals, allow_recursion=allow_recursion)
@ -780,6 +843,10 @@ class JSInterpreter(object):
            raise self.Exception('Cannot return from an expression', expr)
        return ret
    def interpret_iter(self, list_txt, local_vars, allow_recursion):
        for v in self._separate(list_txt):
            yield self.interpret_expression(v, local_vars, allow_recursion)
    def extract_object(self, objname):
        _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
        obj = {}
Author	SHA1	Message	Date
dirkf	9d17948b5a	[myvideoge] Add new extractor (#31360 ) NB download tests on CI servers blocked Co-authored-by: Alfonso Solbes <fonk666@gmail.com>	2023-02-02 23:25:44 +00:00
afterdelight	f316f5d4e3	[xhamster] add support for new domain xhvid.com (#31370 )	2023-02-02 23:20:14 +00:00
dirkf	bc6f94e459	[FIFA] Back-port extractor from yt-dlp (#31385 )	2023-02-02 23:19:03 +00:00
Epsilonator	be3392a0d4	[Blerp] Add new extractor (#31398 ) Co-authored-by: dirkf <fieldhouse@gmx.net>	2023-02-02 17:33:09 +00:00
zhangeric-15	6d829d8119	[YouTube] Fix not finding videos listed under a channel's "shorts" subpage. (#31409 ) Resolves #31336 Co-authored-by: Jouni Järvinen <rautamiekka@users.noreply.github.com> Co-authored-by: dirkf <fieldhouse@gmx.net>	2023-02-02 17:26:31 +00:00
Ruowang Sun	98b0cf1cd0	[Callin] Add new extractor (#31414 ) Co-authored-by: dirkf <fieldhouse@gmx.net>	2023-02-02 17:21:05 +00:00
Leon Etienne	e9611a2a36	[pr0gramm] implement InfoExtractor, Resolves #31433 (#31434 ) * [pr0gramm] implement infoextractor * [pr0gramm] remove misplaced comment, uncapture regex-group * [pr0gramm]: specify utf-8 coding * [pr0gramm]: add trailing comma to lists for maintainability * [pr0gramm]: ie only sets upload_date attribute * [pr0gramm]: add video_id to title * [pr0gramm]: more forgiving _valid_url regex * [pr0gramm]: add uploader to title, if set * Discriminate URL pattern --------- Co-authored-by: dirkf <fieldhouse@gmx.net>	2023-02-02 17:13:39 +00:00
JChris246	807e593a32	[cammodels] fix and improve extractor (#31453 ) Co-authored-by: dirkf <fieldhouse@gmx.net>	2023-02-02 17:12:36 +00:00
Rodrigo Dias	297fbff23b	[doc] Fixed typo appearing to promise an example (#31489 ) Resolves #31425 Co-authored-by: dirkf <fieldhouse@gmx.net>	2023-02-02 17:10:09 +00:00
Brian Marks	37cbdfa0e7	[americastestkitchen] Add support for downloading entire series (#31493 ) Also * support new sites and URL patterns * back-port from yt-dlp Co-authored-by: dirkf <fieldhouse@gmx.net>	2023-02-02 16:58:21 +00:00
dirkf	295736c9cb	[jsinterp] Improve parsing * support subset `... else if ...` * support `while` * add `RegExp` class * generalise `new` support * limited more debug strings * matching test changes	2023-02-02 16:31:49 +00:00
pukkandan	14ef89a8da	Support `if` statements Fix for yt-dlp/yt_dlp#6131 Closes #31509	2023-02-02 13:12:46 +00:00