Merge 48d0994b5b into c5098961b0

[Youtube] Rework n function extraction pattern
Now also succeeds with player b12cc44b
2025-01-10 21:40:11 +09:00 · 2024-08-21 22:32:57 -04:00 · 2024-08-06 20:59:09 +01:00 · 2024-08-06 20:51:38 +01:00 · 2024-08-01 19:18:34 +01:00 · 2022-02-15 12:22:58 +01:00
10 changed files with 192 additions and 321 deletions
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@ -425,6 +425,34 @@ class TestJSInterpreter(unittest.TestCase):
            self._test(jsi, [''], args=['', '-'])
            self._test(jsi, [], args=['', ''])
    def test_slice(self):
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
        self._test('function f(){return "012345678".slice()}', '012345678')
        self._test('function f(){return "012345678".slice(0)}', '012345678')
        self._test('function f(){return "012345678".slice(5)}', '5678')
        self._test('function f(){return "012345678".slice(99)}', '')
        self._test('function f(){return "012345678".slice(-2)}', '78')
        self._test('function f(){return "012345678".slice(-99)}', '012345678')
        self._test('function f(){return "012345678".slice(0, 0)}', '')
        self._test('function f(){return "012345678".slice(1, 0)}', '')
        self._test('function f(){return "012345678".slice(0, 1)}', '0')
        self._test('function f(){return "012345678".slice(3, 6)}', '345')
        self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
        self._test('function f(){return "012345678".slice(-1, 1)}', '')
        self._test('function f(){return "012345678".slice(-3, -1)}', '67')
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -174,6 +174,14 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
        '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
    ),
    (
        'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
        '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
    ),
    (
        'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
        'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
    ),
 ]
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -371,7 +371,6 @@ from .esri import EsriVideoIE
 from .europa import EuropaIE
 from .expotv import ExpoTVIE
 from .expressen import ExpressenIE
 from .extremetube import ExtremeTubeIE
 from .eyedotv import EyedoTVIE
 from .facebook import (
    FacebookIE,
@ -562,7 +561,6 @@ from .kaltura import KalturaIE
 from .kankan import KankanIE
 from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
 from .keezmovies import KeezMoviesIE
 from .ketnet import KetnetIE
 from .khanacademy import (
    KhanAcademyIE,
@ -713,10 +711,6 @@ from .mlb import (
 )
 from .mnet import MnetIE
 from .moevideo import MoeVideoIE
 from .mofosex import (
    MofosexIE,
    MofosexEmbedIE,
 )
 from .mojvideo import MojvideoIE
 from .morningstar import MorningstarIE
 from .motherless import (
--- a/youtube_dl/extractor/extremetube.py
+++ b/youtube_dl/extractor/extremetube.py
@ -1,50 +0,0 @@
 from __future__ import unicode_literals
 from ..utils import str_to_int
 from .keezmovies import KeezMoviesIE
 class ExtremeTubeIE(KeezMoviesIE):
    _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
    _TESTS = [{
        'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
        'md5': '92feaafa4b58e82f261e5419f39c60cb',
        'info_dict': {
            'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
            'ext': 'mp4',
            'title': 'Music Video 14 british euro brit european cumshots swallow',
            'uploader': 'anonim',
            'view_count': int,
            'age_limit': 18,
        }
    }, {
        'url': 'http://www.extremetube.com/gay/video/abcde-1234',
        'only_matching': True,
    }, {
        'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
        'only_matching': True,
    }, {
        'url': 'http://www.extremetube.com/video/652431',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        webpage, info = self._extract_info(url)
        if not info['title']:
            info['title'] = self._search_regex(
                r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
        uploader = self._html_search_regex(
            r'Uploaded by:\s*</[^>]+>\s*<a[^>]+>(.+?)</a>',
            webpage, 'uploader', fatal=False)
        view_count = str_to_int(self._search_regex(
            r'Views:\s*</[^>]+>\s*<[^>]+>([\d,\.]+)</',
            webpage, 'view count', fatal=False))
        info.update({
            'uploader': uploader,
            'view_count': view_count,
        })
        return info
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -66,7 +66,6 @@ from .tnaflix import TNAFlixNetworkEmbedIE
 from .drtuber import DrTuberIE
 from .redtube import RedTubeIE
 from .tube8 import Tube8IE
 from .mofosex import MofosexEmbedIE
 from .spankwire import SpankwireIE
 from .youporn import YouPornIE
 from .vimeo import (
@ -3044,11 +3043,6 @@ class GenericIE(InfoExtractor):
        if tube8_urls:
            return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
        # Look for embedded Mofosex player
        mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
        if mofosex_urls:
            return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
        # Look for embedded Spankwire player
        spankwire_urls = SpankwireIE._extract_urls(webpage)
        if spankwire_urls:
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@ -1,133 +0,0 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..aes import aes_decrypt_text
 from ..compat import compat_urllib_parse_unquote
 from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    str_to_int,
    strip_or_none,
    url_or_none,
 )
 class KeezMoviesIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.keezmovies.com/video/arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money-18070681',
        'md5': '2ac69cdb882055f71d82db4311732a1a',
        'info_dict': {
            'id': '18070681',
            'display_id': 'arab-wife-want-it-so-bad-i-see-she-thirsty-and-has-tiny-money',
            'ext': 'mp4',
            'title': 'Arab wife want it so bad I see she thirsty and has tiny money.',
            'thumbnail': None,
            'view_count': int,
            'age_limit': 18,
        }
    }, {
        'url': 'http://www.keezmovies.com/video/18070681',
        'only_matching': True,
    }]
    def _extract_info(self, url, fatal=True):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = (mobj.group('display_id')
                      if 'display_id' in mobj.groupdict()
                      else None) or mobj.group('id')
        webpage = self._download_webpage(
            url, display_id, headers={'Cookie': 'age_verified=1'})
        formats = []
        format_urls = set()
        title = None
        thumbnail = None
        duration = None
        encrypted = False
        def extract_format(format_url, height=None):
            format_url = url_or_none(format_url)
            if not format_url or not format_url.startswith(('http', '//')):
                return
            if format_url in format_urls:
                return
            format_urls.add(format_url)
            tbr = int_or_none(self._search_regex(
                r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
            if not height:
                height = int_or_none(self._search_regex(
                    r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
            if encrypted:
                format_url = aes_decrypt_text(
                    video_url, title, 32).decode('utf-8')
            formats.append({
                'url': format_url,
                'format_id': '%dp' % height if height else None,
                'height': height,
                'tbr': tbr,
            })
        flashvars = self._parse_json(
            self._search_regex(
                r'flashvars\s*=\s*({.+?});', webpage,
                'flashvars', default='{}'),
            display_id, fatal=False)
        if flashvars:
            title = flashvars.get('video_title')
            thumbnail = flashvars.get('image_url')
            duration = int_or_none(flashvars.get('video_duration'))
            encrypted = flashvars.get('encrypted') is True
            for key, value in flashvars.items():
                mobj = re.search(r'quality_(\d+)[pP]', key)
                if mobj:
                    extract_format(value, int(mobj.group(1)))
            video_url = flashvars.get('video_url')
            if video_url and determine_ext(video_url, None):
                extract_format(video_url)
        video_url = self._html_search_regex(
            r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
            webpage, 'video url', default=None, group='url')
        if video_url:
            extract_format(compat_urllib_parse_unquote(video_url))
        if not formats:
            if 'title="This video is no longer available"' in webpage:
                raise ExtractorError(
                    'Video %s is no longer available' % video_id, expected=True)
        try:
            self._sort_formats(formats)
        except ExtractorError:
            if fatal:
                raise
        if not title:
            title = self._html_search_regex(
                r'<h1[^>]*>([^<]+)', webpage, 'title')
        return webpage, {
            'id': video_id,
            'display_id': display_id,
            'title': strip_or_none(title),
            'thumbnail': thumbnail,
            'duration': duration,
            'age_limit': 18,
            'formats': formats,
        }
    def _real_extract(self, url):
        webpage, info = self._extract_info(url, fatal=False)
        if not info['formats']:
            return self.url_result(url, 'Generic')
        info['view_count'] = str_to_int(self._search_regex(
            r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
        return info
--- a/youtube_dl/extractor/mofosex.py
+++ b/youtube_dl/extractor/mofosex.py
@ -1,79 +0,0 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    str_to_int,
    unified_strdate,
 )
 from .keezmovies import KeezMoviesIE
 class MofosexIE(KeezMoviesIE):
    _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
    _TESTS = [{
        'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
        'md5': '558fcdafbb63a87c019218d6e49daf8a',
        'info_dict': {
            'id': '318131',
            'display_id': 'amateur-teen-playing-and-masturbating-318131',
            'ext': 'mp4',
            'title': 'amateur teen playing and masturbating',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20121114',
            'view_count': int,
            'like_count': int,
            'dislike_count': int,
            'age_limit': 18,
        }
    }, {
        # This video is no longer available
        'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        webpage, info = self._extract_info(url)
        view_count = str_to_int(self._search_regex(
            r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False))
        like_count = int_or_none(self._search_regex(
            r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage,
            'like count', fatal=False))
        dislike_count = int_or_none(self._search_regex(
            r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage,
            'like count', fatal=False))
        upload_date = unified_strdate(self._html_search_regex(
            r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False))
        info.update({
            'view_count': view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
            'upload_date': upload_date,
            'thumbnail': self._og_search_thumbnail(webpage),
        })
        return info
 class MofosexEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
            webpage)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        return self.url_result(
            'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
            ie=MofosexIE.ie_key(), video_id=video_id)
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@ -3,32 +3,33 @@ from __future__ import unicode_literals
 import re
 from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    str_to_int,
    url_or_none,
 )
-from .keezmovies import KeezMoviesIE
+from .common import InfoExtractor
 from ..aes import aes_decrypt_text
 from ..compat import compat_urllib_parse_unquote
-class Tube8IE(KeezMoviesIE):
+class Tube8IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
    _TESTS = [{
-        'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
+        'url': 'https://www.tube8.com/erotic/playtime/81807731/',
-        'md5': '65e20c48e6abff62ed0c3965fff13a39',
+        'md5': 'fefa69ff76debaa63aa59374bfc51c95',
        'info_dict': {
-            'id': '229795',
+            'id': '81807731',
-            'display_id': 'kasia-music-video',
+            'display_id': 'playtime',
            'ext': 'mp4',
-            'description': 'hot teen Kasia grinding',
+            'uploader': 'kikkijay-ph',
-            'uploader': 'unknown',
+            'title': 'Playtime',
            'title': 'Kasia music video',
            'age_limit': 18,
-            'duration': 230,
+            'duration': 988,
-            'categories': ['Teen'],
+            'categories': ['Erotic'],
-            'tags': ['dancing'],
+            'tags': ['adult toys', 'big boobs', 'butt', 'masturbate'],
        },
    }, {
        'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
        'only_matching': True,
    }]
    @staticmethod
@ -38,49 +39,121 @@ class Tube8IE(KeezMoviesIE):
            webpage)
    def _real_extract(self, url):
-        webpage, info = self._extract_info(url)
+        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id')
-        if not info['title']:
+        webpage = self._download_webpage(
-            info['title'] = self._html_search_regex(
+            url, display_id, headers={'Cookie': 'age_verified=1'})
                r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
-        description = self._html_search_regex(
+        formats = []
-            r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
+        format_urls = set()
-        uploader = self._html_search_regex(
+
-            r'<span class="username">\s*(.+?)\s*<',
+        title = None
-            webpage, 'uploader', fatal=False)
+        thumbnail = None
        duration = None
        encrypted = False
        def extract_format(format_url, height=None):
            format_url = url_or_none(format_url)
            if not format_url or not format_url.startswith(('http', '//')):
                return
            if format_url in format_urls:
                return
            format_urls.add(format_url)
            tbr = int_or_none(self._search_regex(
                r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
            if not height:
                height = int_or_none(self._search_regex(
                    r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
            if encrypted:
                format_url = aes_decrypt_text(
                    video_url, title, 32).decode('utf-8')
            formats.append({
                'url': format_url,
                'format_id': '%dp' % height if height else None,
                'height': height,
                'tbr': tbr,
            })
        flashvars = self._parse_json(
            self._search_regex(
                r'flashvars\s*=\s*({.+?});', webpage,
                'flashvars', default='{}'),
            display_id, fatal=False)
        if flashvars:
            title = flashvars.get('video_title')
            thumbnail = flashvars.get('image_url')
            duration = int_or_none(flashvars.get('video_duration'))
            encrypted = flashvars.get('encrypted') is True
            uploader = flashvars.get('sponsor')
            for key, value in flashvars.items():
                mobj = re.search(r'quality_(\d+)[pP]', key)
                if mobj:
                    extract_format(value, int(mobj.group(1)))
            video_url = flashvars.get('video_url')
            for key, value in flashvars.items():
                mobj = re.search(r'quality_(\d+)[pP]', key)
                if mobj:
                    extract_format(value, int(mobj.group(1)))
            video_url = flashvars.get('video_url')
            if video_url and determine_ext(video_url, None):
                extract_format(video_url)
        video_url = self._html_search_regex(
            r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
            webpage, 'video url', default=None, group='url')
        if video_url:
            extract_format(compat_urllib_parse_unquote(video_url))
        if not formats:
            if 'title="This video is no longer available"' in webpage:
                raise ExtractorError(
                    'Video %s is no longer available' % video_id, expected=True)
        self._sort_formats(formats)
        if not title:
            title = self._html_search_regex([
                r'<h1[^>]*>([^<]+)'
                r'videoTitle\s*=\s*"([^"]+)'], webpage, 'title', default=display_id.capitalize())
        like_count = int_or_none(self._search_regex(
-            r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
+            r'rupVar\s*=\s*(\d+);', webpage, 'like count', fatal=False))
        dislike_count = int_or_none(self._search_regex(
-            r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
+            r'rdownVar\s*=\s*(\d+);', webpage, 'dislike count', fatal=False))
        view_count = str_to_int(self._search_regex(
            r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
            webpage, 'view count', fatal=False))
        comment_count = str_to_int(self._search_regex(
-            r'<span id="allCommentsCount">(\d+)</span>',
+            r'<span id="allCommentsCount">\((\d+)\)</span>',
            webpage, 'comment count', fatal=False))
        category = self._search_regex(
-            r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
+            r'videoCategoryByName\s*=\s*"([^"]+)";',
            webpage, 'category', fatal=False)
        categories = [category] if category else None
        tags_str = self._search_regex(
-            r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
+            r"(?s)<li\s+class\s*=\s*'video-tag'\s+data-esp-node\s*=\s*'tag'>(.*?)</div>",
            webpage, 'tags', fatal=False)
        tags = [t for t in re.findall(
            r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
-        info.update({
+        return {
            'description': description,
            'uploader': uploader,
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'age_limit': 18,
            'formats': formats,
            'view_count': view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
            'comment_count': comment_count,
            'categories': categories,
            'tags': tags,
-        })
+        }
        return info
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1659,17 +1659,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
    def _extract_n_function_name(self, jscode):
        func_name, idx = self._search_regex(
            # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
-            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
+            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
-            # old: .get("n"))&&(b=nfunc[idx](b)
+            # or:  (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
-            # older: .get("n"))&&(b=nfunc(b)
+            # or:  (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
            # old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
            # older: (b=a.get("n"))&&(b=nfunc(b)
            r'''(?x)
-                (?:\(\s*(?P<b>[a-z])\s*=\s*(?:
+                \((?:[\w$()\s]+,)*?\s*      # (
                (?P<b>[a-z])\s*=\s*         # b=
                (?:
                    (?:                     # expect ,c=a.get(b) (etc)
                        String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
                        "n+"\[\s*\+?s*[\w$.]+\s*]
-                )\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
+                    )\s*(?:,[\w$()\s]+(?=,))*|
-                \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
+                       (?P<old>[\w$]+)      # a (old[er])
                   )\s*
                   (?(old)
                                            # b.get("n")
                       (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
                       (?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
                       |                    # ,c=a.get(b)
                       ,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
                       (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
                       (?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
                   )
                                            # interstitial junk
                   \s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
               (?(c)(?P=c)|(?P=b))\s*=\s*   # [c|b]=
                                            # nfunc|nfunc[idx]
                   (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
-            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
            default=(None, None))
        # thx bashonly: yt-dlp/yt-dlp/pull/10611
        if not func_name:
            self.report_warning('Falling back to generic n function search')
            return self._search_regex(
                r'''(?xs)
                    (?:(?<=[^\w$])|^)       # instead of \b, which ignores $
                    (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
                    \s*\{(?:(?!};).)+?["']enhanced_except_
                ''', jscode, 'Initial JS player n function name', group='name')
        if not idx:
            return func_name
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -925,9 +925,16 @@ class JSInterpreter(object):
                    obj.reverse()
                    return obj
                elif member == 'slice':
-                    assertion(isinstance(obj, list), 'must be applied on a list')
+                    assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
-                    assertion(len(argvals) == 1, 'takes exactly one argument')
+                    # From [1]:
-                    return obj[argvals[0]:]
+                    # .slice() - like [:]
                    # .slice(n) - like [n:] (not [slice(n)]
                    # .slice(m, n) - like [m:n] or [slice(m, n)]
                    # [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
                    assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
                    if len(argvals) < 2:
                        argvals += (None,)
                    return obj[slice(*argvals)]
                elif member == 'splice':
                    assertion(isinstance(obj, list), 'must be applied on a list')
                    assertion(argvals, 'takes one or more arguments')
Author	SHA1	Message	Date
marieell	f46d98349d	Merge `48d0994b5b` into `c5098961b0`	2024-08-21 22:32:57 -04:00
dirkf	c5098961b0	[Youtube] Rework n function extraction pattern Now also succeeds with player b12cc44b	2024-08-06 20:59:09 +01:00
dirkf	dbc08fba83	[jsinterp] Improve slice implementation for player b12cc44b Partly taken from yt-dlp/yt-dlp#10664, thx seproDev Fixes #32896	2024-08-06 20:51:38 +01:00
Aiur Adept	71223bff39	[Youtube] Fix nsig extraction for player 20dfca59 (#32891 ) * dirkf's patch for nsig extraction * add generic search per yt-dlp/yt-dlp/pull/10611 - thx bashonly --------- Co-authored-by: dirkf <fieldhouse@gmx.net>	2024-08-01 19:18:34 +01:00
marieell	48d0994b5b	[tube8] Fix extractor a bit, replace broken test	2022-02-15 12:22:58 +01:00
marieell	7561727db6	Remove extractors for dead pages They all redirect to youporn.com.	2022-02-15 09:41:32 +01:00