Compare commits

...

5 Commits

Author SHA1 Message Date
ex0rb
02224de10f
Merge 47e5e72ec0 into c5098961b0 2024-08-21 22:33:12 -04:00
dirkf
c5098961b0 [Youtube] Rework n function extraction pattern
Now also succeeds with player b12cc44b
2024-08-06 20:59:09 +01:00
dirkf
dbc08fba83 [jsinterp] Improve slice implementation for player b12cc44b
Partly taken from yt-dlp/yt-dlp#10664, thx seproDev
        Fixes #32896
2024-08-06 20:51:38 +01:00
Aiur Adept
71223bff39
[Youtube] Fix nsig extraction for player 20dfca59 (#32891)
* dirkf's patch for nsig extraction
* add generic search per  yt-dlp/yt-dlp/pull/10611 - thx bashonly

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-08-01 19:18:34 +01:00
ex0rb
47e5e72ec0 [SendFox] Add new extractor 2021-06-02 05:25:45 +02:00
6 changed files with 123 additions and 13 deletions

View File

@ -425,6 +425,34 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, [''], args=['', '-']) self._test(jsi, [''], args=['', '-'])
self._test(jsi, [], args=['', '']) self._test(jsi, [], args=['', ''])
def test_slice(self):
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
self._test('function f(){return "012345678".slice()}', '012345678')
self._test('function f(){return "012345678".slice(0)}', '012345678')
self._test('function f(){return "012345678".slice(5)}', '5678')
self._test('function f(){return "012345678".slice(99)}', '')
self._test('function f(){return "012345678".slice(-2)}', '78')
self._test('function f(){return "012345678".slice(-99)}', '012345678')
self._test('function f(){return "012345678".slice(0, 0)}', '')
self._test('function f(){return "012345678".slice(1, 0)}', '')
self._test('function f(){return "012345678".slice(0, 1)}', '0')
self._test('function f(){return "012345678".slice(3, 6)}', '345')
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
self._test('function f(){return "012345678".slice(-1, 1)}', '')
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -174,6 +174,14 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA', '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
), ),
(
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
),
(
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
),
] ]

View File

@ -1106,6 +1106,7 @@ from .scte import (
) )
from .seeker import SeekerIE from .seeker import SeekerIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .sendfox import SendFoxIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
from .servus import ServusIE from .servus import ServusIE
from .sevenplus import SevenPlusIE from .sevenplus import SevenPlusIE

View File

@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
from youtube_dl.utils import decode_packed_codes
from .common import InfoExtractor
class SendFoxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sendfox\.org/(?P<id>[0-9a-z]+)'
_TEST = {
'url': 'https://sendfox.org/bsw8k32fezg6/big_buck_bunny_1080p_surround.avi',
'md5': 'a1a3969c928f8c3850d34abc18c33cc6',
'info_dict': {
'id': 'bsw8k32fezg6',
'ext': 'mp4',
'title': 'big buck bunny 1080p surround',
'thumbnail': 'https://img.sendfox.org/bsw8k32fezg6_t.jpg'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
packed_code = self._search_regex(
r'(eval\(function\(p,a,c,k,e,d\){.+)', webpage, 'obfuscated code')
video_src = self._search_regex(
r'src:"(.+?)"', decode_packed_codes(packed_code), 'src')
return {
'id': video_id,
'title': self._html_search_regex(r'<h6 class="card-title">(.+?)</h6>', webpage, 'title'),
'formats': self._extract_m3u8_formats(video_src, video_id, ext='mp4'),
'thumbnail': 'https://img.sendfox.org/' + video_id + '_t.jpg',
'ext': 'mp4'
}

View File

@ -1659,17 +1659,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex( func_name, idx = self._search_regex(
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
# old: .get("n"))&&(b=nfunc[idx](b) # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
# older: .get("n"))&&(b=nfunc(b) # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x) r'''(?x)
(?:\(\s*(?P<b>[a-z])\s*=\s*(?: \((?:[\w$()\s]+,)*?\s* # (
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| (?P<b>[a-z])\s*=\s* # b=
"n+"\[\s*\+?s*[\w$.]+\s*] (?:
)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)? (?: # expect ,c=a.get(b) (etc)
\.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s* String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\) "n+"\[\s*\+?s*[\w$.]+\s*]
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) )\s*(?:,[\w$()\s]+(?=,))*|
(?P<old>[\w$]+) # a (old[er])
)\s*
(?(old)
# b.get("n")
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
| # ,c=a.get(b)
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
# interstitial junk
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
# nfunc|nfunc[idx]
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None))
# thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name:
self.report_warning('Falling back to generic n function search')
return self._search_regex(
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?["']enhanced_except_
''', jscode, 'Initial JS player n function name', group='name')
if not idx: if not idx:
return func_name return func_name

View File

@ -925,9 +925,16 @@ class JSInterpreter(object):
obj.reverse() obj.reverse()
return obj return obj
elif member == 'slice': elif member == 'slice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
assertion(len(argvals) == 1, 'takes exactly one argument') # From [1]:
return obj[argvals[0]:] # .slice() - like [:]
# .slice(n) - like [n:] (not [slice(n)]
# .slice(m, n) - like [m:n] or [slice(m, n)]
# [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
if len(argvals) < 2:
argvals += (None,)
return obj[slice(*argvals)]
elif member == 'splice': elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')