Compare commits

...

14 Commits

Author SHA1 Message Date
Miha Frangež
353ef0e8fb
Merge d81793ea56 into c5098961b0 2024-08-21 22:33:22 -04:00
dirkf
c5098961b0 [Youtube] Rework n function extraction pattern
Now also succeeds with player b12cc44b
2024-08-06 20:59:09 +01:00
dirkf
dbc08fba83 [jsinterp] Improve slice implementation for player b12cc44b
Partly taken from yt-dlp/yt-dlp#10664, thx seproDev
        Fixes #32896
2024-08-06 20:51:38 +01:00
Aiur Adept
71223bff39
[Youtube] Fix nsig extraction for player 20dfca59 (#32891)
* dirkf's patch for nsig extraction
* add generic search per  yt-dlp/yt-dlp/pull/10611 - thx bashonly

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-08-01 19:18:34 +01:00
Miha Frangež
d81793ea56 [RTV SLO 4D] Added support for subtitles 2021-03-25 11:49:56 +01:00
Miha Frangež
a302001725 [RTV SLO 4D] Extract both HTTPS and HTTP HLS URLs 2021-03-25 11:48:40 +01:00
Miha Frangež
944f674c6d [RTV SLO 4D] Reordered info extraction 2021-02-15 16:38:15 +01:00
Miha Frangež
8fb3a99c34 [RTV SLO 4D] Fixed test that failed due to different sorting 2021-02-15 15:33:21 +01:00
Miha Frangež
6435b66967 [RTV SLO 4D] Support for multiple audio formats 2021-02-15 15:31:03 +01:00
Miha Frangež
2e75b8092f [RTV SLO 4D] Cleanup, switched to HTTPS 2021-02-14 16:48:49 +01:00
Miha Frangež
5fe9de5f7a [RTV SLO 4D] Added support for audio, more tests 2021-02-11 13:52:52 +01:00
Miha Frangež
2cf78de692 [RTV SLO 4D] Improved URL regex 2021-02-11 13:51:54 +01:00
Miha Frangež
8e6eca6432 [RTV SLO 4D] Removed unnecessary requests, improved formatting 2021-02-11 12:49:53 +01:00
Miha Frangež
910ef313e1 [RTV SLO 4D] Add extractor 2021-02-10 19:43:17 +01:00
6 changed files with 184 additions and 13 deletions

View File

@ -425,6 +425,34 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, [''], args=['', '-']) self._test(jsi, [''], args=['', '-'])
self._test(jsi, [], args=['', '']) self._test(jsi, [], args=['', ''])
def test_slice(self):
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
self._test('function f(){return "012345678".slice()}', '012345678')
self._test('function f(){return "012345678".slice(0)}', '012345678')
self._test('function f(){return "012345678".slice(5)}', '5678')
self._test('function f(){return "012345678".slice(99)}', '')
self._test('function f(){return "012345678".slice(-2)}', '78')
self._test('function f(){return "012345678".slice(-99)}', '012345678')
self._test('function f(){return "012345678".slice(0, 0)}', '')
self._test('function f(){return "012345678".slice(1, 0)}', '')
self._test('function f(){return "012345678".slice(0, 1)}', '0')
self._test('function f(){return "012345678".slice(3, 6)}', '345')
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
self._test('function f(){return "012345678".slice(-1, 1)}', '')
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -174,6 +174,14 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA', '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
), ),
(
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
),
(
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
),
] ]

View File

@ -1068,6 +1068,7 @@ from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
from .rtvnh import RTVNHIE from .rtvnh import RTVNHIE
from .rtvs import RTVSIE from .rtvs import RTVSIE
from .rtvslo import RTVSLO4DIE
from .ruhd import RUHDIE from .ruhd import RUHDIE
from .rumble import RumbleEmbedIE from .rumble import RumbleEmbedIE
from .rutube import ( from .rutube import (

View File

@ -0,0 +1,98 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
try_get,
unified_timestamp
)
class RTVSLO4DIE(InfoExtractor):
_VALID_URL = r'https?://(?:4d\.rtvslo\.si/(?:arhiv/[^/]+|embed)|www\.rtvslo\.si/(?:4d/arhiv|mmr/prispevek))/(?P<id>\d+)'
_TESTS = [{
'url': 'https://4d.rtvslo.si/arhiv/seje-odbora-za-kmetijstvo-gozdarstvo-in-prehrano/174595438',
'md5': '37ab1181292a08e0d6b7952545e6ce8b',
'info_dict': {
'id': '174595438',
'ext': 'mp4',
'title': 'Krajčič o tatvini sendviča',
'thumbnail': r're:https://img.rtvslo.si/.+\.jpg',
'timestamp': 1549999614,
'upload_date': '20190212',
'duration': 85
},
}, {
'url': 'https://4d.rtvslo.si/arhiv/punto-e-a-capo/174752966',
'md5': 'a1ce903ee0a4051e417c9357e3d51c71',
'info_dict': {
'id': '174752966',
'ext': 'mp3',
'title': 'Dante divulgatore della scienza, con Gian Italo Bischi. E un ricordo di Federico Roncoroni',
'thumbnail': r're:https://img.rtvslo.si/.+\.jpg',
'timestamp': 1613033635,
'upload_date': '20210211',
'duration': 1740
},
}, {
'url': 'https://4d.rtvslo.si/arhiv/punto-e-a-capo/174752966',
'only_matching': True,
}, {
'url': 'https://4d.rtvslo.si/embed/174595438',
'only_matching': True,
}, {
'url': 'https://www.rtvslo.si/4d/arhiv/174752597?s=tv_ita',
'only_matching': True,
}, {
'url': 'https://www.rtvslo.si/mmr/prispevek/174752987',
'only_matching': True,
}]
def _real_extract(self, url):
media_id = self._match_id(url)
media_info = self._download_json(
'https://api.rtvslo.si/ava/getRecording/' + media_id, media_id,
query={'client_id': '19cc0556a5ee31d0d52a0e30b0696b26'})['response']
if media_info['mediaType'] == 'video':
formats = []
for proto in ('hls_sec', 'hls',):
formats += self._extract_m3u8_formats(
media_info['addaptiveMedia'][proto], media_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
elif media_info['mediaType'] == 'audio':
formats = [{
'format_id': file['mediaType'],
'url': file['streamers']['http'] + '/' + file['filename'],
'ext': determine_ext(file['filename']),
'tbr': int_or_none(file.get('bitrate')),
'filesize': int_or_none(file.get('filesize')),
'vcodec': 'none'
} for file in media_info['mediaFiles']]
self._sort_formats(formats)
return {
'id': media_id,
'title': media_info['title'],
'formats': formats,
'description': try_get(media_info, 'description'),
'thumbnail': media_info.get('thumbnail_sec'),
'timestamp': unified_timestamp(media_info.get('broadcastDate')),
'duration': media_info.get('duration'),
'subtitles': self.extract_subtitles(media_info)
}
def _get_subtitles(self, media_info):
subs = {}
for sub in media_info.get('subtitles', []):
subs[sub['language']] = [{
'ext': 'vtt',
'url': sub['file']
}]
return subs

View File

@ -1659,17 +1659,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex( func_name, idx = self._search_regex(
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
# old: .get("n"))&&(b=nfunc[idx](b) # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
# older: .get("n"))&&(b=nfunc(b) # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x) r'''(?x)
(?:\(\s*(?P<b>[a-z])\s*=\s*(?: \((?:[\w$()\s]+,)*?\s* # (
(?P<b>[a-z])\s*=\s* # b=
(?:
(?: # expect ,c=a.get(b) (etc)
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
"n+"\[\s*\+?s*[\w$.]+\s*] "n+"\[\s*\+?s*[\w$.]+\s*]
)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)? )\s*(?:,[\w$()\s]+(?=,))*|
\.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s* (?P<old>[\w$]+) # a (old[er])
)\s*
(?(old)
# b.get("n")
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
| # ,c=a.get(b)
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
# interstitial junk
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
# nfunc|nfunc[idx]
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\) (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None))
# thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name:
self.report_warning('Falling back to generic n function search')
return self._search_regex(
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?["']enhanced_except_
''', jscode, 'Initial JS player n function name', group='name')
if not idx: if not idx:
return func_name return func_name

View File

@ -925,9 +925,16 @@ class JSInterpreter(object):
obj.reverse() obj.reverse()
return obj return obj
elif member == 'slice': elif member == 'slice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
assertion(len(argvals) == 1, 'takes exactly one argument') # From [1]:
return obj[argvals[0]:] # .slice() - like [:]
# .slice(n) - like [n:] (not [slice(n)]
# .slice(m, n) - like [m:n] or [slice(m, n)]
# [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
if len(argvals) < 2:
argvals += (None,)
return obj[slice(*argvals)]
elif member == 'splice': elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')