Compare commits

...

6 Commits

Author SHA1 Message Date
Kaspar V.
e8c56ed58c
Merge 14a621cfbe into c5098961b0 2024-08-21 22:33:15 -04:00
dirkf
c5098961b0 [Youtube] Rework n function extraction pattern
Now also succeeds with player b12cc44b
2024-08-06 20:59:09 +01:00
dirkf
dbc08fba83 [jsinterp] Improve slice implementation for player b12cc44b
Partly taken from yt-dlp/yt-dlp#10664, thx seproDev
        Fixes #32896
2024-08-06 20:51:38 +01:00
Aiur Adept
71223bff39
[Youtube] Fix nsig extraction for player 20dfca59 (#32891)
* dirkf's patch for nsig extraction
* add generic search per  yt-dlp/yt-dlp/pull/10611 - thx bashonly

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-08-01 19:18:34 +01:00
Kaspar Vollenweider
14a621cfbe
fix(arte_extractor): description complete and no missing
Arte.tv provides sometimes only a headline, sometimes only a description and
often they provide both.

This change will grasp both, or whatever is available and wrap them to the
description field.
2021-07-17 21:56:00 +02:00
Kaspar Vollenweider
f861761a90
feat(arte_extractor): add alt_title for regular shows ❯❯❯
Most repeating Arte.tv shows have an essential subtitle (not to confuse with the one already in the extractor!).
Without that title those shows get the same title for all
potentially hundreds of shows.

Here is an example of the German version of the ARTE Reportage show:

https://www.arte.tv/de/videos/030273-820-A/arte-reportage/

Now: title is `ARTE Reportage` and no alt_title is available
With this: title is `ARTE Reportage` and
           alt_title is `Sudan: Die Tigray fliehen aus Äthiopien`
2021-07-17 21:52:43 +02:00
5 changed files with 137 additions and 15 deletions

View File

@ -425,6 +425,34 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, [''], args=['', '-']) self._test(jsi, [''], args=['', '-'])
self._test(jsi, [], args=['', '']) self._test(jsi, [], args=['', ''])
def test_slice(self):
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
self._test('function f(){return "012345678".slice()}', '012345678')
self._test('function f(){return "012345678".slice(0)}', '012345678')
self._test('function f(){return "012345678".slice(5)}', '5678')
self._test('function f(){return "012345678".slice(99)}', '')
self._test('function f(){return "012345678".slice(-2)}', '78')
self._test('function f(){return "012345678".slice(-99)}', '012345678')
self._test('function f(){return "012345678".slice(0, 0)}', '')
self._test('function f(){return "012345678".slice(1, 0)}', '')
self._test('function f(){return "012345678".slice(0, 1)}', '0')
self._test('function f(){return "012345678".slice(3, 6)}', '345')
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
self._test('function f(){return "012345678".slice(-1, 1)}', '')
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -174,6 +174,14 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA', '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
), ),
(
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
),
(
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
),
] ]

View File

@ -34,11 +34,56 @@ class ArteTVIE(ArteTVBaseIE):
/(?P<id>\d{6}-\d{3}-[AF]) /(?P<id>\d{6}-\d{3}-[AF])
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
_TESTS = [{ _TESTS = [{
'url': 'https://www.arte.tv/de/videos/092724-001-A/lasst-mich-schlafen/',
'info_dict': {
'id': '092724-001-A',
'ext': 'mp4',
'title': 'Lasst mich schlafen!',
'alt_title': 'Wie schlafen wir?',
'description': 'Gegen Abend signalisiert die biologische Uhr dem Körper durch das Ausschütten von Melatonin, dass es Zeit ist, '
'herunterzufahren. Doch was geschieht dabei im Gehirn? Der Schlafforscher Raphael Heinzer vom Schlafforschungsze'
'ntrum Lausanne will dies herausfinden und beobachtet die Hirnströme in den verschiedenen Schlafphasen.',
'upload_date': '20200224'
},
}, {
'url': 'https://www.arte.tv/de/videos/030273-820-A/arte-reportage',
'info_dict': {
'id': '030273-820-A',
'ext': 'mp4',
'title': 'ARTE Reportage',
'alt_title': 'Sudan: Die Tigray fliehen aus Äthiopien',
'description': 'Sudan: In nur wenigen Stunden verloren viele Bewohner aus der Region Tigray alles im Konflikt gegen die Regierun'
'g.In diesem Konflikt geht es um die jahrzehntealten Spannungen zwischen den gut 80 Ethnien im Land. / Elfenbeink'
'üste: Die 1.000 Einwohner des Dorfs Trinlé-Diapleu integrieren Patienten eines Psychiatrischen Zentrums in ihr D'
'orfleben, um ihnen bei der Genesung zu helfen.\n\n(1): Sudan: Die Tigray fliehen aus ÄthiopienIn nur wenigen Stu'
'nden verloren viele Bewohner aus der Region Tigray alles im Konflikt gegen die Regierung.Ärzte und Bauern, Stude'
'nten und Händler, ganze Familien aus der Region Tigray mussten im Konflikt gegen die Regierung fliehen. In ihrer'
' Heimatregion hatten Tigray Rebellen die Regierung herausgefordert und die schlug hart zurück. In diesem Konflik'
't geht es um die jahrzehntealten Spannungen zwischen den gut 80 Ethnien im Land, es geht um politischen Einfluss'
' und um Landbesitz. Auch dem neuen und zunächst international hoch gelobten Ministerpräsidenten Abiy Ahmed Ali i'
'st es nicht gelungen, die Ethnien untereinander zu befrieden. Unsere Reporter begleiteten die Flüchtlinge aus Ät'
'hiopien im Sudan in ein Flüchtlingscamp in der Wüste, die meisten verbringen die ersten Nächte dort unter freiem'
' Himmel.(2): Elfenbeinküste: Das Dorf, das psychisch Kranken hilftDie 1.000 Einwohner des Dorfs Trinlé-Diapleu h'
'elfen Patienten in ihrem Psychiatrie Zentrum gesund zu werden.In Trinlé-Diapleu leben die psychisch Kranken nich'
't abgetrennt von den Leuten im Dorf, ganz im Gegenteil: Die Patienten des Psychiatrischen Zentrums Victor Houali'
' werden gleich nach ihrer Ankunft behutsam in das Dorfleben integriert. Das Prinzip der offenen Psychiatrie, in '
'dieser Form wohl nicht nur in der Elfenbeinküste einmalig, haben zwei Ärzte der in Frankreich sehr bekannten Cli'
'nique de La Borde, Philippe Bichon und Frédérique Drogoul, in den 80er Jahren hier eingeführt. Auch Patienten mi'
't Psychosen und Wahnvorstellungen oder schwere Fälle von Schizophrenie heilen sie hier mit der Hilfe von Medikam'
'enten, Therapiegesprächen und Mitmenschlichkeit. Für viele Kranke in der Elfenbeinküste ist das Victor Houali di'
'e letzte Hoffnung auf Genesung.',
'upload_date': '20210716'
}
}, {
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'info_dict': { 'info_dict': {
'id': '088501-000-A', 'id': '088501-000-A',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive', 'title': 'Mexico: Stealing Petrol to Survive',
'alt_title': 'ARTE Reportage',
'description': 'In Mexico, the black market in oil is more lucrative than drugs. Poor families drill into pipelines and syphon of'
'f the petrol that finds its way to illegal gas stations. The illicit trade in gasoline is highly dangerous and co'
'sts Mexico 3 billion euros a year.',
'upload_date': '20190628', 'upload_date': '20190628',
}, },
}, { }, {
@ -171,14 +216,19 @@ class ArteTVIE(ArteTVBaseIE):
self._sort_formats(formats) self._sort_formats(formats)
return { extracted_metadata = {
'id': player_info.get('VID') or video_id, 'id': player_info.get('VID') or video_id,
'title': title, 'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str), 'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
'formats': formats, 'formats': formats,
} }
if player_info.get('subtitle', '').strip():
extracted_metadata['alt_title'] = player_info.get('subtitle', '').strip()
description = "%s\n\n%s" % (player_info.get('V7T', '').strip(), player_info.get('VDE', '').strip())
if description.strip():
extracted_metadata['description'] = description.strip()
return extracted_metadata
class ArteTVEmbedIE(InfoExtractor): class ArteTVEmbedIE(InfoExtractor):

View File

@ -1659,17 +1659,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex( func_name, idx = self._search_regex(
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
# old: .get("n"))&&(b=nfunc[idx](b) # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
# older: .get("n"))&&(b=nfunc(b) # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x) r'''(?x)
(?:\(\s*(?P<b>[a-z])\s*=\s*(?: \((?:[\w$()\s]+,)*?\s* # (
(?P<b>[a-z])\s*=\s* # b=
(?:
(?: # expect ,c=a.get(b) (etc)
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
"n+"\[\s*\+?s*[\w$.]+\s*] "n+"\[\s*\+?s*[\w$.]+\s*]
)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)? )\s*(?:,[\w$()\s]+(?=,))*|
\.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s* (?P<old>[\w$]+) # a (old[er])
)\s*
(?(old)
# b.get("n")
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
| # ,c=a.get(b)
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
# interstitial junk
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
# nfunc|nfunc[idx]
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\) (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None))
# thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name:
self.report_warning('Falling back to generic n function search')
return self._search_regex(
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?["']enhanced_except_
''', jscode, 'Initial JS player n function name', group='name')
if not idx: if not idx:
return func_name return func_name

View File

@ -925,9 +925,16 @@ class JSInterpreter(object):
obj.reverse() obj.reverse()
return obj return obj
elif member == 'slice': elif member == 'slice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
assertion(len(argvals) == 1, 'takes exactly one argument') # From [1]:
return obj[argvals[0]:] # .slice() - like [:]
# .slice(n) - like [n:] (not [slice(n)]
# .slice(m, n) - like [m:n] or [slice(m, n)]
# [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
if len(argvals) < 2:
argvals += (None,)
return obj[slice(*argvals)]
elif member == 'splice': elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')