mirror of
https://github.com/ytdl-org/youtube-dl
synced 2024-12-23 04:30:10 +09:00
Compare commits
9 Commits
958b3ccc36
...
a941f835c9
Author | SHA1 | Date | |
---|---|---|---|
|
a941f835c9 | ||
|
c5098961b0 | ||
|
dbc08fba83 | ||
|
71223bff39 | ||
|
72db217289 | ||
|
fc933e686b | ||
|
ea02c40539 | ||
|
7270ecf3d6 | ||
|
dade9111f1 |
@ -425,6 +425,34 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self._test(jsi, [''], args=['', '-'])
|
self._test(jsi, [''], args=['', '-'])
|
||||||
self._test(jsi, [], args=['', ''])
|
self._test(jsi, [], args=['', ''])
|
||||||
|
|
||||||
|
def test_slice(self):
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
|
||||||
|
self._test('function f(){return "012345678".slice()}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(0)}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(5)}', '5678')
|
||||||
|
self._test('function f(){return "012345678".slice(99)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(-2)}', '78')
|
||||||
|
self._test('function f(){return "012345678".slice(-99)}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(0, 0)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(1, 0)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(0, 1)}', '0')
|
||||||
|
self._test('function f(){return "012345678".slice(3, 6)}', '345')
|
||||||
|
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
|
||||||
|
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -174,6 +174,14 @@ _NSIG_TESTS = [
|
|||||||
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
|
||||||
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
|
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
|
||||||
|
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
|
||||||
|
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -413,8 +413,6 @@ from .foxnews import (
|
|||||||
FoxNewsArticleIE,
|
FoxNewsArticleIE,
|
||||||
)
|
)
|
||||||
from .foxsports import FoxSportsIE
|
from .foxsports import FoxSportsIE
|
||||||
from .franceculture import FranceCultureIE
|
|
||||||
from .franceinter import FranceInterIE
|
|
||||||
from .francetv import (
|
from .francetv import (
|
||||||
FranceTVIE,
|
FranceTVIE,
|
||||||
FranceTVSiteIE,
|
FranceTVSiteIE,
|
||||||
@ -1011,7 +1009,11 @@ from .radiocanada import (
|
|||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
from .radiojavan import RadioJavanIE
|
from .radiojavan import RadioJavanIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import (
|
||||||
|
RadioFrancePodcastEpisodeIE,
|
||||||
|
RadioFrancePodcastPlaylistIE,
|
||||||
|
RadioFranceWebradioIE,
|
||||||
|
)
|
||||||
from .rai import (
|
from .rai import (
|
||||||
RaiPlayIE,
|
RaiPlayIE,
|
||||||
RaiPlayLiveIE,
|
RaiPlayLiveIE,
|
||||||
|
@ -1,73 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
extract_attributes,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FranceCultureIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'rendez-vous-au-pays-des-geeks',
|
|
||||||
'display_id': 'rendez-vous-au-pays-des-geeks',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Rendez-vous au pays des geeks',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'upload_date': '20140301',
|
|
||||||
'timestamp': 1393700400,
|
|
||||||
'vcodec': 'none',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# no thumbnail
|
|
||||||
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
video_data = extract_attributes(self._search_regex(
|
|
||||||
r'''(?sx)
|
|
||||||
(?:
|
|
||||||
</h1>|
|
|
||||||
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
|
||||||
).*?
|
|
||||||
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
|
|
||||||
''',
|
|
||||||
webpage, 'video data'))
|
|
||||||
|
|
||||||
video_url = video_data.get('data-url') or video_data['data-asset-source']
|
|
||||||
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
|
|
||||||
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
|
||||||
webpage, 'description', default=None)
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
|
||||||
webpage, 'thumbnail', default=None)
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'(?s)<span class="author">(.*?)</span>',
|
|
||||||
webpage, 'uploader', default=None)
|
|
||||||
ext = determine_ext(video_url.lower())
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': display_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'ext': ext,
|
|
||||||
'vcodec': 'none' if ext == 'mp3' else None,
|
|
||||||
'uploader': uploader,
|
|
||||||
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
|
|
||||||
'duration': int_or_none(video_data.get('data-duration')),
|
|
||||||
}
|
|
@ -1,59 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import month_by_name
|
|
||||||
|
|
||||||
|
|
||||||
class FranceInterIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
|
||||||
'md5': '9e54d7bdb6fdc02a841007f8a975c094',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
|
||||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
'upload_date': '20160907',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url = self._search_regex(
|
|
||||||
r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
|
||||||
webpage, 'video url', group='url')
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
|
||||||
|
|
||||||
upload_date_str = self._search_regex(
|
|
||||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
|
||||||
webpage, 'upload date', fatal=False)
|
|
||||||
if upload_date_str:
|
|
||||||
upload_date_list = upload_date_str.split()
|
|
||||||
upload_date_list.reverse()
|
|
||||||
upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
|
|
||||||
upload_date_list[2] = '%02d' % int(upload_date_list[2])
|
|
||||||
upload_date = ''.join(upload_date_list)
|
|
||||||
else:
|
|
||||||
upload_date = None
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': [{
|
|
||||||
'url': video_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
}],
|
|
||||||
}
|
|
@ -4,56 +4,284 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
get_element_by_attribute,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
url_or_none
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RadioFranceIE(InfoExtractor):
|
class RadioFranceBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
_BASE_URL = r'https://www.radiofrance.fr/'
|
||||||
IE_NAME = 'radiofrance'
|
|
||||||
|
|
||||||
_TEST = {
|
def extract_api_data(self, api_path, id, html):
|
||||||
'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
|
pattern = r'<script [^>]*sveltekit:data-url="https://www\.radiofrance\.fr/api/v[\d.]+/%s[^>]*>(?P<json>.*)</script>' % api_path
|
||||||
'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
|
json = self._search_regex(pattern, html, 'API data', flags=re.DOTALL, group='json')
|
||||||
'info_dict': {
|
|
||||||
'id': 'one-one',
|
if not json:
|
||||||
'ext': 'ogg',
|
raise ExtractorError('%s: JSON data not found' % id)
|
||||||
'title': 'One to one',
|
|
||||||
'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
try:
|
||||||
'uploader': 'Thomas Hercouët',
|
json = self._parse_json(json, id)
|
||||||
},
|
json = self._parse_json(json['body'], id)
|
||||||
|
|
||||||
|
if api_path == 'path':
|
||||||
|
return json['content']
|
||||||
|
elif api_path == 'stations':
|
||||||
|
return json
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Coding error')
|
||||||
|
except KeyError:
|
||||||
|
raise ExtractorError('%s: Invalid JSON' % id)
|
||||||
|
|
||||||
|
def get_title(self, api_data, webpage=None):
|
||||||
|
title = strip_or_none(api_data.get('title'))
|
||||||
|
if not title and webpage:
|
||||||
|
title = strip_or_none(get_element_by_attribute('h1', None, webpage, False)) or strip_or_none(self._og_search_title(webpage))
|
||||||
|
return title
|
||||||
|
|
||||||
|
def get_description(self, api_data, webpage=None):
|
||||||
|
description = strip_or_none(api_data.get('standFirst'))
|
||||||
|
if not description and webpage:
|
||||||
|
description = strip_or_none(self._og_search_description(webpage))
|
||||||
|
return description
|
||||||
|
|
||||||
|
def get_thumbnail(self, api_data, webpage=None):
|
||||||
|
thumbnail = None
|
||||||
|
visual = api_data.get('visual')
|
||||||
|
if visual:
|
||||||
|
thumbnail = url_or_none(visual.get('src'))
|
||||||
|
if not thumbnail and webpage:
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
return thumbnail
|
||||||
|
|
||||||
|
def get_timestamp(self, api_data, webpage=None):
|
||||||
|
timestamp = api_data.get('publishedDate')
|
||||||
|
if not timestamp and webpage:
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta('article:published_time', webpage, 'publication time', ))
|
||||||
|
return timestamp
|
||||||
|
|
||||||
|
def get_brand(self, api_data, webpage=None):
|
||||||
|
brand = strip_or_none(api_data.get('brand'))
|
||||||
|
if not brand and webpage:
|
||||||
|
brand = self._og_search_property('site_name', webpage, 'Station name', fatal=False)
|
||||||
|
return brand
|
||||||
|
|
||||||
|
def extract_episode(self, episode_id, api_data):
|
||||||
|
manifestations = api_data.get('manifestations')
|
||||||
|
if manifestations is None or len(manifestations) == 0:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
url = url_or_none(manifestations[0]['url'])
|
||||||
|
duration = int_or_none(manifestations[0].get('duration'))
|
||||||
|
return url, duration
|
||||||
|
|
||||||
|
def get_playlist_entries(self, playlist_url, playlist_id, api_data, direction):
|
||||||
|
playlist_data = api_data['expressions']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
items = playlist_data.get('items')
|
||||||
|
for item in items:
|
||||||
|
episode_path = item.get('path')
|
||||||
|
if episode_path is None:
|
||||||
|
self.report_warning('No path found for episode "%s"', item.get('title'))
|
||||||
|
continue
|
||||||
|
episode_id = RadioFrancePodcastEpisodeIE._match_id(self._BASE_URL + episode_path)
|
||||||
|
if episode_id is None:
|
||||||
|
self.report_warning('Could not parse id of episode from path: "%s"' % episode_path)
|
||||||
|
continue
|
||||||
|
episode_url, duration = self.extract_episode(episode_id, item)
|
||||||
|
if episode_url is None:
|
||||||
|
self.to_screen('Episode "%s" is not available' % episode_path)
|
||||||
|
continue
|
||||||
|
entry = {
|
||||||
|
'id': episode_id,
|
||||||
|
'url': episode_url,
|
||||||
|
'title': self.get_title(item),
|
||||||
|
'description': self.get_description(item),
|
||||||
|
'timestamp': self.get_timestamp(item),
|
||||||
|
'thumbnail': self.get_thumbnail(item),
|
||||||
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
page_number = int_or_none(playlist_data.get('pageNumber'))
|
||||||
|
if page_number:
|
||||||
|
if direction in ['both', 'prev'] and playlist_data.get('prev') is not None:
|
||||||
|
webpage, other_api_data = self.get_data(playlist_url, 'path', playlist_id, page=page_number - 1)
|
||||||
|
entries = self.get_playlist_entries(playlist_url, playlist_id, other_api_data, direction='prev') + entries
|
||||||
|
if direction in ['both', 'next'] and playlist_data.get('next') is not None:
|
||||||
|
webpage, other_api_data = self.get_data(playlist_url, 'path', playlist_id, page=page_number + 1)
|
||||||
|
entries = entries + self.get_playlist_entries(playlist_url, playlist_id, other_api_data, direction='next')
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def get_data(self, url, api_path, id, page=None):
|
||||||
|
query = {}
|
||||||
|
note = None
|
||||||
|
if page:
|
||||||
|
query['p'] = page
|
||||||
|
note = "Downloading page %i" % page
|
||||||
|
webpage = self._download_webpage(url, id, query=query, note=note)
|
||||||
|
api_data = self.extract_api_data(api_path, id, webpage)
|
||||||
|
return webpage, api_data
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFrancePodcastEpisodeIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/podcasts/.+/.+-(?P<id>\d+)$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Podcast episode with audio from France Info',
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-brief-eco/le-brief-eco-du-lundi-05-septembre-2022-8310713',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8310713',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'url': r're:^https?://.*\.mp3$',
|
||||||
|
'title': 'Pour la première fois en vingt ans, l’euro passe sous les 0,99\u00a0dollar',
|
||||||
|
'description': str,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': int,
|
||||||
|
'duration': int,
|
||||||
|
'upload_date': str
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from France Musique',
|
||||||
|
'url': 'https://www.radiofrance.fr/francemusique/podcasts/allegretto/lever-du-jour-9233228',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from FranceInter',
|
||||||
|
'url': 'https://www.radiofrance.fr/franceinter/podcasts/rendez-vous-avec-x/un-mysterieux-echange-digne-de-la-guerre-froide-9343281',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from France Culture',
|
||||||
|
'url': 'https://www.radiofrance.fr/franceculture/podcasts/la-science-cqfd/teotihuacan-la-plus-mysterieuse-des-cites-d-or-9224610',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from Le Mouv',
|
||||||
|
'url': 'https://www.radiofrance.fr/mouv/podcasts/mouv-dj-la-caution/ncr2a-ne-cherche-rien-d-autre-ailleurs-1197950',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'note': 'Podcast episode from FIP',
|
||||||
|
'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip/hommage-au-cinema-de-vangelis-4734742',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
id = self._match_id(url)
|
||||||
video_id = m.group('id')
|
webpage, api_data = self.get_data(url, 'path', id)
|
||||||
|
url, duration = self.extract_episode(id, api_data)
|
||||||
webpage = self._download_webpage(url, video_id)
|
if url is None:
|
||||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
msg = 'Podcast file is not available. If the show is too recent, the file may not have been uploaded yet: try again later.'
|
||||||
description = self._html_search_regex(
|
raise ExtractorError(msg, expected=True, video_id=id)
|
||||||
r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'<div class="credit"> © (.*?)</div>',
|
|
||||||
webpage, 'uploader', fatal=False)
|
|
||||||
|
|
||||||
formats_str = self._html_search_regex(
|
|
||||||
r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
|
|
||||||
webpage, 'audio URLs')
|
|
||||||
formats = [
|
|
||||||
{
|
|
||||||
'format_id': fm[0],
|
|
||||||
'url': fm[1],
|
|
||||||
'vcodec': 'none',
|
|
||||||
'preference': i,
|
|
||||||
}
|
|
||||||
for i, fm in
|
|
||||||
enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
|
|
||||||
]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': id,
|
||||||
'title': title,
|
'url': url,
|
||||||
'formats': formats,
|
'title': self.get_title(api_data, webpage),
|
||||||
'description': description,
|
'description': self.get_description(api_data, webpage),
|
||||||
'uploader': uploader,
|
'timestamp': self.get_timestamp(api_data, webpage),
|
||||||
|
'thumbnail': self.get_thumbnail(api_data, webpage),
|
||||||
|
'channel_id': self.get_brand(api_data, webpage),
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFrancePodcastPlaylistIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/podcasts/(?P<id>[^/]+?)(?:[?#].*)?$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Podcast show with multiple pages of episodes and some of them are missing',
|
||||||
|
'url': 'https://www.radiofrance.fr/franceculture/podcasts/une-semaine-dans-le-monde-10-11?p=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'une-semaine-dans-le-monde-10-11',
|
||||||
|
'title': 'Une semaine dans le monde | 10-11',
|
||||||
|
'description': str,
|
||||||
|
'timestamp': int
|
||||||
|
},
|
||||||
|
'playlist_count': 23,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
id = self._match_id(url)
|
||||||
|
webpage, api_data = self.get_data(url, 'path', id)
|
||||||
|
|
||||||
|
entries = self.get_playlist_entries(url, id, api_data, direction='both')
|
||||||
|
entries.reverse()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': id,
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': self.get_title(api_data, webpage),
|
||||||
|
'description': self.get_description(api_data, webpage),
|
||||||
|
'timestamp': self.get_timestamp(api_data, webpage),
|
||||||
|
'thumbnail': self.get_thumbnail(api_data, webpage),
|
||||||
|
'channel_id': self.get_brand(api_data, webpage),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RadioFranceWebradioIE(RadioFranceBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.radiofrance\.fr/(?:francemusique|franceinter|franceculture|franceinfo|mouv|fip)/(?P<id>radio-[^/]+)$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list of webradios available at https://www.radiofrance.fr/ecouter-musique',
|
||||||
|
'url': 'https://www.radiofrance.fr/fip/radio-metal',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'radio-metal',
|
||||||
|
'ext': 'aac',
|
||||||
|
'title': str,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'aac',
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def get_livestream_formats(self, id, api_data):
|
||||||
|
sources = api_data['media']['sources']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in sources:
|
||||||
|
url = source.get('url')
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
format_id = source.get('format')
|
||||||
|
format = {
|
||||||
|
'url': url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'asr': 48000,
|
||||||
|
'vcodec': 'none'
|
||||||
|
}
|
||||||
|
if format_id == 'mp3':
|
||||||
|
format['preference'] = 1
|
||||||
|
format['acodec'] = 'mp3'
|
||||||
|
format['abr'] = source.get('bitrate')
|
||||||
|
elif format_id == 'aac':
|
||||||
|
format['preference'] = 2
|
||||||
|
format['acodec'] = 'aac'
|
||||||
|
format['abr'] = source.get('bitrate')
|
||||||
|
elif format_id == 'hls':
|
||||||
|
format['preference'] = 0
|
||||||
|
format['manifest_url'] = url
|
||||||
|
formats.append(format)
|
||||||
|
|
||||||
|
if len(formats) == 0:
|
||||||
|
raise ExtractorError('No live streaming URL found')
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
id = self._match_id(url)
|
||||||
|
webpage, api_data = self.get_data(url, 'stations', id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': id,
|
||||||
|
'title': self.get_title(api_data, webpage),
|
||||||
|
'formats': self.get_livestream_formats(id, api_data),
|
||||||
|
'thumbnail': self.get_thumbnail(api_data, webpage),
|
||||||
|
'channel_id': self.get_brand(api_data, webpage),
|
||||||
|
'is_live': True
|
||||||
}
|
}
|
||||||
|
@ -1659,17 +1659,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
def _extract_n_function_name(self, jscode):
|
def _extract_n_function_name(self, jscode):
|
||||||
func_name, idx = self._search_regex(
|
func_name, idx = self._search_regex(
|
||||||
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
||||||
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
|
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
|
||||||
# old: .get("n"))&&(b=nfunc[idx](b)
|
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
|
||||||
# older: .get("n"))&&(b=nfunc(b)
|
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||||||
|
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||||
|
# older: (b=a.get("n"))&&(b=nfunc(b)
|
||||||
r'''(?x)
|
r'''(?x)
|
||||||
(?:\(\s*(?P<b>[a-z])\s*=\s*(?:
|
\((?:[\w$()\s]+,)*?\s* # (
|
||||||
|
(?P<b>[a-z])\s*=\s* # b=
|
||||||
|
(?:
|
||||||
|
(?: # expect ,c=a.get(b) (etc)
|
||||||
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
|
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
|
||||||
"n+"\[\s*\+?s*[\w$.]+\s*]
|
"n+"\[\s*\+?s*[\w$.]+\s*]
|
||||||
)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
|
)\s*(?:,[\w$()\s]+(?=,))*|
|
||||||
\.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
|
(?P<old>[\w$]+) # a (old[er])
|
||||||
|
)\s*
|
||||||
|
(?(old)
|
||||||
|
# b.get("n")
|
||||||
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||||
|
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
|
||||||
|
| # ,c=a.get(b)
|
||||||
|
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||||
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||||
|
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
|
||||||
|
)
|
||||||
|
# interstitial junk
|
||||||
|
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
|
||||||
|
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
|
||||||
|
# nfunc|nfunc[idx]
|
||||||
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
|
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
|
||||||
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
||||||
|
default=(None, None))
|
||||||
|
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
||||||
|
if not func_name:
|
||||||
|
self.report_warning('Falling back to generic n function search')
|
||||||
|
return self._search_regex(
|
||||||
|
r'''(?xs)
|
||||||
|
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
||||||
|
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
||||||
|
\s*\{(?:(?!};).)+?["']enhanced_except_
|
||||||
|
''', jscode, 'Initial JS player n function name', group='name')
|
||||||
if not idx:
|
if not idx:
|
||||||
return func_name
|
return func_name
|
||||||
|
|
||||||
|
@ -925,9 +925,16 @@ class JSInterpreter(object):
|
|||||||
obj.reverse()
|
obj.reverse()
|
||||||
return obj
|
return obj
|
||||||
elif member == 'slice':
|
elif member == 'slice':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
|
||||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
# From [1]:
|
||||||
return obj[argvals[0]:]
|
# .slice() - like [:]
|
||||||
|
# .slice(n) - like [n:] (not [slice(n)]
|
||||||
|
# .slice(m, n) - like [m:n] or [slice(m, n)]
|
||||||
|
# [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
|
||||||
|
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
|
||||||
|
if len(argvals) < 2:
|
||||||
|
argvals += (None,)
|
||||||
|
return obj[slice(*argvals)]
|
||||||
elif member == 'splice':
|
elif member == 'splice':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
assertion(argvals, 'takes one or more arguments')
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
Loading…
Reference in New Issue
Block a user