Compare commits

...

4 Commits

Author SHA1 Message Date
Petr Tesarik
3ad6b5bc2e
Merge fe02a15b1c35aad2463dfc6ff160e50cd8c2a04c into 673277e510ebd996b62a2fcc76169bf3cce29910 2025-03-05 06:59:31 +00:00
dirkf
673277e510
[YouTube] Fix 91b1569 2025-02-28 01:02:20 +00:00
dirkf
91b1569f68
[YouTube] Fix channel playlist extraction (#33074)
* [YouTube] Extract playlist items from LOCKUP_VIEW_MODEL_...
* resolves #33073
* thx seproDev (yt-dlp/yt-dlp#11615)

Co-authored-by: sepro <sepro@sepr0.com>
2025-02-28 00:02:10 +00:00
Petr Tesarik
fe02a15b1c Add extractor for www.mujrozhlas.cz
Signed-off-by: Petr Tesarik <petr@tesarici.cz>
2023-02-05 17:04:02 +01:00
3 changed files with 143 additions and 2 deletions

View File

@ -736,6 +736,7 @@ from .mtv import (
MTVJapanIE, MTVJapanIE,
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .mujrozhlas import MujRozhlasIE
from .mwave import MwaveIE, MwaveMeetGreetIE from .mwave import MwaveIE, MwaveMeetGreetIE
from .mychannels import MyChannelsIE from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE from .myspace import MySpaceIE, MySpaceAlbumIE

View File

@ -0,0 +1,95 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError,
js_to_json,
)
class MujRozhlasIE(InfoExtractor):
IE_NAME = 'mujRozhlas'
IE_DESC = 'https://www.mujrozhlas.cz/'
_VALID_URL = r'https?://www\.mujrozhlas\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [
{
'url': 'https://www.mujrozhlas.cz/vinohradska-12/zadne-dalsi-pusy-konec-prchala-spol-muze-znamenat-babisuv-odchod-z-politiky-tipuje',
'md5': '34ecaa47f64079a63d6b80498c280e9d',
'info_dict': {
'id': '0c54ba72-93dd-3a29-b567-910d3d8c71a8',
'ext': 'mp3',
'title': 'Žádné další pusy. Konec Prchala a spol. může znamenat Babišův odchod z politiky, tipuje politolog',
'description': 'md5:ec0610bdb1f591061dbd224d2dd9c19e',
},
},
{
'url': 'https://www.mujrozhlas.cz/kazki/princi-ta-zliy-drakon',
'md5': 'cbad6f68db6dc4d6d798d69b5d258aa5',
'info_dict': {
'id': 'ec5f53b2-3910-448e-8e7f-d6d1a19f4926',
'ext': 'm4a',
'title': 'Принці та злий дракон',
'description': 'md5:b21701e09c2b509c4451194af7ac271b',
},
'params': {
'format': 'hls-128',
},
},
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_data = self._search_regex(
r'\bvar dl = ({[^\n]+});',
webpage, 'player data', default=None)
if not player_data:
raise ExtractorError('Could not find player data')
player_data = self._parse_json(player_data, display_id, js_to_json)
audio_id = player_data['contentId']
bundle = player_data['siteEntityBundle']
if bundle not in ('episode', 'serialPart'):
raise ExtractorError('Unsupported entity: {0}'.format(bundle))
url = 'https://api.mujrozhlas.cz/episodes/{0}'.format(player_data['contentId'])
webpage = self._download_webpage(url, audio_id)
attr = self._parse_json(webpage, audio_id)['data']['attributes']
formats = []
for link in attr['audioLinks']:
variant = link['variant']
if variant == 'hls':
formats.extend(self._extract_m3u8_formats(
link['url'], audio_id, 'm4a', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif variant == 'dash':
formats.extend(self._extract_mpd_formats(
link['url'], audio_id, mpd_id='dash', fatal=False))
elif variant == 'mp3':
url = link['url']
m = re.search(
r'(?P<proto>[^:]+):(?:.*/)*(?P<id>[^.]+)\.(?P<ext>[^/.]+)$',
url)
bitrate = link['bitrate']
formats.append({
'url': link['url'],
'protocol': m.group('proto'),
'ext': m.group('ext'),
'format_id': '-'.join(('mp3', str(bitrate))),
'vcodec': 'none',
'abr': bitrate,
'tbr': bitrate,
})
self._sort_formats(formats)
return {
'id': audio_id,
'title': attr['title'],
'description': clean_html(attr['description']),
'formats': formats,
}

View File

@ -27,6 +27,7 @@ from ..compat import (
) )
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..utils import ( from ..utils import (
bug_reports_message,
clean_html, clean_html,
dict_get, dict_get,
error_to_compat_str, error_to_compat_str,
@ -65,6 +66,7 @@ from ..utils import (
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
variadic,
) )
@ -460,6 +462,26 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'uploader': uploader, 'uploader': uploader,
} }
@staticmethod
def _extract_thumbnails(data, *path_list, **kw_final_key):
"""
Extract thumbnails from thumbnails dict
@param path_list: path list to level that contains 'thumbnails' key
"""
final_key = kw_final_key.get('final_key', 'thumbnails')
return traverse_obj(data, ((
tuple(variadic(path) + (final_key, Ellipsis)
for path in path_list or [()])), {
'url': ('url', T(url_or_none),
# Sometimes youtube gives a wrong thumbnail URL. See:
# https://github.com/yt-dlp/yt-dlp/issues/233
# https://github.com/ytdl-org/youtube-dl/issues/28023
T(lambda u: update_url(u, query=None) if u and 'maxresdefault' in u else u)),
'height': ('height', T(int_or_none)),
'width': ('width', T(int_or_none)),
}, T(lambda t: t if t.get('url') else None)))
def _search_results(self, query, params): def _search_results(self, query, params):
data = { data = {
'context': { 'context': {
@ -3183,8 +3205,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
expected_type=txt_or_none) expected_type=txt_or_none)
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']: for item in traverse_obj(grid_renderer, ('items', Ellipsis, T(dict))):
if not isinstance(item, dict): lockup_view_model = traverse_obj(item, ('lockupViewModel', T(dict)))
if lockup_view_model:
entry = self._extract_lockup_view_model(lockup_view_model)
if entry:
yield entry
continue continue
renderer = self._extract_grid_item_renderer(item) renderer = self._extract_grid_item_renderer(item)
if not isinstance(renderer, dict): if not isinstance(renderer, dict):
@ -3268,6 +3294,25 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue continue
yield self._extract_video(renderer) yield self._extract_video(renderer)
def _extract_lockup_view_model(self, view_model):
content_id = view_model.get('contentId')
if not content_id:
return
content_type = view_model.get('contentType')
if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
self.report_warning(
'Unsupported lockup view model content type "{0}"{1}'.format(content_type, bug_reports_message()), only_once=True)
return
return merge_dicts(self.url_result(
update_url_query('https://www.youtube.com/playlist', {'list': content_id}),
ie=YoutubeTabIE.ie_key(), video_id=content_id), {
'title': traverse_obj(view_model, (
'metadata', 'lockupMetadataViewModel', 'title', 'content', T(compat_str))),
'thumbnails': self._extract_thumbnails(view_model, (
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail',
'thumbnailViewModel', 'image'), final_key='sources'),
})
def _video_entry(self, video_renderer): def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId') video_id = video_renderer.get('videoId')
if video_id: if video_id: