This commit is contained in:
Petr Tesarik 2024-08-21 22:32:49 -04:00 committed by GitHub
commit 890a635f89
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 96 additions and 0 deletions

View File

@ -736,6 +736,7 @@ from .mtv import (
MTVJapanIE, MTVJapanIE,
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .mujrozhlas import MujRozhlasIE
from .mwave import MwaveIE, MwaveMeetGreetIE from .mwave import MwaveIE, MwaveMeetGreetIE
from .mychannels import MyChannelsIE from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE from .myspace import MySpaceIE, MySpaceAlbumIE

View File

@ -0,0 +1,95 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError,
js_to_json,
)
class MujRozhlasIE(InfoExtractor):
IE_NAME = 'mujRozhlas'
IE_DESC = 'https://www.mujrozhlas.cz/'
_VALID_URL = r'https?://www\.mujrozhlas\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [
{
'url': 'https://www.mujrozhlas.cz/vinohradska-12/zadne-dalsi-pusy-konec-prchala-spol-muze-znamenat-babisuv-odchod-z-politiky-tipuje',
'md5': '34ecaa47f64079a63d6b80498c280e9d',
'info_dict': {
'id': '0c54ba72-93dd-3a29-b567-910d3d8c71a8',
'ext': 'mp3',
'title': 'Žádné další pusy. Konec Prchala a spol. může znamenat Babišův odchod z politiky, tipuje politolog',
'description': 'md5:ec0610bdb1f591061dbd224d2dd9c19e',
},
},
{
'url': 'https://www.mujrozhlas.cz/kazki/princi-ta-zliy-drakon',
'md5': 'cbad6f68db6dc4d6d798d69b5d258aa5',
'info_dict': {
'id': 'ec5f53b2-3910-448e-8e7f-d6d1a19f4926',
'ext': 'm4a',
'title': 'Принці та злий дракон',
'description': 'md5:b21701e09c2b509c4451194af7ac271b',
},
'params': {
'format': 'hls-128',
},
},
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_data = self._search_regex(
r'\bvar dl = ({[^\n]+});',
webpage, 'player data', default=None)
if not player_data:
raise ExtractorError('Could not find player data')
player_data = self._parse_json(player_data, display_id, js_to_json)
audio_id = player_data['contentId']
bundle = player_data['siteEntityBundle']
if bundle not in ('episode', 'serialPart'):
raise ExtractorError('Unsupported entity: {0}'.format(bundle))
url = 'https://api.mujrozhlas.cz/episodes/{0}'.format(player_data['contentId'])
webpage = self._download_webpage(url, audio_id)
attr = self._parse_json(webpage, audio_id)['data']['attributes']
formats = []
for link in attr['audioLinks']:
variant = link['variant']
if variant == 'hls':
formats.extend(self._extract_m3u8_formats(
link['url'], audio_id, 'm4a', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif variant == 'dash':
formats.extend(self._extract_mpd_formats(
link['url'], audio_id, mpd_id='dash', fatal=False))
elif variant == 'mp3':
url = link['url']
m = re.search(
r'(?P<proto>[^:]+):(?:.*/)*(?P<id>[^.]+)\.(?P<ext>[^/.]+)$',
url)
bitrate = link['bitrate']
formats.append({
'url': link['url'],
'protocol': m.group('proto'),
'ext': m.group('ext'),
'format_id': '-'.join(('mp3', str(bitrate))),
'vcodec': 'none',
'abr': bitrate,
'tbr': bitrate,
})
self._sort_formats(formats)
return {
'id': audio_id,
'title': attr['title'],
'description': clean_html(attr['description']),
'formats': formats,
}