Compare commits

...

9 Commits

Author SHA1 Message Date
Martin Vician
3ed9b7b7c3
Merge e4759fdffe7e0cfa204fb83324552ab31503d597 into 4e714f9df1ed2cccd51df60d45ff5504abe827b7 2025-03-26 16:13:02 +00:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
Martin Vician
e4759fdffe
Update youtube_dl/extractor/mujrozhlas.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-06 21:32:01 +01:00
Martin Vician
5fd5b4737f
Update youtube_dl/extractor/mujrozhlas.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-06 21:31:56 +01:00
Martin Vician
f935e0585b
Added thumbnail and timestamp 2022-05-08 19:11:20 +01:00
Martin Vician
d4ac3ac47f
No default for content_id, better content parsing 2022-03-25 16:18:21 +00:00
Martin Vician
fb13bb6598
Updated to flake8 2022-03-16 17:51:50 +00:00
Martin Vician
9a6b3d9dac
Added mujrozhlas.cz extractor 2022-03-16 17:44:10 +00:00
7 changed files with 74 additions and 15 deletions

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""

View File

@ -736,6 +736,7 @@ from .mtv import (
MTVJapanIE, MTVJapanIE,
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .mujrozhlas import MujRozhlasIE
from .mwave import MwaveIE, MwaveMeetGreetIE from .mwave import MwaveIE, MwaveMeetGreetIE
from .mychannels import MyChannelsIE from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE from .myspace import MySpaceIE, MySpaceAlbumIE

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
clean_html,
parse_iso8601
)
class MujRozhlasIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mujrozhlas\.cz/(?P<id>[a-zA-Z0-9/-]+)'
_TESTS = [{
'url': 'https://www.mujrozhlas.cz/meteor/meteor-o-nejvetsim-matematikovi-nekonecnem-vesmiru-skakajicim-pavoukovi-hrani-surikat',
'info_dict': {
'id': 'meteor/meteor-o-nejvetsim-matematikovi-nekonecnem-vesmiru-skakajicim-pavoukovi-hrani-surikat',
'ext': 'mp3',
'title': 'Meteor o největším matematikovi, nekonečném vesmíru, skákajícím pavoukovi a hraní surikat',
'description': 'Poslechněte si:01:00 Vymírající pták roku07:57 Největší experimentátor všech dob14:06 Největší matematik 20. století27:40 Jak si představit nekonečný vesmír?35:12 Pavouk, který skáče bunjee jumping41:34 Jak si hrají surikaty?Hovoří ornitolog Zdeněk Vermouzek, matematik Václav Chvátal nebo astronom Norbert Werner. Rubriku Stalo se tento den připravil ing. František Houdek.\xa0Z knihy\xa0Pozoruhodné objevy ze světa zvířat čte Zuzana Slavíková.\nPetr Sobotka',
'timestamp': 1647272701,
'upload_date': '20220314',
}
}, {
'url': 'https://www.mujrozhlas.cz/podcast-vinohradska-12/its-humanitarian-disaster-mariupol-we-want-help-says-msfs-alex-wade',
'info_dict': {
'id': 'podcast-vinohradska-12/its-humanitarian-disaster-mariupol-we-want-help-says-msfs-alex-wade',
'ext': 'mp3',
'title': 'It\'s a humanitarian disaster in Mariupol. We want to help, says MSFs Alex Wade',
'description': 'It is a humanitarian catastrophe. People in the southeast of Ukraine are dying in the streets and their neighbors have to bury them in the gardens. Those who are still alive, dont have food, water or medicine. I spoke to Alex Wade, an emergency coordinator for Doctors without Borders, who is in Dnipro. He is doing his best to help Ukrainians who are fleeing the war or staying to fight back.\nMatěj Skalický\n\nEdited by: Kateřina Pospíšilová\nSound design: Tomáš Černý\nResearched by:\xa0Alžběta Jurčová\nMusic:\xa0Martin Hůla',
'timestamp': 1647442501,
'upload_date': '20220316',
}
}]
def _real_extract(self, url):
audio_id = self._match_id(url)
webpage = self._download_webpage(url, audio_id)
content_id = self._html_search_regex(r'"contentId":"(.+?)"', webpage, 'content_id')
content_url = 'https://api.mujrozhlas.cz/episodes/' + content_id
content = self._download_json(content_url, content_id)
attrs = content['data']['attributes']
title = attrs['title']
audio_url = content['data']['attributes']['audioLinks'][0]['url']
audio_info = content['data']['attributes']['audioLinks'][0]
duration = audio_info.get('duration')
description = clean_html(attrs.get('description'))
thumbnail = self._og_search_thumbnail(webpage)
timestamp = parse_iso8601(self._og_search_property('updated_time', webpage, fatal=False))
return {
'id': audio_id,
'url': audio_url,
'title': title,
'description': description,
'duration': int_or_none(duration),
'vcodec': 'none',
'thumbnail': thumbnail,
'timestamp': timestamp,
}

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',