Compare commits

...

13 Commits

Author SHA1 Message Date
Miha Frangež
bd2cb48336
Merge d81793ea564e44219aa06f806caa8d46a242d2f3 into 4e714f9df1ed2cccd51df60d45ff5504abe827b7 2025-03-28 18:18:09 +01:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
Miha Frangež
d81793ea56 [RTV SLO 4D] Added support for subtitles 2021-03-25 11:49:56 +01:00
Miha Frangež
a302001725 [RTV SLO 4D] Extract both HTTPS and HTTP HLS URLs 2021-03-25 11:48:40 +01:00
Miha Frangež
944f674c6d [RTV SLO 4D] Reordered info extraction 2021-02-15 16:38:15 +01:00
Miha Frangež
8fb3a99c34 [RTV SLO 4D] Fixed test that failed due to different sorting 2021-02-15 15:33:21 +01:00
Miha Frangež
6435b66967 [RTV SLO 4D] Support for multiple audio formats 2021-02-15 15:31:03 +01:00
Miha Frangež
2e75b8092f [RTV SLO 4D] Cleanup, switched to HTTPS 2021-02-14 16:48:49 +01:00
Miha Frangež
5fe9de5f7a [RTV SLO 4D] Added support for audio, more tests 2021-02-11 13:52:52 +01:00
Miha Frangež
2cf78de692 [RTV SLO 4D] Improved URL regex 2021-02-11 13:51:54 +01:00
Miha Frangež
8e6eca6432 [RTV SLO 4D] Removed unnecessary requests, improved formatting 2021-02-11 12:49:53 +01:00
Miha Frangež
910ef313e1 [RTV SLO 4D] Add extractor 2021-02-10 19:43:17 +01:00
7 changed files with 109 additions and 15 deletions

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""

View File

@ -1068,6 +1068,7 @@ from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
from .rtvnh import RTVNHIE from .rtvnh import RTVNHIE
from .rtvs import RTVSIE from .rtvs import RTVSIE
from .rtvslo import RTVSLO4DIE
from .ruhd import RUHDIE from .ruhd import RUHDIE
from .rumble import RumbleEmbedIE from .rumble import RumbleEmbedIE
from .rutube import ( from .rutube import (

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -0,0 +1,98 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
try_get,
unified_timestamp
)
class RTVSLO4DIE(InfoExtractor):
_VALID_URL = r'https?://(?:4d\.rtvslo\.si/(?:arhiv/[^/]+|embed)|www\.rtvslo\.si/(?:4d/arhiv|mmr/prispevek))/(?P<id>\d+)'
_TESTS = [{
'url': 'https://4d.rtvslo.si/arhiv/seje-odbora-za-kmetijstvo-gozdarstvo-in-prehrano/174595438',
'md5': '37ab1181292a08e0d6b7952545e6ce8b',
'info_dict': {
'id': '174595438',
'ext': 'mp4',
'title': 'Krajčič o tatvini sendviča',
'thumbnail': r're:https://img.rtvslo.si/.+\.jpg',
'timestamp': 1549999614,
'upload_date': '20190212',
'duration': 85
},
}, {
'url': 'https://4d.rtvslo.si/arhiv/punto-e-a-capo/174752966',
'md5': 'a1ce903ee0a4051e417c9357e3d51c71',
'info_dict': {
'id': '174752966',
'ext': 'mp3',
'title': 'Dante divulgatore della scienza, con Gian Italo Bischi. E un ricordo di Federico Roncoroni',
'thumbnail': r're:https://img.rtvslo.si/.+\.jpg',
'timestamp': 1613033635,
'upload_date': '20210211',
'duration': 1740
},
}, {
'url': 'https://4d.rtvslo.si/arhiv/punto-e-a-capo/174752966',
'only_matching': True,
}, {
'url': 'https://4d.rtvslo.si/embed/174595438',
'only_matching': True,
}, {
'url': 'https://www.rtvslo.si/4d/arhiv/174752597?s=tv_ita',
'only_matching': True,
}, {
'url': 'https://www.rtvslo.si/mmr/prispevek/174752987',
'only_matching': True,
}]
def _real_extract(self, url):
media_id = self._match_id(url)
media_info = self._download_json(
'https://api.rtvslo.si/ava/getRecording/' + media_id, media_id,
query={'client_id': '19cc0556a5ee31d0d52a0e30b0696b26'})['response']
if media_info['mediaType'] == 'video':
formats = []
for proto in ('hls_sec', 'hls',):
formats += self._extract_m3u8_formats(
media_info['addaptiveMedia'][proto], media_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
elif media_info['mediaType'] == 'audio':
formats = [{
'format_id': file['mediaType'],
'url': file['streamers']['http'] + '/' + file['filename'],
'ext': determine_ext(file['filename']),
'tbr': int_or_none(file.get('bitrate')),
'filesize': int_or_none(file.get('filesize')),
'vcodec': 'none'
} for file in media_info['mediaFiles']]
self._sort_formats(formats)
return {
'id': media_id,
'title': media_info['title'],
'formats': formats,
'description': try_get(media_info, 'description'),
'thumbnail': media_info.get('thumbnail_sec'),
'timestamp': unified_timestamp(media_info.get('broadcastDate')),
'duration': media_info.get('duration'),
'subtitles': self.extract_subtitles(media_info)
}
def _get_subtitles(self, media_info):
subs = {}
for sub in media_info.get('subtitles', []):
subs[sub['language']] = [{
'ext': 'vtt',
'url': sub['file']
}]
return subs

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',