Compare commits

...

4 Commits

Author SHA1 Message Date
dirkf
b1fb17021f
Merge 964f2cc5bd7b87fede8da24d4cc4e993004bc8db into 4e714f9df1ed2cccd51df60d45ff5504abe827b7 2025-03-30 06:35:36 +00:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
dirkf
964f2cc5bd Add new extractor for ERTFlix.gr: single shows, series as playlists 2022-01-16 20:04:08 +00:00
7 changed files with 179 additions and 15 deletions

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""

View File

@ -0,0 +1,165 @@
# coding: utf-8
from __future__ import unicode_literals
from ..compat import compat_str
from ..utils import (
determine_ext,
dict_get,
int_or_none,
parse_age_limit,
parse_iso8601,
str_or_none,
try_get,
url_or_none,
)
from .common import InfoExtractor
class ERTFlixBaseIE(InfoExtractor):
_VALID_URL = r'ertflix:(?P<id>[\w-]+)'
_TESTS = [{
'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
'info_dict': {
'id': 'monogramma-praxitelis-tzanoylinos',
'ext': 'mp4',
'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e',
},
},
]
def _call_api(self, video_id, method='Player/AcquireContent', **params):
query = {'platformCodename': 'www', }
query.update(params)
json = self._download_json(
'https://api.app.ertflix.gr/v1/%s' % (method, ),
video_id, fatal=False, query=query)
if try_get(json, lambda x: x['Result']['Success']) is True:
return json
def _extract_formats(self, video_id, allow_none=True):
media_info = self._call_api(video_id, codename=video_id)
formats = []
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
for media in try_get(media_file, lambda x: x['Formats'], list) or []:
fmt_url = url_or_none(try_get(media, lambda x: x['Url']))
if not fmt_url:
continue
ext = determine_ext(fmt_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(fmt_url, video_id, m3u8_id='hls', ext='mp4', entry_protocol='m3u8_native', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(fmt_url, video_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': fmt_url,
'format_id': str_or_none(media.get('Id')),
})
if formats or not allow_none:
self._sort_formats(formats)
return formats
def _real_extract(self, url):
video_id = self._match_id(url)
formats = self._extract_formats(video_id)
if formats:
return {
'id': video_id,
'formats': formats,
'title': self._generic_title(url),
}
class ERTFlixIE(ERTFlixBaseIE):
_VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
_TESTS = [{
'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
'md5': '388f47a70e1935c8dabe454e871446bb',
'info_dict': {
'id': 'aoratoi-ergates',
'ext': 'mp4',
'title': 'md5:c1433d598fbba0211b0069021517f8b4',
'description': 'md5:8cc02e5cdb31058c8f6e0423db813770',
'thumbnail': r're:https?://.+\.jpg',
},
}, {
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
'info_dict': {
'id': 'ser.3448',
'age_limit': 8,
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.',
'title': 'Μονόγραμμα',
},
'playlist_mincount': 64,
},
]
def _extract_series(self, video_id):
media_info = self._call_api(video_id, method='Tile/GetSeriesDetails', id=video_id)
series = try_get(media_info, lambda x: x['Series'], dict) or {}
series_info = {
'age_limit': parse_age_limit(series.get('AgeRating', series.get('IsAdultContent') and 18)),
'title': series.get('Title'),
'description': dict_get(series, ('ShortDescription', 'TinyDescription', )),
}
def gen_episode(m_info):
for episode_group in try_get(m_info, lambda x: x['EpisodeGroups'], list) or []:
episodes = try_get(episode_group, lambda x: x['Episodes'], list)
if not episodes:
continue
season_info = {
'season': episode_group.get('Title'),
'season_number': int_or_none(episode_group.get('SeasonNumber')),
}
for n, episode in enumerate(episodes, 1):
codename = try_get(episode, lambda x: x['Codename'], compat_str)
title = episode.get('Title')
if not codename and title and episode.get('HasPlayableStream', True):
continue
info = {
'_type': 'url_transparent',
'thumbnail': url_or_none(try_get(episode, lambda x: x['Image']['Url'])),
'id': codename,
'episode_id': episode.get('Id'),
'title': title,
'alt_title': episode.get('Subtitle'),
'description': episode.get('ShortDescription'),
'timestamp': parse_iso8601(episode.get('PublishDate')),
'episode_number': n,
'age_limit': parse_age_limit(episode.get('AgeRating', episode.get('IsAdultContent') and 18)) or series_info.get('age_limit'),
'url': 'ertflix:%s' % (codename, ),
}
info.update(season_info)
yield info
result = self.playlist_result((e for e in gen_episode(media_info)), playlist_id=video_id)
result.update(series_info)
return result
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if video_id.startswith('ser'):
return self._extract_series(video_id)
video_id = self._search_regex(
r'"codenameToId"\s*:\s*\{\s*"([\w-]+)"\s*:\s*"%s"' % (video_id, ),
webpage, video_id, default=False) or video_id
title = self._og_search_title(webpage)
formats = self._extract_formats(video_id, False)
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
}

View File

@ -361,6 +361,10 @@ from .engadget import EngadgetIE
from .epidemicsound import EpidemicSoundIE from .epidemicsound import EpidemicSoundIE
from .eporner import EpornerIE from .eporner import EpornerIE
from .eroprofile import EroProfileIE from .eroprofile import EroProfileIE
from .ertgr import (
ERTFlixBaseIE,
ERTFlixIE,
)
from .escapist import EscapistIE from .escapist import EscapistIE
from .espn import ( from .espn import (
ESPNIE, ESPNIE,

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',