Compare commits

...

9 Commits

Author SHA1 Message Date
Morgan Harris
ff4d654a26
Merge 7ac17d4b4469d23e575c3483695a790a309b2ff8 into 4e714f9df1ed2cccd51df60d45ff5504abe827b7 2025-03-29 11:19:01 -07:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
dirkf
7ac17d4b44
Match merged yt-dlp extractor 2024-09-14 23:18:32 +01:00
dirkf
3d122d40c1
Disable TenPlaySeasonIE tests by default 2024-09-11 18:06:00 +01:00
dirkf
3ae35e2933
Support TenPlaySeasonIE 2024-09-11 17:38:07 +01:00
dirkf
def8836ca9
Back-port PR yt-dlp/yt-dlp#10928
* back-port PR
* improve geo-block detection
2024-09-11 17:34:13 +01:00
Morgan Harris
8b11dbba00 Add support for downloading an entire season as a playlist 2023-03-21 17:25:12 +11:00
Morgan Harris
552e30e988 Allow downloading videos requiring login on 10play.com.au
As far as I can tell this is now essentially every episode of every
show.
2023-03-21 15:27:31 +11:00
7 changed files with 187 additions and 56 deletions

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""

View File

@ -1274,7 +1274,10 @@ from .telequebec import (
from .teletask import TeleTaskIE from .teletask import TeleTaskIE
from .telewebion import TelewebionIE from .telewebion import TelewebionIE
from .tennistv import TennisTVIE from .tennistv import TennisTVIE
from .tenplay import TenPlayIE from .tenplay import (
TenPlayIE,
TenPlaySeasonIE,
)
from .testurl import TestURLIE from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .tfo import TFOIE from .tfo import TFOIE

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',

View File

@ -1,70 +1,203 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_itertools_count,
compat_kwargs,
)
from ..utils import ( from ..utils import (
get_elements_by_attribute,
HEADRequest, HEADRequest,
parse_age_limit, float_or_none,
parse_iso8601, int_or_none,
# smuggle_url, merge_dicts,
NO_DEFAULT,
T,
traverse_obj,
txt_or_none,
url_or_none,
urljoin,
) )
class TenPlayIE(InfoExtractor): class TenPlayBase(InfoExtractor):
_GEO_COUNTRIES = ['AU']
_GEO_BYPASS = False
def raise_geo_restricted(self, *args, **kwargs):
countries = args[1] if len(args) > 1 else kwargs.get('countries', NO_DEFAULT)
if countries is NO_DEFAULT:
kwargs['countries'] = self._GEO_COUNTRIES
kwargs = compat_kwargs(kwargs)
super(TenPlayBase, self).raise_geo_restricted(*args, **kwargs)
def _download_webpage_handle(self, url_or_request, video_id, *args, **kwargs):
res = super(TenPlayBase, self)._download_webpage_handle(url_or_request, video_id, *args, **kwargs)
if res and any('Sorry, 10 play is not available in your region.' in e
for e in get_elements_by_attribute('class', 'iserror__text', res[0])):
self.raise_geo_restricted()
return res
class TenPlayIE(TenPlayBase):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})' _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
_NETRC_MACHINE = '10play'
_TESTS = [{ _TESTS = [{
'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga', 'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz',
'info_dict': { 'info_dict': {
'id': '6060533435001', 'id': '6336940246112',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MasterChef - S1 Ep. 1', 'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c', 'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'age_limit': 10, 'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach',
'timestamp': 1240828200, 'duration': 74,
'upload_date': '20090427', 'season': 'Season 41',
'season_number': 41,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'uploader': 'Channel 10',
'age_limit': 15,
'timestamp': 1694386800,
'upload_date': '20230910',
'uploader_id': '2199827728001', 'uploader_id': '2199827728001',
}, },
'params': { 'params': {
# 'format': 'bestvideo',
'skip_download': True, 'skip_download': True,
} },
'skip': 'Only available in Australia',
}, {
'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
'info_dict': {
'id': '9000000000091177',
'ext': 'mp4',
'title': 'Neighbours - S42 Ep. 9107',
'alt_title': 'Thu 05 Sep',
'description': 'md5:37a1f4271be34b9ee2b533426a5fbaef',
'duration': 1388,
'episode': 'Episode 9107',
'episode_number': 9107,
'season': 'Season 42',
'season_number': 42,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'age_limit': 15,
'timestamp': 1725517860,
'upload_date': '20240905',
'uploader': 'Channel 10',
'uploader_id': '2199827728001',
},
'params': {
'skip_download': True,
},
'skip': 'Only available in Australia',
}, { }, {
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
'only_matching': True, 'only_matching': True,
}] }]
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
_GEO_BYPASS = False _AUS_AGES = {
_FASTLY_URL_TEMPL = 'https://10-selector.global.ssl.fastly.net/s/kYEXFC/media/%s?mbr=true&manifest=m3u&format=redirect' 'G': 0,
'PG': 15,
'M': 15,
'MA': 15,
'MA15+': 15,
'R': 18,
'X': 18,
}
def _real_extract(self, url): def _real_extract(self, url):
content_id = self._match_id(url) content_id = self._match_id(url)
data = self._download_json( data = self._download_json(
'https://10play.com.au/api/video/' + content_id, content_id) 'https://10play.com.au/api/v1/videos/' + content_id, content_id)
video = data.get('video') or {}
metadata = data.get('metaData') or {} video_data = self._download_json(
brightcove_id = video.get('videoId') or metadata['showContentVideoId'] 'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={0}'.format(data['altId']),
# brightcove_url = smuggle_url( content_id, 'Downloading video JSON')
# self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, m3u8_url = self._request_webpage(
# {'geo_countries': ['AU']}) HEADRequest(video_data['items'][0]['HLSURL']),
m3u8_url = self._request_webpage(HEADRequest( content_id, 'Checking stream URL').url
self._FASTLY_URL_TEMPL % brightcove_id), brightcove_id).geturl()
if '10play-not-in-oz' in m3u8_url: if '10play-not-in-oz' in m3u8_url:
self.raise_geo_restricted(countries=['AU']) self.raise_geo_restricted()
formats = self._extract_m3u8_formats(m3u8_url, brightcove_id, 'mp4') # Attempt to get a higher quality stream
m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000')
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)
return { return merge_dicts({
# '_type': 'url_transparent', 'id': content_id,
# 'url': brightcove_url,
'formats': formats, 'formats': formats,
'id': brightcove_id, 'uploader': 'Channel 10',
'title': video.get('title') or metadata.get('pageContentName') or metadata['showContentName'],
'description': video.get('description'),
'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
'series': metadata.get('showName'),
'season': metadata.get('showContentSeason'),
'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
'thumbnail': video.get('poster'),
'uploader_id': '2199827728001', 'uploader_id': '2199827728001',
# 'ie_key': 'BrightcoveNew', }, traverse_obj(data, {
} 'subtitles': ('captionUrl', T(lambda x: None if x is None
else {'en': [{'url': x}]})),
'id': ('altId', T(txt_or_none)),
'duration': ('duration', T(float_or_none)),
'title': ('subtitle', T(txt_or_none)),
'alt_title': ('title', T(txt_or_none)),
'description': ('description', T(txt_or_none)),
'age_limit': ('classification', T(self._AUS_AGES.get)),
'series': ('tvShow', T(txt_or_none)),
'season_number': ('season', T(int_or_none)),
'episode_number': ('episode', T(int_or_none)),
'timestamp': ('published', T(int_or_none)),
'thumbnail': ('imageUrl', T(url_or_none)),
}), rev=True)
class TenPlaySeasonIE(TenPlayBase):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P<show>[^/?#]+)/episodes/(?P<season>[^/?#]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://10play.com.au/masterchef/episodes/season-14',
'info_dict': {
'title': 'Season 14',
'id': 'MjMyOTIy',
},
'playlist_mincount': 64,
'skip': 'Only available in Australia',
}, {
'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2022',
'info_dict': {
'title': 'Season 2022',
'id': 'Mjc0OTIw',
},
'playlist_mincount': 256,
'skip': 'Only available in Australia',
}]
def _entries(self, load_more_url, display_id=None):
skip_ids = []
for page in compat_itertools_count(1):
episodes_carousel = self._download_json(
load_more_url, display_id, query={'skipIds[]': skip_ids},
note='Fetching episodes page {0}'.format(page))
episodes_chunk = episodes_carousel['items']
skip_ids.extend(ep['id'] for ep in episodes_chunk)
for ep in episodes_chunk:
yield ep['cardLink']
if not episodes_carousel.get('hasMore'):
break
def _real_extract(self, url):
show, season = self._match_valid_url(url).group('show', 'season')
season_info = self._download_json(*(s.format(show=show, season=season) for s in (
'https://10play.com.au/api/shows/{show}/episodes/{season}', '{show}/{season}')
))
episodes_carousel = traverse_obj(season_info, (
'content', 0, 'components', (
lambda _, v: v['title'].lower() == 'episodes',
(Ellipsis, T(dict)),
)), any) or {}
playlist_id = episodes_carousel['tpId']
return self.playlist_from_matches(
self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id),
playlist_id, traverse_obj(season_info, ('content', 0, 'title', T(txt_or_none))),
getter=functools.partial(urljoin, url))