mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-06-06 04:52:41 +09:00
Compare commits
6 Commits
83eedc093c
...
84a9c17455
Author | SHA1 | Date | |
---|---|---|---|
![]() |
84a9c17455 | ||
![]() |
4e714f9df1 | ||
![]() |
c1ea7f5a24 | ||
![]() |
1204472ade | ||
![]() |
a1466c4996 | ||
![]() |
5c76372835 |
@ -359,7 +359,9 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
|
||||
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(((?:[^/]+/)?(?:player|live|video|serie|sendung)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+))|(((?P<sender>[a-zA-Z0-9\-]+)([/]))?(?P<name>[a-zA-Z0-9\-]+)))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||
@ -395,73 +397,189 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://ardmediathek.de/sendung/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://ardmediathek.de/serie/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/br/dahoam-is-dahoam',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_name = self._match_id(url, group_name='name')
|
||||
sender = self._match_id(url, group_name='sender')
|
||||
|
||||
if '/serie/' in url or '/sendung/' in url:
|
||||
return self._real_extract_serie(video_id)
|
||||
elif 'none' != video_name.lower():
|
||||
return self._real_extract_named_serie(video_name, sender if 'none' != sender.lower() else "ard")
|
||||
else:
|
||||
return self._real_extract_video(video_id)
|
||||
|
||||
def _real_extract_video(self, video_id):
|
||||
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
video_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client: "ard", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
mediaCollection {
|
||||
_duration
|
||||
_geoblocked
|
||||
_isLive
|
||||
_mediaArray {
|
||||
_mediaStreamArray {
|
||||
_quality
|
||||
_server
|
||||
_stream
|
||||
}
|
||||
}
|
||||
_previewImage
|
||||
_subtitleUrl
|
||||
_type
|
||||
}
|
||||
show {
|
||||
title
|
||||
}
|
||||
synopsis
|
||||
title
|
||||
tracking {
|
||||
atiCustomVars {
|
||||
contentId
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % video_id,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}',
|
||||
video_id
|
||||
)
|
||||
|
||||
title = player_page['title']
|
||||
content_id = str_or_none(try_get(
|
||||
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
|
||||
media_collection = player_page.get('mediaCollection') or {}
|
||||
content_id = str_or_none(
|
||||
try_get(
|
||||
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']
|
||||
)
|
||||
)
|
||||
media_collection = player_page['widgets'][0].get('mediaCollection') or {}
|
||||
if not media_collection and content_id:
|
||||
media_collection = self._download_json(
|
||||
'https://www.ardmediathek.de/play/media/' + content_id,
|
||||
content_id, fatal=False) or {}
|
||||
'https://www.ardmediathek.de/play/media/' + content_id,
|
||||
content_id, fatal=False
|
||||
) or {}
|
||||
|
||||
info = self._parse_media_info(
|
||||
media_collection, content_id or video_id,
|
||||
player_page.get('blockedByFsk'))
|
||||
media_collection['embedded'], content_id or video_id,
|
||||
player_page['widgets'][0].get('blockedByFsk')
|
||||
)
|
||||
|
||||
age_limit = None
|
||||
description = player_page.get('synopsis')
|
||||
maturity_content_rating = player_page.get('maturityContentRating')
|
||||
description = player_page['widgets'][0].get('synopsis')
|
||||
maturity_content_rating = player_page['widgets'][0].get('maturityContentRating')
|
||||
if maturity_content_rating:
|
||||
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
|
||||
if not age_limit and description:
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||
})
|
||||
age_limit = int_or_none(
|
||||
self._search_regex(
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None
|
||||
)
|
||||
)
|
||||
|
||||
session_episode_match = re.search(r"\(S(\d+)/E(\d+)\)", title)
|
||||
episode_match = re.search(r"\((\d+)\)", title)
|
||||
episode_name_match = re.search(r"(Folge\s\d+)", title)
|
||||
|
||||
if session_episode_match:
|
||||
season_number = session_episode_match.group(1)
|
||||
episode_number = session_episode_match.group(2)
|
||||
|
||||
alt_title = re.sub(r"\(S\d+/E\d+\)", "", title)
|
||||
alt_title = re.sub(r"(Folge \d+(\:|\s\-))", "", alt_title)
|
||||
alt_title = alt_title.replace("|", "").replace(" ", " ").replace(" .", "").strip()
|
||||
|
||||
info.update(
|
||||
{
|
||||
'season_number': int(season_number),
|
||||
'episode_number': int(episode_number),
|
||||
'alt_title': alt_title
|
||||
}
|
||||
)
|
||||
elif episode_name_match:
|
||||
episode_number = episode_name_match.group(1).replace("Folge ", "")
|
||||
|
||||
alt_title = re.sub(r"(Folge\s\d+)", "", title)
|
||||
alt_title = alt_title.replace("|", "").replace(" ", " ").replace(" .", "").strip()
|
||||
|
||||
info.update(
|
||||
{
|
||||
'season_number': int(0),
|
||||
'episode_number': int(episode_number),
|
||||
'alt_title': alt_title
|
||||
}
|
||||
)
|
||||
|
||||
elif episode_match:
|
||||
episode_number = episode_match.group(1)
|
||||
info.update(
|
||||
{
|
||||
'season_number': int(0),
|
||||
'episode_number': int(episode_number),
|
||||
'alt_title':re.sub(r"\(\d+\)", "", title).replace(" ", "").strip()
|
||||
}
|
||||
)
|
||||
else:
|
||||
info.update(
|
||||
{
|
||||
'alt_title': title
|
||||
}
|
||||
)
|
||||
|
||||
info.update(
|
||||
{
|
||||
'age_limit': age_limit,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page['widgets'][0].get('broadcastedOn')),
|
||||
'series': try_get(player_page['widgets'][0], lambda x: x['show']['title']),
|
||||
'channel': player_page['widgets'][0]['publicationService']['name'],
|
||||
'channel_id': player_page['widgets'][0]['publicationService']['id'],
|
||||
'channel_url': f"https://www.ardmediathek.de/{player_page['widgets'][0]['publicationService']['id']}",
|
||||
}
|
||||
)
|
||||
return info
|
||||
|
||||
def _real_extract_serie(self, video_id):
|
||||
entries = []
|
||||
|
||||
page_number = 0
|
||||
page_size = 100
|
||||
|
||||
while True:
|
||||
|
||||
widgets = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/widgets/ard/asset/{video_id}',
|
||||
video_id,
|
||||
query={'pageSize':str(page_size), 'pageNumber':page_number}
|
||||
)
|
||||
|
||||
for teaser in widgets['teasers']:
|
||||
if 'EPISODE' == teaser['coreAssetType']:
|
||||
item = self._real_extract_video(teaser['id'])
|
||||
item['webpage_url'] = f"https://www.ardmediathek.de/video/{teaser['id']}"
|
||||
|
||||
entries.append(item)
|
||||
|
||||
total = widgets['pagination']['totalElements']
|
||||
if (page_number + 1) * page_size > total:
|
||||
break
|
||||
page_number += 1
|
||||
|
||||
return self.playlist_result(entries)
|
||||
|
||||
def _real_extract_named_serie(self, video_id, sender):
|
||||
|
||||
entries = []
|
||||
page_size = 100
|
||||
|
||||
widgets = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/{sender}/editorial/{video_id}',
|
||||
video_id,
|
||||
query={'pageSize': str(10), 'pageNumber': 0}
|
||||
)['widgets']
|
||||
|
||||
for widget in widgets:
|
||||
widget_id = widget['id']
|
||||
page_number = 0
|
||||
|
||||
while True:
|
||||
page_data = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/widgets/{sender}/editorials/{widget_id}',
|
||||
video_id,
|
||||
query={'pageSize': page_size, 'pageNumber': page_number}
|
||||
)
|
||||
|
||||
for teaser in page_data['teasers']:
|
||||
if 'EPISODE' == teaser.get('coreAssetType', None) and teaser['type'] not in ['poster'] and ':' not in teaser['id']:
|
||||
|
||||
item = self._real_extract_video(teaser['id'])
|
||||
item['webpage_url'] = f"https://www.ardmediathek.de/video/{teaser['id']}"
|
||||
entries.append(item)
|
||||
|
||||
total = page_data['pagination']['totalElements']
|
||||
if (page_number + 1) * page_size > total:
|
||||
break
|
||||
page_number += 1
|
||||
|
||||
return self.playlist_result(entries)
|
||||
|
@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BokeCCIE(BokeCCBaseIE):
|
||||
_IE_DESC = 'CC视频'
|
||||
IE_DESC = 'CC视频'
|
||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
_IE_DESC = 'cloudy.ec'
|
||||
IE_DESC = 'cloudy.ec'
|
||||
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
|
@ -422,6 +422,8 @@ class InfoExtractor(object):
|
||||
_GEO_COUNTRIES = None
|
||||
_GEO_IP_BLOCKS = None
|
||||
_WORKING = True
|
||||
# supply this in public subclasses: used in supported sites list, etc
|
||||
# IE_DESC = 'short description of IE'
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
"""Constructor. Receives an optional downloader."""
|
||||
@ -454,10 +456,10 @@ class InfoExtractor(object):
|
||||
return cls.__match_valid_url(url) is not None
|
||||
|
||||
@classmethod
|
||||
def _match_id(cls, url):
|
||||
def _match_id(cls, url, group_name = 'id'):
|
||||
m = cls.__match_valid_url(url)
|
||||
assert m
|
||||
return compat_str(m.group('id'))
|
||||
return compat_str(m.group(group_name))
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
|
@ -35,15 +35,6 @@ from ..utils import (
|
||||
|
||||
class ITVBaseIE(InfoExtractor):
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
transform_source = kw.pop('transform_source', None)
|
||||
fatal = kw.pop('fatal', True)
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
|
||||
webpage, 'next.js data', group='js', fatal=fatal, **kw),
|
||||
video_id, transform_source=transform_source, fatal=fatal)
|
||||
|
||||
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
|
||||
if errnote is False:
|
||||
return False
|
||||
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
|
||||
|
||||
class ITVIE(ITVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
|
||||
_IE_DESC = 'ITVX'
|
||||
IE_DESC = 'ITVX'
|
||||
_WORKING = False
|
||||
|
||||
_TESTS = [{
|
||||
'note': 'Hub URLs redirect to ITVX',
|
||||
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
||||
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
|
||||
'ext': determine_ext(href, 'vtt'),
|
||||
})
|
||||
|
||||
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
|
||||
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
|
||||
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
|
||||
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
|
||||
info = self._og_extract(webpage, require_title=not title)
|
||||
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
|
||||
|
||||
class ITVBTCCIE(ITVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
|
||||
IE_DESC = 'ITV articles: News, British Touring Car Championship'
|
||||
_TESTS = [{
|
||||
'note': 'British Touring Car Championship',
|
||||
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
|
||||
|
@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
|
||||
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
|
||||
['arch', '', 'http://ussenate-f.akamaihd.net/']
|
||||
]
|
||||
_IE_NAME = 'senate.gov'
|
||||
IE_NAME = 'senate.gov'
|
||||
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
||||
|
Loading…
x
Reference in New Issue
Block a user