Compare commits

...

6 Commits

Author SHA1 Message Date
shouldsee
623d461e97
Merge d5de6295cf294cc5f93b07dad4f868b4d12b380a into 3eb8d22ddb8982ca4fb56bb7a8d6517538bf14c6 2025-04-01 08:12:49 +02:00
dirkf
3eb8d22ddb
[JSInterp] Temporary fix for #33102 2025-03-31 04:21:09 +01:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
shouldsee
d5de6295cf format 2023-05-10 02:21:27 +08:00
shouldsee
1ebcbb7812 adapt to work for mewatch.sg 2023-05-07 20:54:40 +08:00
7 changed files with 253 additions and 187 deletions

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频'
IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec'
IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None
_WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader."""

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False:
return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX'
IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{
'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'),
})
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{
'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/']
]
_IE_NAME = 'senate.gov'
IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',

View File

@ -1,13 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
@ -16,219 +12,292 @@ from ..utils import (
class ToggleIE(InfoExtractor):
IE_NAME = 'toggle'
_VALID_URL = r'(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}|toggle:)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': {
'id': '343115',
'ext': 'mp4',
'title': 'Lion Moms Premiere',
'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
'upload_date': '20150910',
'timestamp': 1441858274,
IE_NAME = "toggle"
_VALID_URL = r"(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:[^/]+/){1,}|toggle:)[^/]+-(?P<id>[0-9]+)"
_TESTS = [
{
### to be updated
### deprecated
# 'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
"info_dict": {
"id": "343115",
"ext": "mp4",
"title": "Lion Moms Premiere",
"description": "md5:aea1149404bff4d7f7b6da11fafd8e6b",
"upload_date": "20150910",
"timestamp": 1441858274,
},
"params": {
"skip_download": "m3u8 download",
},
},
'params': {
'skip_download': 'm3u8 download',
}
}, {
'note': 'DRM-protected video',
'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
'info_dict': {
'id': '341413',
'ext': 'wvm',
'title': 'Dug\'s Special Mission',
'description': 'md5:e86c6f4458214905c1772398fabc93e0',
'upload_date': '20150827',
'timestamp': 1440644006,
{
"note": "DRM-protected video",
### url deprecated
# 'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
"info_dict": {
"id": "341413",
"ext": "wvm",
"title": "Dug's Special Mission",
"description": "md5:e86c6f4458214905c1772398fabc93e0",
"upload_date": "20150827",
"timestamp": 1440644006,
},
"params": {
"skip_download": "DRM-protected wvm download",
},
},
'params': {
'skip_download': 'DRM-protected wvm download',
}
}, {
# this also tests correct video id extraction
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
'info_dict': {
'id': '332861',
'ext': 'mp4',
'title': '28th SEA Games (5 Show) - Episode 11',
'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
'upload_date': '20150605',
'timestamp': 1433480166,
{
# this also tests correct video id extraction
"note": "m3u8 links are geo-restricted, but Android/mp4 is okay",
### url deprecated
# 'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
"info_dict": {
"id": "332861",
"ext": "mp4",
"title": "28th SEA Games (5 Show) - Episode 11",
"description": "md5:3cd4f5f56c7c3b1340c50a863f896faa",
"upload_date": "20150605",
"timestamp": 1433480166,
},
"params": {
"skip_download": "DRM-protected wvm download",
},
"skip": "m3u8 links are geo-restricted",
},
'params': {
'skip_download': 'DRM-protected wvm download',
{
### video.toggle.sg seems deprecated
# 'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
"only_matching": True,
},
'skip': 'm3u8 links are geo-restricted'
}, {
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
'only_matching': True,
}, {
'url': 'http://www.mewatch.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
'only_matching': True,
}, {
'url': 'http://www.mewatch.sg/zh/series/zero-calling-s2-hd/ep13/336367',
'only_matching': True,
}, {
'url': 'http://www.mewatch.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
'only_matching': True,
}, {
'url': 'http://www.mewatch.sg/en/movies/seven-days/321936',
'only_matching': True,
}, {
'url': 'https://www.mewatch.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
'only_matching': True,
}, {
'url': 'http://www.mewatch.sg/en/channels/eleven-plus/401585',
'only_matching': True,
}]
{
"url": "https://www.mewatch.sg/clips/Seraph-Sun-Aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set-84901",
"only_matching": True,
},
{
"url": "https://www.mewatch.sg/episode/Zero-Calling-S2-E13-The-End-of-The-Beginning-55137",
"only_matching": True,
},
{
### webisodes no longer used
"url": "https://www.mewatch.sg/clips/Jeeva-is-an-orphan-Vetri-S2-Webisode-7-84944",
# 'url': 'http://www.mewatch.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
"only_matching": True,
},
{
### only available in Singapore 403 forbidden
"url": "https://www.mewatch.sg/movie/Seven-Days-79641",
"only_matching": True,
},
{
### not working for this list, but not a big issue since old path deprecated
"url": "https://www.mewatch.sg/list/CNA-Singapore-Tonight-154904",
# 'url': 'https://www.mewatch.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
"only_matching": True,
},
{
### not working. seems deprecated
# 'url': 'http://www.mewatch.sg/en/channels/eleven-plus/401585',
"only_matching": True,
},
{
### [20230507:shouldsee] working
"url": "https://www.mewatch.sg/watch/Seraph-Sun-Aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set-84901",
"only_matching": True,
},
]
_API_USER = 'tvpapi_147'
_API_PASS = '11111'
_API_USER = "tvpapi_147"
_API_PASS = "11111"
def _real_extract(self, url):
video_id = self._match_id(url)
lang = "en"
params = {
'initObj': {
'Locale': {
'LocaleLanguage': '',
'LocaleCountry': '',
'LocaleDevice': '',
'LocaleUserState': 0
},
'Platform': 0,
'SiteGuid': 0,
'DomainID': '0',
'UDID': '',
'ApiUser': self._API_USER,
'ApiPass': self._API_PASS
},
'MediaID': video_id,
'mediaType': 0,
"delivery": "stream,progressive",
"resolution": "External",
"segments": "all",
"lang": lang,
"ff": "idp,ldp,rpt,cd",
}
video_info = self._download_json(
"https://cdn.mewatch.sg/api/items/" + video_id + "/videos",
video_id,
"Downloading video info json",
query=params,
)
info = self._download_json(
'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8'))
params = {
# 'delivery':'stream,progressive',
# 'resolution':'External',
"segments": "all",
"lang": lang,
"ff": "idp,ldp,rpt,cd",
}
meta_info = self._download_json(
"https://cdn.mewatch.sg/api/items/" + video_id,
video_id,
"Downloading video info json",
query=params,
)
title = info['MediaName']
# urls = info
info = {"Files": video_info}
info.update(meta_info)
title = info["path"].rsplit("/", 1)[-1]
formats = []
for video_file in info.get('Files', []):
video_url, vid_format = video_file.get('URL'), video_file.get('Format')
if not video_url or video_url == 'NA' or not vid_format:
for video_file in info.get("Files", []):
video_url, vid_format = video_file.get("url"), video_file.get("format")
if not video_url or video_url == "NA" or not vid_format:
continue
ext = determine_ext(video_url)
vid_format = vid_format.replace(' ', '')
vid_format = vid_format.replace(" ", "")
# if geo-restricted, m3u8 is inaccessible, but mp4 is okay
if ext == 'm3u8':
if ext == "m3u8":
m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, ext='mp4', m3u8_id=vid_format,
note='Downloading %s m3u8 information' % vid_format,
errnote='Failed to download %s m3u8 information' % vid_format,
fatal=False)
video_url,
video_id,
ext="mp4",
m3u8_id=vid_format,
note="Downloading %s m3u8 information" % vid_format,
errnote="Failed to download %s m3u8 information" % vid_format,
fatal=False,
)
for f in m3u8_formats:
# Apple FairPlay Streaming
if '/fpshls/' in f['url']:
if "/fpshls/" in f["url"]:
continue
formats.append(f)
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id=vid_format,
note='Downloading %s MPD manifest' % vid_format,
errnote='Failed to download %s MPD manifest' % vid_format,
fatal=False))
elif ext == 'ism':
formats.extend(self._extract_ism_formats(
video_url, video_id, ism_id=vid_format,
note='Downloading %s ISM manifest' % vid_format,
errnote='Failed to download %s ISM manifest' % vid_format,
fatal=False))
elif ext == 'mp4':
formats.append({
'ext': ext,
'url': video_url,
'format_id': vid_format,
})
elif ext == "mpd":
formats.extend(
self._extract_mpd_formats(
video_url,
video_id,
mpd_id=vid_format,
note="Downloading %s MPD manifest" % vid_format,
errnote="Failed to download %s MPD manifest" % vid_format,
fatal=False,
)
)
elif ext == "ism":
formats.extend(
self._extract_ism_formats(
video_url,
video_id,
ism_id=vid_format,
note="Downloading %s ISM manifest" % vid_format,
errnote="Failed to download %s ISM manifest" % vid_format,
fatal=False,
)
)
elif ext == "mp4":
formats.append(
{
"ext": ext,
"url": video_url,
"format_id": vid_format,
}
)
if not formats:
for meta in (info.get('Metas') or []):
if meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
raise ExtractorError(
'This video is DRM protected.', expected=True)
# Most likely because geo-blocked
raise ExtractorError('No downloadable videos found', expected=True)
self._sort_formats(formats)
for meta in info.get("Metas") or []:
if (
not self.get_param("allow_unplayable_formats")
and meta.get("Key") == "Encryption"
and meta.get("Value") == "1"
):
self.report_drm(video_id)
# Most likely because geo-blocked if no formats and no DRM
thumbnails = []
for picture in info.get('Pictures', []):
for picture in info.get("images", []):
if not isinstance(picture, dict):
continue
pic_url = picture.get('URL')
pic_url = picture.get("tile")
if not pic_url:
continue
thumbnail = {
'url': pic_url,
"url": pic_url,
}
pic_size = picture.get('PicSize', '')
m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
pic_size = picture.get("PicSize", "")
m = re.search(r"(?P<width>\d+)[xX](?P<height>\d+)", pic_size)
if m:
thumbnail.update({
'width': int(m.group('width')),
'height': int(m.group('height')),
})
thumbnail.update(
{
"width": int(m.group("width")),
"height": int(m.group("height")),
}
)
thumbnails.append(thumbnail)
def counter(prefix):
return int_or_none(
info.get(prefix + 'Counter') or info.get(prefix.lower() + '_counter'))
info.get(prefix + "Counter") or info.get(prefix.lower() + "_counter")
)
return {
'id': video_id,
'title': title,
'description': strip_or_none(info.get('Description')),
'duration': int_or_none(info.get('Duration')),
'timestamp': parse_iso8601(info.get('CreationDate') or None),
'average_rating': float_or_none(info.get('Rating')),
'view_count': counter('View'),
'like_count': counter('Like'),
'thumbnails': thumbnails,
'formats': formats,
"id": video_id,
"title": title,
"description": strip_or_none(info.get("Description")),
"duration": int_or_none(info.get("Duration")),
"timestamp": parse_iso8601(info.get("CreationDate") or None),
"average_rating": float_or_none(info.get("Rating")),
"view_count": counter("View"),
"like_count": counter("Like"),
"thumbnails": thumbnails,
"formats": formats,
}
class MeWatchIE(InfoExtractor):
IE_NAME = 'mewatch'
_VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
'info_dict': {
'id': '1008625',
'ext': 'mp4',
'title': 'Recipe Of Life 味之道',
'timestamp': 1603306526,
'description': 'md5:6e88cde8af2068444fc8e1bc3ebf257c',
'upload_date': '20201021',
IE_NAME = "mewatch"
_VALID_URL = (
r"https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)"
)
_TESTS = [
{
"url": "https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371",
"info_dict": {
"id": "1008625",
"ext": "mp4",
"title": "Recipe Of Life 味之道",
"timestamp": 1603306526,
"description": "md5:6e88cde8af2068444fc8e1bc3ebf257c",
"upload_date": "20201021",
},
"params": {
"skip_download": "m3u8 download",
},
},
'params': {
'skip_download': 'm3u8 download',
{
"url": "https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232",
"only_matching": True,
},
}, {
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232',
'only_matching': True,
}, {
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
'only_matching': True,
}, {
'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
'only_matching': True,
}]
{
"url": "https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232",
"only_matching": True,
},
{
"url": "https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759",
"only_matching": True,
},
]
def _real_extract(self, url):
item_id = self._match_id(url)
custom_id = self._download_json(
'https://cdn.mewatch.sg/api/items/' + item_id,
item_id, query={'segments': 'all'})['customId']
return self.url_result(
'toggle:' + custom_id, ToggleIE.ie_key(), custom_id)
return self.url_result("toggle:" + item_id, ToggleIE.ie_key(), item_id)
print(f"[debug]{item_id}")
xdata = self._download_json(
"https://cdn.mewatch.sg/api/items/" + item_id,
item_id,
query={"segments": "all", "lang": "en", "ff": "idp,ldp,rpt,cd"},
)
from pprint import pprint
pprint(xdata)
custom_id = xdata["customId"]
print(f"[debug]{custom_id}")
return self.url_result("toggle:" + custom_id, ToggleIE.ie_key(), custom_id)

View File

@ -686,6 +686,8 @@ class JSInterpreter(object):
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace):
if obj is JS_Undefined:
return 'undefined'
try:
return json.dumps(obj)
except TypeError: