Compare commits

...

12 Commits

Author SHA1 Message Date
Yuval Hager
4272e78d97
Merge 57c3cb420c31e78732566b77dfb16fd78ebfd310 into 4e714f9df1ed2cccd51df60d45ff5504abe827b7 2025-03-30 11:58:11 +00:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
Yuval Hager
57c3cb420c remove geo_countries 2021-05-19 07:38:25 -07:00
Yuval Hager
803b071036 add skip_download to tests using ffmpeg 2021-05-18 18:32:26 -07:00
Yuval Hager
279539e995 fix trailing parentheses 2021-05-18 18:17:43 -07:00
Yuval Hager
440aba21de fix typo 2021-02-13 20:37:15 -08:00
Yuval Hager
c0fd80c113 code review fixes 2021-02-13 15:52:37 -08:00
Yuval Hager
e6c7b3c154 code review fixes 2021-01-27 16:03:01 -08:00
Yuval Hager
e3a900e707 minor fixes 2021-01-26 00:44:23 -08:00
Yuval Hager
5507979402 typo fix 2021-01-25 01:19:43 -08:00
Yuval Hager
fecc1dc727 [kan] Add new extractor 2021-01-25 00:06:57 -08:00
7 changed files with 130 additions and 15 deletions

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""

View File

@ -559,6 +559,10 @@ from .joj import JojIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .kakao import KakaoIE from .kakao import KakaoIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .kan import (
KanEpisodeIE,
KanPlaylistIE
)
from .kankan import KankanIE from .kankan import KankanIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE from .karrierevideos import KarriereVideosIE

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

116
youtube_dl/extractor/kan.py Normal file
View File

@ -0,0 +1,116 @@
# coding: utf-8
from __future__ import unicode_literals
from ..compat import compat_str
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_duration,
try_get,
unified_strdate)
def get_thumbnail(data):
for media in data.get('media_group', []):
if media.get('type') == 'image':
for item in media.get('media_item', []):
thumbnail = item.get('src')
if thumbnail:
return thumbnail
class KanBaseIE(InfoExtractor):
def download_webpage(self, url, video_id):
return self._download_webpage(
url,
video_id)
def extract_item(self, video_id, webpage):
data = self._parse_json(
self._search_regex(
r'<script[^>]+id="kan_app_search_data"[^>]*>([^<]+)</script>',
webpage,
'data'),
video_id)
title = data.get('title') or self._og_search_title(webpage)
m3u8_url = try_get(data, lambda x: x['content']['src'], compat_str)
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
if not formats:
raise ExtractorError('Unable to extract video formats')
description = data.get('summary') or \
self._og_search_description(webpage, fatal=False)
creator = try_get(data, lambda x: x['author']['name'], compat_str) or \
self._og_search_property('site_name', webpage, fatal=False)
thumbnail = get_thumbnail(data)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
'description': description,
'creator': creator,
'release_date': unified_strdate(data.get('published')),
'duration': parse_duration(
try_get(data, lambda x: x['extensions']['duration']))
}
class KanEpisodeIE(KanBaseIE):
_VALID_URL = r'https?://(?:www\.)?kan\.org\.il/[iI]tem/\?item[iI]d=(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.kan.org.il/Item/?itemId=74658',
'md5': 'c28763bdb61c1bb7823528dd024e6129',
'info_dict': {
'id': '74658',
'ext': 'mp4',
'title': 'העד - פרק 2',
'thumbnail': r're:^https://.*36805_A\.jpeg$',
'description': 'הגופות ממשיכות להיערם, אך איזי עדיין מפקפק בחשדות נגד ברק',
'creator': 'מערכת כאן',
'release_date': '20200803',
'duration': 2393,
},
'params': {
'skip_download': True
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
return self.extract_item(video_id, self.download_webpage(url, video_id))
class KanPlaylistIE(KanBaseIE):
_VALID_URL = r'https?://(?:www\.)?kan\.org\.il/program/\?cat[iI]d=(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.kan.org.il/program/?catId=1636',
'playlist_mincount': 9,
'info_dict': {
'id': '1636',
'title': 'מנאייכ - פרקים מלאים לצפייה ישירה | כאן',
'description': 'md5:9dfbd501189d08674d20762464c5301b',
},
}
def _real_extract(self, url):
list_id = self._match_id(url)
webpage = self.download_webpage(url, list_id)
video_ids = re.findall(r'onclick="playVideo\(.*,\'([0-9]+)\'\)', webpage)
entries = []
for video_id in video_ids:
video_url = 'https://www.kan.org.il/Item/?itemId=%s' % video_id
entries.append(self.extract_item(
video_id,
self.download_webpage(video_url, video_id)))
if not entries:
raise ExtractorError('Unable to extract playlist entries')
return {
'_type': 'playlist',
'id': list_id,
'entries': entries,
'title': self._og_search_title(webpage, fatal=False),
'description': self._og_search_description(webpage),
}

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',