Compare commits

...

12 Commits

Author SHA1 Message Date
Moritz Bunse
21cec7c134
Merge 638ec38eb53c1e0c90d5e2feb82a32bc789835a4 into 4e714f9df1ed2cccd51df60d45ff5504abe827b7 2025-03-29 16:41:11 +08:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
dirkf
638ec38eb5
Linted 2023-01-23 13:27:52 +00:00
Moritz Bunse
47390b1355
Update youtube_dl/extractor/videocdn.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-01-21 20:28:53 +01:00
Moritz Bunse
5d37e5e19d
Update youtube_dl/extractor/videocdn.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-01-21 20:28:23 +01:00
mbunse
bdd0cf4611 apply suggested improvements 2023-01-21 20:27:41 +01:00
Moritz Bunse
6ff4cde8e8
Update youtube_dl/extractor/videocdn.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-01-21 20:17:48 +01:00
mbunse
62e96f48db Merge branch 'feature/add_videocdn_extractor' of https://github.com/mbunse/youtube-dl into feature/add_videocdn_extractor 2023-01-21 20:16:36 +01:00
mbunse
bfca7e849c remove duplicate video id extraction 2023-01-21 20:16:21 +01:00
Moritz Bunse
aec8c6b570
Update youtube_dl/extractor/videocdn.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-01-21 20:11:25 +01:00
mbunse
6b54fcef20 [VideoCdn] Add new extractor 2023-01-21 01:10:22 +01:00
7 changed files with 73 additions and 15 deletions

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""

View File

@ -645,6 +645,7 @@ from .livestream import (
) )
from .lnkgo import LnkGoIE from .lnkgo import LnkGoIE
from .localnews8 import LocalNews8IE from .localnews8 import LocalNews8IE
from .videocdn import VideoCdnIE
from .lovehomeporn import LoveHomePornIE from .lovehomeporn import LoveHomePornIE
from .lrt import LRTIE from .lrt import LRTIE
from .lynda import ( from .lynda import (

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',

View File

@ -0,0 +1,62 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
urljoin,
)
class VideoCdnIE(InfoExtractor):
_VALID_URL = r'https?://e\.video-cdn\.net/video?.*video-id=(?P<id>[a-zA-Z0-9-_]+).*'
_TESTS = [
{
'url': 'https://e.video-cdn.net/video?video-id=8eBUrWaMJFS38A5X-j2CgY&player-id=53Tun3ZZpZpVuvaTvsm3jU',
'info_dict': {
'id': '8eBUrWaMJFS38A5X-j2CgY',
'ext': 'mp4',
'title': 'RiskBuster FireFighter VI - Adventskranz',
'thumbnail': r're:(?i)https://.*\.jpeg',
},
},
{
'url': 'https://e.video-cdn.net/video?video-id=91imQ_wKjkTFghe-3mmBAA&player-id=7nCLZ_ESM8rT9YUw6qUGA9',
'info_dict': {
'id': '91imQ_wKjkTFghe-3mmBAA',
'ext': 'mp4',
'title': 'SCC2019_Talk_Tychsen_TXL.mp4',
'thumbnail': r're:(?i)https://.*\.jpeg',
},
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
thumbnail = self._search_regex(
r'\"thumbnailUrl\":\"(?P<thumbnail>[^\"]+)',
webpage, 'thumbnail', group='thumbnail',
default=None)
title = self._search_regex(r'"name"\s*:\s*"((?:\\"|[^"])+)', webpage, 'title')
manifest_url = self._search_regex(r'"contentUrl"\s*:\s*"((?:\\"|[^"])+)', webpage, 'manifest_url')
manifest_url = urljoin(url, manifest_url)
formats = []
if manifest_url and determine_ext(manifest_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='m3u8'))
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}