Fix extraction

This commit is contained in:
dirkf 2022-11-17 19:23:43 +00:00 committed by GitHub
parent 8d2881a442
commit 001d5b8395
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,7 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
url_or_none,
)
class VOEIE(InfoExtractor): class VOEIE(InfoExtractor):
IE_NAME = 'voe' IE_NAME = 'voe'
@ -12,7 +16,7 @@ class VOEIE(InfoExtractor):
'url': 'https://voe.sx/e/ng7ja5n5n2y8', 'url': 'https://voe.sx/e/ng7ja5n5n2y8',
'info_dict': { 'info_dict': {
'id': 'ng7ja5n5n2y8', 'id': 'ng7ja5n5n2y8',
'title': 'md5:05ab15eb43a32f0f5918755156c5fb34', 'title': 'md5:a86687fb962742f04652aee19ad34e06',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'ext': 'm3u8', 'ext': 'm3u8',
}, },
@ -24,21 +28,34 @@ class VOEIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'https://voe.sx/e/%s' % video_id, video_id) 'https://voe.sx/e/%s' % video_id, video_id)
m3u8 = self._search_regex( sources = self._parse_json(
r'(https.+m3u8)', self._search_regex(r'\bsources\s*=\s*(\{[^}]+\})', webpage, 'sources'),
webpage, 'm3u8') video_id, transform_source=js_to_json)
title = self._search_regex( title = self._search_regex(
r'<title>Watch (?P<title>.+)<\/title>', r'<title>(?:Watch\s+)?(?P<title>.+?)(?:-\s+VOE\s+\|.+)?</title>',
webpage, 'title', group='title') webpage, 'title', group='title')
thumbnail = self._search_regex( formats = []
r'VOEPlayer.poster="(?P<thumbnail>https.+)"',
webpage, 'thumbnail', group='thumbnail') f_url = url_or_none(sources.get('hls'))
if f_url:
formats.extend(self._extract_m3u8_formats(
f_url, video_id, entry_protocol='m3u8_native', fatal=False))
f_url = url_or_none(sources.get('mp4'))
if f_url:
formats.append({
'url': f_url,
'ext': 'mp4',
'height': int_or_none(sources.get('video_height')),
})
formats = self._extract_m3u8_formats(m3u8, video_id)
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = url_or_none(self._search_regex(
r'(?:VOEPlayer.|data-)poster\s*=\s*(["\'])(?P<thumbnail>(?:(?!\1)\S)+)\1',
webpage, 'thumbnail', group='thumbnail', default=None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,