apply more suggestions

This commit is contained in:
bellesafan 2023-02-25 03:26:10 +01:00
parent 801766a1ae
commit 8ef3fe138d

View File

@ -1,12 +1,13 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
from ..utils import ( from ..utils import (
clean_html, compat_str,
ExtractorError, ExtractorError,
int_or_none,
strip_or_none,
try_get, try_get,
url_or_none
) )
from .common import InfoExtractor from .common import InfoExtractor
@ -41,39 +42,41 @@ class BellesaIE(InfoExtractor):
# videos on this page are embedded into a container called VideoCard - if there is # videos on this page are embedded into a container called VideoCard - if there is
# nothing on the page referencing a VideoCard we cannot extract the information and # nothing on the page referencing a VideoCard we cannot extract the information and
# thus need to raise an error # thus need to raise an error
# the VideoCard container is not specific html element but rather only mentioned in
# CSS styles; hence we cannot use get_element_by_id and the like to find our info
# but instead just quickly check whether or not we have a page with a video
if 'VideoCard' not in webpage: if 'VideoCard' not in webpage:
title = self._html_search_regex( raise ExtractorError('[%s] %s: page does not contain a VideoCard', self.IE_NAME, video_id, expected=True)
r'(?s)<title\b[^>]*>(?P<title>.+?)(?:\|\s+Bellesa)?</title',
webpage, 'title', default=None,
group='title', fatal=False)
raise ExtractorError('[%s] %s: %s' % (self.IE_NAME, video_id, clean_html(title)), expected=True)
initial_data_raw = self._search_regex(r'(?s)window\s*\.\s*__INITIAL_DATA__\s*=\s*(\{.+?\})\s*;\s*</script>', webpage, 'initial_data') initial_data_raw = self._search_regex(r'(?s)window\s*\.\s*__INITIAL_DATA__\s*=\s*(\{.+?\})\s*;\s*</script>', webpage, 'initial_data')
try: initial_data = self._parse_json(initial_data_raw, video_id)
initial_data = json.loads(initial_data_raw) if not initial_data:
except json.JSONDecodeError: raise ExtractorError('[%s] %s: cannot decode initial data', self.IE_NAME, video_id, expected=True)
raise ExtractorError('%s said: cannot decode initial data', self.IE_NAME, expected=True)
video = try_get(initial_data, lambda x: x['video'], dict) or {} video = try_get(initial_data, lambda x: x['video'])
if not video:
raise ExtractorError('[%s] %s: initial data malformed' % self.IE_NAME, video_id, expected=True)
resolutions = video.get('resolutions') resolutions = video.get('resolutions')
source = video.get('source') source = video.get('source')
if not resolutions or not source: if not resolutions or not source:
raise ExtractorError('%s said: cannot extract playlist information from meta data' % self.IE_NAME, expected=True) raise ExtractorError('[%s] %s: cannot extract playlist information from meta data' % self.IE_NAME, video_id, expected=True)
m3u8_url = 'https://s.bellesa.co/hls/v/%s/,%s,.mp4.urlset/master.m3u8' % (source, resolutions) m3u8_url = 'https://s.bellesa.co/hls/v/%s/,%s,.mp4.urlset/master.m3u8' % (source, resolutions)
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False) fatal=False)
if not formats:
raise ExtractorError('[%s] %s: cannot extract formats from m3u8 file', self.IE_NAME, video_id, expected=True)
self._sort_formats(formats) self._sort_formats(formats)
# get from video meta data first # get from video meta data first
title = video.get('title')
title = strip_or_none(video.get('title')) title = strip_or_none(video.get('title'))
if not title: if not title:
# fallback to og:title, which needs some treatment # fallback to og:title, which needs some treatment
@ -82,11 +85,7 @@ class BellesaIE(InfoExtractor):
title = title.split('|')[0].strip() title = title.split('|')[0].strip()
tags = list(filter(None, map(lambda s: s.strip(), (video.get('tags') or '').split(',')))) tags = list(filter(None, map(lambda s: s.strip(), (video.get('tags') or '').split(','))))
categories = list(filter(None, map(lambda d: strip_or_none(d['name']), (video.get('categories') or []))))
categories = None
if 'categories' in video:
categories = [c['name'] for c in video.get('categories')]
list(filter(None, map(lambda d: strip_or_none(d['name']), (video.get('categories') or []))))
return { return {
'id': video_id, 'id': video_id,