diff --git a/youtube_dl/extractor/bellesa.py b/youtube_dl/extractor/bellesa.py
index 6ae3042d7..7e17156b6 100644
--- a/youtube_dl/extractor/bellesa.py
+++ b/youtube_dl/extractor/bellesa.py
@@ -1,12 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
-
from ..utils import (
- clean_html,
+ compat_str,
ExtractorError,
+ int_or_none,
+ strip_or_none,
try_get,
+ url_or_none
)
from .common import InfoExtractor
@@ -41,39 +42,41 @@ class BellesaIE(InfoExtractor):
# videos on this page are embedded into a container called VideoCard - if there is
# nothing on the page referencing a VideoCard we cannot extract the information and
# thus need to raise an error
+ # the VideoCard container is not specific html element but rather only mentioned in
+ # CSS styles; hence we cannot use get_element_by_id and the like to find our info
+ # but instead just quickly check whether or not we have a page with a video
if 'VideoCard' not in webpage:
- title = self._html_search_regex(
- r'(?s)
]*>(?P.+?)(?:\|\s+Bellesa)?', webpage, 'initial_data')
- try:
- initial_data = json.loads(initial_data_raw)
- except json.JSONDecodeError:
- raise ExtractorError('%s said: cannot decode initial data', self.IE_NAME, expected=True)
+ initial_data = self._parse_json(initial_data_raw, video_id)
+ if not initial_data:
+ raise ExtractorError('[%s] %s: cannot decode initial data', self.IE_NAME, video_id, expected=True)
- video = try_get(initial_data, lambda x: x['video'], dict) or {}
+ video = try_get(initial_data, lambda x: x['video'])
+ if not video:
+ raise ExtractorError('[%s] %s: initial data malformed' % self.IE_NAME, video_id, expected=True)
resolutions = video.get('resolutions')
source = video.get('source')
if not resolutions or not source:
- raise ExtractorError('%s said: cannot extract playlist information from meta data' % self.IE_NAME, expected=True)
+ raise ExtractorError('[%s] %s: cannot extract playlist information from meta data' % self.IE_NAME, video_id, expected=True)
m3u8_url = 'https://s.bellesa.co/hls/v/%s/,%s,.mp4.urlset/master.m3u8' % (source, resolutions)
+
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False)
+ if not formats:
+ raise ExtractorError('[%s] %s: cannot extract formats from m3u8 file', self.IE_NAME, video_id, expected=True)
+
self._sort_formats(formats)
# get from video meta data first
- title = video.get('title')
title = strip_or_none(video.get('title'))
if not title:
# fallback to og:title, which needs some treatment
@@ -82,11 +85,7 @@ class BellesaIE(InfoExtractor):
title = title.split('|')[0].strip()
tags = list(filter(None, map(lambda s: s.strip(), (video.get('tags') or '').split(','))))
-
- categories = None
- if 'categories' in video:
- categories = [c['name'] for c in video.get('categories')]
- list(filter(None, map(lambda d: strip_or_none(d['name']), (video.get('categories') or []))))
+ categories = list(filter(None, map(lambda d: strip_or_none(d['name']), (video.get('categories') or []))))
return {
'id': video_id,