From 7a1e11c0456387f09e0218960423e7a5945059c1 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 3 Mar 2021 18:06:35 +0000 Subject: [PATCH] Extend _MEDIA_SETS to get HD Reel videos --- youtube_dl/extractor/bbc.py | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index d9d2fb77c..df035ed1e 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -995,7 +995,7 @@ class BBCIE(BBCCoUkIE): } # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness) - programme_id = self._search_regex( + programme_id = self._search_regex( r'/reel/video/(?P%s)/' % self._ID_REGEX, url, 'Reel pid', default=None) initial_data = self._parse_json(self._html_search_regex( r']+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P(?:(?!\2).)+)', @@ -1007,40 +1007,27 @@ class BBCIE(BBCCoUkIE): clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {} version_id = clip_data.get('versionID') if version_id: + title = smp_data['title'] + # also try for higher resolutions + self._MEDIA_SETS.insert(0,'iptv-all') + formats, subtitles = self._download_media_selector(version_id) + self._sort_formats(formats) image_url = smp_data.get('holdingImageURL') display_date = init_data.get('displayDate') topic_title = init_data.get('topicTitle') - ret = { - 'title': smp_data.get('title', playlist_id), + return { 'id': version_id, + 'title': title + 'formats': formats, 'alt_title': init_data.get('shortTitle'), 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None, 'description': smp_data.get('summary') or init_data.get('shortSummary'), 'upload_date': display_date.replace('-', '') if display_date else None, + 'subtitles': subtitles, 'duration': int_or_none(clip_data.get('duration')), 'categories': [topic_title] if topic_title else None, } - if not programme_id: - # get the formats from the reel page - formats, subtitles = self._download_media_selector(version_id) - self._sort_formats(formats) - ret.update({ - 'formats': formats, - 'subtitles': subtitles, - }) - else: - # get the formats (including HD) from the programmes page - # avoid https: to help proxying - ret.update({ - '_type': 'url_transparent', - 'url': 'http://bbc.co.uk/programmes/%s' % programme_id - }) - return ret - elif programme_id: - # the Reel page was not as expected: try the programmes page - return self._url_result(programme_id) - # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975) # There are several setPayload calls may be present but the video # seems to be always related to the first one