Fall back to default for extracting channel video metadata

This commit is contained in:
dirkf 2021-08-12 14:25:24 +01:00 committed by df
parent a803582717
commit 517af8b6e0

View File

@ -384,6 +384,29 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
'expected_warnings': ['Unable to download JSON metadata'],
},
{
'url': 'https://vimeo.com/channels/bestofstaffpicks/543188947',
'note': 'channel video with no data-config-url',
'info_dict': {
'id': '543188947',
'ext': 'mp4',
'title': "THE CHEMICAL BROTHERS 'THE DARKNESS THAT YOU FEAR' - OFFICIAL VIDEO",
'description': 'md5:a3949dd6e4a3dc5161871d195ee46cf0',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/ruffmercy',
'uploader_id': 'ruffmercy',
'uploader': 'RUFFMERCY',
'channel_id': 'bestofstaffpicks',
'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/bestofstaffpicks',
'timestamp': 1619693770,
'upload_date': '20210429',
'duration': 237,
},
'params': {
# avoid selecting DASH/HLS which only send 1 fragment and fail expected size check
'format': 'best[protocol=https]',
},
'expected_warnings': ['Unable to download JSON metadata'],
},
{
'url': 'http://vimeo.com/76979871',
'note': 'Video with subtitles',
@ -679,14 +702,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
channel_id = self._search_regex(
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
if channel_id:
# look for data-config-url=, but it may not be present
config_url = self._html_search_regex(
r'\bdata-config-url="([^"]+)"', webpage, 'config URL')
r'\bdata-config-url\s*=\s*("|\')(?P<config_url>[^"\']+)\1',
webpage, 'config URL', group='config_url', default=None)
video_description = clean_html(get_element_by_class('description', webpage))
info_dict.update({
'channel_id': channel_id,
'channel_url': 'https://vimeo.com/channels/' + channel_id,
})
else:
config_url = None
if not config_url:
page_config = self._parse_json(self._search_regex(
r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
webpage, 'page config', default='{}'), video_id, fatal=False)
@ -696,8 +723,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
cc_license = page_config.get('cc_license')
clip = page_config.get('clip') or {}
timestamp = clip.get('uploaded_on')
video_description = clean_html(
clip.get('description') or page_config.get('description_html_escaped'))
if not video_description:
video_description = clean_html(
clip.get('description') or page_config.get('description_html_escaped'))
config = self._download_json(config_url, video_id)
video = config.get('video') or {}
vod = video.get('vod') or {}