[vvvvid] imporove info extraction

This commit is contained in:
Remita Amine 2020-12-30 18:16:47 +01:00
parent 2e21b06ea2
commit 7b0f04ed1f

View File

@ -22,6 +22,16 @@ class VVVVIDIE(InfoExtractor):
'id': '489048', 'id': '489048',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ping Pong', 'title': 'Ping Pong',
'duration': 239,
'series': '"Perché dovrei guardarlo?" di Dario Moccia',
'season_id': '437',
'season_number': 1,
'episode': 'Ping Pong',
'episode_number': 1,
'episode_id': '3334',
'view_count': int,
'like_count': int,
'repost_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -38,6 +48,9 @@ class VVVVIDIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
'only_matching': True
}] }]
_conn_id = None _conn_id = None
@ -48,24 +61,34 @@ class VVVVIDIE(InfoExtractor):
def _download_info(self, show_id, path, video_id, fatal=True): def _download_info(self, show_id, path, video_id, fatal=True):
response = self._download_json( response = self._download_json(
'https://www.vvvvid.it/vvvvid/ondemand/%s%s' % (show_id, path), 'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
video_id, headers=self.geo_verification_headers(), query={ video_id, headers=self.geo_verification_headers(), query={
'conn_id': self._conn_id, 'conn_id': self._conn_id,
}, fatal=fatal) }, fatal=fatal)
if response['result'] == 'error': if not (response or fatal):
return
if response.get('result') == 'error':
raise ExtractorError('%s said: %s' % ( raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True) self.IE_NAME, response['message']), expected=True)
return response['data'] return response['data']
def _extract_common_video_info(self, video_data):
return {
'thumbnail': video_data.get('thumbnail'),
'episode_number': int_or_none(video_data.get('number')),
'episode_id': str_or_none(video_data.get('id')),
}
def _real_extract(self, url): def _real_extract(self, url):
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups() show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
response = self._download_info( response = self._download_info(
show_id, '/season/%s' % season_id, video_id) show_id, 'season/%s' % season_id, video_id)
vid = int(video_id) vid = int(video_id)
video_data = list(filter( video_data = list(filter(
lambda episode: episode.get('video_id') == vid, response))[0] lambda episode: episode.get('video_id') == vid, response))[0]
title = video_data['title']
formats = [] formats = []
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
@ -148,25 +171,25 @@ class VVVVIDIE(InfoExtractor):
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id)) 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
self._sort_formats(formats) self._sort_formats(formats)
return { info = self._extract_common_video_info(video_data)
info.update({
'id': video_id, 'id': video_id,
'title': video_data['title'], 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': video_data.get('thumbnail'),
'duration': int_or_none(video_data.get('length')), 'duration': int_or_none(video_data.get('length')),
'series': video_data.get('show_title'), 'series': video_data.get('show_title'),
'season_id': season_id, 'season_id': season_id,
'season_number': video_data.get('season_number'), 'season_number': video_data.get('season_number'),
'episode_id': str_or_none(video_data.get('id')), 'episode': title,
'episode_number': int_or_none(video_data.get('number')),
'episode_title': video_data['title'],
'view_count': int_or_none(video_data.get('views')), 'view_count': int_or_none(video_data.get('views')),
'like_count': int_or_none(video_data.get('video_likes')), 'like_count': int_or_none(video_data.get('video_likes')),
} 'repost_count': int_or_none(video_data.get('video_shares')),
})
return info
class VVVVIDShowIE(VVVVIDIE): class VVVVIDShowIE(VVVVIDIE):
_VALID_URL = r'(?P<base_url>%s(?P<show_id>\d+)/(?P<show_title>[^/]+))/?(?:$|[\?&].*$)?$' % VVVVIDIE._VALID_URL_BASE _VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
_TESTS = [{ _TESTS = [{
'url': 'https://www.vvvvid.it/show/156/psyco-pass', 'url': 'https://www.vvvvid.it/show/156/psyco-pass',
'info_dict': { 'info_dict': {
@ -175,37 +198,40 @@ class VVVVIDShowIE(VVVVIDIE):
'description': 'md5:94d572c0bd85894b193b8aebc9a3a806', 'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
}, },
'playlist_count': 46, 'playlist_count': 46,
}, {
'url': 'https://www.vvvvid.it/show/156',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
base_url, show_id, show_title = re.match(self._VALID_URL, url).groups() base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
response = self._download_info( seasons = self._download_info(
show_id, '/seasons/', show_title) show_id, 'seasons/', show_title)
show_infos = self._download_info( show_info = self._download_info(
show_id, '/info/', show_title, fatal=False) show_id, 'info/', show_title, fatal=False)
entries = [] entries = []
for season in response: for season in (seasons or []):
season_number = int_or_none(season.get('number'))
episodes = season.get('episodes') or [] episodes = season.get('episodes') or []
for episode in episodes: for episode in episodes:
season_id = str_or_none(episode.get('season_id')) season_id = str_or_none(episode.get('season_id'))
video_id = str_or_none(episode.get('video_id')) video_id = str_or_none(episode.get('video_id'))
if not (season_id and video_id): if not (season_id and video_id):
continue continue
info = self._extract_common_video_info(episode)
video_url = '/'.join([base_url, season_id, video_id]) info.update({
'_type': 'url',
entries.append({
'_type': 'url_transparent',
'ie_key': VVVVIDIE.ie_key(), 'ie_key': VVVVIDIE.ie_key(),
'url': video_url, 'url': '/'.join([base_url, season_id, video_id]),
'title': episode.get('title'), 'title': episode.get('title'),
'thumbnail': episode.get('thumbnail'),
'description': episode.get('description'), 'description': episode.get('description'),
'season_number': int_or_none(episode.get('season_number')), 'season_number': season_number,
'episode_number': int_or_none(episode.get('number')), 'season_id': season_id,
}) })
entries.append(info)
return self.playlist_result( return self.playlist_result(
entries, show_id, show_infos.get('title'), show_infos.get('description')) entries, show_id, show_info.get('title'), show_info.get('description'))