Compare commits

...

5 Commits

Author SHA1 Message Date
Remita Amine
7b0f04ed1f [vvvvid] imporove info extraction 2020-12-30 18:16:47 +01:00
nixxo
2e21b06ea2
[vvvvid] add playlists support (#27574)
closes #18130
2020-12-30 18:12:17 +01:00
Remita Amine
a6f75e6e89 [yandexdisk] extract info from webpage
the public API does not return metadata when download limit is reached
2020-12-30 16:45:53 +01:00
Remita Amine
bd18824c2a [yandexdisk] fix extraction(closes #17861)(closes #27131) 2020-12-30 13:43:56 +01:00
Remita Amine
bdd044e67b [yandexvideo] use old api call as fallback 2020-12-30 13:30:11 +01:00
4 changed files with 193 additions and 78 deletions

View File

@ -1425,7 +1425,10 @@ from .vshare import VShareIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE from .vvvvid import (
VVVVIDIE,
VVVVIDShowIE,
)
from .vyborymos import VyboryMosIE from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE from .vzaar import VzaarIE
from .wakanim import WakanimIE from .wakanim import WakanimIE

View File

@ -12,7 +12,8 @@ from ..utils import (
class VVVVIDIE(InfoExtractor): class VVVVIDIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' _VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
_VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
_TESTS = [{ _TESTS = [{
# video_type == 'video/vvvvid' # video_type == 'video/vvvvid'
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong', 'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
@ -21,6 +22,16 @@ class VVVVIDIE(InfoExtractor):
'id': '489048', 'id': '489048',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ping Pong', 'title': 'Ping Pong',
'duration': 239,
'series': '"Perché dovrei guardarlo?" di Dario Moccia',
'season_id': '437',
'season_number': 1,
'episode': 'Ping Pong',
'episode_number': 1,
'episode_id': '3334',
'view_count': int,
'like_count': int,
'repost_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -37,6 +48,9 @@ class VVVVIDIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
'only_matching': True
}] }]
_conn_id = None _conn_id = None
@ -45,20 +59,36 @@ class VVVVIDIE(InfoExtractor):
'https://www.vvvvid.it/user/login', 'https://www.vvvvid.it/user/login',
None, headers=self.geo_verification_headers())['data']['conn_id'] None, headers=self.geo_verification_headers())['data']['conn_id']
def _real_extract(self, url): def _download_info(self, show_id, path, video_id, fatal=True):
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
response = self._download_json( response = self._download_json(
'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id), 'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
video_id, headers=self.geo_verification_headers(), query={ video_id, headers=self.geo_verification_headers(), query={
'conn_id': self._conn_id, 'conn_id': self._conn_id,
}) }, fatal=fatal)
if response['result'] == 'error': if not (response or fatal):
return
if response.get('result') == 'error':
raise ExtractorError('%s said: %s' % ( raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True) self.IE_NAME, response['message']), expected=True)
return response['data']
def _extract_common_video_info(self, video_data):
return {
'thumbnail': video_data.get('thumbnail'),
'episode_number': int_or_none(video_data.get('number')),
'episode_id': str_or_none(video_data.get('id')),
}
def _real_extract(self, url):
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
response = self._download_info(
show_id, 'season/%s' % season_id, video_id)
vid = int(video_id) vid = int(video_id)
video_data = list(filter( video_data = list(filter(
lambda episode: episode.get('video_id') == vid, response['data']))[0] lambda episode: episode.get('video_id') == vid, response))[0]
title = video_data['title']
formats = [] formats = []
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
@ -141,18 +171,67 @@ class VVVVIDIE(InfoExtractor):
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id)) 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
self._sort_formats(formats) self._sort_formats(formats)
return { info = self._extract_common_video_info(video_data)
info.update({
'id': video_id, 'id': video_id,
'title': video_data['title'], 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': video_data.get('thumbnail'),
'duration': int_or_none(video_data.get('length')), 'duration': int_or_none(video_data.get('length')),
'series': video_data.get('show_title'), 'series': video_data.get('show_title'),
'season_id': season_id, 'season_id': season_id,
'season_number': video_data.get('season_number'), 'season_number': video_data.get('season_number'),
'episode_id': str_or_none(video_data.get('id')), 'episode': title,
'episode_number': int_or_none(video_data.get('number')),
'episode_title': video_data['title'],
'view_count': int_or_none(video_data.get('views')), 'view_count': int_or_none(video_data.get('views')),
'like_count': int_or_none(video_data.get('video_likes')), 'like_count': int_or_none(video_data.get('video_likes')),
} 'repost_count': int_or_none(video_data.get('video_shares')),
})
return info
class VVVVIDShowIE(VVVVIDIE):
_VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.vvvvid.it/show/156/psyco-pass',
'info_dict': {
'id': '156',
'title': 'Psycho-Pass',
'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
},
'playlist_count': 46,
}, {
'url': 'https://www.vvvvid.it/show/156',
'only_matching': True,
}]
def _real_extract(self, url):
base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
seasons = self._download_info(
show_id, 'seasons/', show_title)
show_info = self._download_info(
show_id, 'info/', show_title, fatal=False)
entries = []
for season in (seasons or []):
season_number = int_or_none(season.get('number'))
episodes = season.get('episodes') or []
for episode in episodes:
season_id = str_or_none(episode.get('season_id'))
video_id = str_or_none(episode.get('video_id'))
if not (season_id and video_id):
continue
info = self._extract_common_video_info(episode)
info.update({
'_type': 'url',
'ie_key': VVVVIDIE.ie_key(),
'url': '/'.join([base_url, season_id, video_id]),
'title': episode.get('title'),
'description': episode.get('description'),
'season_number': season_number,
'season_id': season_id,
})
entries.append(info)
return self.playlist_result(
entries, show_id, show_info.get('title'), show_info.get('description'))

View File

@ -1,23 +1,43 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
mimetype2ext,
try_get, try_get,
urlencode_postdata, urljoin,
) )
class YandexDiskIE(InfoExtractor): class YandexDiskIE(InfoExtractor):
_VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)' _VALID_URL = r'''(?x)https?://
(?P<domain>
yadi\.sk|
disk\.yandex\.
(?:
az|
by|
co(?:m(?:\.(?:am|ge|tr))?|\.il)|
ee|
fr|
k[gz]|
l[tv]|
md|
t[jm]|
u[az]|
ru
)
)/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
'md5': '33955d7ae052f15853dc41f35f17581c', 'md5': 'a4a8d52958c8fddcf9845935070402ae',
'info_dict': { 'info_dict': {
'id': 'VdOeDou8eZs6Y', 'id': 'VdOeDou8eZs6Y',
'ext': 'mp4', 'ext': 'mp4',
@ -27,92 +47,101 @@ class YandexDiskIE(InfoExtractor):
'uploader_id': '300043621', 'uploader_id': '300043621',
'view_count': int, 'view_count': int,
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, { }, {
'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce', 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) domain, video_id = re.match(self._VALID_URL, url).groups()
status = self._download_webpage(
'https://disk.yandex.com/auth/status', video_id, query={
'urlOrigin': url,
'source': 'public',
'md5': 'false',
})
sk = self._search_regex(
r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
status, 'sk', group='value')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
store = self._parse_json(self._search_regex(
r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
webpage, 'store'), video_id)
resource = store['resources'][store['rootResourceId']]
models = self._parse_json( title = resource['name']
self._search_regex( meta = resource.get('meta') or {}
r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
webpage, 'video JSON'),
video_id)
data = next( public_url = meta.get('short_url')
model['data'] for model in models if public_url:
if model.get('model') == 'resource') video_id = self._match_id(public_url)
video_hash = data['id'] source_url = (self._download_json(
title = data['name'] 'https://cloud-api.yandex.net/v1/disk/public/resources/download',
video_id, query={'public_key': url}, fatal=False) or {}).get('href')
video_streams = resource.get('videoStreams') or {}
video_hash = resource.get('hash') or url
environment = store.get('environment') or {}
sk = environment.get('sk')
yandexuid = environment.get('yandexuid')
if sk and yandexuid and not (source_url and video_streams):
self._set_cookie(domain, 'yandexuid', yandexuid)
models = self._download_json( def call_api(action):
'https://disk.yandex.com/models/', video_id, return (self._download_json(
data=urlencode_postdata({ urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
'_model.0': 'videoInfo', 'hash': video_hash,
'id.0': video_hash,
'_model.1': 'do-get-resource-url',
'id.1': video_hash,
'version': '13.6',
'sk': sk, 'sk': sk,
}), query={'_m': 'videoInfo'})['models'] }).encode(), headers={
'Content-Type': 'text/plain',
videos = try_get(models, lambda x: x[0]['data']['videos'], list) or [] }, fatal=False) or {}).get('data') or {}
source_url = try_get( if not source_url:
models, lambda x: x[1]['data']['file'], compat_str) # TODO: figure out how to detect if download limit has
# been reached and then avoid unnecessary source format
# extraction requests
source_url = call_api('download-url').get('url')
if not video_streams:
video_streams = call_api('get-video-streams')
formats = [] formats = []
if source_url: if source_url:
formats.append({ formats.append({
'url': source_url, 'url': source_url,
'format_id': 'source', 'format_id': 'source',
'ext': determine_ext(title, 'mp4'), 'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
'quality': 1, 'quality': 1,
'filesize': int_or_none(meta.get('size'))
}) })
for video in videos:
for video in (video_streams.get('videos') or []):
format_url = video.get('url') format_url = video.get('url')
if not format_url: if not format_url:
continue continue
if determine_ext(format_url) == 'm3u8': if video.get('dimension') == 'adaptive':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native', format_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
else: else:
size = video.get('size') or {}
height = int_or_none(size.get('height'))
format_id = 'hls'
if height:
format_id += '-%dp' % height
formats.append({ formats.append({
'ext': 'mp4',
'format_id': format_id,
'height': height,
'protocol': 'm3u8_native',
'url': format_url, 'url': format_url,
'width': int_or_none(size.get('width')),
}) })
self._sort_formats(formats) self._sort_formats(formats)
duration = float_or_none(try_get( uid = resource.get('uid')
models, lambda x: x[0]['data']['duration']), 1000) display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
uploader = try_get(
data, lambda x: x['user']['display_name'], compat_str)
uploader_id = try_get(
data, lambda x: x['user']['uid'], compat_str)
view_count = int_or_none(try_get(
data, lambda x: x['meta']['views_counter']))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'duration': duration, 'duration': float_or_none(video_streams.get('duration'), 1000),
'uploader': uploader, 'uploader': display_name,
'uploader_id': uploader_id, 'uploader_id': uid,
'view_count': view_count, 'view_count': int_or_none(meta.get('views_counter')),
'formats': formats, 'formats': formats,
} }

View File

@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
try_get,
url_or_none, url_or_none,
) )
@ -64,12 +65,7 @@ class YandexVideoIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
content = self._download_json( player = try_get((self._download_json(
# 'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
# video_id, query={
# 'stream_options': 'hires',
# 'disable_trackings': 1,
# })['content']
'https://frontend.vh.yandex.ru/graphql', video_id, data=b'''{ 'https://frontend.vh.yandex.ru/graphql', video_id, data=b'''{
player(content_id: "%s") { player(content_id: "%s") {
computed_title computed_title
@ -90,7 +86,15 @@ class YandexVideoIE(InfoExtractor):
title title
views_count views_count
} }
}''' % video_id.encode())['player']['content']['content'] }''' % video_id.encode(), fatal=False)), lambda x: x['player']['content'])
if not player or player.get('error'):
player = self._download_json(
'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
video_id, query={
'stream_options': 'hires',
'disable_trackings': 1,
})
content = player['content']
title = content.get('title') or content['computed_title'] title = content.get('title') or content['computed_title']