Compare commits

..

No commits in common. "7b0f04ed1f72fd4e4b5b3e935e08a912857fa8c4" and "f7e95fb2a0516f90edffe72d9911222d1ed1a2bc" have entirely different histories.

4 changed files with 81 additions and 196 deletions

View File

@ -1425,10 +1425,7 @@ from .vshare import VShareIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE
from .vvvvid import ( from .vvvvid import VVVVIDIE
VVVVIDIE,
VVVVIDShowIE,
)
from .vyborymos import VyboryMosIE from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE from .vzaar import VzaarIE
from .wakanim import WakanimIE from .wakanim import WakanimIE

View File

@ -12,8 +12,7 @@ from ..utils import (
class VVVVIDIE(InfoExtractor): class VVVVIDIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/' _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
_VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
_TESTS = [{ _TESTS = [{
# video_type == 'video/vvvvid' # video_type == 'video/vvvvid'
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong', 'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
@ -22,16 +21,6 @@ class VVVVIDIE(InfoExtractor):
'id': '489048', 'id': '489048',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ping Pong', 'title': 'Ping Pong',
'duration': 239,
'series': '"Perché dovrei guardarlo?" di Dario Moccia',
'season_id': '437',
'season_number': 1,
'episode': 'Ping Pong',
'episode_number': 1,
'episode_id': '3334',
'view_count': int,
'like_count': int,
'repost_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -48,9 +37,6 @@ class VVVVIDIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
'only_matching': True
}] }]
_conn_id = None _conn_id = None
@ -59,36 +45,20 @@ class VVVVIDIE(InfoExtractor):
'https://www.vvvvid.it/user/login', 'https://www.vvvvid.it/user/login',
None, headers=self.geo_verification_headers())['data']['conn_id'] None, headers=self.geo_verification_headers())['data']['conn_id']
def _download_info(self, show_id, path, video_id, fatal=True):
response = self._download_json(
'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
video_id, headers=self.geo_verification_headers(), query={
'conn_id': self._conn_id,
}, fatal=fatal)
if not (response or fatal):
return
if response.get('result') == 'error':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True)
return response['data']
def _extract_common_video_info(self, video_data):
return {
'thumbnail': video_data.get('thumbnail'),
'episode_number': int_or_none(video_data.get('number')),
'episode_id': str_or_none(video_data.get('id')),
}
def _real_extract(self, url): def _real_extract(self, url):
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups() show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
response = self._download_json(
response = self._download_info( 'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id),
show_id, 'season/%s' % season_id, video_id) video_id, headers=self.geo_verification_headers(), query={
'conn_id': self._conn_id,
})
if response['result'] == 'error':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True)
vid = int(video_id) vid = int(video_id)
video_data = list(filter( video_data = list(filter(
lambda episode: episode.get('video_id') == vid, response))[0] lambda episode: episode.get('video_id') == vid, response['data']))[0]
title = video_data['title']
formats = [] formats = []
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
@ -171,67 +141,18 @@ class VVVVIDIE(InfoExtractor):
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id)) 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
self._sort_formats(formats) self._sort_formats(formats)
info = self._extract_common_video_info(video_data) return {
info.update({
'id': video_id, 'id': video_id,
'title': title, 'title': video_data['title'],
'formats': formats, 'formats': formats,
'thumbnail': video_data.get('thumbnail'),
'duration': int_or_none(video_data.get('length')), 'duration': int_or_none(video_data.get('length')),
'series': video_data.get('show_title'), 'series': video_data.get('show_title'),
'season_id': season_id, 'season_id': season_id,
'season_number': video_data.get('season_number'), 'season_number': video_data.get('season_number'),
'episode': title, 'episode_id': str_or_none(video_data.get('id')),
'episode_number': int_or_none(video_data.get('number')),
'episode_title': video_data['title'],
'view_count': int_or_none(video_data.get('views')), 'view_count': int_or_none(video_data.get('views')),
'like_count': int_or_none(video_data.get('video_likes')), 'like_count': int_or_none(video_data.get('video_likes')),
'repost_count': int_or_none(video_data.get('video_shares')), }
})
return info
class VVVVIDShowIE(VVVVIDIE):
_VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.vvvvid.it/show/156/psyco-pass',
'info_dict': {
'id': '156',
'title': 'Psycho-Pass',
'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
},
'playlist_count': 46,
}, {
'url': 'https://www.vvvvid.it/show/156',
'only_matching': True,
}]
def _real_extract(self, url):
base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
seasons = self._download_info(
show_id, 'seasons/', show_title)
show_info = self._download_info(
show_id, 'info/', show_title, fatal=False)
entries = []
for season in (seasons or []):
season_number = int_or_none(season.get('number'))
episodes = season.get('episodes') or []
for episode in episodes:
season_id = str_or_none(episode.get('season_id'))
video_id = str_or_none(episode.get('video_id'))
if not (season_id and video_id):
continue
info = self._extract_common_video_info(episode)
info.update({
'_type': 'url',
'ie_key': VVVVIDIE.ie_key(),
'url': '/'.join([base_url, season_id, video_id]),
'title': episode.get('title'),
'description': episode.get('description'),
'season_number': season_number,
'season_id': season_id,
})
entries.append(info)
return self.playlist_result(
entries, show_id, show_info.get('title'), show_info.get('description'))

View File

@ -1,43 +1,23 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
mimetype2ext,
try_get, try_get,
urljoin, urlencode_postdata,
) )
class YandexDiskIE(InfoExtractor): class YandexDiskIE(InfoExtractor):
_VALID_URL = r'''(?x)https?:// _VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
(?P<domain>
yadi\.sk|
disk\.yandex\.
(?:
az|
by|
co(?:m(?:\.(?:am|ge|tr))?|\.il)|
ee|
fr|
k[gz]|
l[tv]|
md|
t[jm]|
u[az]|
ru
)
)/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
'md5': 'a4a8d52958c8fddcf9845935070402ae', 'md5': '33955d7ae052f15853dc41f35f17581c',
'info_dict': { 'info_dict': {
'id': 'VdOeDou8eZs6Y', 'id': 'VdOeDou8eZs6Y',
'ext': 'mp4', 'ext': 'mp4',
@ -47,101 +27,92 @@ class YandexDiskIE(InfoExtractor):
'uploader_id': '300043621', 'uploader_id': '300043621',
'view_count': int, 'view_count': int,
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, { }, {
'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce', 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
domain, video_id = re.match(self._VALID_URL, url).groups() video_id = self._match_id(url)
status = self._download_webpage(
'https://disk.yandex.com/auth/status', video_id, query={
'urlOrigin': url,
'source': 'public',
'md5': 'false',
})
sk = self._search_regex(
r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
status, 'sk', group='value')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
store = self._parse_json(self._search_regex(
r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
webpage, 'store'), video_id)
resource = store['resources'][store['rootResourceId']]
title = resource['name'] models = self._parse_json(
meta = resource.get('meta') or {} self._search_regex(
r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
webpage, 'video JSON'),
video_id)
public_url = meta.get('short_url') data = next(
if public_url: model['data'] for model in models
video_id = self._match_id(public_url) if model.get('model') == 'resource')
source_url = (self._download_json( video_hash = data['id']
'https://cloud-api.yandex.net/v1/disk/public/resources/download', title = data['name']
video_id, query={'public_key': url}, fatal=False) or {}).get('href')
video_streams = resource.get('videoStreams') or {}
video_hash = resource.get('hash') or url
environment = store.get('environment') or {}
sk = environment.get('sk')
yandexuid = environment.get('yandexuid')
if sk and yandexuid and not (source_url and video_streams):
self._set_cookie(domain, 'yandexuid', yandexuid)
def call_api(action): models = self._download_json(
return (self._download_json( 'https://disk.yandex.com/models/', video_id,
urljoin(url, '/public/api/') + action, video_id, data=json.dumps({ data=urlencode_postdata({
'hash': video_hash, '_model.0': 'videoInfo',
'sk': sk, 'id.0': video_hash,
}).encode(), headers={ '_model.1': 'do-get-resource-url',
'Content-Type': 'text/plain', 'id.1': video_hash,
}, fatal=False) or {}).get('data') or {} 'version': '13.6',
if not source_url: 'sk': sk,
# TODO: figure out how to detect if download limit has }), query={'_m': 'videoInfo'})['models']
# been reached and then avoid unnecessary source format
# extraction requests videos = try_get(models, lambda x: x[0]['data']['videos'], list) or []
source_url = call_api('download-url').get('url') source_url = try_get(
if not video_streams: models, lambda x: x[1]['data']['file'], compat_str)
video_streams = call_api('get-video-streams')
formats = [] formats = []
if source_url: if source_url:
formats.append({ formats.append({
'url': source_url, 'url': source_url,
'format_id': 'source', 'format_id': 'source',
'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'), 'ext': determine_ext(title, 'mp4'),
'quality': 1, 'quality': 1,
'filesize': int_or_none(meta.get('size'))
}) })
for video in videos:
for video in (video_streams.get('videos') or []):
format_url = video.get('url') format_url = video.get('url')
if not format_url: if not format_url:
continue continue
if video.get('dimension') == 'adaptive': if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', 'm3u8_native', format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
else: else:
size = video.get('size') or {}
height = int_or_none(size.get('height'))
format_id = 'hls'
if height:
format_id += '-%dp' % height
formats.append({ formats.append({
'ext': 'mp4',
'format_id': format_id,
'height': height,
'protocol': 'm3u8_native',
'url': format_url, 'url': format_url,
'width': int_or_none(size.get('width')),
}) })
self._sort_formats(formats) self._sort_formats(formats)
uid = resource.get('uid') duration = float_or_none(try_get(
display_name = try_get(store, lambda x: x['users'][uid]['displayName']) models, lambda x: x[0]['data']['duration']), 1000)
uploader = try_get(
data, lambda x: x['user']['display_name'], compat_str)
uploader_id = try_get(
data, lambda x: x['user']['uid'], compat_str)
view_count = int_or_none(try_get(
data, lambda x: x['meta']['views_counter']))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'duration': float_or_none(video_streams.get('duration'), 1000), 'duration': duration,
'uploader': display_name, 'uploader': uploader,
'uploader_id': uid, 'uploader_id': uploader_id,
'view_count': int_or_none(meta.get('views_counter')), 'view_count': view_count,
'formats': formats, 'formats': formats,
} }

View File

@ -5,7 +5,6 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
try_get,
url_or_none, url_or_none,
) )
@ -65,7 +64,12 @@ class YandexVideoIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
player = try_get((self._download_json( content = self._download_json(
# 'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
# video_id, query={
# 'stream_options': 'hires',
# 'disable_trackings': 1,
# })['content']
'https://frontend.vh.yandex.ru/graphql', video_id, data=b'''{ 'https://frontend.vh.yandex.ru/graphql', video_id, data=b'''{
player(content_id: "%s") { player(content_id: "%s") {
computed_title computed_title
@ -86,15 +90,7 @@ class YandexVideoIE(InfoExtractor):
title title
views_count views_count
} }
}''' % video_id.encode(), fatal=False)), lambda x: x['player']['content']) }''' % video_id.encode())['player']['content']['content']
if not player or player.get('error'):
player = self._download_json(
'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
video_id, query={
'stream_options': 'hires',
'disable_trackings': 1,
})
content = player['content']
title = content.get('title') or content['computed_title'] title = content.get('title') or content['computed_title']