Compare commits

..

No commits in common. "f7e95fb2a0516f90edffe72d9911222d1ed1a2bc" and "9c1e164e0cd77331ea4f0b474b32fd06f84bad71" have entirely different histories.

3 changed files with 41 additions and 82 deletions

View File

@ -554,11 +554,6 @@ class TestUtil(unittest.TestCase):
self.assertEqual(url_or_none('http$://foo.de'), None) self.assertEqual(url_or_none('http$://foo.de'), None)
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
self.assertEqual(url_or_none('//foo.de'), '//foo.de') self.assertEqual(url_or_none('//foo.de'), '//foo.de')
self.assertEqual(url_or_none('s3://foo.de'), None)
self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
def test_parse_age_limit(self): def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(None), None)

View File

@ -13,30 +13,26 @@ class YandexVideoIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
yandex\.ru(?:/(?:portal/(?:video|efir)|efir))?/?\?.*?stream_id=| yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
frontend\.vh\.yandex\.ru/player/ frontend\.vh\.yandex\.ru/player/
) )
(?P<id>(?:[\da-f]{32}|[\w-]{12})) (?P<id>[\da-f]+)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'https://yandex.ru/portal/video?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374', 'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
'md5': 'e02a05bfaf0d9615ef07ae3a10f4faf4', 'md5': '33955d7ae052f15853dc41f35f17581c',
'info_dict': { 'info_dict': {
'id': '4dbb36ec4e0526d58f9f2dc8f0ecf374', 'id': '4dbb262b4fe5cf15a215de4f34eee34d',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Русский Вудсток - главный рок-фест в истории СССР / вДудь', 'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
'description': 'md5:7d6b8d4bc4a3b9a56499916c1ea5b5fa', 'description': '',
'thumbnail': r're:^https?://', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1549972939, 'timestamp': 0,
'duration': 5575, 'duration': 30,
'age_limit': 18, 'age_limit': 18,
'upload_date': '20190212',
'view_count': int,
'like_count': int,
'dislike_count': int,
}, },
}, { }, {
'url': 'https://yandex.ru/portal/efir?stream_id=4dbb262b4fe5cf15a215de4f34eee34d&from=morda', 'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d', 'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
@ -56,85 +52,53 @@ class YandexVideoIE(InfoExtractor):
# DASH with DRM # DASH with DRM
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8', 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://yandex.ru/efir?stream_active=watching&stream_id=v7a2dZ-v5mSI&from_block=efir_newtab',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
content = self._download_json( content = self._download_json(
# 'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id, 'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
# video_id, query={ video_id, query={
# 'stream_options': 'hires', 'stream_options': 'hires',
# 'disable_trackings': 1, 'disable_trackings': 1,
# })['content'] })['content']
'https://frontend.vh.yandex.ru/graphql', video_id, data=b'''{
player(content_id: "%s") {
computed_title
content_url
description
dislikes
duration
likes
program_title
release_date
release_date_ut
release_year
restriction_age
season
start_time
streams
thumbnail
title
views_count
}
}''' % video_id.encode())['player']['content']['content']
title = content.get('title') or content['computed_title'] content_url = url_or_none(content.get('content_url')) or url_or_none(
content['streams'][0]['url'])
title = content.get('title') or content.get('computed_title')
formats = []
streams = content.get('streams') or []
streams.append({'url': content.get('content_url')})
for stream in streams:
content_url = url_or_none(stream.get('url'))
if not content_url:
continue
ext = determine_ext(content_url) ext = determine_ext(content_url)
if ext == 'ismc':
continue if ext == 'm3u8':
elif ext == 'm3u8': formats = self._extract_m3u8_formats(
formats.extend(self._extract_m3u8_formats( content_url, video_id, 'mp4', entry_protocol='m3u8_native',
content_url, video_id, 'mp4', m3u8_id='hls')
'm3u8_native', m3u8_id='hls', fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats = self._extract_mpd_formats(
content_url, video_id, mpd_id='dash', fatal=False)) content_url, video_id, mpd_id='dash')
else: else:
formats.append({'url': content_url}) formats = [{'url': content_url}]
self._sort_formats(formats) self._sort_formats(formats)
description = content.get('description')
thumbnail = content.get('thumbnail')
timestamp = (int_or_none(content.get('release_date')) timestamp = (int_or_none(content.get('release_date'))
or int_or_none(content.get('release_date_ut')) or int_or_none(content.get('release_date_ut'))
or int_or_none(content.get('start_time'))) or int_or_none(content.get('start_time')))
season = content.get('season') or {} duration = int_or_none(content.get('duration'))
series = content.get('program_title')
age_limit = int_or_none(content.get('restriction_age'))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': content.get('description'), 'description': description,
'thumbnail': content.get('thumbnail'), 'thumbnail': thumbnail,
'timestamp': timestamp, 'timestamp': timestamp,
'duration': int_or_none(content.get('duration')), 'duration': duration,
'series': content.get('program_title'), 'series': series,
'age_limit': int_or_none(content.get('restriction_age')), 'age_limit': age_limit,
'view_count': int_or_none(content.get('views_count')),
'like_count': int_or_none(content.get('likes')),
'dislike_count': int_or_none(content.get('dislikes')),
'season_number': int_or_none(season.get('season_number')),
'season_id': season.get('id'),
'release_year': int_or_none(content.get('release_year')),
'formats': formats, 'formats': formats,
} }

View File

@ -3640,7 +3640,7 @@ def url_or_none(url):
if not url or not isinstance(url, compat_str): if not url or not isinstance(url, compat_str):
return None return None
url = url.strip() url = url.strip()
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
def parse_duration(s): def parse_duration(s):