Compare commits

..

No commits in common. "4ef1fc970727a1bbdd9f20bea54c37deb8794fb0" and "7bf5e3a84aca972fe4c34135c98394ca592211ef" have entirely different histories.

7 changed files with 178 additions and 376 deletions

View File

@ -28,7 +28,6 @@ from ..utils import (
parse_iso8601, parse_iso8601,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
try_get,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
UnsupportedError, UnsupportedError,
@ -601,27 +600,24 @@ class BrightcoveNewIE(AdobePassIE):
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
def extract_policy_key(): def extract_policy_key():
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed) webpage = self._download_webpage(
config = self._download_json( 'http://players.brightcove.net/%s/%s_%s/index.min.js'
base_url + 'config.json', video_id, fatal=False) or {} % (account_id, player_id, embed), video_id)
policy_key = try_get(
config, lambda x: x['video_cloud']['policy_key'])
if not policy_key:
webpage = self._download_webpage(
base_url + 'index.min.js', video_id)
catalog = self._search_regex( policy_key = None
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog: if catalog:
catalog = self._parse_json( policy_key = catalog.get('policyKey')
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key: if not policy_key:
policy_key = self._search_regex( policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk') webpage, 'policy key', group='pk')
store_pk(policy_key) store_pk(policy_key)
return policy_key return policy_key

View File

@ -11,47 +11,7 @@ from ..utils import (
class CBSLocalIE(AnvatoIE): class CBSLocalIE(AnvatoIE):
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/' _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': int,
'upload_date': r're:^\d{8}$',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
mcp_id = self._match_id(url)
return self.url_result(
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
class CBSLocalArticleIE(AnvatoIE):
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
_TESTS = [{ _TESTS = [{
# Anvato backend # Anvato backend
@ -92,6 +52,31 @@ class CBSLocalArticleIE(AnvatoIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': int,
'upload_date': r're:^\d{8}$',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -163,10 +163,7 @@ from .cbc import (
CBCOlympicsIE, CBCOlympicsIE,
) )
from .cbs import CBSIE from .cbs import CBSIE
from .cbslocal import ( from .cbslocal import CBSLocalIE
CBSLocalIE,
CBSLocalArticleIE,
)
from .cbsinteractive import CBSInteractiveIE from .cbsinteractive import CBSInteractiveIE
from .cbsnews import ( from .cbsnews import (
CBSNewsEmbedIE, CBSNewsEmbedIE,

View File

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
import socket import socket
@ -9,7 +8,6 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_http_client, compat_http_client,
compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus, compat_urllib_parse_unquote_plus,
@ -49,8 +47,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
groups/[^/]+/permalink/| groups/[^/]+/permalink/
watchparty/
)| )|
facebook: facebook:
) )
@ -283,18 +280,8 @@ class FacebookIE(InfoExtractor):
# data.video.creation_story.attachments[].media # data.video.creation_story.attachments[].media
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275', 'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.facebook.com/watchparty/211641140192478',
'info_dict': {
'id': '211641140192478',
},
'playlist_count': 1,
'skip': 'Requires logging in',
}] }]
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
_api_config = {
'graphURI': '/api/graphql/'
}
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -418,17 +405,6 @@ class FacebookIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
def extract_relay_prefetched_data(_filter):
replay_data = extract_relay_data(_filter)
for require in (replay_data.get('require') or []):
if require[0] == 'RelayPrefetchedStreamCache':
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX, r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
@ -437,83 +413,87 @@ class FacebookIE(InfoExtractor):
video_data = extract_from_jsmods_instances(server_js_data) video_data = extract_from_jsmods_instances(server_js_data)
if not video_data: if not video_data:
data = extract_relay_prefetched_data( graphql_data = self._parse_json(self._search_regex(
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"') r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);',
if data: webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}
entries = [] for require in (graphql_data.get('require') or []):
if require[0] == 'RelayPrefetchedStreamCache':
entries = []
def parse_graphql_video(video): def parse_graphql_video(video):
formats = [] formats = []
q = qualities(['sd', 'hd']) q = qualities(['sd', 'hd'])
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]: for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
playable_url = video.get('playable_url' + suffix) playable_url = video.get('playable_url' + suffix)
if not playable_url: if not playable_url:
continue continue
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'quality': q(format_id), 'quality': q(format_id),
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(video, formats) extract_dash_manifest(video, formats)
process_formats(formats) process_formats(formats)
v_id = video.get('videoId') or video.get('id') or video_id v_id = video.get('videoId') or video.get('id') or video_id
info = { info = {
'id': v_id, 'id': v_id,
'formats': formats, 'formats': formats,
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']), 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
'uploader_id': try_get(video, lambda x: x['owner']['id']), 'uploader_id': try_get(video, lambda x: x['owner']['id']),
'timestamp': int_or_none(video.get('publish_time')), 'timestamp': int_or_none(video.get('publish_time')),
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
} }
description = try_get(video, lambda x: x['savable_description']['text']) description = try_get(video, lambda x: x['savable_description']['text'])
title = video.get('name') title = video.get('name')
if title: if title:
info.update({ info.update({
'title': title, 'title': title,
'description': description, 'description': description,
}) })
else: else:
info['title'] = description or 'Facebook video #%s' % v_id info['title'] = description or 'Facebook video #%s' % v_id
entries.append(info) entries.append(info)
def parse_attachment(attachment, key='media'): def parse_attachment(attachment, key='media'):
media = attachment.get(key) or {} media = attachment.get(key) or {}
if media.get('__typename') == 'Video': if media.get('__typename') == 'Video':
return parse_graphql_video(media) return parse_graphql_video(media)
nodes = data.get('nodes') or [] data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
node = data.get('node') or {}
if not nodes and node:
nodes.append(node)
for node in nodes:
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
attachments = try_get(story, [
lambda x: x['attached_story']['attachments'],
lambda x: x['attachments']
], list) or []
for attachment in attachments:
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
for n in ns:
parse_attachment(n)
parse_attachment(attachment)
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or [] nodes = data.get('nodes') or []
for edge in edges: node = data.get('node') or {}
parse_attachment(edge, key='node') if not nodes and node:
nodes.append(node)
for node in nodes:
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
attachments = try_get(story, [
lambda x: x['attached_story']['attachments'],
lambda x: x['attachments']
], list) or []
for attachment in attachments:
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
for n in ns:
parse_attachment(n)
parse_attachment(attachment)
video = data.get('video') or {} edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
if video: for edge in edges:
attachments = try_get(video, [ parse_attachment(edge, key='node')
lambda x: x['story']['attachments'],
lambda x: x['creation_story']['attachments']
], list) or []
for attachment in attachments:
parse_attachment(attachment)
if not entries:
parse_graphql_video(video)
return self.playlist_result(entries, video_id) video = data.get('video') or {}
if video:
attachments = try_get(video, [
lambda x: x['story']['attachments'],
lambda x: x['creation_story']['attachments']
], list) or []
for attachment in attachments:
parse_attachment(attachment)
if not entries:
parse_graphql_video(video)
return self.playlist_result(entries, video_id)
if not video_data: if not video_data:
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
@ -524,43 +504,6 @@ class FacebookIE(InfoExtractor):
elif '>You must log in to continue' in webpage: elif '>You must log in to continue' in webpage:
self.raise_login_required() self.raise_login_required()
if not video_data and '/watchparty/' in url:
post_data = {
'doc_id': 3731964053542869,
'variables': json.dumps({
'livingRoomID': video_id,
}),
}
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
if prefetched_data:
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
if lsd:
post_data[lsd['name']] = lsd['value']
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
for define in (relay_data.get('define') or []):
if define[0] == 'RelayAPIConfigDefaults':
self._api_config = define[2]
living_room = self._download_json(
urljoin(url, self._api_config['graphURI']), video_id,
data=urlencode_postdata(post_data))['data']['living_room']
entries = []
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
video = try_get(edge, lambda x: x['node']['video']) or {}
v_id = video.get('id')
if not v_id:
continue
v_id = compat_str(v_id)
entries.append(self.url_result(
self._VIDEO_PAGE_TEMPLATE % v_id,
self.ie_key(), v_id, video.get('name')))
return self.playlist_result(entries, video_id)
if not video_data:
# Video info not in first request, do a secondary request using # Video info not in first request, do a secondary request using
# tahoe player specific URL # tahoe player specific URL
tahoe_data = self._download_webpage( tahoe_data = self._download_webpage(

View File

@ -1,112 +1,40 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import time
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..utils import smuggle_url
from ..utils import (
ExtractorError,
int_or_none,
)
class SonyLIVIE(InfoExtractor): class SonyLIVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true', 'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
'info_dict': { 'info_dict': {
'title': 'Bachelors Delight - Achaari Cheese Toast', 'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
'id': '1000022678', 'id': 'ref:5024612095001',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20200411', 'upload_date': '20170923',
'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb', 'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
'timestamp': 1586632091, 'uploader_id': '5182475815001',
'duration': 185, 'timestamp': 1506200547,
'season_number': 1,
'episode': 'Achaari Cheese Toast',
'episode_number': 1,
'release_year': 2016,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['BrightcoveNew'],
}, { }, {
'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true', 'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
'only_matching': True,
}, {
'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
'only_matching': True,
}, {
'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
'only_matching': True,
}, {
'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
'only_matching': True,
}, {
'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
'only_matching': True, 'only_matching': True,
}] }]
_GEO_COUNTRIES = ['IN']
_TOKEN = None
def _call_api(self, version, path, video_id): # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
headers = {} BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
if self._TOKEN:
headers['security_token'] = self._TOKEN
try:
return self._download_json(
'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
video_id, headers=headers)['resultObj']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
message = self._parse_json(
e.cause.read().decode(), video_id)['message']
if message == 'Geoblocked Country':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
raise ExtractorError(message)
raise
def _real_initialize(self):
self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) brightcove_id = self._match_id(url)
content = self._call_api( return self.url_result(
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id) smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
if content.get('isEncrypted'): 'geo_countries': ['IN'],
raise ExtractorError('This video is DRM protected.', expected=True) 'referrer': url,
dash_url = content['videoURL'] }),
headers = { 'BrightcoveNew', brightcove_id)
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
}
formats = self._extract_mpd_formats(
dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
formats.extend(self._extract_m3u8_formats(
dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
for f in formats:
f.setdefault('http_headers', {}).update(headers)
self._sort_formats(formats)
metadata = self._call_api(
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
title = metadata['title']
episode = metadata.get('episodeTitle')
if episode and title != episode:
title += ' - ' + episode
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': content.get('posterURL'),
'description': metadata.get('longDescription') or metadata.get('shortDescription'),
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
'duration': int_or_none(metadata.get('duration')),
'season_number': int_or_none(metadata.get('season')),
'episode': episode,
'episode_number': int_or_none(metadata.get('episodeNumber')),
'release_year': int_or_none(metadata.get('year')),
}

View File

@ -2,40 +2,25 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..compat import compat_str
int_or_none, from ..utils import unified_strdate
parse_iso8601,
str_or_none,
strip_or_none,
try_get,
urljoin,
)
class StreetVoiceIE(InfoExtractor): class StreetVoiceIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://streetvoice.com/skippylu/songs/123688/', 'url': 'http://streetvoice.com/skippylu/songs/94440/',
'md5': '0eb535970629a5195685355f3ed60bfd', 'md5': '15974627fc01a29e492c98593c2fd472',
'info_dict': { 'info_dict': {
'id': '123688', 'id': '94440',
'ext': 'mp3', 'ext': 'mp3',
'title': '流浪', 'title': '',
'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6', 'description': 'Crispy脆樂團 - 輸',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 270, 'duration': 260,
'upload_date': '20100923', 'upload_date': '20091018',
'uploader': 'Crispy脆樂團', 'uploader': 'Crispy脆樂團',
'uploader_id': '627810', 'uploader_id': '627810',
'uploader_url': 're:^https?://streetvoice.com/skippylu/',
'timestamp': 1285261661,
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
'track': '流浪',
'track_id': '123688',
'album': '2010',
} }
}, { }, {
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/', 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
@ -44,57 +29,21 @@ class StreetVoiceIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
song_id = self._match_id(url) song_id = self._match_id(url)
base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
song = self._download_json(base_url, song_id, query={ song = self._download_json(
'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username', 'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
})
title = song['name'] title = song['name']
author = song['user']['nickname']
formats = []
for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
f_url = (self._download_json(
base_url + suffix + '/', song_id,
'Downloading %s format URL' % format_id,
data=b'', fatal=False) or {}).get('file')
if not f_url:
continue
f = {
'ext': 'mp3',
'format_id': format_id,
'url': f_url,
'vcodec': 'none',
}
if format_id == 'hls':
f['protocol'] = 'm3u8_native'
abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
if abr:
abr = int(abr)
f.update({
'abr': abr,
'tbr': abr,
})
formats.append(f)
user = song.get('user') or {}
username = user.get('username')
get_count = lambda x: int_or_none(song.get(x + '_count'))
return { return {
'id': song_id, 'id': song_id,
'formats': formats, 'url': song['file'],
'title': title, 'title': title,
'description': strip_or_none(song.get('synopsis')), 'description': '%s - %s' % (author, title),
'thumbnail': song.get('image'), 'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
'duration': int_or_none(song.get('length')), 'duration': song.get('length'),
'timestamp': parse_iso8601(song.get('created_at')), 'upload_date': unified_strdate(song.get('created_at')),
'uploader': try_get(user, lambda x: x['profile']['nickname']), 'uploader': author,
'uploader_id': str_or_none(user.get('id')), 'uploader_id': compat_str(song['user']['id']),
'uploader_url': urljoin(url, '/%s/' % username) if username else None,
'view_count': get_count('plays'),
'like_count': get_count('likes'),
'comment_count': get_count('comments'),
'repost_count': get_count('share'),
'track': title,
'track_id': song_id,
'album': try_get(song, lambda x: x['album']['name']),
} }

View File

@ -1322,16 +1322,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json( return self._parse_json(
uppercase_escape(config), video_id, fatal=False) uppercase_escape(config), video_id, fatal=False)
def _get_automatic_captions(self, video_id, player_response, player_config): def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
self.to_screen('%s: Looking for automatic captions' % video_id) self.to_screen('%s: Looking for automatic captions' % video_id)
player_config = self._get_ytplayer_config(video_id, webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id err_msg = 'Couldn\'t find automatic captions for %s' % video_id
if not (player_response or player_config): if not player_config:
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
try: try:
args = player_config.get('args') if player_config else {} args = player_config['args']
caption_url = args.get('ttsurl') caption_url = args.get('ttsurl')
if caption_url: if caption_url:
timestamp = args['timestamp'] timestamp = args['timestamp']
@ -1390,15 +1391,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return captions return captions
# New captions format as of 22.06.2017 # New captions format as of 22.06.2017
if player_response: player_response = args.get('player_response')
renderer = player_response['captions']['playerCaptionsTracklistRenderer'] if player_response and isinstance(player_response, compat_str):
base_url = renderer['captionTracks'][0]['baseUrl'] player_response = self._parse_json(
sub_lang_list = [] player_response, video_id, fatal=False)
for lang in renderer['translationLanguages']: if player_response:
lang_code = lang.get('languageCode') renderer = player_response['captions']['playerCaptionsTracklistRenderer']
if lang_code: base_url = renderer['captionTracks'][0]['baseUrl']
sub_lang_list.append(lang_code) sub_lang_list = []
return make_captions(base_url, sub_lang_list) for lang in renderer['translationLanguages']:
lang_code = lang.get('languageCode')
if lang_code:
sub_lang_list.append(lang_code)
return make_captions(base_url, sub_lang_list)
# Some videos don't provide ttsurl but rather caption_tracks and # Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA) # caption_translation_languages (e.g. 20LmZk1hakA)
@ -1647,7 +1652,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get video info # Get video info
video_info = {} video_info = {}
embed_webpage = None embed_webpage = None
ytplayer_config = None
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None: if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
age_gate = True age_gate = True
@ -2272,7 +2276,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# subtitles # subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage) video_subtitles = self.extract_subtitles(video_id, video_webpage)
automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config) automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
video_duration = try_get( video_duration = try_get(
video_info, lambda x: int_or_none(x['length_seconds'][0])) video_info, lambda x: int_or_none(x['length_seconds'][0]))