mirror of
https://github.com/ytdl-org/youtube-dl
synced 2024-12-23 04:30:10 +09:00
Compare commits
7 Commits
7bf5e3a84a
...
4ef1fc9707
Author | SHA1 | Date | |
---|---|---|---|
|
4ef1fc9707 | ||
|
f9e6aa1dcf | ||
|
f83db9064b | ||
|
2da9a86399 | ||
|
ecaa535cf4 | ||
|
79dd92b1fe | ||
|
bd3844c9c2 |
@ -28,6 +28,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
@ -600,24 +601,27 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
config, lambda x: x['video_cloud']['policy_key'])
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
webpage = self._download_webpage(
|
||||
base_url + 'index.min.js', video_id)
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
@ -11,7 +11,47 @@ from ..utils import (
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mcp_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
||||
|
||||
|
||||
class CBSLocalArticleIE(AnvatoIE):
|
||||
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -163,7 +163,10 @@ from .cbc import (
|
||||
CBCOlympicsIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
)
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
|
@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
@ -8,6 +9,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@ -47,7 +49,8 @@ class FacebookIE(InfoExtractor):
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
groups/[^/]+/permalink/
|
||||
groups/[^/]+/permalink/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
@ -280,8 +283,18 @@ class FacebookIE(InfoExtractor):
|
||||
# data.video.creation_story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/watchparty/211641140192478',
|
||||
'info_dict': {
|
||||
'id': '211641140192478',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
'graphURI': '/api/graphql/'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
@ -405,6 +418,17 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
replay_data = extract_relay_data(_filter)
|
||||
for require in (replay_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
||||
@ -413,87 +437,83 @@ class FacebookIE(InfoExtractor):
|
||||
video_data = extract_from_jsmods_instances(server_js_data)
|
||||
|
||||
if not video_data:
|
||||
graphql_data = self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);',
|
||||
webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}
|
||||
for require in (graphql_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
entries = []
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
|
||||
data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
@ -504,6 +524,43 @@ class FacebookIE(InfoExtractor):
|
||||
elif '>You must log in to continue' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
if not video_data and '/watchparty/' in url:
|
||||
post_data = {
|
||||
'doc_id': 3731964053542869,
|
||||
'variables': json.dumps({
|
||||
'livingRoomID': video_id,
|
||||
}),
|
||||
}
|
||||
|
||||
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
|
||||
if prefetched_data:
|
||||
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
|
||||
if lsd:
|
||||
post_data[lsd['name']] = lsd['value']
|
||||
|
||||
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
|
||||
for define in (relay_data.get('define') or []):
|
||||
if define[0] == 'RelayAPIConfigDefaults':
|
||||
self._api_config = define[2]
|
||||
|
||||
living_room = self._download_json(
|
||||
urljoin(url, self._api_config['graphURI']), video_id,
|
||||
data=urlencode_postdata(post_data))['data']['living_room']
|
||||
|
||||
entries = []
|
||||
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
|
||||
video = try_get(edge, lambda x: x['node']['video']) or {}
|
||||
v_id = video.get('id')
|
||||
if not v_id:
|
||||
continue
|
||||
v_id = compat_str(v_id)
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_PAGE_TEMPLATE % v_id,
|
||||
self.ie_key(), v_id, video.get('name')))
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
# Video info not in first request, do a secondary request using
|
||||
# tahoe player specific URL
|
||||
tahoe_data = self._download_webpage(
|
||||
|
@ -1,40 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SonyLIVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
|
||||
'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
|
||||
'info_dict': {
|
||||
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
|
||||
'id': 'ref:5024612095001',
|
||||
'title': 'Bachelors Delight - Achaari Cheese Toast',
|
||||
'id': '1000022678',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170923',
|
||||
'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
|
||||
'uploader_id': '5182475815001',
|
||||
'timestamp': 1506200547,
|
||||
'upload_date': '20200411',
|
||||
'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
|
||||
'timestamp': 1586632091,
|
||||
'duration': 185,
|
||||
'season_number': 1,
|
||||
'episode': 'Achaari Cheese Toast',
|
||||
'episode_number': 1,
|
||||
'release_year': 2016,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
|
||||
'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_TOKEN = None
|
||||
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
|
||||
def _call_api(self, version, path, video_id):
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['security_token'] = self._TOKEN
|
||||
try:
|
||||
return self._download_json(
|
||||
'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
|
||||
video_id, headers=headers)['resultObj']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
message = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['message']
|
||||
if message == 'Geoblocked Country':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(message)
|
||||
raise
|
||||
|
||||
def _real_initialize(self):
|
||||
self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
|
||||
'geo_countries': ['IN'],
|
||||
'referrer': url,
|
||||
}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
video_id = self._match_id(url)
|
||||
content = self._call_api(
|
||||
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
|
||||
if content.get('isEncrypted'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
dash_url = content['videoURL']
|
||||
headers = {
|
||||
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
|
||||
}
|
||||
formats = self._extract_mpd_formats(
|
||||
dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
|
||||
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._call_api(
|
||||
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
|
||||
title = metadata['title']
|
||||
episode = metadata.get('episodeTitle')
|
||||
if episode and title != episode:
|
||||
title += ' - ' + episode
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('posterURL'),
|
||||
'description': metadata.get('longDescription') or metadata.get('shortDescription'),
|
||||
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
'season_number': int_or_none(metadata.get('season')),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(metadata.get('episodeNumber')),
|
||||
'release_year': int_or_none(metadata.get('year')),
|
||||
}
|
||||
|
@ -2,25 +2,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class StreetVoiceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://streetvoice.com/skippylu/songs/94440/',
|
||||
'md5': '15974627fc01a29e492c98593c2fd472',
|
||||
'url': 'https://streetvoice.com/skippylu/songs/123688/',
|
||||
'md5': '0eb535970629a5195685355f3ed60bfd',
|
||||
'info_dict': {
|
||||
'id': '94440',
|
||||
'id': '123688',
|
||||
'ext': 'mp3',
|
||||
'title': '輸',
|
||||
'description': 'Crispy脆樂團 - 輸',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 260,
|
||||
'upload_date': '20091018',
|
||||
'title': '流浪',
|
||||
'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 270,
|
||||
'upload_date': '20100923',
|
||||
'uploader': 'Crispy脆樂團',
|
||||
'uploader_id': '627810',
|
||||
'uploader_url': 're:^https?://streetvoice.com/skippylu/',
|
||||
'timestamp': 1285261661,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'track': '流浪',
|
||||
'track_id': '123688',
|
||||
'album': '2010',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
|
||||
@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
song = self._download_json(
|
||||
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
|
||||
|
||||
base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
|
||||
song = self._download_json(base_url, song_id, query={
|
||||
'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
|
||||
})
|
||||
title = song['name']
|
||||
author = song['user']['nickname']
|
||||
|
||||
formats = []
|
||||
for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
|
||||
f_url = (self._download_json(
|
||||
base_url + suffix + '/', song_id,
|
||||
'Downloading %s format URL' % format_id,
|
||||
data=b'', fatal=False) or {}).get('file')
|
||||
if not f_url:
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp3',
|
||||
'format_id': format_id,
|
||||
'url': f_url,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if format_id == 'hls':
|
||||
f['protocol'] = 'm3u8_native'
|
||||
abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
|
||||
if abr:
|
||||
abr = int(abr)
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'tbr': abr,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
user = song.get('user') or {}
|
||||
username = user.get('username')
|
||||
get_count = lambda x: int_or_none(song.get(x + '_count'))
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song['file'],
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': '%s - %s' % (author, title),
|
||||
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
|
||||
'duration': song.get('length'),
|
||||
'upload_date': unified_strdate(song.get('created_at')),
|
||||
'uploader': author,
|
||||
'uploader_id': compat_str(song['user']['id']),
|
||||
'description': strip_or_none(song.get('synopsis')),
|
||||
'thumbnail': song.get('image'),
|
||||
'duration': int_or_none(song.get('length')),
|
||||
'timestamp': parse_iso8601(song.get('created_at')),
|
||||
'uploader': try_get(user, lambda x: x['profile']['nickname']),
|
||||
'uploader_id': str_or_none(user.get('id')),
|
||||
'uploader_url': urljoin(url, '/%s/' % username) if username else None,
|
||||
'view_count': get_count('plays'),
|
||||
'like_count': get_count('likes'),
|
||||
'comment_count': get_count('comments'),
|
||||
'repost_count': get_count('share'),
|
||||
'track': title,
|
||||
'track_id': song_id,
|
||||
'album': try_get(song, lambda x: x['album']['name']),
|
||||
}
|
||||
|
@ -1322,17 +1322,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return self._parse_json(
|
||||
uppercase_escape(config), video_id, fatal=False)
|
||||
|
||||
def _get_automatic_captions(self, video_id, webpage):
|
||||
def _get_automatic_captions(self, video_id, player_response, player_config):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||
player_config = self._get_ytplayer_config(video_id, webpage)
|
||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||
if not player_config:
|
||||
if not (player_response or player_config):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
try:
|
||||
args = player_config['args']
|
||||
args = player_config.get('args') if player_config else {}
|
||||
caption_url = args.get('ttsurl')
|
||||
if caption_url:
|
||||
timestamp = args['timestamp']
|
||||
@ -1391,19 +1390,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return captions
|
||||
|
||||
# New captions format as of 22.06.2017
|
||||
player_response = args.get('player_response')
|
||||
if player_response and isinstance(player_response, compat_str):
|
||||
player_response = self._parse_json(
|
||||
player_response, video_id, fatal=False)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
|
||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||
@ -1652,6 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Get video info
|
||||
video_info = {}
|
||||
embed_webpage = None
|
||||
ytplayer_config = None
|
||||
|
||||
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
||||
age_gate = True
|
||||
@ -2276,7 +2272,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
|
||||
|
||||
video_duration = try_get(
|
||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||
|
Loading…
Reference in New Issue
Block a user