mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-01-24 12:20:09 +09:00
Compare commits
7 Commits
7bf5e3a84a
...
4ef1fc9707
Author | SHA1 | Date | |
---|---|---|---|
|
4ef1fc9707 | ||
|
f9e6aa1dcf | ||
|
f83db9064b | ||
|
2da9a86399 | ||
|
ecaa535cf4 | ||
|
79dd92b1fe | ||
|
bd3844c9c2 |
@ -28,6 +28,7 @@ from ..utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
@ -600,11 +601,14 @@ class BrightcoveNewIE(AdobePassIE):
|
|||||||
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||||
|
|
||||||
def extract_policy_key():
|
def extract_policy_key():
|
||||||
|
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
|
||||||
|
config = self._download_json(
|
||||||
|
base_url + 'config.json', video_id, fatal=False) or {}
|
||||||
|
policy_key = try_get(
|
||||||
|
config, lambda x: x['video_cloud']['policy_key'])
|
||||||
|
if not policy_key:
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
base_url + 'index.min.js', video_id)
|
||||||
% (account_id, player_id, embed), video_id)
|
|
||||||
|
|
||||||
policy_key = None
|
|
||||||
|
|
||||||
catalog = self._search_regex(
|
catalog = self._search_regex(
|
||||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||||
|
@ -11,7 +11,47 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CBSLocalIE(AnvatoIE):
|
class CBSLocalIE(AnvatoIE):
|
||||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
||||||
|
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3580809',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'A Very Blue Anniversary',
|
||||||
|
'description': 'CBS2’s Cindy Hsu has more.',
|
||||||
|
'thumbnail': 're:^https?://.*',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': r're:^\d{8}$',
|
||||||
|
'uploader': 'CBS',
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:5',
|
||||||
|
},
|
||||||
|
'categories': [
|
||||||
|
'Stations\\Spoken Word\\WCBSTV',
|
||||||
|
'Syndication\\AOL',
|
||||||
|
'Syndication\\MSN',
|
||||||
|
'Syndication\\NDN',
|
||||||
|
'Syndication\\Yahoo',
|
||||||
|
'Content\\News',
|
||||||
|
'Content\\News\\Local News',
|
||||||
|
],
|
||||||
|
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mcp_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
||||||
|
|
||||||
|
|
||||||
|
class CBSLocalArticleIE(AnvatoIE):
|
||||||
|
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Anvato backend
|
# Anvato backend
|
||||||
@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3580809',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'A Very Blue Anniversary',
|
|
||||||
'description': 'CBS2’s Cindy Hsu has more.',
|
|
||||||
'thumbnail': 're:^https?://.*',
|
|
||||||
'timestamp': int,
|
|
||||||
'upload_date': r're:^\d{8}$',
|
|
||||||
'uploader': 'CBS',
|
|
||||||
'subtitles': {
|
|
||||||
'en': 'mincount:5',
|
|
||||||
},
|
|
||||||
'categories': [
|
|
||||||
'Stations\\Spoken Word\\WCBSTV',
|
|
||||||
'Syndication\\AOL',
|
|
||||||
'Syndication\\MSN',
|
|
||||||
'Syndication\\NDN',
|
|
||||||
'Syndication\\Yahoo',
|
|
||||||
'Content\\News',
|
|
||||||
'Content\\News\\Local News',
|
|
||||||
],
|
|
||||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -163,7 +163,10 @@ from .cbc import (
|
|||||||
CBCOlympicsIE,
|
CBCOlympicsIE,
|
||||||
)
|
)
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbslocal import CBSLocalIE
|
from .cbslocal import (
|
||||||
|
CBSLocalIE,
|
||||||
|
CBSLocalArticleIE,
|
||||||
|
)
|
||||||
from .cbsinteractive import CBSInteractiveIE
|
from .cbsinteractive import CBSInteractiveIE
|
||||||
from .cbsnews import (
|
from .cbsnews import (
|
||||||
CBSNewsEmbedIE,
|
CBSNewsEmbedIE,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
@ -8,6 +9,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
@ -47,7 +49,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||||
[^/]+/videos/(?:[^/]+/)?|
|
[^/]+/videos/(?:[^/]+/)?|
|
||||||
[^/]+/posts/|
|
[^/]+/posts/|
|
||||||
groups/[^/]+/permalink/
|
groups/[^/]+/permalink/|
|
||||||
|
watchparty/
|
||||||
)|
|
)|
|
||||||
facebook:
|
facebook:
|
||||||
)
|
)
|
||||||
@ -280,8 +283,18 @@ class FacebookIE(InfoExtractor):
|
|||||||
# data.video.creation_story.attachments[].media
|
# data.video.creation_story.attachments[].media
|
||||||
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebook.com/watchparty/211641140192478',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '211641140192478',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
'skip': 'Requires logging in',
|
||||||
}]
|
}]
|
||||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||||
|
_api_config = {
|
||||||
|
'graphURI': '/api/graphql/'
|
||||||
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
@ -405,6 +418,17 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
def extract_relay_data(_filter):
|
||||||
|
return self._parse_json(self._search_regex(
|
||||||
|
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||||
|
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||||
|
|
||||||
|
def extract_relay_prefetched_data(_filter):
|
||||||
|
replay_data = extract_relay_data(_filter)
|
||||||
|
for require in (replay_data.get('require') or []):
|
||||||
|
if require[0] == 'RelayPrefetchedStreamCache':
|
||||||
|
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(self._search_regex([
|
server_js_data = self._parse_json(self._search_regex([
|
||||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
||||||
@ -413,11 +437,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
video_data = extract_from_jsmods_instances(server_js_data)
|
video_data = extract_from_jsmods_instances(server_js_data)
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
graphql_data = self._parse_json(self._search_regex(
|
data = extract_relay_prefetched_data(
|
||||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);',
|
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||||
webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}
|
if data:
|
||||||
for require in (graphql_data.get('require') or []):
|
|
||||||
if require[0] == 'RelayPrefetchedStreamCache':
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
def parse_graphql_video(video):
|
def parse_graphql_video(video):
|
||||||
@ -459,8 +481,6 @@ class FacebookIE(InfoExtractor):
|
|||||||
if media.get('__typename') == 'Video':
|
if media.get('__typename') == 'Video':
|
||||||
return parse_graphql_video(media)
|
return parse_graphql_video(media)
|
||||||
|
|
||||||
data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
|
||||||
|
|
||||||
nodes = data.get('nodes') or []
|
nodes = data.get('nodes') or []
|
||||||
node = data.get('node') or {}
|
node = data.get('node') or {}
|
||||||
if not nodes and node:
|
if not nodes and node:
|
||||||
@ -504,6 +524,43 @@ class FacebookIE(InfoExtractor):
|
|||||||
elif '>You must log in to continue' in webpage:
|
elif '>You must log in to continue' in webpage:
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
|
|
||||||
|
if not video_data and '/watchparty/' in url:
|
||||||
|
post_data = {
|
||||||
|
'doc_id': 3731964053542869,
|
||||||
|
'variables': json.dumps({
|
||||||
|
'livingRoomID': video_id,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
|
||||||
|
if prefetched_data:
|
||||||
|
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
|
||||||
|
if lsd:
|
||||||
|
post_data[lsd['name']] = lsd['value']
|
||||||
|
|
||||||
|
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
|
||||||
|
for define in (relay_data.get('define') or []):
|
||||||
|
if define[0] == 'RelayAPIConfigDefaults':
|
||||||
|
self._api_config = define[2]
|
||||||
|
|
||||||
|
living_room = self._download_json(
|
||||||
|
urljoin(url, self._api_config['graphURI']), video_id,
|
||||||
|
data=urlencode_postdata(post_data))['data']['living_room']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
|
||||||
|
video = try_get(edge, lambda x: x['node']['video']) or {}
|
||||||
|
v_id = video.get('id')
|
||||||
|
if not v_id:
|
||||||
|
continue
|
||||||
|
v_id = compat_str(v_id)
|
||||||
|
entries.append(self.url_result(
|
||||||
|
self._VIDEO_PAGE_TEMPLATE % v_id,
|
||||||
|
self.ie_key(), v_id, video.get('name')))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id)
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
# Video info not in first request, do a secondary request using
|
# Video info not in first request, do a secondary request using
|
||||||
# tahoe player specific URL
|
# tahoe player specific URL
|
||||||
tahoe_data = self._download_webpage(
|
tahoe_data = self._download_webpage(
|
||||||
|
@ -1,40 +1,112 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..compat import compat_HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SonyLIVIE(InfoExtractor):
|
class SonyLIVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
|
'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
|
'title': 'Bachelors Delight - Achaari Cheese Toast',
|
||||||
'id': 'ref:5024612095001',
|
'id': '1000022678',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20170923',
|
'upload_date': '20200411',
|
||||||
'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
|
'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
|
||||||
'uploader_id': '5182475815001',
|
'timestamp': 1586632091,
|
||||||
'timestamp': 1506200547,
|
'duration': 185,
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Achaari Cheese Toast',
|
||||||
|
'episode_number': 1,
|
||||||
|
'release_year': 2016,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['BrightcoveNew'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
|
'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_GEO_COUNTRIES = ['IN']
|
||||||
|
_TOKEN = None
|
||||||
|
|
||||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
|
def _call_api(self, version, path, video_id):
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
|
headers = {}
|
||||||
|
if self._TOKEN:
|
||||||
|
headers['security_token'] = self._TOKEN
|
||||||
|
try:
|
||||||
|
return self._download_json(
|
||||||
|
'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
|
||||||
|
video_id, headers=headers)['resultObj']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
message = self._parse_json(
|
||||||
|
e.cause.read().decode(), video_id)['message']
|
||||||
|
if message == 'Geoblocked Country':
|
||||||
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
raise ExtractorError(message)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
brightcove_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
return self.url_result(
|
content = self._call_api(
|
||||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
|
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
|
||||||
'geo_countries': ['IN'],
|
if content.get('isEncrypted'):
|
||||||
'referrer': url,
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
}),
|
dash_url = content['videoURL']
|
||||||
'BrightcoveNew', brightcove_id)
|
headers = {
|
||||||
|
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
|
||||||
|
}
|
||||||
|
formats = self._extract_mpd_formats(
|
||||||
|
dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
|
||||||
|
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {}).update(headers)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
metadata = self._call_api(
|
||||||
|
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
|
||||||
|
title = metadata['title']
|
||||||
|
episode = metadata.get('episodeTitle')
|
||||||
|
if episode and title != episode:
|
||||||
|
title += ' - ' + episode
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': content.get('posterURL'),
|
||||||
|
'description': metadata.get('longDescription') or metadata.get('shortDescription'),
|
||||||
|
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
|
||||||
|
'duration': int_or_none(metadata.get('duration')),
|
||||||
|
'season_number': int_or_none(metadata.get('season')),
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': int_or_none(metadata.get('episodeNumber')),
|
||||||
|
'release_year': int_or_none(metadata.get('year')),
|
||||||
|
}
|
||||||
|
@ -2,25 +2,40 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..utils import (
|
||||||
from ..utils import unified_strdate
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class StreetVoiceIE(InfoExtractor):
|
class StreetVoiceIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://streetvoice.com/skippylu/songs/94440/',
|
'url': 'https://streetvoice.com/skippylu/songs/123688/',
|
||||||
'md5': '15974627fc01a29e492c98593c2fd472',
|
'md5': '0eb535970629a5195685355f3ed60bfd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '94440',
|
'id': '123688',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '輸',
|
'title': '流浪',
|
||||||
'description': 'Crispy脆樂團 - 輸',
|
'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 260,
|
'duration': 270,
|
||||||
'upload_date': '20091018',
|
'upload_date': '20100923',
|
||||||
'uploader': 'Crispy脆樂團',
|
'uploader': 'Crispy脆樂團',
|
||||||
'uploader_id': '627810',
|
'uploader_id': '627810',
|
||||||
|
'uploader_url': 're:^https?://streetvoice.com/skippylu/',
|
||||||
|
'timestamp': 1285261661,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'track': '流浪',
|
||||||
|
'track_id': '123688',
|
||||||
|
'album': '2010',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
|
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
|
||||||
@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
song_id = self._match_id(url)
|
song_id = self._match_id(url)
|
||||||
|
base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
|
||||||
song = self._download_json(
|
song = self._download_json(base_url, song_id, query={
|
||||||
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
|
'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
|
||||||
|
})
|
||||||
title = song['name']
|
title = song['name']
|
||||||
author = song['user']['nickname']
|
|
||||||
|
formats = []
|
||||||
|
for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
|
||||||
|
f_url = (self._download_json(
|
||||||
|
base_url + suffix + '/', song_id,
|
||||||
|
'Downloading %s format URL' % format_id,
|
||||||
|
data=b'', fatal=False) or {}).get('file')
|
||||||
|
if not f_url:
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'ext': 'mp3',
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': f_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
||||||
|
if format_id == 'hls':
|
||||||
|
f['protocol'] = 'm3u8_native'
|
||||||
|
abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
|
||||||
|
if abr:
|
||||||
|
abr = int(abr)
|
||||||
|
f.update({
|
||||||
|
'abr': abr,
|
||||||
|
'tbr': abr,
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
|
user = song.get('user') or {}
|
||||||
|
username = user.get('username')
|
||||||
|
get_count = lambda x: int_or_none(song.get(x + '_count'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': song_id,
|
'id': song_id,
|
||||||
'url': song['file'],
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': '%s - %s' % (author, title),
|
'description': strip_or_none(song.get('synopsis')),
|
||||||
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
|
'thumbnail': song.get('image'),
|
||||||
'duration': song.get('length'),
|
'duration': int_or_none(song.get('length')),
|
||||||
'upload_date': unified_strdate(song.get('created_at')),
|
'timestamp': parse_iso8601(song.get('created_at')),
|
||||||
'uploader': author,
|
'uploader': try_get(user, lambda x: x['profile']['nickname']),
|
||||||
'uploader_id': compat_str(song['user']['id']),
|
'uploader_id': str_or_none(user.get('id')),
|
||||||
|
'uploader_url': urljoin(url, '/%s/' % username) if username else None,
|
||||||
|
'view_count': get_count('plays'),
|
||||||
|
'like_count': get_count('likes'),
|
||||||
|
'comment_count': get_count('comments'),
|
||||||
|
'repost_count': get_count('share'),
|
||||||
|
'track': title,
|
||||||
|
'track_id': song_id,
|
||||||
|
'album': try_get(song, lambda x: x['album']['name']),
|
||||||
}
|
}
|
||||||
|
@ -1322,17 +1322,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
uppercase_escape(config), video_id, fatal=False)
|
uppercase_escape(config), video_id, fatal=False)
|
||||||
|
|
||||||
def _get_automatic_captions(self, video_id, webpage):
|
def _get_automatic_captions(self, video_id, player_response, player_config):
|
||||||
"""We need the webpage for getting the captions url, pass it as an
|
"""We need the webpage for getting the captions url, pass it as an
|
||||||
argument to speed up the process."""
|
argument to speed up the process."""
|
||||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||||
player_config = self._get_ytplayer_config(video_id, webpage)
|
|
||||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||||
if not player_config:
|
if not (player_response or player_config):
|
||||||
self._downloader.report_warning(err_msg)
|
self._downloader.report_warning(err_msg)
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
args = player_config['args']
|
args = player_config.get('args') if player_config else {}
|
||||||
caption_url = args.get('ttsurl')
|
caption_url = args.get('ttsurl')
|
||||||
if caption_url:
|
if caption_url:
|
||||||
timestamp = args['timestamp']
|
timestamp = args['timestamp']
|
||||||
@ -1391,10 +1390,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
return captions
|
return captions
|
||||||
|
|
||||||
# New captions format as of 22.06.2017
|
# New captions format as of 22.06.2017
|
||||||
player_response = args.get('player_response')
|
|
||||||
if player_response and isinstance(player_response, compat_str):
|
|
||||||
player_response = self._parse_json(
|
|
||||||
player_response, video_id, fatal=False)
|
|
||||||
if player_response:
|
if player_response:
|
||||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||||
@ -1652,6 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# Get video info
|
# Get video info
|
||||||
video_info = {}
|
video_info = {}
|
||||||
embed_webpage = None
|
embed_webpage = None
|
||||||
|
ytplayer_config = None
|
||||||
|
|
||||||
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
||||||
age_gate = True
|
age_gate = True
|
||||||
@ -2276,7 +2272,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
|
||||||
|
|
||||||
video_duration = try_get(
|
video_duration = try_get(
|
||||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||||
|
Loading…
Reference in New Issue
Block a user