mirror of
https://github.com/ytdl-org/youtube-dl
synced 2024-12-23 04:30:10 +09:00
Compare commits
5 Commits
2dbb45ae82
...
59e583f7e8
Author | SHA1 | Date | |
---|---|---|---|
|
59e583f7e8 | ||
|
daa25d4142 | ||
|
25a35cb38a | ||
|
2cf8003638 | ||
|
cf1a8668e8 |
103
youtube_dl/extractor/amara.py
Normal file
103
youtube_dl/extractor/amara.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AmaraIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Youtube
|
||||||
|
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||||
|
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'h6ZuVdvYnfE',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Why jury trials are becoming less common',
|
||||||
|
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20160813',
|
||||||
|
'uploader': 'PBS NewsHour',
|
||||||
|
'uploader_id': 'PBSNewsHour',
|
||||||
|
'timestamp': 1549639570,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Vimeo
|
||||||
|
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||||
|
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '18622084',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Vimeo at CES 2011!',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'timestamp': 1294763658,
|
||||||
|
'upload_date': '20110111',
|
||||||
|
'uploader': 'Sam Morrill',
|
||||||
|
'uploader_id': 'sammorrill'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Direct Link
|
||||||
|
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||||
|
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's8KL7I3jLmh6',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The danger of a single story',
|
||||||
|
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20091007',
|
||||||
|
'timestamp': 1254942511,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
meta = self._download_json(
|
||||||
|
'https://amara.org/api/videos/%s/' % video_id,
|
||||||
|
video_id, query={'format': 'json'})
|
||||||
|
title = meta['title']
|
||||||
|
video_url = meta['all_urls'][0]
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for language in (meta.get('languages') or []):
|
||||||
|
subtitles_uri = language.get('subtitles_uri')
|
||||||
|
if not (subtitles_uri and language.get('published')):
|
||||||
|
continue
|
||||||
|
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||||
|
for f in ('json', 'srt', 'vtt'):
|
||||||
|
subtitle.append({
|
||||||
|
'ext': f,
|
||||||
|
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'title': title,
|
||||||
|
'description': meta.get('description'),
|
||||||
|
'thumbnail': meta.get('thumbnail'),
|
||||||
|
'duration': int_or_none(meta.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(meta.get('created')),
|
||||||
|
}
|
||||||
|
|
||||||
|
for ie in (YoutubeIE, VimeoIE):
|
||||||
|
if ie.suitable(video_url):
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
return info
|
@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
|
|||||||
from .airmozilla import AirMozillaIE
|
from .airmozilla import AirMozillaIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
|
from .amara import AmaraIE
|
||||||
from .amcnetworks import AMCNetworksIE
|
from .amcnetworks import AMCNetworksIE
|
||||||
from .americastestkitchen import AmericasTestKitchenIE
|
from .americastestkitchen import AmericasTestKitchenIE
|
||||||
from .animeondemand import AnimeOnDemandIE
|
from .animeondemand import AnimeOnDemandIE
|
||||||
|
@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_parse_qs
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
try_get,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
# video can't be watched anonymously due to view count limit reached,
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
'only_matching': True,
|
||||||
'info_dict': {
|
|
||||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||||
'info_dict': {
|
|
||||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
|
||||||
'duration': 189,
|
|
||||||
},
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
video_info = compat_parse_qs(self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
'https://drive.google.com/get_video_info',
|
||||||
|
video_id, query={'docid': video_id}))
|
||||||
|
|
||||||
title = self._search_regex(
|
def get_value(key):
|
||||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
return try_get(video_info, lambda x: x[key][0])
|
||||||
default=None) or self._og_search_title(webpage)
|
|
||||||
duration = int_or_none(self._search_regex(
|
reason = get_value('reason')
|
||||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
title = get_value('title')
|
||||||
default=None))
|
if not title and reason:
|
||||||
|
raise ExtractorError(reason, expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
fmt_stream_map = self._search_regex(
|
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
|
||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
fmt_list = (get_value('fmt_list') or '').split(',')
|
||||||
'fmt stream map', default='').split(',')
|
|
||||||
fmt_list = self._search_regex(
|
|
||||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
|
||||||
'fmt_list', default='').split(',')
|
|
||||||
if fmt_stream_map and fmt_list:
|
if fmt_stream_map and fmt_list:
|
||||||
resolutions = {}
|
resolutions = {}
|
||||||
for fmt in fmt_list:
|
for fmt in fmt_list:
|
||||||
@ -257,19 +246,14 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
if urlh and urlh.headers.get('Content-Disposition'):
|
if urlh and urlh.headers.get('Content-Disposition'):
|
||||||
add_source_format(urlh)
|
add_source_format(urlh)
|
||||||
|
|
||||||
if not formats:
|
if not formats and reason:
|
||||||
reason = self._search_regex(
|
raise ExtractorError(reason, expected=True)
|
||||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
|
||||||
if reason:
|
|
||||||
raise ExtractorError(reason, expected=True)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
hl = self._search_regex(
|
hl = get_value('hl')
|
||||||
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
|
||||||
subtitles_id = None
|
subtitles_id = None
|
||||||
ttsurl = self._search_regex(
|
ttsurl = get_value('ttsurl')
|
||||||
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
|
||||||
if ttsurl:
|
if ttsurl:
|
||||||
# the video Id for subtitles will be the last value in the ttsurl
|
# the video Id for subtitles will be the last value in the ttsurl
|
||||||
# query string
|
# query string
|
||||||
@ -279,8 +263,8 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
|
||||||
'duration': duration,
|
'duration': int_or_none(get_value('length_seconds')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||||
'automatic_captions': self.extract_automatic_captions(
|
'automatic_captions': self.extract_automatic_captions(
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import itertools
|
import itertools
|
||||||
@ -9,6 +10,10 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -165,19 +170,20 @@ class VikiIE(VikiBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# episode
|
# episode
|
||||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||||
'md5': '5fa476a902e902783ac7a4d615cdbc7a',
|
'md5': '94e0e34fd58f169f40c184f232356cfe',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '44699v',
|
'id': '44699v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Boys Over Flowers - Episode 1',
|
'title': 'Boys Over Flowers - Episode 1',
|
||||||
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
||||||
'duration': 4204,
|
'duration': 4172,
|
||||||
'timestamp': 1270496524,
|
'timestamp': 1270496524,
|
||||||
'upload_date': '20100405',
|
'upload_date': '20100405',
|
||||||
'uploader': 'group8',
|
'uploader': 'group8',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||||
}, {
|
}, {
|
||||||
# youtube external
|
# youtube external
|
||||||
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
||||||
@ -194,14 +200,15 @@ class VikiIE(VikiBaseIE):
|
|||||||
'uploader_id': 'ad14065n',
|
'uploader_id': 'ad14065n',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
}
|
},
|
||||||
|
'skip': 'Page not found!',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viki.com/player/44699v',
|
'url': 'http://www.viki.com/player/44699v',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# non-English description
|
# non-English description
|
||||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
'md5': 'adf9e321a0ae5d0aace349efaaff7691',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '158036v',
|
'id': '158036v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -217,8 +224,11 @@ class VikiIE(VikiBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._call_api(
|
resp = self._download_json(
|
||||||
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
|
'https://www.viki.com/api/videos/' + video_id,
|
||||||
|
video_id, 'Downloading video JSON',
|
||||||
|
headers={'x-viki-app-ver': '4.0.57'})
|
||||||
|
video = resp['video']
|
||||||
|
|
||||||
self._check_errors(video)
|
self._check_errors(video)
|
||||||
|
|
||||||
@ -265,57 +275,74 @@ class VikiIE(VikiBaseIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
streams = self._call_api(
|
|
||||||
'videos/%s/streams.json' % video_id, video_id,
|
|
||||||
'Downloading video streams JSON')
|
|
||||||
|
|
||||||
if 'external' in streams:
|
|
||||||
result.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': streams['external']['url'],
|
|
||||||
})
|
|
||||||
return result
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, stream_dict in streams.items():
|
|
||||||
height = int_or_none(self._search_regex(
|
def add_format(format_id, format_dict, protocol='http'):
|
||||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
# rtmps URLs does not seem to work
|
||||||
for protocol, format_dict in stream_dict.items():
|
if protocol == 'rtmps':
|
||||||
# rtmps URLs does not seem to work
|
return
|
||||||
if protocol == 'rtmps':
|
format_url = format_dict.get('url')
|
||||||
continue
|
if not format_url:
|
||||||
format_url = format_dict['url']
|
return
|
||||||
if format_id == 'm3u8':
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
stream = qs.get('stream', [None])[0]
|
||||||
format_url, video_id, 'mp4',
|
if stream:
|
||||||
entry_protocol='m3u8_native',
|
format_url = base64.b64decode(stream).decode()
|
||||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
if format_id in ('m3u8', 'hls'):
|
||||||
# Despite CODECS metadata in m3u8 all video-only formats
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
# are actually video+audio
|
format_url, video_id, 'mp4',
|
||||||
for f in m3u8_formats:
|
entry_protocol='m3u8_native',
|
||||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||||
f['acodec'] = None
|
# Despite CODECS metadata in m3u8 all video-only formats
|
||||||
formats.extend(m3u8_formats)
|
# are actually video+audio
|
||||||
elif format_url.startswith('rtmp'):
|
for f in m3u8_formats:
|
||||||
mobj = re.search(
|
if '_drm/index_' in f['url']:
|
||||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
|
||||||
format_url)
|
|
||||||
if not mobj:
|
|
||||||
continue
|
continue
|
||||||
formats.append({
|
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||||
'format_id': 'rtmp-%s' % format_id,
|
f['acodec'] = None
|
||||||
'ext': 'flv',
|
formats.append(f)
|
||||||
'url': mobj.group('url'),
|
elif format_id in ('mpd', 'dash'):
|
||||||
'play_path': mobj.group('playpath'),
|
formats.extend(self._extract_mpd_formats(
|
||||||
'app': mobj.group('app'),
|
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
|
||||||
'page_url': url,
|
elif format_url.startswith('rtmp'):
|
||||||
})
|
mobj = re.search(
|
||||||
else:
|
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||||
formats.append({
|
format_url)
|
||||||
'url': format_url,
|
if not mobj:
|
||||||
'format_id': '%s-%s' % (format_id, protocol),
|
return
|
||||||
'height': height,
|
formats.append({
|
||||||
})
|
'format_id': 'rtmp-%s' % format_id,
|
||||||
|
'ext': 'flv',
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'app': mobj.group('app'),
|
||||||
|
'page_url': url,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%s-%s' % (format_id, protocol),
|
||||||
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)[pP]$', format_id, 'height', default=None)),
|
||||||
|
})
|
||||||
|
|
||||||
|
for format_id, format_dict in (resp.get('streams') or {}).items():
|
||||||
|
add_format(format_id, format_dict)
|
||||||
|
if not formats:
|
||||||
|
streams = self._call_api(
|
||||||
|
'videos/%s/streams.json' % video_id, video_id,
|
||||||
|
'Downloading video streams JSON')
|
||||||
|
|
||||||
|
if 'external' in streams:
|
||||||
|
result.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': streams['external']['url'],
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
|
||||||
|
for format_id, stream_dict in streams.items():
|
||||||
|
for protocol, format_dict in stream_dict.items():
|
||||||
|
add_format(format_id, format_dict, protocol)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
result['formats'] = formats
|
result['formats'] = formats
|
||||||
|
Loading…
Reference in New Issue
Block a user