Compare commits

..

No commits in common. "170e1c19951ce6a87ce0e9157faa32b9a2efe708" and "2c337f4e854f8bdf33f30d26b1a242b00a0eff93" have entirely different histories.

3 changed files with 58 additions and 106 deletions

View File

@ -526,10 +526,7 @@ from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE from .ketnet import KetnetIE
from .khanacademy import ( from .khanacademy import KhanAcademyIE
KhanAcademyIE,
KhanAcademyUnitIE,
)
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE

View File

@ -1,107 +1,82 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, unified_strdate,
parse_iso8601,
try_get,
) )
class KhanAcademyBaseIE(InfoExtractor): class KhanAcademyIE(InfoExtractor):
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
IE_NAME = 'KhanAcademy'
def _parse_video(self, video): _TESTS = [{
return { 'url': 'http://www.khanacademy.org/video/one-time-pad',
'_type': 'url_transparent', 'md5': '7b391cce85e758fb94f763ddc1bbb979',
'url': video['youtubeId'],
'id': video.get('slug'),
'title': video.get('title'),
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
'duration': int_or_none(video.get('duration')),
'description': video.get('description'),
'ie_key': 'Youtube',
}
def _real_extract(self, url):
display_id = self._match_id(url)
component_props = self._parse_json(self._download_json(
'https://www.khanacademy.org/api/internal/graphql',
display_id, query={
'hash': 1604303425,
'variables': json.dumps({
'path': display_id,
'queryParams': '',
}),
})['data']['contentJson'], display_id)['componentProps']
return self._parse_component_props(component_props)
class KhanAcademyIE(KhanAcademyBaseIE):
IE_NAME = 'khanacademy'
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
_TEST = {
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
'info_dict': { 'info_dict': {
'id': 'FlIG3TvQCBQ', 'id': 'one-time-pad',
'ext': 'mp4', 'ext': 'webm',
'title': 'The one-time pad', 'title': 'The one-time pad',
'description': 'The perfect cipher', 'description': 'The perfect cipher',
'duration': 176, 'duration': 176,
'uploader': 'Brit Cruise', 'uploader': 'Brit Cruise',
'uploader_id': 'khanacademy', 'uploader_id': 'khanacademy',
'upload_date': '20120411', 'upload_date': '20120411',
'timestamp': 1334170113,
'license': 'cc-by-nc-sa',
}, },
'add_ie': ['Youtube'], 'add_ie': ['Youtube'],
} }, {
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
def _parse_component_props(self, component_props):
video = component_props['tutorialPageData']['contentModel']
info = self._parse_video(video)
author_names = video.get('authorNames')
info.update({
'uploader': ', '.join(author_names) if author_names else None,
'timestamp': parse_iso8601(video.get('dateAdded')),
'license': video.get('kaUserLicense'),
})
return info
class KhanAcademyUnitIE(KhanAcademyBaseIE):
IE_NAME = 'khanacademy:unit'
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
_TEST = {
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
'info_dict': { 'info_dict': {
'id': 'cryptography', 'id': 'cryptography',
'title': 'Cryptography', 'title': 'Journey into cryptography',
'description': 'How have humans protected their secret messages through history? What has changed today?', 'description': 'How have humans protected their secret messages through history? What has changed today?',
}, },
'playlist_mincount': 31, 'playlist_mincount': 3,
}]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
if m.group('key') == 'video':
data = self._download_json(
'http://api.khanacademy.org/api/v1/videos/' + video_id,
video_id, 'Downloading video info')
upload_date = unified_strdate(data['date_added'])
uploader = ', '.join(data['author_names'])
return {
'_type': 'url_transparent',
'url': data['url'],
'id': video_id,
'title': data['title'],
'thumbnail': data['image_url'],
'duration': data['duration'],
'description': data['description'],
'uploader': uploader,
'upload_date': upload_date,
} }
else:
# topic
data = self._download_json(
'http://api.khanacademy.org/api/v1/topic/' + video_id,
video_id, 'Downloading topic info')
def _parse_component_props(self, component_props): entries = [
curation = component_props['curation'] {
'_type': 'url',
entries = [] 'url': c['url'],
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or [] 'id': c['id'],
for tutorial_number, tutorial in enumerate(tutorials, 1): 'title': c['title'],
chapter_info = {
'chapter': tutorial.get('title'),
'chapter_number': tutorial_number,
'chapter_id': tutorial.get('id'),
} }
for content_item in (tutorial.get('contentItems') or []): for c in data['children'] if c['kind'] in ('Video', 'Topic')]
if content_item.get('kind') == 'Video':
info = self._parse_video(content_item)
info.update(chapter_info)
entries.append(info)
return self.playlist_result( return {
entries, curation.get('unit'), curation.get('title'), '_type': 'playlist',
curation.get('description')) 'id': video_id,
'title': data['title'],
'description': data['description'],
'entries': entries,
}

View File

@ -450,18 +450,6 @@ class PeerTubeIE(InfoExtractor):
'tags': ['framasoft', 'peertube'], 'tags': ['framasoft', 'peertube'],
'categories': ['Science & Technology'], 'categories': ['Science & Technology'],
} }
}, {
# Issue #26002
'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
'info_dict': {
'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
'ext': 'mp4',
'title': 'Dot matrix printer shell demo',
'uploader_id': '3',
'timestamp': 1587401293,
'upload_date': '20200420',
'uploader': 'Drew DeVault',
}
}, { }, {
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
'only_matching': True, 'only_matching': True,
@ -538,15 +526,7 @@ class PeerTubeIE(InfoExtractor):
title = video['name'] title = video['name']
formats = [] formats = []
files = video.get('files') or [] for file_ in video['files']:
for playlist in (video.get('streamingPlaylists') or []):
if not isinstance(playlist, dict):
continue
playlist_files = playlist.get('files')
if not (playlist_files and isinstance(playlist_files, list)):
continue
files.extend(playlist_files)
for file_ in files:
if not isinstance(file_, dict): if not isinstance(file_, dict):
continue continue
file_url = url_or_none(file_.get('fileUrl')) file_url = url_or_none(file_.get('fileUrl'))