From 5f3cc3bbea83d196a370106da8c7f8f8db05f09b Mon Sep 17 00:00:00 2001 From: schn0sch Date: Sat, 24 Apr 2021 13:53:43 +0200 Subject: [PATCH 1/2] [keep2share] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/keep2share.py | 85 ++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 youtube_dl/extractor/keep2share.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ac33cd996..cfd939663 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -537,6 +537,7 @@ from .kaltura import KalturaIE from .kankan import KankanIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE +from .keep2share import Keep2ShareIE from .keezmovies import KeezMoviesIE from .ketnet import KetnetIE from .khanacademy import ( diff --git a/youtube_dl/extractor/keep2share.py b/youtube_dl/extractor/keep2share.py new file mode 100644 index 000000000..5adc5aa66 --- /dev/null +++ b/youtube_dl/extractor/keep2share.py @@ -0,0 +1,85 @@ +from __future__ import unicode_literals + +import json +import re +from .common import ( + InfoExtractor, + RegexNotFoundError, +) +from ..utils import ( + clean_html, + compat_str, + js_to_json, + urljoin, + try_get, +) + + +class Keep2ShareIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:k2s\.cc|keep2share\.cc|keep2share\.com)/file/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://k2s.cc/file/d6f565bcb9581/Big_Buck%20Bunny%20Trailer.mp4', + 'md5': '0dbce91e7d1efc506d1461439eb8a4c0', + 'info_dict': { + 'id': 'd6f565bcb9581', + 'ext': 'mp4', + 'title': 'Big Buck Bunny Trailer.mp4', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'filesize': 4447915, + 'duration': 33.019, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + # TODO share app_secret across calls to _real_extract + app_secret = None + + # retrieve app_secret + webpage = self._download_webpage(url, video_id) + scripts = re.finditer(r'/static/[^"]*)"', webpage) + for mobj in scripts: + src = urljoin(url, clean_html(mobj.group('src'))) + script = self._download_webpage(src, video_id) + secret = self._search_regex( + r'REACT_APP_API_CLIENT_SECRET:\s*(?P%(string)s)' % { + 'string': r'"(?:[^"]|\\")*"' + '|' + r"'(?:[^']|\\')*'", + }, script, 'app secret', group='secret', default=None) + if secret is not None: + app_secret = self._parse_json(secret, video_id, transform_source=js_to_json) + break + else: + raise RegexNotFoundError('Unable to extract app secret') + + # retrieve access_token + # TODO share access_token across calls fo _real_extract + data = { + 'grant_type': 'client_credentials', + 'client_id': 'k2s_web_app', + 'client_secret': app_secret, + } + data = json.dumps(data, separators=(',', ':')).encode('utf-8') + headers = {'Content-Type': 'application/json'} + tokens = self._download_json('https://api.k2s.cc/v1/auth/token', + video_id, data=data, headers=headers) + access_token = tokens['access_token'] + + # retrieve API page + headers = {'Cookie': 'accessToken=' + access_token} + info = self._download_json('https://api.k2s.cc/v1/files/' + video_id, + video_id, headers=headers) + + return { + 'id': video_id, + 'title': info.get('name', 'keep2share-file'), + 'thumbnail': try_get(info, lambda x: x['videoPreview']['cover'], compat_str), + 'duration': try_get(info, lambda x: x['videoInfo']['duration'], (int, float)), + 'formats': [{ + 'url': info['videoPreview']['video'], + 'ext': 'mp4', + 'filesize': try_get(info, lambda x: x['size'], int), + 'width': try_get(info, lambda x: x['videoInfo']['resolution']['width'], int), + 'height': try_get(info, lambda x: x['videoInfo']['resolution']['height'], int), + }], + } From 14bf10c6488b74cc31ef6ae0e1b239a1505ba47a Mon Sep 17 00:00:00 2001 From: schn0sch Date: Sat, 24 Apr 2021 14:03:17 +0200 Subject: [PATCH 2/2] [keep2share] share access token across calls to _real_extract --- youtube_dl/extractor/keep2share.py | 39 ++++++++++++++++++------------ 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/keep2share.py b/youtube_dl/extractor/keep2share.py index 5adc5aa66..7191420bd 100644 --- a/youtube_dl/extractor/keep2share.py +++ b/youtube_dl/extractor/keep2share.py @@ -30,13 +30,13 @@ class Keep2ShareIE(InfoExtractor): }, }] - def _real_extract(self, url): - video_id = self._match_id(url) + def _get_app_secret(self, video_id): + """ retrieve REACT_APP_API_CLIENT_SECRET """ - # TODO share app_secret across calls to _real_extract - app_secret = None + if getattr(self, '_app_secret', None) is not None: + return self._app_secret - # retrieve app_secret + url = 'https://k2s.cc/file/' + video_id webpage = self._download_webpage(url, video_id) scripts = re.finditer(r'/static/[^"]*)"', webpage) for mobj in scripts: @@ -47,26 +47,35 @@ class Keep2ShareIE(InfoExtractor): 'string': r'"(?:[^"]|\\")*"' + '|' + r"'(?:[^']|\\')*'", }, script, 'app secret', group='secret', default=None) if secret is not None: - app_secret = self._parse_json(secret, video_id, transform_source=js_to_json) - break - else: - raise RegexNotFoundError('Unable to extract app secret') + self._app_secret = self._parse_json(secret, video_id, + transform_source=js_to_json) + return self._app_secret + + raise RegexNotFoundError('Unable to extract app secret') + + def _get_access_token(self, video_id): + """ retrieve access_token """ + + if getattr(self, '_access_token', None) is not None: + return self._access_token - # retrieve access_token - # TODO share access_token across calls fo _real_extract data = { 'grant_type': 'client_credentials', 'client_id': 'k2s_web_app', - 'client_secret': app_secret, + 'client_secret': self._get_app_secret(video_id), } data = json.dumps(data, separators=(',', ':')).encode('utf-8') headers = {'Content-Type': 'application/json'} tokens = self._download_json('https://api.k2s.cc/v1/auth/token', video_id, data=data, headers=headers) - access_token = tokens['access_token'] + self._access_token = tokens['access_token'] - # retrieve API page - headers = {'Cookie': 'accessToken=' + access_token} + return self._access_token + + def _real_extract(self, url): + video_id = self._match_id(url) + + headers = {'Cookie': 'accessToken=' + self._get_access_token(url)} info = self._download_json('https://api.k2s.cc/v1/files/' + video_id, video_id, headers=headers)