From bf78e7d8a8de218f327b08657ff007170ee87743 Mon Sep 17 00:00:00 2001 From: Aniruddh Joshi Date: Sun, 25 Mar 2018 07:20:17 +0530 Subject: [PATCH] Added extractor for torrins.com --- youtube_dl/extractor/torrins.py | 160 ++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 youtube_dl/extractor/torrins.py diff --git a/youtube_dl/extractor/torrins.py b/youtube_dl/extractor/torrins.py new file mode 100644 index 000000000..a8b247c89 --- /dev/null +++ b/youtube_dl/extractor/torrins.py @@ -0,0 +1,160 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_kwargs, + compat_str, + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( + determine_ext, + extract_attributes, + ExtractorError, + float_or_none, + int_or_none, + js_to_json, + sanitized_Request, + unescapeHTML, + urlencode_postdata, +) + + +class TorrinsIE(InfoExtractor): + IE_NAME = 'torrins' + _VALID_URL = r'''(?x) + https?:// + www\.torrins\.com/(?:guitar|piano|bass)-lessons/(?:song-lessons|style-genre)/[^/]+/(?P[^/]+)/(?P[^(\.)]+)\.html + ''' + _LOGIN_URL = 'https://www.torrins.com/services/user/sign-in' + _ORIGIN_URL = 'https://www.torrins.com' + _NETRC_MACHINE = 'torrins' + + _TESTS = [{ + 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', + 'username': 'anirudha1987@gmail.com', + 'password': 'hello123', + 'info_dict': { + 'id': 'another-brick-in-the-wall', + 'lesson_id': 'song-demo', + 'ext': 'mp4', + 'title': 'Another Brick in the Wall Guitar - Song Demo', + 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', + 'duration': 579.29, + }, + 'skip': 'Requires torrins premium account credentials', + }, { + # new URL schema + 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', + 'only_matching': True, + }, { + # no url in outputs format entry + 'url': 'https://www.torrins.com/guitar-lessons/style-genre/fingerstyle/tone-production.html', + 'only_matching': True, + }] + + def _handle_error(self, response): + if not isinstance(response, dict): + return + error = response.get('error') + if error: + error_str = 'Torrins returned error #%s: %s' % (error.get('code'), error.get('message')) + error_data = error.get('data') + if error_data: + error_str += ' - %s' % error_data.get('formErrors') + raise ExtractorError(error_str, expected=True) + + def _download_webpage(self, *args, **kwargs): + kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + return super(TorrinsIE, self)._download_webpage( + *args, **compat_kwargs(kwargs)) + + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_popup = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(reason): + webpage = self._download_webpage(self._ORIGIN_URL, None, reason) + + return any(re.search(p, webpage) for p in ( + r'id=["\'](?:bt-logout)', + r'>Logout<')) + + # already logged in + if is_logged('Checking if already logged in'): + return + + #login_form = self._form_hidden_inputs('login-form', login_popup) + + login_form = { + 'email': username, + 'password': password, + } + + response = self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={ + 'Referer': self._ORIGIN_URL, + 'Origin': self._ORIGIN_URL, + }) + + if not is_logged('Post login check'): + error = self._html_search_regex( + r'(?s)]+class="form-errors[^"]*">(.+?)', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + title = self._og_search_title(webpage) + + video_json = self._html_search_regex(r"