from __future__ import unicode_literals import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, compat_kwargs, compat_str, compat_urllib_request, compat_urlparse, ) from ..utils import ( determine_ext, extract_attributes, ExtractorError, float_or_none, int_or_none, js_to_json, sanitized_Request, unescapeHTML, urlencode_postdata, ) class TorrinsIE(InfoExtractor): IE_NAME = 'torrins' _VALID_URL = r'''(?x) https?:// www\.torrins\.com/(?:guitar|piano|bass)-lessons/(?:song-lessons|style-genre)/[^/]+/(?P[^/]+)/(?P[^(\.)]+)\.html ''' _LOGIN_URL = 'https://www.torrins.com/services/user/sign-in' _ORIGIN_URL = 'https://www.torrins.com' _NETRC_MACHINE = 'torrins' _TESTS = [{ 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', 'username': 'anirudha1987@gmail.com', 'password': 'hello123', 'info_dict': { 'id': 'another-brick-in-the-wall', 'lesson_id': 'song-demo', 'ext': 'mp4', 'title': 'Another Brick in the Wall Guitar - Song Demo', 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', 'duration': 579.29, }, 'skip': 'Requires torrins premium account credentials', }, { # new URL schema 'url': 'https://www.torrins.com/guitar-lessons/song-lessons/english-songs/another-brick-in-the-wall/song-demo.html', 'only_matching': True, }, { # no url in outputs format entry 'url': 'https://www.torrins.com/guitar-lessons/style-genre/fingerstyle/tone-production.html', 'only_matching': True, }] def _handle_error(self, response): if not isinstance(response, dict): return error = response.get('error') if error: error_str = 'Torrins returned error #%s: %s' % (error.get('code'), error.get('message')) error_data = error.get('data') if error_data: error_str += ' - %s' % error_data.get('formErrors') raise ExtractorError(error_str, expected=True) def _download_webpage(self, *args, **kwargs): kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' return super(TorrinsIE, self)._download_webpage( *args, **compat_kwargs(kwargs)) def _real_initialize(self): self._login() def _login(self): (username, password) = self._get_login_info() if username is None: return login_popup = self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') def is_logged(reason): webpage = self._download_webpage(self._ORIGIN_URL, None, reason) return any(re.search(p, webpage) for p in ( r'id=["\'](?:bt-logout)', r'>Logout<')) # already logged in if is_logged('Checking if already logged in'): return #login_form = self._form_hidden_inputs('login-form', login_popup) login_form = { 'email': username, 'password': password, } response = self._download_webpage( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), headers={ 'Referer': self._ORIGIN_URL, 'Origin': self._ORIGIN_URL, }) if not is_logged('Post login check'): error = self._html_search_regex( r'(?s)]+class="form-errors[^"]*">(.+?)', response, 'error message', default=None) if error: raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') def _real_extract(self, url): course_id = self._match_id(url) webpage = self._download_webpage(url, course_id) title = self._og_search_title(webpage) video_json = self._html_search_regex(r"