diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index c2cec9845..75370c5ba 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -6,14 +6,19 @@ import re from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( + base_url, ExtractorError, int_or_none, urlencode_postdata, + urljoin, + xpath_element, + xpath_text, + xpath_with_ns, ) class AtresPlayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P.+?)_(?P[0-9a-f]{24})' + _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+(?:/[^/]+)?/(?P.+?)_(?P[0-9a-f]{24})' _NETRC_MACHINE = 'atresplayer' _TESTS = [ { @@ -39,7 +44,6 @@ class AtresPlayerIE(InfoExtractor): 'only_matching': True, }, ] - _API_BASE = 'https://api.atresplayer.com/' def _real_initialize(self): self._login() @@ -49,6 +53,8 @@ class AtresPlayerIE(InfoExtractor): error = self._parse_json(e.cause.read(), None) if error.get('error') == 'required_registered': self.raise_login_required() + if error.get('error') == 'invalid_request': + raise ExtractorError('Authentication failed', expected=True) raise ExtractorError(error['error_description'], expected=True) raise @@ -57,35 +63,65 @@ class AtresPlayerIE(InfoExtractor): if username is None: return - self._request_webpage( - self._API_BASE + 'login', None, 'Downloading login page') - try: - target_url = self._download_json( - 'https://account.atresmedia.com/api/login', None, + self._download_webpage( + 'https://account.atresplayer.com/auth/v1/login', None, 'Logging in', headers={ 'Content-Type': 'application/x-www-form-urlencoded' }, data=urlencode_postdata({ 'username': username, 'password': password, - }))['targetUrl'] + })) except ExtractorError as e: self._handle_error(e, 400) - self._request_webpage(target_url, None, 'Following Target URL') + def _get_mpd_subtitles(self, mpd_xml, mpd_url): + subs = {} + + def _add_ns(name): + return xpath_with_ns(name, { + 'mpd': 'urn:mpeg:dash:schema:mpd:2011' + }) + + def _is_mime_type(node, mime_type): + return node.attrib.get('mimeType') == mime_type + + text_nodes = mpd_xml.findall( + _add_ns('mpd:Period/mpd:AdaptationSet[@contentType="text"]')) + for adaptation_set in text_nodes: + lang = adaptation_set.attrib['lang'] + representation = xpath_element(adaptation_set, _add_ns('mpd:Representation')) + subs_url = xpath_text(representation, _add_ns('mpd:BaseURL')) + if subs_url and (_is_mime_type(adaptation_set, 'text/vtt') or _is_mime_type(representation, 'text/vtt')): + subs.update({lang: [{ + 'ext': 'vtt', + 'url': urljoin(mpd_url, subs_url), + }]}) + return subs def _real_extract(self, url): display_id, video_id = re.match(self._VALID_URL, url).groups() + page = self._download_webpage(url, video_id, 'Downloading video page') + preloaded_state_regex = r'window\.__PRELOADED_STATE__\s*=\s*(\{(.*?)\});' + preloaded_state_text = self._html_search_regex(preloaded_state_regex, page, 'preloaded state') + preloaded_state = self._parse_json(preloaded_state_text, video_id) + link_info = next(iter(preloaded_state['links'].values())) + try: - episode = self._download_json( - self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) + metadata = self._download_json(link_info['href'], video_id) + except ExtractorError as e: + self._handle_error(e, 403) + + try: + episode = self._download_json(metadata['urlVideo'], video_id) except ExtractorError as e: self._handle_error(e, 403) title = episode['titulo'] formats = [] + subtitles = {} for source in episode.get('sources', []): src = source.get('src') if not src: @@ -96,8 +132,14 @@ class AtresPlayerIE(InfoExtractor): src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif src_type == 'application/dash+xml': - formats.extend(self._extract_mpd_formats( - src, video_id, mpd_id='dash', fatal=False)) + mpd = self._download_xml_handle( + src, video_id, note='Downloading MPD manifest', fatal=False) + if mpd: + mpd_doc, mpd_handle = mpd + mpd_base_url = base_url(mpd_handle.geturl()) + subtitles = self._merge_subtitles(subtitles, self._get_mpd_subtitles(mpd_doc, mpd_base_url)) + formats.extend(self._parse_mpd_formats( + mpd_doc, mpd_id='dash', mpd_base_url=mpd_base_url, mpd_url=src)) self._sort_formats(formats) heartbeat = episode.get('heartbeat') or {} @@ -115,4 +157,5 @@ class AtresPlayerIE(InfoExtractor): 'channel': get_meta('channel'), 'season': get_meta('season'), 'episode_number': int_or_none(get_meta('episodeNumber')), + 'subtitles': subtitles, }