From c3a3028f9f7fa79ee7357b65252ff2c9a062bdc8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 13 Nov 2013 11:06:53 +0100 Subject: [PATCH] [tvp] Minor improvements (#1730) --- youtube_dl/extractor/tvp.py | 47 +++++++++++-------------------------- 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index 63fb57bbe..32e0f5037 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -1,23 +1,17 @@ -# encoding: utf-8 -import re import json +import re from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - RegexNotFoundError, -) + class TvpIE(InfoExtractor): IE_NAME = u'tvp.pl' _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P\d+)/(?P\d+)' - _INFO_URL = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' - _TEST = { u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238', - u'file': u'31.10.2013-12878238.wmv', + u'md5': u'148408967a6a468953c0a75cbdaf0d7a', + u'file': u'12878238.wmv', u'info_dict': { u'title': u'31.10.2013', u'description': u'31.10.2013', @@ -27,34 +21,21 @@ class TvpIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id, "Downloading video webpage") - json_params = self._download_webpage(self._INFO_URL % video_id, video_id, "Downloading video metadata") - - try: - params = json.loads(json_params) - except: - raise ExtractorError(u'Invalid JSON') + webpage = self._download_webpage(url, video_id) + json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id + json_params = self._download_webpage( + json_url, video_id, u"Downloading video metadata") + params = json.loads(json_params) self.report_extraction(video_id) - try: - video_url = params['video_url'] - except KeyError: - raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1]) + video_url = params['video_url'] - try: - title = self._og_search_title(webpage) - except RegexNotFoundError: - title = video_id - info = { + title = self._og_search_title(webpage, fatal=True) + return { 'id': video_id, 'title': title, 'ext': 'wmv', 'url': video_url, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), } - try: - info['description'] = self._og_search_description(webpage) - info['thumbnail'] = self._og_search_thumbnail(webpage) - except RegexNotFoundError: - pass - - return info