From db95dc13a1a8e5951d09d3fd555f7a4be590821f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 10 Mar 2014 20:55:47 +0100 Subject: [PATCH] [playvid] Simplify (#2539) --- youtube_dl/YoutubeDL.py | 5 ++- youtube_dl/extractor/playvid.py | 65 +++++++++++++++------------------ 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1db27315c..63ed08abf 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -701,8 +701,11 @@ class YoutubeDL(object): else: formats = info_dict['formats'] + if not formats: + raise ExtractorError('No video formats found!') + # We check that all the formats have the format and format_id fields - for (i, format) in enumerate(formats): + for i, format in enumerate(formats): if format.get('format_id') is None: format['format_id'] = compat_str(i) if format.get('format') is None: diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index a74f8f675..b1322f13f 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -5,17 +5,17 @@ import re from .common import InfoExtractor from ..utils import ( compat_urllib_parse, - determine_ext, ) -class PlayvidIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P.+?)(#|$)' +class PlayvidIE(InfoExtractor): + _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P.+?)(?:#|$)' _TEST = { 'url': 'http://www.playvid.com/watch/agbDDi7WZTV', - 'file': 'agbDDi7WZTV.mp4', 'md5': '44930f8afa616efdf9482daf4fe53e1e', 'info_dict': { + 'id': 'agbDDi7WZTV', + 'ext': 'mp4', 'title': 'Michelle Lewin in Miami Beach', 'duration': 240, 'age_limit': 18, @@ -28,46 +28,41 @@ class PlayvidIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - video_title = None duration = None video_thumbnail = None formats = [] # most of the information is stored in the flashvars - flashvars_match = re.search(r'flashvars="(.+?)"',webpage) + flashvars = self._html_search_regex( + r'flashvars="(.+?)"', webpage, 'flashvars') - if flashvars_match: - infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&') - for info in infos: - videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info) - if videovars_match: - key = videovars_match.group(1) - val = videovars_match.group(2) + infos = compat_urllib_parse.unquote(flashvars).split(r'&') + for info in infos: + videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) + if videovars_match: + key = videovars_match.group(1) + val = videovars_match.group(2) - if key == 'title': - video_title = val.replace('+',' ') - if key == 'duration': - try: - duration = val - except ValueError: - duration = None - if key == 'big_thumb': - video_thumbnail = val + if key == 'title': + video_title = compat_urllib_parse.unquote_plus(val) + if key == 'duration': + try: + duration = int(val) + except ValueError: + pass + if key == 'big_thumb': + video_thumbnail = val - videourl_match = re.match(r'^video_urls\]\[(?P\d+)p',key) - if videourl_match: - resolution = int(videourl_match.group('resolution')) - formats.append({ - 'resolution': resolution, # 360, 480, ... - 'ext': determine_ext(val), - 'url': val - }) - - # fatal error, if no download url is found - if len(formats) == 0: - raise ExtractorError,'no video url found' + videourl_match = re.match( + r'^video_urls\]\[(?P[0-9]+)p', key) + if videourl_match: + height = int(videourl_match.group('resolution')) + formats.append({ + 'height': height, + 'url': val, + }) + self._sort_formats(formats) # Extract title - should be in the flashvars; if not, look elsewhere if video_title is None: