[thisav] Simplify and use unicode literals

This commit is contained in:
Sergey M. 2014-02-05 19:13:06 +07:00
parent 8c82077619
commit fa7df757a7

View File

@ -1,22 +1,23 @@
#coding: utf-8 #coding: utf-8
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import determine_ext
determine_ext,
)
class ThisAVIE(InfoExtractor): class ThisAVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
_TEST = { _TEST = {
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html", 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
u"file": u"47734.flv", 'md5': '0480f1ef3932d901f0e0e719f188f19b',
u"md5": u"0480f1ef3932d901f0e0e719f188f19b", 'info_dict': {
u"info_dict": { 'id': '47734',
u"title": u"高樹マリア - Just fit", 'ext': 'flv',
u"uploader": u"dj7970", 'title': '高樹マリア - Just fit',
u"uploader_id": u"dj7970" 'uploader': 'dj7970',
'uploader_id': 'dj7970'
} }
} }
@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title') title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
video_url = self._html_search_regex( video_url = self._html_search_regex(
r"addVariable\('file','([^']+)'\);", webpage, u'video url') r"addVariable\('file','([^']+)'\);", webpage, 'video url')
uploader = self._html_search_regex( uploader = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, u'uploader name', fatal=False) webpage, 'uploader name', fatal=False)
uploader_id = self._html_search_regex( uploader_id = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, u'uploader id', fatal=False) webpage, 'uploader id', fatal=False)
ext = determine_ext(video_url) ext = determine_ext(video_url)
return { return {
'_type': 'video',
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'uploader': uploader, 'uploader': uploader,