added "bild.de" as extractor

This commit is contained in:
scheiba 2014-10-18 22:15:47 +02:00
parent 16efb3695f
commit ce519b747e
2 changed files with 47 additions and 0 deletions

View File

@ -26,6 +26,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE from .bbccouk import BBCCoUkIE
from .beeg import BeegIE from .beeg import BeegIE
from .behindkink import BehindKinkIE from .behindkink import BehindKinkIE
from .bild import BildIE
from .bilibili import BiliBiliIE from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE

View File

@ -0,0 +1,46 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class BildIE(InfoExtractor):
IE_NAME = 'bild'
_TEST = {
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
'info_dict': {
'id': '38184146',
'title': 'BILD hat sie getestet',
'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',
'duration': 196,
}
}
#http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html
_VALID_URL = r'http?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
#webpage = self._download_webpage(url, video_id)
xml_url = url.split(".bild.html")[0]+",view=xml.bild.xml"
doc = self._download_xml(xml_url, video_id)
video_url = doc.attrib['src']
title = doc.attrib['ueberschrift']
description = doc.attrib['text']
thumbnail = doc.attrib['img']
duration = int(doc.attrib['duration'])/1000
return {
'id': video_id,
'title': title,
'description': description,
'url': video_url,
'thumbnail': thumbnail,
'duration': duration,
}