mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-09-27 20:08:36 +09:00
Compare commits
18 Commits
2014.09.04
...
2014.09.06
Author | SHA1 | Date | |
---|---|---|---|
![]() |
e154762c74 | ||
![]() |
ba92ab3d05 | ||
![]() |
a2f0cdc074 | ||
![]() |
70a1ecd2c1 | ||
![]() |
88a23aef5a | ||
![]() |
140d8d77b3 | ||
![]() |
665cd96929 | ||
![]() |
4d067a58ca | ||
![]() |
1c1cff6a52 | ||
![]() |
f063a04f07 | ||
![]() |
af8812bb9b | ||
![]() |
f2d9e3a370 | ||
![]() |
16e6f396b4 | ||
![]() |
c6ec6b2e8b | ||
![]() |
7bbc6428b6 | ||
![]() |
c1a3c9ddb2 | ||
![]() |
feec0f56f5 | ||
![]() |
8029857d27 |
@@ -74,6 +74,7 @@ __authors__ = (
|
||||
'Keith Beckman',
|
||||
'Ole Ernst',
|
||||
'Aaron McDaniel (mcd1992)',
|
||||
'Magnus Kolstad',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@@ -193,7 +193,8 @@ class HttpFD(FileDownloader):
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
if tmpfilename != u'-':
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
|
@@ -67,6 +67,7 @@ from .dailymotion import (
|
||||
DailymotionUserIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
|
@@ -78,7 +78,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(
|
||||
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
|
||||
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
|
||||
webpage, 'json vp url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
|
74
youtube_dl/extractor/dbtv.py
Normal file
74
youtube_dl/extractor/dbtv.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
|
||||
'info_dict': {
|
||||
'id': '33100',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1404039863.438,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
|
||||
|
||||
video = data['playlist'][0]
|
||||
|
||||
formats = [{
|
||||
'url': f['URL'],
|
||||
'vcodec': f.get('container'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'vbr': float_or_none(f.get('rate'), 1000),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
} for f in video['renditions'] if 'URL' in f]
|
||||
|
||||
if not formats:
|
||||
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
|
||||
if url_key in video:
|
||||
formats.append({
|
||||
'url': video[url_key],
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video['id'],
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
'thumbnail': video.get('splash') or video.get('thumb'),
|
||||
'timestamp': float_or_none(video.get('publishedAt'), 1000),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'categories': video.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
@@ -3,18 +3,23 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||
_TEST = {
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap.nba',
|
||||
'ext': 'mp4',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,13 +32,18 @@ class NBAIE(InfoExtractor):
|
||||
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||
|
||||
shortened_video_id = video_id.rpartition('/')[2]
|
||||
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration', fatal=False))
|
||||
|
||||
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'id': shortened_video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
}
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
u'playlist': [
|
||||
'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
'info_dict': {
|
||||
'id': '57758',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
u'file': u'57758.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
'info_dict': {
|
||||
'id': '57758',
|
||||
'ext': 'flv',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'file': u'57758-slides.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
'info_dict': {
|
||||
'id': '57758-slides',
|
||||
'ext': 'flv',
|
||||
'title': 'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
],
|
||||
u'params': {
|
||||
'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
talk_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, talk_id)
|
||||
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
|
||||
u'rtmp url')
|
||||
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, u'presenter play path')
|
||||
rtmp_url = self._search_regex(
|
||||
r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
|
||||
play_path = self._search_regex(
|
||||
r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, 'presenter play path')
|
||||
title = clean_html(get_element_by_attribute('class', 'title', webpage))
|
||||
video_info = {
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
|
||||
if m_slides is None:
|
||||
return video_info
|
||||
else:
|
||||
return [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'entries': [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
@@ -47,7 +47,7 @@ class ToypicsUserIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'Mikey',
|
||||
},
|
||||
'playlist_mincount': 9917,
|
||||
'playlist_mincount': 19,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,32 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import qualities
|
||||
|
||||
|
||||
class UnistraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(\d+)'
|
||||
_VALID_URL = r'http://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
|
||||
u'file': u'154.mp4',
|
||||
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
|
||||
u'info_dict': {
|
||||
u'title': u'M!ss Yella',
|
||||
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://utv.unistra.fr/video.php?id_video=154',
|
||||
'md5': '736f605cfdc96724d55bb543ab3ced24',
|
||||
'info_dict': {
|
||||
'id': '154',
|
||||
'ext': 'mp4',
|
||||
'title': 'M!ss Yella',
|
||||
'description': 'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://utv.unistra.fr/index.php?id_video=437',
|
||||
'md5': '1ddddd6cccaae76f622ce29b8779636d',
|
||||
'info_dict': {
|
||||
'id': '437',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prix Louise Weiss 2014',
|
||||
'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, id)
|
||||
file = re.search(r'file: "(.*?)",', webpage).group(1)
|
||||
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {'id': id,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
|
||||
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
|
||||
}
|
||||
files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
|
||||
|
||||
quality = qualities(['SD', 'HD'])
|
||||
formats = []
|
||||
for file_path in files:
|
||||
format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
|
||||
formats.append({
|
||||
'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id)
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>UTV - (.*?)</', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
|
||||
thumbnail = self._search_regex(
|
||||
r'image: "(.*?)"', webpage, 'thumbnail')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats
|
||||
}
|
||||
|
@@ -13,6 +13,9 @@ class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
'title': 'Sinkhole of bureaucracy',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
|
||||
'info_dict': {
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.09.04.2'
|
||||
__version__ = '2014.09.06'
|
||||
|
Reference in New Issue
Block a user