mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-09-27 11:58:37 +09:00
Compare commits
12 Commits
2013.11.06
...
2013.11.07
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6161d17579 | ||
![]() |
4ac5306ae7 | ||
![]() |
b1a80ec1a9 | ||
![]() |
672fe94dcb | ||
![]() |
51040b72ed | ||
![]() |
4f045eef8f | ||
![]() |
5d7b253ea0 | ||
![]() |
b0759f0c19 | ||
![]() |
065472936a | ||
![]() |
fc4a0c2aec | ||
![]() |
eeb165e674 | ||
![]() |
9ee2b5f6f2 |
@@ -31,6 +31,7 @@ from youtube_dl.utils import (
|
||||
ExtractorError,
|
||||
UnavailableVideoError,
|
||||
)
|
||||
from youtube_dl.extractor import get_info_extractor
|
||||
|
||||
RETRIES = 3
|
||||
|
||||
@@ -63,9 +64,10 @@ def generator(test_case):
|
||||
|
||||
def test_template(self):
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
||||
def print_skipping(reason):
|
||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||
if not ie._WORKING:
|
||||
if not ie.working():
|
||||
print_skipping('IE marked as not _WORKING')
|
||||
return
|
||||
if 'playlist' not in test_case:
|
||||
@@ -77,6 +79,10 @@ def generator(test_case):
|
||||
if 'skip' in test_case:
|
||||
print_skipping(test_case['skip'])
|
||||
return
|
||||
for other_ie in other_ies:
|
||||
if not other_ie.working():
|
||||
print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
|
||||
return
|
||||
|
||||
params = get_params(test_case.get('params', {}))
|
||||
|
||||
|
@@ -234,8 +234,14 @@ class FileDownloader(object):
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||
eta_str = self.format_eta(eta)
|
||||
percent_str = self.format_percent(percent)
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
eta_str = 'Unknown ETA'
|
||||
if percent is not None:
|
||||
percent_str = self.format_percent(percent)
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
||||
@@ -557,12 +563,11 @@ class FileDownloader(object):
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
self.report_progress('Unknown %', data_len_str, speed, 'Unknown ETA')
|
||||
eta = None
|
||||
eta = percent = None
|
||||
else:
|
||||
percent = self.calc_percent(byte_counter, data_len)
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
|
@@ -38,6 +38,7 @@ from .defense import DefenseGouvFrIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .escapist import EscapistIE
|
||||
from .exfm import ExfmIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
@@ -115,6 +116,7 @@ from .slideshare import SlideshareIE
|
||||
from .sohu import SohuIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
||||
from .southparkstudios import SouthParkStudiosIE
|
||||
from .space import SpaceIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
find_xpath_attr,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -41,6 +42,17 @@ class BrightcoveIE(InfoExtractor):
|
||||
u'uploader': u'Oracle',
|
||||
},
|
||||
},
|
||||
{
|
||||
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
||||
u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
|
||||
u'info_dict': {
|
||||
u'id': u'2750934548001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'This Bracelet Acts as a Personal Thermostat',
|
||||
u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||
u'uploader': u'Mashable',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
@@ -71,7 +83,24 @@ class BrightcoveIE(InfoExtractor):
|
||||
data = compat_urllib_parse.urlencode(params)
|
||||
return cls._FEDERATED_URL_TEMPLATE % data
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
"""Try to extract the brightcove url from the wepbage, returns None
|
||||
if it can't be found
|
||||
"""
|
||||
m_brightcove = re.search(
|
||||
r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
|
||||
webpage, re.DOTALL)
|
||||
if m_brightcove is not None:
|
||||
return cls._build_brighcove_url(m_brightcove.group())
|
||||
else:
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Change the 'videoId' and others field to '@videoPlayer'
|
||||
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
|
||||
# Change bckey (used by bcove.me urls) to playerKey
|
||||
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query_str = mobj.group('query')
|
||||
query = compat_urlparse.parse_qs(query_str)
|
||||
@@ -109,7 +138,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
info = {
|
||||
'id': video_info['id'],
|
||||
'id': compat_str(video_info['id']),
|
||||
'title': video_info['displayName'],
|
||||
'description': video_info.get('shortDescription'),
|
||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||
@@ -119,10 +148,11 @@ class BrightcoveIE(InfoExtractor):
|
||||
renditions = video_info.get('renditions')
|
||||
if renditions:
|
||||
renditions = sorted(renditions, key=lambda r: r['size'])
|
||||
best_format = renditions[-1]
|
||||
info.update({
|
||||
'url': best_format['defaultURL'],
|
||||
})
|
||||
info['formats'] = [{
|
||||
'url': rend['defaultURL'],
|
||||
'height': rend.get('frameHeight'),
|
||||
'width': rend.get('frameWidth'),
|
||||
} for rend in renditions]
|
||||
elif video_info.get('FLVFullLengthURL') is not None:
|
||||
info.update({
|
||||
'url': video_info['FLVFullLengthURL'],
|
||||
|
37
youtube_dl/extractor/eitb.py
Normal file
37
youtube_dl/extractor/eitb.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
IE_NAME = u'eitb.tv'
|
||||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||
u'md5': u'edf4436247185adee3ea18ce64c47998',
|
||||
u'info_dict': {
|
||||
u'id': u'2743577154001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'60 minutos (Lasa y Zabala, 30 años)',
|
||||
# All videos from eitb has this description in the brightcove info
|
||||
u'description': u'.',
|
||||
u'uploader': u'Euskal Telebista',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
chapter_id = mobj.group('chapter_id')
|
||||
webpage = self._download_webpage(url, chapter_id)
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is None:
|
||||
raise ExtractorError(u'Could not extract the Brightcove url')
|
||||
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||
# it manually
|
||||
bc_url += '&%40videoPlayer={}'.format(chapter_id)
|
||||
return self.url_result(bc_url, BrightcoveIE.ie_key())
|
@@ -33,6 +33,7 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
# embedded vimeo video
|
||||
{
|
||||
u'add_ie': ['Vimeo'],
|
||||
u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
|
||||
u'file': u'22444065.mp4',
|
||||
u'md5': u'2903896e23df39722c33f015af0666e2',
|
||||
@@ -44,6 +45,7 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
# bandcamp page with custom domain
|
||||
{
|
||||
u'add_ie': ['Bandcamp'],
|
||||
u'url': u'http://bronyrock.com/track/the-pony-mash',
|
||||
u'file': u'3235767654.mp3',
|
||||
u'info_dict': {
|
||||
@@ -52,6 +54,21 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
u'skip': u'There is a limit of 200 free downloads / month for the test song',
|
||||
},
|
||||
# embedded brightcove video
|
||||
{
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics',
|
||||
u'info_dict': {
|
||||
u'id': u'2365799484001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Bubble Simulation',
|
||||
u'description': u'A visualization from a new computer model of foam behavior.',
|
||||
u'uploader': u'Scientific American',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -144,10 +161,9 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
# Look for BrightCove:
|
||||
m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
|
||||
if m_brightcove is not None:
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is not None:
|
||||
self.to_screen(u'Brightcove video detected.')
|
||||
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
|
||||
return self.url_result(bc_url, 'Brightcove')
|
||||
|
||||
# Look for embedded Vimeo player
|
||||
|
@@ -26,6 +26,7 @@ class MTVIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
u'add_ie': ['Vevo'],
|
||||
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
||||
u'file': u'USCJY1331283.mp4',
|
||||
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
|
||||
|
@@ -7,6 +7,7 @@ class SlashdotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Ooyala'],
|
||||
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
|
||||
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
|
||||
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
|
||||
|
35
youtube_dl/extractor/space.py
Normal file
35
youtube_dl/extractor/space.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveIE
|
||||
from ..utils import RegexNotFoundError, ExtractorError
|
||||
|
||||
|
||||
class SpaceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
|
||||
_TEST = {
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
|
||||
u'info_dict': {
|
||||
u'id': u'2780937028001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
|
||||
u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
|
||||
u'uploader': u'TechMedia Networks',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
try:
|
||||
# Some videos require the playerKey field, which isn't define in
|
||||
# the BrightcoveExperience object
|
||||
brightcove_url = self._og_search_video_url(webpage)
|
||||
except RegexNotFoundError:
|
||||
# Other videos works fine with the info from the object
|
||||
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if brightcove_url is None:
|
||||
raise ExtractorError(u'The webpage does not contain a video', expected=True)
|
||||
return self.url_result(brightcove_url, BrightcoveIE.ie_key())
|
@@ -13,6 +13,7 @@ class WeiboIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Sina'],
|
||||
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
|
||||
u'file': u'98322879.flv',
|
||||
u'info_dict': {
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class XNXXIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
|
||||
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
|
||||
VIDEO_URL_RE = r'flv_url=(.*?)&'
|
||||
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
|
||||
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
|
||||
|
@@ -1572,7 +1572,6 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
class YoutubeChannelIE(InfoExtractor):
|
||||
IE_DESC = u'YouTube.com channels'
|
||||
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
|
||||
_TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
|
||||
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
||||
_MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
||||
IE_NAME = u'youtube:channel'
|
||||
@@ -1593,30 +1592,20 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
# Download channel page
|
||||
channel_id = mobj.group(1)
|
||||
video_ids = []
|
||||
pagenum = 1
|
||||
|
||||
url = self._TEMPLATE_URL % (channel_id, pagenum)
|
||||
page = self._download_webpage(url, channel_id,
|
||||
u'Downloading page #%s' % pagenum)
|
||||
# Download all channel pages using the json-based channel_ajax query
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_webpage(url, channel_id,
|
||||
u'Downloading page #%s' % pagenum)
|
||||
|
||||
# Extract video identifiers
|
||||
ids_in_page = self.extract_videos_from_page(page)
|
||||
video_ids.extend(ids_in_page)
|
||||
page = json.loads(page)
|
||||
|
||||
# Download any subsequent channel pages using the json-based channel_ajax query
|
||||
if self._MORE_PAGES_INDICATOR in page:
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_webpage(url, channel_id,
|
||||
u'Downloading page #%s' % pagenum)
|
||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||
video_ids.extend(ids_in_page)
|
||||
|
||||
page = json.loads(page)
|
||||
|
||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||
video_ids.extend(ids_in_page)
|
||||
|
||||
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
||||
break
|
||||
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
||||
break
|
||||
|
||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.11.06'
|
||||
__version__ = '2013.11.07'
|
||||
|
Reference in New Issue
Block a user