release 2013.11.07

Fix the report progress when file_size is unknown (#1731 )
The report_progress function will accept eta and percent with None value and will set the message to 'Unknow ETA' or 'Unknown %'. Otherwise the values must be numbers.
2025-09-27 11:58:37 +09:00 · 2013-11-07 11:06:34 +01:00 · 2013-11-07 08:03:35 +01:00 · 2013-11-06 23:45:01 +01:00 · 2013-11-06 22:11:46 +01:00 · 2013-11-06 22:03:00 +01:00
13 changed files with 160 additions and 37 deletions
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -31,6 +31,7 @@ from youtube_dl.utils import (
    ExtractorError,
    UnavailableVideoError,
 )
+from youtube_dl.extractor import get_info_extractor

 RETRIES = 3

@@ -63,9 +64,10 @@ def generator(test_case):

    def test_template(self):
        ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
+        other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
        def print_skipping(reason):
            print('Skipping %s: %s' % (test_case['name'], reason))
-        if not ie._WORKING:
+        if not ie.working():
            print_skipping('IE marked as not _WORKING')
            return
        if 'playlist' not in test_case:
@@ -77,6 +79,10 @@ def generator(test_case):
        if 'skip' in test_case:
            print_skipping(test_case['skip'])
            return
+        for other_ie in other_ies:
+            if not other_ie.working():
+                print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+                return

        params = get_params(test_case.get('params', {}))

--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -234,8 +234,14 @@ class FileDownloader(object):
        if self.params.get('noprogress', False):
            return
        clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
-        eta_str = self.format_eta(eta)
-        percent_str = self.format_percent(percent)
+        if eta is not None:
+            eta_str = self.format_eta(eta)
+        else:
+            eta_str = 'Unknown ETA'
+        if percent is not None:
+            percent_str = self.format_percent(percent)
+        else:
+            percent_str = 'Unknown %'
        speed_str = self.format_speed(speed)
        if self.params.get('progress_with_newline', False):
            self.to_screen(u'[download] %s of %s at %s ETA %s' %
@@ -557,12 +563,11 @@ class FileDownloader(object):
            # Progress message
            speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
            if data_len is None:
-                self.report_progress('Unknown %', data_len_str, speed, 'Unknown ETA')
-                eta = None
+                eta = percent = None
            else:
                percent = self.calc_percent(byte_counter, data_len)
                eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
-                self.report_progress(percent, data_len_str, speed, eta)
+            self.report_progress(percent, data_len_str, speed, eta)

            self._hook_progress({
                'downloaded_bytes': byte_counter,
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -38,6 +38,7 @@ from .defense import DefenseGouvFrIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
 from .eighttracks import EightTracksIE
+from .eitb import EitbIE
 from .escapist import EscapistIE
 from .exfm import ExfmIE
 from .extremetube import ExtremeTubeIE
@@ -115,6 +116,7 @@ from .slideshare import SlideshareIE
 from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
 from .southparkstudios import SouthParkStudiosIE
+from .space import SpaceIE
 from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -9,6 +9,7 @@ from ..utils import (
    compat_urllib_parse,
    find_xpath_attr,
    compat_urlparse,
+    compat_str,

    ExtractorError,
 )
@@ -41,6 +42,17 @@ class BrightcoveIE(InfoExtractor):
                u'uploader': u'Oracle',
            },
        },
+        {
+            # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
+            u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
+            u'info_dict': {
+                u'id': u'2750934548001',
+                u'ext': u'mp4',
+                u'title': u'This Bracelet Acts as a Personal Thermostat',
+                u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
+                u'uploader': u'Mashable',
+            },
+        },
    ]

    @classmethod
@@ -71,7 +83,24 @@ class BrightcoveIE(InfoExtractor):
        data = compat_urllib_parse.urlencode(params)
        return cls._FEDERATED_URL_TEMPLATE % data

+    @classmethod
+    def _extract_brightcove_url(cls, webpage):
+        """Try to extract the brightcove url from the wepbage, returns None
+        if it can't be found
+        """
+        m_brightcove = re.search(
+            r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
+            webpage, re.DOTALL)
+        if m_brightcove is not None:
+            return cls._build_brighcove_url(m_brightcove.group())
+        else:
+            return None
+
    def _real_extract(self, url):
+        # Change the 'videoId' and others field to '@videoPlayer'
+        url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
+        # Change bckey (used by bcove.me urls) to playerKey
+        url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
        mobj = re.match(self._VALID_URL, url)
        query_str = mobj.group('query')
        query = compat_urlparse.parse_qs(query_str)
@@ -109,7 +138,7 @@ class BrightcoveIE(InfoExtractor):

    def _extract_video_info(self, video_info):
        info = {
-            'id': video_info['id'],
+            'id': compat_str(video_info['id']),
            'title': video_info['displayName'],
            'description': video_info.get('shortDescription'),
            'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
@@ -119,10 +148,11 @@ class BrightcoveIE(InfoExtractor):
        renditions = video_info.get('renditions')
        if renditions:
            renditions = sorted(renditions, key=lambda r: r['size'])
-            best_format = renditions[-1]
-            info.update({
-                'url': best_format['defaultURL'],
-            })
+            info['formats'] = [{
+                'url': rend['defaultURL'],
+                'height': rend.get('frameHeight'),
+                'width': rend.get('frameWidth'),
+            } for rend in renditions]
        elif video_info.get('FLVFullLengthURL') is not None:
            info.update({
                'url': video_info['FLVFullLengthURL'],
--- a/youtube_dl/extractor/eitb.py
+++ b/youtube_dl/extractor/eitb.py
@@ -0,0 +1,37 @@
+# encoding: utf-8
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from ..utils import ExtractorError
+
+
+class EitbIE(InfoExtractor):
+    IE_NAME = u'eitb.tv'
+    _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
+
+    _TEST = {
+        u'add_ie': ['Brightcove'],
+        u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
+        u'md5': u'edf4436247185adee3ea18ce64c47998',
+        u'info_dict': {
+            u'id': u'2743577154001',
+            u'ext': u'mp4',
+            u'title': u'60 minutos (Lasa y Zabala, 30 años)',
+            # All videos from eitb has this description in the brightcove info
+            u'description': u'.',
+            u'uploader': u'Euskal Telebista',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        chapter_id = mobj.group('chapter_id')
+        webpage = self._download_webpage(url, chapter_id)
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if bc_url is None:
+            raise ExtractorError(u'Could not extract the Brightcove url')
+        # The BrightcoveExperience object doesn't contain the video id, we set
+        # it manually
+        bc_url += '&%40videoPlayer={}'.format(chapter_id)
+        return self.url_result(bc_url, BrightcoveIE.ie_key())
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -33,6 +33,7 @@ class GenericIE(InfoExtractor):
        },
        # embedded vimeo video
        {
+            u'add_ie': ['Vimeo'],
            u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
            u'file': u'22444065.mp4',
            u'md5': u'2903896e23df39722c33f015af0666e2',
@@ -44,6 +45,7 @@ class GenericIE(InfoExtractor):
        },
        # bandcamp page with custom domain
        {
+            u'add_ie': ['Bandcamp'],
            u'url': u'http://bronyrock.com/track/the-pony-mash',
            u'file': u'3235767654.mp3',
            u'info_dict': {
@@ -52,6 +54,21 @@ class GenericIE(InfoExtractor):
            },
            u'skip': u'There is a limit of 200 free downloads / month for the test song',
        },
+        # embedded brightcove video
+        {
+            u'add_ie': ['Brightcove'],
+            u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics',
+            u'info_dict': {
+                u'id': u'2365799484001',
+                u'ext': u'mp4',
+                u'title': u'Bubble Simulation',
+                u'description': u'A visualization from a new computer model of foam behavior.',
+                u'uploader': u'Scientific American',
+            },
+            u'params': {
+                u'skip_download': True,
+            },
+        },
    ]

    def report_download_webpage(self, video_id):
@@ -144,10 +161,9 @@ class GenericIE(InfoExtractor):

        self.report_extraction(video_id)
        # Look for BrightCove:
-        m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
-        if m_brightcove is not None:
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if bc_url is not None:
            self.to_screen(u'Brightcove video detected.')
-            bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
            return self.url_result(bc_url, 'Brightcove')

        # Look for embedded Vimeo player
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -26,6 +26,7 @@ class MTVIE(InfoExtractor):
            },
        },
        {
+            u'add_ie': ['Vevo'],
            u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
            u'file': u'USCJY1331283.mp4',
            u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
--- a/youtube_dl/extractor/slashdot.py
+++ b/youtube_dl/extractor/slashdot.py
@@ -7,6 +7,7 @@ class SlashdotIE(InfoExtractor):
    _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'

    _TEST = {
+        u'add_ie': ['Ooyala'],
        u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
        u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
        u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
--- a/youtube_dl/extractor/space.py
+++ b/youtube_dl/extractor/space.py
@@ -0,0 +1,35 @@
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from ..utils import RegexNotFoundError, ExtractorError
+
+
+class SpaceIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
+    _TEST = {
+        u'add_ie': ['Brightcove'],
+        u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
+        u'info_dict': {
+            u'id': u'2780937028001',
+            u'ext': u'mp4',
+            u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
+            u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
+            u'uploader': u'TechMedia Networks',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        try:
+            # Some videos require the playerKey field, which isn't define in
+            # the BrightcoveExperience object
+            brightcove_url = self._og_search_video_url(webpage)
+        except RegexNotFoundError:
+            # Other videos works fine with the info from the object
+            brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if brightcove_url is None:
+            raise ExtractorError(u'The webpage does not contain a video', expected=True)
+        return self.url_result(brightcove_url, BrightcoveIE.ie_key())
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@@ -13,6 +13,7 @@ class WeiboIE(InfoExtractor):
    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'

    _TEST = {
+        u'add_ie': ['Sina'],
        u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
        u'file': u'98322879.flv',
        u'info_dict': {
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@@ -9,7 +9,7 @@ from ..utils import (


 class XNXXIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
+    _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
    VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
    VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
    VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1572,7 +1572,6 @@ class YoutubePlaylistIE(InfoExtractor):
 class YoutubeChannelIE(InfoExtractor):
    IE_DESC = u'YouTube.com channels'
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
-    _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
    _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
    _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
    IE_NAME = u'youtube:channel'
@@ -1593,30 +1592,20 @@ class YoutubeChannelIE(InfoExtractor):
        # Download channel page
        channel_id = mobj.group(1)
        video_ids = []
-        pagenum = 1

-        url = self._TEMPLATE_URL % (channel_id, pagenum)
-        page = self._download_webpage(url, channel_id,
-                                      u'Downloading page #%s' % pagenum)
+        # Download all channel pages using the json-based channel_ajax query
+        for pagenum in itertools.count(1):
+            url = self._MORE_PAGES_URL % (pagenum, channel_id)
+            page = self._download_webpage(url, channel_id,
+                                          u'Downloading page #%s' % pagenum)

-        # Extract video identifiers
-        ids_in_page = self.extract_videos_from_page(page)
-        video_ids.extend(ids_in_page)
+            page = json.loads(page)

-        # Download any subsequent channel pages using the json-based channel_ajax query
-        if self._MORE_PAGES_INDICATOR in page:
-            for pagenum in itertools.count(1):
-                url = self._MORE_PAGES_URL % (pagenum, channel_id)
-                page = self._download_webpage(url, channel_id,
-                                              u'Downloading page #%s' % pagenum)
+            ids_in_page = self.extract_videos_from_page(page['content_html'])
+            video_ids.extend(ids_in_page)

-                page = json.loads(page)
-
-                ids_in_page = self.extract_videos_from_page(page['content_html'])
-                video_ids.extend(ids_in_page)
-
-                if self._MORE_PAGES_INDICATOR  not in page['load_more_widget_html']:
-                    break
+            if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
+                break

        self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))

--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.11.06'
+__version__ = '2013.11.07'
Author	SHA1	Message	Date
Philipp Hagemeister	6161d17579	release 2013.11.07	2013-11-07 11:06:34 +01:00
Jaime Marquínez Ferrándiz	4ac5306ae7	Fix the report progress when file_size is unknown (#1731 ) The report_progress function will accept eta and percent with None value and will set the message to 'Unknow ETA' or 'Unknown %'. Otherwise the values must be numbers.	2013-11-07 08:03:35 +01:00
Jaime Marquínez Ferrándiz	b1a80ec1a9	[xnxx] Accept urls that start with 'www' (fixes #1734 )	2013-11-06 23:45:01 +01:00
Philipp Hagemeister	672fe94dcb	release 2013.11.06.1	2013-11-06 22:11:46 +01:00
Jaime Marquínez Ferrándiz	51040b72ed	[brightcove] Support redirected urls from bcove.me (fixes #1732 ) 'bctid' needs to be changed to '@videoPlayer', and 'bckey' to 'playerKey'.	2013-11-06 22:03:00 +01:00
Jaime Marquínez Ferrándiz	4f045eef8f	[youtube:channel] Fix the extraction The page don't include the 'load more' button anymore, now we directly get the 'c4_browse_ajax' pages.	2013-11-06 21:42:33 +01:00
Jaime Marquínez Ferrándiz	5d7b253ea0	Add an extractor for eitb.tv (fixes #1608 ) The BrighcoveExperience object doesn't contain the video id, the extractor adds it and passes the url to BrightcoveIE.	2013-11-06 20:06:14 +01:00
Jaime Marquínez Ferrándiz	b0759f0c19	[brightcove] Extract all the available formats	2013-11-06 19:05:41 +01:00
Jaime Marquínez Ferrándiz	065472936a	Add an extractor for space.com (fixes #1718 ) It uses Brightcove, but requires some special process for getting a url with the playerKey field in some videos	2013-11-06 17:37:39 +01:00
Jaime Marquínez Ferrándiz	fc4a0c2aec	[brightcove] Change the 'videoId' or 'videoID' field to '@videoPlayer' (fixes #1697 ) It seems to be needed when using the htmlFederated page	2013-11-06 17:31:47 +01:00
Jaime Marquínez Ferrándiz	eeb165e674	[brightcove] Add the extraction of the url from generic	2013-11-06 16:58:03 +01:00
Jaime Marquínez Ferrándiz	9ee2b5f6f2	tests: don't run the test if any of the extractors listed in the 'add_ie' field is marked as not working	2013-11-06 16:43:26 +01:00