release 2013.07.11

YoutubeIE: a new algo for length 83
GametrailersIE: support multipart videos
2025-09-28 04:18:36 +09:00 · 2013-07-11 21:04:59 +02:00 · 2013-07-11 20:21:45 +02:00 · 2013-07-11 18:24:53 +02:00 · 2013-07-11 16:31:29 +02:00 · 2013-07-11 16:16:02 +02:00
22 changed files with 385 additions and 113 deletions
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -20,9 +20,9 @@ tests = [
    # 84
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
-    # 83
+    # 83 - vflcaqGO8 2013/07/11
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
-     "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"),
+     "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
    # 82
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
     "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -4,6 +4,7 @@

 import sys
 import unittest
+import xml.etree.ElementTree

 # Allow direct execution
 import os
@@ -16,6 +17,7 @@ from youtube_dl.utils import unescapeHTML
 from youtube_dl.utils import orderedSet
 from youtube_dl.utils import DateRange
 from youtube_dl.utils import unified_strdate
+from youtube_dl.utils import find_xpath_attr

 if sys.version_info < (3, 0):
    _compat_str = lambda b: b.decode('unicode-escape')
@@ -112,5 +114,18 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
        self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')

+    def test_find_xpath_attr(self):
+        testxml = u'''<root>
+            <node/>
+            <node x="a"/>
+            <node x="a" y="c" />
+            <node x="b" y="d" />
+        </root>'''
+        doc = xml.etree.ElementTree.fromstring(testxml)
+
+        self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
+        self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
+        self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_youtube_sig.py
+++ b/test/test_youtube_sig.py
@@ -45,7 +45,7 @@ class TestYoutubeSig(unittest.TestCase):

    def test_83(self):
        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
-        right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"
+        right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
        self.assertEqual(sig(wrong), right)

    def test_82(self):
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -348,6 +348,7 @@ class YoutubeDL(object):

        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
        if result_type == 'video':
+            ie_result.update(extra_info)
            if 'playlist' not in ie_result:
                # It isn't part of a playlist
                ie_result['playlist'] = None
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -1,4 +1,3 @@
-
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE
 from .arte import ArteTvIE
@@ -12,7 +11,9 @@ from .comedycentral import ComedyCentralIE
 from .cspan import CSpanIE
 from .dailymotion import DailymotionIE
 from .depositfiles import DepositFilesIE
+from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
+from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .escapist import EscapistIE
 from .facebook import FacebookIE
@@ -58,6 +59,7 @@ from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
+from .veoh import VeohIE
 from .vevo import VevoIE
 from .vimeo import VimeoIE
 from .vine import VineIE
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -48,6 +48,7 @@ class ArchiveOrgIE(InfoExtractor):
        formats.sort(key=lambda fdata: fdata['file_size'])

        info = {
+            '_type': 'video',
            'id': video_id,
            'title': title,
            'formats': formats,
@@ -63,4 +64,4 @@ class ArchiveOrgIE(InfoExtractor):
        info['url'] = formats[-1]['url']
        info['ext'] = determine_ext(formats[-1]['url'])

-        return self.video_result(info)
+        return info
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -32,7 +32,7 @@ class ARDIE(InfoExtractor):
        # determine title and media streams from webpage
        html = self._download_webpage(url, video_id)
        title = re.search(self._TITLE, html).group('title')
-        streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
+        streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)]
        if not streams:
            assert '"fsk"' in html
            raise ExtractorError(u'This video is only available after 8:00 pm')
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -4,10 +4,8 @@ import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
-    # This is used by the not implemented extractLiveStream method
-    compat_urllib_parse,
-
    ExtractorError,
+    find_xpath_attr,
    unified_strdate,
 )

@@ -28,6 +26,7 @@ class ArteTvIE(InfoExtractor):
        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))

    # TODO implement Live Stream
+    # from ..utils import compat_urllib_parse
    # def extractLiveStream(self, url):
    #     video_lang = url.split('/')[-4]
    #     info = self.grep_webpage(
@@ -57,7 +56,6 @@ class ArteTvIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._EMISSION_URL, url)
        if mobj is not None:
-            name = mobj.group('name')
            lang = mobj.group('lang')
            # This is not a real id, it can be for example AJT for the news
            # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
@@ -122,7 +120,7 @@ class ArteTvIE(InfoExtractor):
        ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
        ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
        ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
-        config_node = ref_xml_doc.find('.//video[@lang="%s"]' % lang)
+        config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
        config_xml_url = config_node.attrib['ref']
        config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')

--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -189,5 +189,5 @@ class BlipTVUserIE(InfoExtractor):
            pagenum += 1

        urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
-        url_entries = [self.url_result(url, 'BlipTV') for url in urls]
+        url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
        return [self.playlist_result(url_entries, playlist_title = username)]
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -1,28 +1,81 @@
 import re
 import json
+import xml.etree.ElementTree

 from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    find_xpath_attr,
+)

 class BrightcoveIE(InfoExtractor):
-    _VALID_URL = r'http://.*brightcove\.com/.*\?(?P<query>.*videoPlayer=(?P<id>\d*).*)'
+    _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
+    _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
+    _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
+    
+    # There is a test for Brigtcove in GenericIE, that way we test both the download
+    # and the detection of videos, and we don't have to find an URL that is always valid
+
+    @classmethod
+    def _build_brighcove_url(cls, object_str):
+        """
+        Build a Brightcove url from a xml string containing
+        <object class="BrightcoveExperience">{params}</object>
+        """
+        object_doc = xml.etree.ElementTree.fromstring(object_str)
+        assert u'BrightcoveExperience' in object_doc.attrib['class']
+        params = {'flashID': object_doc.attrib['id'],
+                  'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
+                  }
+        playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
+        # Not all pages define this value
+        if playerKey is not None:
+            params['playerKey'] = playerKey.attrib['value']
+        videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
+        if videoPlayer is not None:
+            params['@videoPlayer'] = videoPlayer.attrib['value']
+        data = compat_urllib_parse.urlencode(params)
+        return cls._FEDERATED_URL_TEMPLATE % data

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        query = mobj.group('query')
-        video_id = mobj.group('id')

-        request_url = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' % query
+        m_video_id = re.search(r'videoPlayer=(\d+)', query)
+        if m_video_id is not None:
+            video_id = m_video_id.group(1)
+            return self._get_video_info(video_id, query)
+        else:
+            player_key = self._search_regex(r'playerKey=(.+?)(&|$)', query, 'playlist_id')
+            return self._get_playlist_info(player_key)
+
+    def _get_video_info(self, video_id, query):
+        request_url = self._FEDERATED_URL_TEMPLATE % query
        webpage = self._download_webpage(request_url, video_id)

        self.report_extraction(video_id)
        info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
        info = json.loads(info)['data']
        video_info = info['programmedContent']['videoPlayer']['mediaDTO']
+
+        return self._extract_video_info(video_info)
+
+    def _get_playlist_info(self, player_key):
+        playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
+                                               player_key, u'Downloading playlist information')
+
+        playlist_info = json.loads(playlist_info)['videoList']
+        videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
+
+        return self.playlist_result(videos, playlist_id=playlist_info['id'],
+                                    playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
+
+    def _extract_video_info(self, video_info):
        renditions = video_info['renditions']
        renditions = sorted(renditions, key=lambda r: r['size'])
        best_format = renditions[-1]
-        
-        return {'id': video_id,
+
+        return {'id': video_info['id'],
                'title': video_info['displayName'],
                'url': best_format['defaultURL'], 
                'ext': 'mp4',
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -169,11 +169,6 @@ class InfoExtractor(object):
        self.to_screen(u'Logging in')

    #Methods for following #608
-    #They set the correct value of the '_type' key
-    def video_result(self, video_info):
-        """Returns a video"""
-        video_info['_type'] = 'video'
-        return video_info
    def url_result(self, url, ie=None):
        """Returns a url that points to a page that should be processed"""
        #TODO: ie should be the class used for getting the info
--- a/youtube_dl/extractor/dotsub.py
+++ b/youtube_dl/extractor/dotsub.py
@@ -0,0 +1,41 @@
+import re
+import json
+import time
+
+from .common import InfoExtractor
+
+
+class DotsubIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?dotsub\.com/view/([^/]+)'
+    _TEST = {
+        u'url': u'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
+        u'file': u'aed3b8b2-1889-4df5-ae63-ad85f5572f27.flv',
+        u'md5': u'0914d4d69605090f623b7ac329fea66e',
+        u'info_dict': {
+            u"title": u"Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary",
+            u"uploader": u"4v4l0n42",
+            u'description': u'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism  and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
+            u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+            u'upload_date': u'20101213',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        info_url = "https://dotsub.com/api/media/%s/metadata" %(video_id)
+        webpage = self._download_webpage(info_url, video_id)
+        info = json.loads(webpage)
+        date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
+
+        return [{
+            'id':          video_id,
+            'url':         info['mediaURI'],
+            'ext':         'flv',
+            'title':       info['title'],
+            'thumbnail':   info['screenshotURI'],
+            'description': info['description'],
+            'uploader':    info['user'],
+            'view_count':  info['numberOfViews'],
+            'upload_date': u'%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
+        }]
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -6,7 +6,6 @@ import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
-    ExtractorError,
    unified_strdate,
 )

@@ -68,6 +67,7 @@ class DreiSatIE(InfoExtractor):
        formats.sort(key=_sortkey)

        info = {
+            '_type': 'video',
            'id': video_id,
            'title': video_title,
            'formats': formats,
@@ -82,4 +82,4 @@ class DreiSatIE(InfoExtractor):
        info['url'] = formats[-1]['url']
        info['ext'] = determine_ext(formats[-1]['url'])

-        return self.video_result(info)
+        return info
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dl/extractor/ehow.py
@@ -0,0 +1,51 @@
+import re
+
+from ..utils import (
+    compat_urllib_parse,
+    determine_ext
+)
+from .common import InfoExtractor
+
+
+class EHowIE(InfoExtractor):
+    IE_NAME = u'eHow'
+    _VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
+    _TEST = {
+        u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
+        u'file': u'12245069.flv',
+        u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
+        u'info_dict': {
+            u"title": u"Hardwood Flooring Basics",
+            u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
+   			u"uploader": u"Erick Nathan"
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
+            webpage, u'video URL')
+        final_url = compat_urllib_parse.unquote(video_url)        
+        thumbnail_url = self._search_regex(r'<meta property="og:image" content="(.+?)" />',
+            webpage, u'thumbnail URL')
+        uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
+            webpage, u'uploader')
+        title = self._search_regex(r'<meta property="og:title" content="(.+?)" />',
+            webpage, u'Video title').replace(' | eHow', '')
+        description = self._search_regex(r'<meta property="og:description" content="(.+?)" />',
+            webpage, u'video description')
+        ext = determine_ext(final_url)
+
+        return {
+            '_type':       'video',
+            'id':          video_id,
+            'url':         final_url,
+            'ext':         ext,
+            'title':       title,
+            'thumbnail':   thumbnail_url,
+            'description': description,
+            'uploader':    uploader,
+        }
+
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -4,14 +4,15 @@ import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    unified_strdate,
+    compat_urllib_parse,
 )

 class GameSpotIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/'
+    _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
    _TEST = {
        u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
        u"file": u"6410818.mp4",
-        u"md5": u"5569d64ca98db01f0177c934fe8c1e9b",
+        u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
        u"info_dict": {
            u"title": u"Arma III - Community Guide: SITREP I",
            u"upload_date": u"20130627", 
@@ -21,13 +22,22 @@ class GameSpotIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(3).split("-")[-1]
-        info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id)
+        page_id = mobj.group('page_id')
+        webpage = self._download_webpage(url, page_id)
+        video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"',
+                                            r'http://www\.gamespot\.com/videoembed/(\d+)'],
+                                           webpage, 'video id')
+        data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'})
+        info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data
        info_xml = self._download_webpage(info_url, video_id)
        doc = xml.etree.ElementTree.fromstring(info_xml)
        clip_el = doc.find('./playList/clip')

-        video_url = clip_el.find('./URI').text
+        http_urls = [{'url': node.find('filePath').text,
+                      'rate': int(node.find('rate').text)}
+            for node in clip_el.find('./httpURI')]
+        best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1]
+        video_url = best_quality['url']
        title = clip_el.find('./title').text
        ext = video_url.rpartition('.')[2]
        thumbnail_url = clip_el.find('./screenGrabURI').text
--- a/youtube_dl/extractor/gametrailers.py
+++ b/youtube_dl/extractor/gametrailers.py
@@ -1,4 +1,5 @@
 import re
+import xml.etree.ElementTree

 from .common import InfoExtractor
 from ..utils import (
@@ -11,7 +12,7 @@ class GametrailersIE(InfoExtractor):
    _VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
    _TEST = {
        u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
-        u'file': u'zbvr8i.flv',
+        u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.flv',
        u'md5': u'c3edbc995ab4081976e16779bd96a878',
        u'info_dict': {
            u"title": u"E3 2013: Debut Trailer"
@@ -24,45 +25,39 @@ class GametrailersIE(InfoExtractor):
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('id')
-        video_type = mobj.group('type')
        webpage = self._download_webpage(url, video_id)
-        if video_type == 'full-episodes':
-            mgid_re = r'data-video="(?P<mgid>mgid:.*?)"'
-        else:
-            mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\''
-        mgid = self._search_regex(mgid_re, webpage, u'mgid')
-        data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'})
+        mgid = self._search_regex([r'data-video="(?P<mgid>mgid:.*?)"',
+                                   r'data-contentId=\'(?P<mgid>mgid:.*?)\''],
+                                  webpage, u'mgid')

+        data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'})
        info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data,
                                           video_id, u'Downloading video info')
-        links_webpage = self._download_webpage('http://www.gametrailers.com/feeds/mediagen/?' + data,
+        doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
+        default_thumb = doc.find('./channel/image/url').text
+
+        media_namespace = {'media': 'http://search.yahoo.com/mrss/'}
+        parts = [{
+            'title': video_doc.find('title').text,
+            'ext': 'flv',
+            'id': video_doc.find('guid').text.rpartition(':')[2],
+            # Videos are actually flv not mp4
+            'url': self._get_video_url(video_doc.find('media:group/media:content', media_namespace).attrib['url'], video_id),
+            # The thumbnail may not be defined, it would be ''
+            'thumbnail': video_doc.find('media:group/media:thumbnail', media_namespace).attrib['url'] or default_thumb,
+            'description': video_doc.find('description').text,
+        } for video_doc in doc.findall('./channel/item')]
+        return parts
+
+    def _get_video_url(self, mediagen_url, video_id):
+        if 'acceptMethods' not in mediagen_url:
+            mediagen_url += '&acceptMethods=fms'
+        links_webpage = self._download_webpage(mediagen_url,
                                               video_id, u'Downloading video urls info')
-
-        self.report_extraction(video_id)
-        info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
-                      <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
-                      <image>.*
-                        <url>(?P<thumb>.*?)</url>.*
-                      </image>'''
-
-        m_info = re.search(info_re, info_page, re.VERBOSE|re.DOTALL)
-        if m_info is None:
-            raise ExtractorError(u'Unable to extract video info')
-        video_title = m_info.group('title')
-        video_description = m_info.group('description')
-        video_thumb = m_info.group('thumb')
-
-        m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage))
-        if m_urls is None or len(m_urls) == 0:
+        doc = xml.etree.ElementTree.fromstring(links_webpage)
+        urls = list(doc.iter('src'))
+        if len(urls) == 0:
            raise ExtractorError(u'Unable to extract video url')
        # They are sorted from worst to best quality
-        video_url = m_urls[-1].group('url')
+        return urls[-1].text

-        return {'url':         video_url,
-                'id':          video_id,
-                'title':       video_title,
-                # Videos are actually flv not mp4
-                'ext':         'flv',
-                'thumbnail':   video_thumb,
-                'description': video_description,
-                }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1,3 +1,5 @@
+# encoding: utf-8
+
 import os
 import re

@@ -9,20 +11,34 @@ from ..utils import (

    ExtractorError,
 )
+from .brightcove import BrightcoveIE

 class GenericIE(InfoExtractor):
    IE_DESC = u'Generic downloader that works on some sites'
    _VALID_URL = r'.*'
    IE_NAME = u'generic'
-    _TEST = {
-        u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
-        u'file': u'13601338388002.mp4',
-        u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
-        u'info_dict': {
-            u"uploader": u"www.hodiho.fr", 
-            u"title": u"R\u00e9gis plante sa Jeep"
-        }
-    }
+    _TESTS = [
+        {
+            u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
+            u'file': u'13601338388002.mp4',
+            u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
+            u'info_dict': {
+                u"uploader": u"www.hodiho.fr", 
+                u"title": u"R\u00e9gis plante sa Jeep"
+            }
+        },
+        {
+            u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/',
+            u'file': u'2371591881001.mp4',
+            u'md5': u'9e80619e0a94663f0bdc849b4566af19',
+            u'note': u'Test Brightcove downloads and detection in GenericIE',
+            u'info_dict': {
+                u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
+                u'uploader': u'8TV',
+                u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
+            }
+        },
+    ]

    def report_download_webpage(self, video_id):
        """Report webpage download."""
@@ -103,6 +119,13 @@ class GenericIE(InfoExtractor):
            raise ExtractorError(u'Invalid URL: %s' % url)

        self.report_extraction(video_id)
+        # Look for BrigthCove:
+        m_brightcove = re.search(r'<object.+?class=".*?BrightcoveExperience.*?".+?</object>', webpage, re.DOTALL)
+        if m_brightcove is not None:
+            self.to_screen(u'Brightcove video detected.')
+            bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
+            return self.url_result(bc_url, 'Brightcove')
+
        # Start with something easy: JW Player in SWFObject
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
--- a/youtube_dl/extractor/steam.py
+++ b/youtube_dl/extractor/steam.py
@@ -23,14 +23,16 @@ class SteamIE(InfoExtractor):
                u"file": u"81300.flv",
                u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
                u"info_dict": {
-                        u"title": u"Terraria 1.1 Trailer"
+                        u"title": u"Terraria 1.1 Trailer",
+                        u'playlist_index': 1,
                }
            },
            {
                u"file": u"80859.flv",
                u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
                u"info_dict": {
-                    u"title": u"Terraria Trailer"
+                    u"title": u"Terraria Trailer",
+                    u'playlist_index': 2,
                }
            }
        ]
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@@ -0,0 +1,47 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+)
+
+class VeohIE(InfoExtractor):
+    _VALID_URL = r'http://www\.veoh\.com/watch/v(?P<id>\d*)'
+
+    _TEST = {
+        u'url': u'http://www.veoh.com/watch/v56314296nk7Zdmz3',
+        u'file': u'56314296.mp4',
+        u'md5': u'620e68e6a3cff80086df3348426c9ca3',
+        u'info_dict': {
+            u'title': u'Straight Backs Are Stronger',
+            u'uploader': u'LUMOback',
+            u'description': u'At LUMOback, we believe straight backs are stronger.  The LUMOback Posture & Movement Sensor:  It gently vibrates when you slouch, inspiring improved posture and mobility.  Use the app to track your data and improve your posture over time. ',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
+        if m_youtube is not None:
+            youtube_id = m_youtube.group(1)
+            self.to_screen(u'%s: detected Youtube video.' % video_id)
+            return self.url_result(youtube_id, 'Youtube')
+
+        self.report_extraction(video_id)
+        info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
+        info = json.loads(info)
+        video_url =  info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
+
+        return {'id': info['videoId'], 
+                'title': info['title'],
+                'ext': determine_ext(video_url),
+                'url': video_url,
+                'uploader': info['username'],
+                'thumbnail': info.get('highResImage') or info.get('medResImage'),
+                'description': info['description'],
+                'view_count': info['views'],
+                }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -117,7 +117,19 @@ class YoutubeIE(InfoExtractor):
                u"uploader": u"IconaPop",
                u"uploader_id": u"IconaPop"
            }
-        }
+        },
+        {
+            u"url":  u"https://www.youtube.com/watch?v=07FYdnEawAQ",
+            u"file":  u"07FYdnEawAQ.mp4",
+            u"note": u"Test VEVO video with age protection (#956)",
+            u"info_dict": {
+                u"upload_date": u"20130703",
+                u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
+                u"description": u"md5:64249768eec3bc4276236606ea996373",
+                u"uploader": u"justintimberlakeVEVO",
+                u"uploader_id": u"justintimberlakeVEVO"
+            }
+        },
    ]


@@ -131,10 +143,6 @@ class YoutubeIE(InfoExtractor):
        """Report attempt to set language."""
        self.to_screen(u'Setting language')

-    def report_login(self):
-        """Report attempt to log in."""
-        self.to_screen(u'Logging in')
-
    def report_video_webpage_download(self, video_id):
        """Report attempt to download video webpage."""
        self.to_screen(u'%s: Downloading video webpage' % video_id)
@@ -182,7 +190,7 @@ class YoutubeIE(InfoExtractor):
        elif len(s) == 84:
            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
        elif len(s) == 83:
-            return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36]
+            return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:]
        elif len(s) == 82:
            return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]

@@ -296,26 +304,6 @@ class YoutubeIE(InfoExtractor):
        if self._downloader is None:
            return

-        username = None
-        password = None
-        downloader_params = self._downloader.params
-
-        # Attempt to use provided username and password or .netrc data
-        if downloader_params.get('username', None) is not None:
-            username = downloader_params['username']
-            password = downloader_params['password']
-        elif downloader_params.get('usenetrc', False):
-            try:
-                info = netrc.netrc().authenticators(self._NETRC_MACHINE)
-                if info is not None:
-                    username = info[0]
-                    password = info[2]
-                else:
-                    raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
-            except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
-                return
-
        # Set language
        request = compat_urllib_request.Request(self._LANG_URL)
        try:
@@ -325,6 +313,8 @@ class YoutubeIE(InfoExtractor):
            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
            return

+        (username, password) = self._get_login_info()
+
        # No authentication to be performed
        if username is None:
            return
@@ -432,15 +422,35 @@ class YoutubeIE(InfoExtractor):

        # Get video info
        self.report_video_info_webpage_download(video_id)
-        for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-            video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
-                    % (video_id, el_type))
+        if re.search(r'player-age-gate-content">', video_webpage) is not None:
+            self.report_age_confirmation()
+            age_gate = True
+            # We simulate the access to the video from www.youtube.com/v/{video_id}
+            # this can be viewed without login into Youtube
+            data = compat_urllib_parse.urlencode({'video_id': video_id,
+                                                  'el': 'embedded',
+                                                  'gl': 'US',
+                                                  'hl': 'en',
+                                                  'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+                                                  'asv': 3,
+                                                  'sts':'1588',
+                                                  })
+            video_info_url = 'https://www.youtube.com/get_video_info?' + data
            video_info_webpage = self._download_webpage(video_info_url, video_id,
                                    note=False,
                                    errnote='unable to download video info webpage')
            video_info = compat_parse_qs(video_info_webpage)
-            if 'token' in video_info:
-                break
+        else:
+            age_gate = False
+            for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
+                video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                        % (video_id, el_type))
+                video_info_webpage = self._download_webpage(video_info_url, video_id,
+                                        note=False,
+                                        errnote='unable to download video info webpage')
+                video_info = compat_parse_qs(video_info_webpage)
+                if 'token' in video_info:
+                    break
        if 'token' not in video_info:
            if 'reason' in video_info:
                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
@@ -557,6 +567,8 @@ class YoutubeIE(InfoExtractor):
            self.report_rtmp_download()
            video_url_list = [(None, video_info['conn'][0])]
        elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
+            if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
+                raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
            url_map = {}
            for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
                url_data = compat_parse_qs(url_data_str)
@@ -567,9 +579,15 @@ class YoutubeIE(InfoExtractor):
                    elif 's' in url_data:
                        if self._downloader.params.get('verbose'):
                            s = url_data['s'][0]
-                            player = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
-                                'html5 player', fatal=False)
-                            self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' %
+                            if age_gate:
+                                player_version = self._search_regex(r'ad3-(.+?)\.swf',
+                                    video_info['ad3_module'][0], 'flash player',
+                                    fatal=False)
+                                player = 'flash player %s' % player_version
+                            else:
+                                player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
+                                    'html5 player', fatal=False)
+                            self.to_screen('encrypted signature length %d (%d.%d), itag %s, %s' %
                                (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player))
                        signature = self._decrypt_signature(url_data['s'][0])
                        url += '&signature=' + signature
@@ -697,7 +715,7 @@ class YoutubePlaylistIE(InfoExtractor):

        videos = [v[1] for v in sorted(videos)]

-        url_results = [self.url_result(url, 'Youtube') for url in videos]
+        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
        return [self.playlist_result(url_results, playlist_id, playlist_title)]


@@ -755,7 +773,7 @@ class YoutubeChannelIE(InfoExtractor):
        self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))

        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
-        url_entries = [self.url_result(url, 'Youtube') for url in urls]
+        url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
        return [self.playlist_result(url_entries, channel_id)]


@@ -812,7 +830,7 @@ class YoutubeUserIE(InfoExtractor):
            pagenum += 1

        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
-        url_results = [self.url_result(url, 'Youtube') for url in urls]
+        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
        return [self.playlist_result(url_results, playlist_title = username)]

 class YoutubeSearchIE(SearchInfoExtractor):
@@ -887,6 +905,12 @@ class YoutubeSubscriptionsIE(YoutubeIE):
    def suitable(cls, url):
        return re.match(cls._VALID_URL, url) is not None

+    def _real_initialize(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
+        super(YoutubeSubscriptionsIE, self)._real_initialize()
+
    def _real_extract(self, url):
        feed_entries = []
        # The step argument is available only in 2.7 or higher
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -198,6 +198,20 @@ else:
        with open(fn, 'w', encoding='utf-8') as f:
            json.dump(obj, f)

+if sys.version_info >= (2,7):
+    def find_xpath_attr(node, xpath, key, val):
+        """ Find the xpath xpath[@key=val] """
+        assert re.match(r'^[a-zA-Z]+$', key)
+        assert re.match(r'^[a-zA-Z@]*$', val)
+        expr = xpath + u"[@%s='%s']" % (key, val)
+        return node.find(expr)
+else:
+    def find_xpath_attr(node, xpath, key, val):
+        for f in node.findall(xpath):
+            if f.attrib.get(key) == val:
+                return f
+        return None
+
 def htmlentity_transform(matchobj):
    """Transforms an HTML entity to a character.

--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.07.08.1'
+__version__ = '2013.07.11'
Author	SHA1	Message	Date
Philipp Hagemeister	fd87ff26b9	release 2013.07.11	2013-07-11 21:04:59 +02:00
Jaime Marquínez Ferrándiz	85347e1cb6	YoutubeIE: a new algo for length 83	2013-07-11 20:21:45 +02:00
Jaime Marquínez Ferrándiz	41897817cc	GametrailersIE: support multipart videos Use xml.etree.ElementTree instead of re when possible	2013-07-11 18:24:53 +02:00
Philipp Hagemeister	45ff2d51d0	[brightcove] add import	2013-07-11 16:31:29 +02:00
Philipp Hagemeister	5de3ece225	[brightcove] fix on Python 2.6	2013-07-11 16:16:02 +02:00
Philipp Hagemeister	df50a41289	[arte] Fix on 2.6	2013-07-11 16:12:16 +02:00
Philipp Hagemeister	59ae56fad5	Add helper function find_path_attr	2013-07-11 16:12:08 +02:00
Philipp Hagemeister	690e872c51	Remove video_result helper method Calling it was more complex then actually including the type in the video info	2013-07-11 12:12:30 +02:00
Philipp Hagemeister	81082e046e	[ehow] improve minor bits	2013-07-11 12:11:00 +02:00
Philipp Hagemeister	3fa9550837	Merge remote-tracking branch 'yasoob/master'	2013-07-11 12:02:16 +02:00
M.Yasoob Khalid	b1082f01a6	added test for ehow	2013-07-11 14:30:25 +05:00
M.Yasoob Khalid	f35b84c807	added an IE for Ehow videos	2013-07-11 14:25:14 +05:00
Jaime Marquínez Ferrándiz	117adb0f0f	GenericIE: detect more Brightcove videos In some sites "class" contains more that BrightcoveExperience	2013-07-11 00:25:38 +02:00
Jaime Marquínez Ferrándiz	abb285fb1b	BrightcoveIE: add support for playlists	2013-07-11 00:04:33 +02:00
Jaime Marquínez Ferrándiz	a431154706	Set the playlist_index and playlist fields for already resolved video results.	2013-07-10 23:36:30 +02:00
Jaime Marquínez Ferrándiz	cfe50f04ed	GenericIE: Detect videos from Brightcove Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.	2013-07-10 17:49:11 +02:00
Jaime Marquínez Ferrándiz	a7055eb956	YoutubeIE: show a more meaningful error when it founds a rtmpe download (related #343 )	2013-07-10 14:35:11 +02:00
Philipp Hagemeister	0a1be1e997	release 2013.07.10	2013-07-10 11:36:11 +02:00
Jaime Marquínez Ferrándiz	c93898dae9	YoutubeIE: new algo for length 83 (closes #1017 and closes #1016 )	2013-07-10 10:44:04 +02:00
Jaime Marquínez Ferrándiz	ebdf2af727	GameSpotIE: support more urls and download videos in the best quality	2013-07-09 20:07:52 +02:00
Jaime Marquínez Ferrándiz	c108eb73cc	YoutubeIE: Fix vevo explicit videos (closes #956 ) When an age restricted video is detected it simulates accessing the video from www.youtube.com/v/{video_id}	2013-07-09 15:43:44 +02:00
Jaime Marquínez Ferrándiz	3a1375dacf	VeohIE: remove debug logging	2013-07-09 11:11:55 +02:00
Jaime Marquínez Ferrándiz	41bece30b4	DotsubIE: simplify and extract the upload date Do not declare variables for fields in the info dictionary.	2013-07-08 22:40:42 +02:00
Jaime Marquínez Ferrándiz	16ea58cbda	Merge pull request #1009 from yasoob/master Added an IE and test for dotsub.com videos. ( closes #1008 )	2013-07-08 22:21:06 +02:00
Jaime Marquínez Ferrándiz	99e350d902	Add VeohIE (closes #1006 )	2013-07-08 22:02:23 +02:00
M.Yasoob Khalid	13e06d298c	added an IE and test for dotsub.	2013-07-09 00:05:52 +05:00
Jaime Marquínez Ferrándiz	81f0259b9e	YoutubeSubscriptionsIE: raise an error if there's no login information.	2013-07-08 11:24:11 +02:00
Jaime Marquínez Ferrándiz	fefcb5d314	YoutubeIE: use the new method in the base IE for getting the login info	2013-07-08 11:24:11 +02:00
Philipp Hagemeister	345b0c9b46	Remove dead code	2013-07-08 02:13:50 +02:00
Philipp Hagemeister	20c3893f0e	Do not redefine variables in list comprehensions	2013-07-08 02:12:20 +02:00