release 2014.03.21

Add --prefer-insecure option (Fixes #2364 )
[ninegag] Modernize and remove unused import
2025-09-28 20:38:37 +09:00 · 2014-03-21 00:37:23 +01:00 · 2014-03-21 00:37:10 +01:00 · 2014-03-21 00:37:10 +01:00 · 2014-03-21 00:37:10 +01:00 · 2014-03-20 23:05:57 +07:00
13 changed files with 104 additions and 42 deletions
--- a/README.md
+++ b/README.md
@@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like.
                                     an empty string (--proxy "") for direct
                                     connection
    --no-check-certificate           Suppress HTTPS certificate validation.
+    --prefer-insecure                Use an unencrypted connection to retrieve
+                                     information about the video. (Currently
+                                     supported only for YouTube)
    --cache-dir DIR                  Location in the filesystem where youtube-dl
                                     can store some downloaded information
                                     permanently. By default $XDG_CACHE_HOME
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 git checkout HEAD -- youtube-dl youtube-dl.exe

 /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
-for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
+for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
 scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
 ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
 ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -148,6 +148,8 @@ class YoutubeDL(object):
                       again.
    cookiefile:        File name where cookies should be read from and dumped to.
    nocheckcertificate:Do not verify SSL certificates
+    prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
+                       At the moment, this is only supported by YouTube.
    proxy:             URL of the proxy server to use
    socket_timeout:    Time to wait for unresponsive hosts, in seconds
    bidi_workaround:   Work around buggy terminals without bidirectional text
@@ -533,7 +535,7 @@ class YoutubeDL(object):
                else:
                    raise
        else:
-            self.report_error('no suitable InfoExtractor: %s' % url)
+            self.report_error('no suitable InfoExtractor for URL %s' % url)

    def process_ie_result(self, ie_result, download=True, extra_info={}):
        """
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -237,6 +237,9 @@ def parseOpts(overrideArguments=None):
        '--proxy', dest='proxy', default=None, metavar='URL',
        help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
    general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
+    general.add_option(
+        '--prefer-insecure', action='store_true', dest='prefer_insecure',
+        help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
    general.add_option(
        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -257,7 +260,6 @@ def parseOpts(overrideArguments=None):
        action='store_true',
        help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')

-
    selection.add_option(
        '--playlist-start',
        dest='playliststart', metavar='NUMBER', default=1, type=int,
@@ -756,6 +758,7 @@ def _real_main(argv=None):
        'download_archive': download_archive_fn,
        'cookiefile': opts.cookiefile,
        'nocheckcertificate': opts.no_check_certificate,
+        'prefer_insecure': opts.prefer_insecure,
        'proxy': opts.proxy,
        'socket_timeout': opts.socket_timeout,
        'bidi_workaround': opts.bidi_workaround,
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -10,6 +10,7 @@ from .arte import (
    ArteTvIE,
    ArteTVPlus7IE,
    ArteTVCreativeIE,
+    ArteTVConcertIE,
    ArteTVFutureIE,
    ArteTVDDCIE,
 )
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -131,7 +131,7 @@ class ArteTvIE(InfoExtractor):

 class ArteTVPlus7IE(InfoExtractor):
    IE_NAME = 'arte.tv:+7'
-    _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'

    @classmethod
    def _extract_url_info(cls, url):
@@ -202,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor):
                    re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
                    # The version with sourds/mal subtitles has also lower relevance
                    re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+                    # Prefer http downloads over m3u8
+                    0 if f['url'].endswith('m3u8') else 1,
                )
        formats = sorted(formats, key=sort_key)
        def _format(format_info):
@@ -242,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE):

    _TEST = {
        'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
-        'file': '050489-002.mp4',
        'info_dict': {
+            'id': '050489-002',
+            'ext': 'mp4',
            'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
        },
    }
@@ -255,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):

    _TEST = {
        'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
-        'file': '050940-003.mp4',
        'info_dict': {
+            'id': '050940-003',
+            'ext': 'mp4',
            'title': 'Les champignons au secours de la planète',
        },
    }
@@ -270,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):

 class ArteTVDDCIE(ArteTVPlus7IE):
    IE_NAME = 'arte.tv:ddc'
-    _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
+    _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'

    def _real_extract(self, url):
        video_id, lang = self._extract_url_info(url)
@@ -284,3 +288,19 @@ class ArteTVDDCIE(ArteTVPlus7IE):
        javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
        json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
        return self._extract_from_json_url(json_url, video_id, lang)
+
+
+class ArteTVConcertIE(ArteTVPlus7IE):
+    IE_NAME = 'arte.tv:concert'
+    _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
+
+    _TEST = {
+        'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
+        'md5': '9ea035b7bd69696b67aa2ccaaa218161',
+        'info_dict': {
+            'id': '186',
+            'ext': 'mp4',
+            'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
+            'upload_date': '20140128',
+        },
+    }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -159,6 +159,18 @@ class GenericIE(InfoExtractor):
                # m3u8 download
                'skip_download': True,
            },
+        },
+        # Embedded TED video
+        {
+            'url': 'http://en.support.wordpress.com/videos/ted-talks/',
+            'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
+            'info_dict': {
+                'id': '981',
+                'ext': 'mp4',
+                'title': 'My web playroom',
+                'uploader': 'Ze Frank',
+                'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
+            }
        }
    ]

@@ -487,6 +499,13 @@ class GenericIE(InfoExtractor):
        if mobj is None:
            # Broaden the search a little bit: JWPlayer JS loader
            mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
+
+        # Look for embedded TED player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'TED')
+
        if mobj is None:
            # Try to find twitter cards info
            mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
--- a/youtube_dl/extractor/kontrtube.py
+++ b/youtube_dl/extractor/kontrtube.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import int_or_none


 class KontrTubeIE(InfoExtractor):
@@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):

        video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
        thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
-        title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
-            'video title')
+        title = self._html_search_regex(
+            r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
        description = self._html_search_meta('description', webpage, 'video description')

-        mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
-            webpage)
+        mobj = re.search(
+            r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
        duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None

-        view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
-            'view count', fatal=False)
-        view_count = int(view_count) if view_count is not None else None
+        view_count = self._html_search_regex(
+            r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)

        comment_count = None
-        comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
-            fatal=False)
+        comment_str = self._html_search_regex(
+            r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
        if comment_str.startswith('комментариев нет'):
            comment_count = 0
        else:
            mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
            if mobj:
-                comment_count = int(mobj.group('total'))
+                comment_count = mobj.group('total')

        return {
            'id': video_id,
@@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
            'title': title,
            'description': description,
            'duration': duration,
-            'view_count': view_count,
-            'comment_count': comment_count,
+            'view_count': int_or_none(view_count),
+            'comment_count': int_or_none(comment_count),
        }
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor
@@ -12,8 +11,9 @@ class NineGagIE(InfoExtractor):

    _TEST = {
        "url": "http://9gag.tv/v/1912",
-        "file": "1912.mp4",
        "info_dict": {
+            "id": "1912",
+            "ext": "mp4",
            "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
            "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
            "view_count": int,
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -11,7 +11,9 @@ from ..utils import (


 class TEDIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'''(?x)http://www\.ted\.com/
+    _VALID_URL = r'''(?x)
+        (?P<proto>https?://)
+        (?P<type>www|embed)(?P<urlmain>\.ted\.com/
        (
            (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
            |
@@ -19,6 +21,7 @@ class TEDIE(SubtitlesInfoExtractor):
        )
        (/lang/(.*?))? # The url may contain the language
        /(?P<name>\w+) # Here goes the name and then ".html"
+        .*)$
        '''
    _TEST = {
        'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
@@ -48,6 +51,9 @@ class TEDIE(SubtitlesInfoExtractor):

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url, re.VERBOSE)
+        if m.group('type') == 'embed':
+            desktop_url = m.group('proto') + 'www' + m.group('urlmain')
+            return self.url_result(desktop_url, 'TED')
        name = m.group('name')
        if m.group('type_talk'):
            return self._talk_info(url, name)
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -1,3 +1,6 @@
+from __future__ import unicode_literals
+
+
 import json
 import re
 import sys
@@ -17,24 +20,25 @@ from ..aes import (


 class YouPornIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
+    _VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
    _TEST = {
-        u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-        u'file': u'505835.mp4',
-        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
-        u'info_dict': {
-            u"upload_date": u"20101221",
-            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
-            u"uploader": u"Ask Dan And Jennifer",
-            u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
-            u"age_limit": 18,
+        'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+        'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
+        'info_dict': {
+            'id': '505835',
+            'ext': 'mp4',
+            'upload_date': '20101221',
+            'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
+            'uploader': 'Ask Dan And Jennifer',
+            'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        url = mobj.group('proto') + 'www.' + mobj.group('url')

        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
@@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
        age_limit = self._rta_search(webpage)

        # Get JSON parameters
-        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
+        json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
        try:
            params = json.loads(json_params)
        except:
@@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
        # Get all of the links from the page
        DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
        download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
-            webpage, u'download list').strip()
+            webpage, 'download list').strip()
        LINK_RE = r'<a href="([^"]+)">'
        links = re.findall(LINK_RE, download_list_html)

@@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
            resolution = format_parts[0]
            height = int(resolution[:-len('p')])
            bitrate = int(format_parts[1][:-len('k')])
-            format = u'-'.join(format_parts) + u'-' + dn
+            format = '-'.join(format_parts) + '-' + dn

            formats.append({
                'url': video_url,
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')

    def _real_extract(self, url):
+        proto = (
+            u'http' if self._downloader.params.get('prefer_insecure', False)
+            else u'https')
+
        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
        mobj = re.search(self._NEXT_URL_RE, url)
        if mobj:
-            url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+            url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
        video_id = self.extract_id(url)

        # Get video webpage
-        url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
        video_webpage = self._download_webpage(url, video_id)

        # Attempt to extract SWF player URL
@@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                                                  'asv': 3,
                                                  'sts':'1588',
                                                  })
-            video_info_url = 'https://www.youtube.com/get_video_info?' + data
+            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
            video_info_webpage = self._download_webpage(video_info_url, video_id,
                                    note=False,
                                    errnote='unable to download video info webpage')
@@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        else:
            age_gate = False
            for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                        % (video_id, el_type))
                video_info_webpage = self._download_webpage(video_info_url, video_id,
                                        note=False,
@@ -1445,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            'duration':     video_duration,
            'age_limit':    18 if age_gate else 0,
            'annotations':  video_annotations,
-            'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+            'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
            'view_count':   view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.03.18.1'
+__version__ = '2014.03.21'
Author	SHA1	Message	Date
Philipp Hagemeister	23c322a531	release 2014.03.21	2014-03-21 00:37:23 +01:00
Philipp Hagemeister	7e8c0af004	Add --prefer-insecure option (Fixes #2364 )	2014-03-21 00:37:10 +01:00
Philipp Hagemeister	d2983ccb25	[ninegag] Modernize and remove unused import	2014-03-21 00:37:10 +01:00
Philipp Hagemeister	f24e9833dc	[youporn] Modernize	2014-03-21 00:37:10 +01:00
Sergey M․	bc2bdf5709	[kontrtube] Modernize	2014-03-20 23:05:57 +07:00
Philipp Hagemeister	627a209f74	release 2014.03.20	2014-03-20 16:35:54 +01:00
Philipp Hagemeister	1a4895453a	[YoutubeDL] Improve error message	2014-03-20 16:33:46 +01:00
Philipp Hagemeister	aab74fa106	[ted] Simplify embed code (#2587 )	2014-03-20 16:33:23 +01:00
Philipp Hagemeister	2bd9efd4c2	Merge remote-tracking branch 'anovicecodemonkey/TEDIEimprovements'	2014-03-20 16:24:34 +01:00
Jaime Marquínez Ferrándiz	39a743fb9b	[arte] Modernize tests and fix _VALID_REGEX	2014-03-20 09:14:43 +01:00
Jaime Marquínez Ferrándiz	4966a0b22d	[arte] Add extractor for concert.arte.tv (closes #2588 )	2014-03-20 09:11:47 +01:00
anovicecodemonkey	fc26023120	[TEDIE] Add support for embeded TED video URLs	2014-03-20 01:04:21 +10:30
anovicecodemonkey	8d7c0cca13	[generic] Add support for embeded TED videos	2014-03-20 00:56:32 +10:30
Sergey M․	f66ede4328	[arte.tv:+7] Fix _VALID_URL	2014-03-19 21:23:55 +07:00
Philipp Hagemeister	cc88b90ec8	[desvscripts/release] Bump the number of password tries to accomodate stubby-fingered @phihag	2014-03-18 15:02:37 +01:00