Compare commits

..

15 Commits

Author SHA1 Message Date
Philipp Hagemeister
23c322a531 release 2014.03.21 2014-03-21 00:37:23 +01:00
Philipp Hagemeister
7e8c0af004 Add --prefer-insecure option (Fixes #2364) 2014-03-21 00:37:10 +01:00
Philipp Hagemeister
d2983ccb25 [ninegag] Modernize and remove unused import 2014-03-21 00:37:10 +01:00
Philipp Hagemeister
f24e9833dc [youporn] Modernize 2014-03-21 00:37:10 +01:00
Sergey M․
bc2bdf5709 [kontrtube] Modernize 2014-03-20 23:05:57 +07:00
Philipp Hagemeister
627a209f74 release 2014.03.20 2014-03-20 16:35:54 +01:00
Philipp Hagemeister
1a4895453a [YoutubeDL] Improve error message 2014-03-20 16:33:46 +01:00
Philipp Hagemeister
aab74fa106 [ted] Simplify embed code (#2587) 2014-03-20 16:33:23 +01:00
Philipp Hagemeister
2bd9efd4c2 Merge remote-tracking branch 'anovicecodemonkey/TEDIEimprovements' 2014-03-20 16:24:34 +01:00
Jaime Marquínez Ferrándiz
39a743fb9b [arte] Modernize tests and fix _VALID_REGEX 2014-03-20 09:14:43 +01:00
Jaime Marquínez Ferrándiz
4966a0b22d [arte] Add extractor for concert.arte.tv (closes #2588) 2014-03-20 09:11:47 +01:00
anovicecodemonkey
fc26023120 [TEDIE] Add support for embeded TED video URLs 2014-03-20 01:04:21 +10:30
anovicecodemonkey
8d7c0cca13 [generic] Add support for embeded TED videos 2014-03-20 00:56:32 +10:30
Sergey M․
f66ede4328 [arte.tv:+7] Fix _VALID_URL 2014-03-19 21:23:55 +07:00
Philipp Hagemeister
cc88b90ec8 [desvscripts/release] Bump the number of password tries to accomodate stubby-fingered @phihag 2014-03-18 15:02:37 +01:00
13 changed files with 104 additions and 42 deletions

View File

@@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like.
an empty string (--proxy "") for direct
connection
--no-check-certificate Suppress HTTPS certificate validation.
--prefer-insecure Use an unencrypted connection to retrieve
information about the video. (Currently
supported only for YouTube)
--cache-dir DIR Location in the filesystem where youtube-dl
can store some downloaded information
permanently. By default $XDG_CACHE_HOME

View File

@@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
git checkout HEAD -- youtube-dl youtube-dl.exe
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"

View File

@@ -148,6 +148,8 @@ class YoutubeDL(object):
again.
cookiefile: File name where cookies should be read from and dumped to.
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
@@ -533,7 +535,7 @@ class YoutubeDL(object):
else:
raise
else:
self.report_error('no suitable InfoExtractor: %s' % url)
self.report_error('no suitable InfoExtractor for URL %s' % url)
def process_ie_result(self, ie_result, download=True, extra_info={}):
"""

View File

@@ -237,6 +237,9 @@ def parseOpts(overrideArguments=None):
'--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--prefer-insecure', action='store_true', dest='prefer_insecure',
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
@@ -257,7 +260,6 @@ def parseOpts(overrideArguments=None):
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
@@ -756,6 +758,7 @@ def _real_main(argv=None):
'download_archive': download_archive_fn,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure,
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround,

View File

@@ -10,6 +10,7 @@ from .arte import (
ArteTvIE,
ArteTVPlus7IE,
ArteTVCreativeIE,
ArteTVConcertIE,
ArteTVFutureIE,
ArteTVDDCIE,
)

View File

@@ -131,7 +131,7 @@ class ArteTvIE(InfoExtractor):
class ArteTVPlus7IE(InfoExtractor):
IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
@classmethod
def _extract_url_info(cls, url):
@@ -202,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor):
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
# The version with sourds/mal subtitles has also lower relevance
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
# Prefer http downloads over m3u8
0 if f['url'].endswith('m3u8') else 1,
)
formats = sorted(formats, key=sort_key)
def _format(format_info):
@@ -242,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
_TEST = {
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
'file': '050489-002.mp4',
'info_dict': {
'id': '050489-002',
'ext': 'mp4',
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
},
}
@@ -255,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
_TEST = {
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
'file': '050940-003.mp4',
'info_dict': {
'id': '050940-003',
'ext': 'mp4',
'title': 'Les champignons au secours de la planète',
},
}
@@ -270,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:ddc'
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
@@ -284,3 +288,19 @@ class ArteTVDDCIE(ArteTVPlus7IE):
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
return self._extract_from_json_url(json_url, video_id, lang)
class ArteTVConcertIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:concert'
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
_TEST = {
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
'info_dict': {
'id': '186',
'ext': 'mp4',
'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
'upload_date': '20140128',
},
}

View File

@@ -159,6 +159,18 @@ class GenericIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
},
# Embedded TED video
{
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
'info_dict': {
'id': '981',
'ext': 'mp4',
'title': 'My web playroom',
'uploader': 'Ze Frank',
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
}
}
]
@@ -487,6 +499,13 @@ class GenericIE(InfoExtractor):
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
# Look for embedded TED player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
if mobj is None:
# Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class KontrTubeIE(InfoExtractor):
@@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
'video title')
title = self._html_search_regex(
r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
description = self._html_search_meta('description', webpage, 'video description')
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
webpage)
mobj = re.search(
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
'view count', fatal=False)
view_count = int(view_count) if view_count is not None else None
view_count = self._html_search_regex(
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
comment_count = None
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
fatal=False)
comment_str = self._html_search_regex(
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
if comment_str.startswith('комментариев нет'):
comment_count = 0
else:
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
if mobj:
comment_count = int(mobj.group('total'))
comment_count = mobj.group('total')
return {
'id': video_id,
@@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count),
}

View File

@@ -1,6 +1,5 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
@@ -12,8 +11,9 @@ class NineGagIE(InfoExtractor):
_TEST = {
"url": "http://9gag.tv/v/1912",
"file": "1912.mp4",
"info_dict": {
"id": "1912",
"ext": "mp4",
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
"view_count": int,

View File

@@ -11,7 +11,9 @@ from ..utils import (
class TEDIE(SubtitlesInfoExtractor):
_VALID_URL = r'''(?x)http://www\.ted\.com/
_VALID_URL = r'''(?x)
(?P<proto>https?://)
(?P<type>www|embed)(?P<urlmain>\.ted\.com/
(
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
@@ -19,6 +21,7 @@ class TEDIE(SubtitlesInfoExtractor):
)
(/lang/(.*?))? # The url may contain the language
/(?P<name>\w+) # Here goes the name and then ".html"
.*)$
'''
_TEST = {
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
@@ -48,6 +51,9 @@ class TEDIE(SubtitlesInfoExtractor):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
if m.group('type') == 'embed':
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
return self.url_result(desktop_url, 'TED')
name = m.group('name')
if m.group('type_talk'):
return self._talk_info(url, name)

View File

@@ -1,3 +1,6 @@
from __future__ import unicode_literals
import json
import re
import sys
@@ -17,24 +20,25 @@ from ..aes import (
class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
_VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
_TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4',
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
u'info_dict': {
u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
u"uploader": u"Ask Dan And Jennifer",
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
u"age_limit": 18,
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
'info_dict': {
'id': '505835',
'ext': 'mp4',
'upload_date': '20101221',
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
'uploader': 'Ask Dan And Jennifer',
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
url = mobj.group('proto') + 'www.' + mobj.group('url')
req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1')
@@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
age_limit = self._rta_search(webpage)
# Get JSON parameters
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
try:
params = json.loads(json_params)
except:
@@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
# Get all of the links from the page
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
webpage, u'download list').strip()
webpage, 'download list').strip()
LINK_RE = r'<a href="([^"]+)">'
links = re.findall(LINK_RE, download_list_html)
@@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
resolution = format_parts[0]
height = int(resolution[:-len('p')])
bitrate = int(format_parts[1][:-len('k')])
format = u'-'.join(format_parts) + u'-' + dn
format = '-'.join(format_parts) + '-' + dn
formats.append({
'url': video_url,

View File

@@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
def _real_extract(self, url):
proto = (
u'http' if self._downloader.params.get('prefer_insecure', False)
else u'https')
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
video_id = self.extract_id(url)
# Get video webpage
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
video_webpage = self._download_webpage(url, video_id)
# Attempt to extract SWF player URL
@@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'asv': 3,
'sts':'1588',
})
video_info_url = 'https://www.youtube.com/get_video_info?' + data
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
errnote='unable to download video info webpage')
@@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
age_gate = False
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
@@ -1445,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,

View File

@@ -1,2 +1,2 @@
__version__ = '2014.03.18.1'
__version__ = '2014.03.21'