Compare commits

..

6 Commits

Author SHA1 Message Date
Philipp Hagemeister
6086d121cb release 2013.12.17.2 2013-12-17 12:35:57 +01:00
Philipp Hagemeister
7de6e075b4 [radiofrance] remove unused imports 2013-12-17 12:35:16 +01:00
Philipp Hagemeister
946135aa2a [academicearth] remove unused imports 2013-12-17 12:34:30 +01:00
Philipp Hagemeister
42393ce234 Add support for direct links to a video (#1973) 2013-12-17 12:33:55 +01:00
Philipp Hagemeister
d6c7a367e8 [utils] Fix url_basename 2013-12-17 12:32:58 +01:00
Philipp Hagemeister
cecaaf3f58 [generic] Do not use compatibility result fallback 2013-12-17 12:04:33 +01:00
6 changed files with 55 additions and 27 deletions

View File

@@ -188,6 +188,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
self.assertEqual(
url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
u'trailer.mp4')
if __name__ == '__main__':
unittest.main()

View File

@@ -1,11 +1,6 @@
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
remove_start,
)
class AcademicEarthCourseIE(InfoExtractor):

View File

@@ -13,6 +13,8 @@ from ..utils import (
ExtractorError,
smuggle_url,
unescapeHTML,
unified_strdate,
url_basename,
)
from .brightcove import BrightcoveIE
@@ -71,6 +73,17 @@ class GenericIE(InfoExtractor):
u'skip_download': True,
},
},
# Direct link to a video
{
u'url': u'http://media.w3.org/2010/05/sintel/trailer.mp4',
u'file': u'trailer.mp4',
u'md5': u'67d406c2bcb6af27fa886f31aa934bbe',
u'info_dict': {
u'id': u'trailer',
u'title': u'trailer',
u'upload_date': u'20100513',
}
}
]
def report_download_webpage(self, video_id):
@@ -83,7 +96,7 @@ class GenericIE(InfoExtractor):
"""Report information extraction."""
self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
def _test_redirect(self, url):
def _send_head(self, url):
"""Check if it is a redirect, like url shorteners, in case return the new url."""
class HeadRequest(compat_urllib_request.Request):
def get_method(self):
@@ -131,29 +144,46 @@ class GenericIE(InfoExtractor):
response = opener.open(HeadRequest(url))
if response is None:
raise ExtractorError(u'Invalid URL protocol')
new_url = response.geturl()
if url == new_url:
return False
self.report_following_redirect(new_url)
return new_url
return response
def _real_extract(self, url):
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
video_id = os.path.splitext(url.split('/')[-1])[0]
try:
new_url = self._test_redirect(url)
if new_url:
return [self.url_result(new_url)]
response = self._send_head(url)
# Check for redirect
new_url = response.geturl()
if url != new_url:
self.report_following_redirect(new_url)
return self.url_result(new_url)
# Check for direct link to a video
content_type = response.headers.get('Content-Type', '')
m = re.match(r'^(?:audio|video)/(?P<format_id>.+)$', content_type)
if m:
upload_date = response.headers.get('Last-Modified')
if upload_date:
upload_date = unified_strdate(upload_date)
assert (url_basename(url) == 'trailer.mp4')
return {
'id': video_id,
'title': os.path.splitext(url_basename(url))[0],
'formats': [{
'format_id': m.group('format_id'),
'url': url,
}],
'upload_date': upload_date,
}
except compat_urllib_error.HTTPError:
# This may be a stupid server that doesn't like HEAD, our UA, or so
pass
video_id = url.split('/')[-1]
try:
webpage = self._download_webpage(url, video_id)
except ValueError:

View File

@@ -1,12 +1,7 @@
# coding: utf-8
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
remove_start,
)
class RadioFranceIE(InfoExtractor):
@@ -42,11 +37,11 @@ class RadioFranceIE(InfoExtractor):
webpage, u'audio URLs')
formats = [
{
'format_id': m[0],
'url': m[1],
'format_id': fm[0],
'url': fm[1],
'vcodec': 'none',
}
for m in
for fm in
re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
]
# No sorting, we don't know any more about these formats

View File

@@ -761,12 +761,17 @@ def unified_strdate(date_str):
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S',
]
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except:
pass
if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
return upload_date
def determine_ext(url, default_ext=u'unknown_video'):
@@ -1087,7 +1092,7 @@ def remove_start(s, start):
def url_basename(url):
m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
m = re.match(r'(?:https?:|)//[^/]+/(?:[^?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
if not m:
return u''
return m.group(1)

View File

@@ -1,2 +1,2 @@
__version__ = '2013.12.17.1'
__version__ = '2013.12.17.2'