mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-09-28 12:28:37 +09:00
Compare commits
39 Commits
2014.09.14
...
2014.09.16
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6b6096d0b7 | ||
![]() |
d0246d07f1 | ||
![]() |
727a98c3ee | ||
![]() |
997987d568 | ||
![]() |
c001f939e4 | ||
![]() |
e825c38082 | ||
![]() |
a04aa7a9e6 | ||
![]() |
7cdd5339b3 | ||
![]() |
38349518f1 | ||
![]() |
64892c0b79 | ||
![]() |
dc9f356846 | ||
![]() |
ed86ee3b4a | ||
![]() |
7bb5df1cda | ||
![]() |
37a81dff04 | ||
![]() |
fc96eb4e21 | ||
![]() |
ae369738b0 | ||
![]() |
e2037b3f7d | ||
![]() |
5419033935 | ||
![]() |
2eebf060af | ||
![]() |
acd9db5902 | ||
![]() |
d0e8b3d59b | ||
![]() |
c15dd15388 | ||
![]() |
0003a5c416 | ||
![]() |
21f2927f70 | ||
![]() |
e5a79071a5 | ||
![]() |
ca0e7a2b17 | ||
![]() |
b523bb71ab | ||
![]() |
a020a0dc20 | ||
![]() |
6d1f2431bd | ||
![]() |
fdea3abdf8 | ||
![]() |
59d284c316 | ||
![]() |
98703c7fbf | ||
![]() |
b04c8f7358 | ||
![]() |
56d1912f1d | ||
![]() |
eb3bd7ba8d | ||
![]() |
2bca84e345 | ||
![]() |
984e8e14ea | ||
![]() |
d05cfe0600 | ||
![]() |
37419b4f99 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -11,6 +11,7 @@ MANIFEST
|
||||
README.txt
|
||||
youtube-dl.1
|
||||
youtube-dl.bash-completion
|
||||
youtube-dl.fish
|
||||
youtube-dl
|
||||
youtube-dl.exe
|
||||
youtube-dl.tar.gz
|
||||
|
@@ -2,5 +2,6 @@ include README.md
|
||||
include test/*.py
|
||||
include test/*.json
|
||||
include youtube-dl.bash-completion
|
||||
include youtube-dl.fish
|
||||
include youtube-dl.1
|
||||
recursive-include docs Makefile conf.py *.rst
|
||||
|
20
Makefile
20
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.fish
|
||||
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
@@ -29,6 +29,8 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
||||
install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
|
||||
install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
|
||||
install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
|
||||
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
|
||||
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
|
||||
|
||||
test:
|
||||
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
||||
@@ -36,9 +38,9 @@ test:
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
.PHONY: all clean install test tar bash-completion pypi-files
|
||||
.PHONY: all clean install test tar bash-completion pypi-files fish-completion
|
||||
|
||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1
|
||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
||||
|
||||
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
|
||||
@@ -64,7 +66,12 @@ youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-co
|
||||
|
||||
bash-completion: youtube-dl.bash-completion
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||
youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
|
||||
python devscripts/fish-completion.py
|
||||
|
||||
fish-completion: youtube-dl.fish
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish
|
||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
@@ -78,5 +85,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
-- \
|
||||
bin devscripts test youtube_dl docs \
|
||||
LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
||||
youtube-dl.fish setup.py \
|
||||
youtube-dl
|
||||
|
5
devscripts/fish-completion.in
Normal file
5
devscripts/fish-completion.in
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
{{commands}}
|
||||
|
||||
|
||||
complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
|
48
devscripts/fish-completion.py
Executable file
48
devscripts/fish-completion.py
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import optparse
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||
import youtube_dl
|
||||
from youtube_dl.utils import shell_quote
|
||||
|
||||
FISH_COMPLETION_FILE = 'youtube-dl.fish'
|
||||
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
||||
|
||||
EXTRA_ARGS = {
|
||||
'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'],
|
||||
|
||||
# Options that need a file parameter
|
||||
'download-archive': ['--require-parameter'],
|
||||
'cookies': ['--require-parameter'],
|
||||
'load-info': ['--require-parameter'],
|
||||
'batch-file': ['--require-parameter'],
|
||||
}
|
||||
|
||||
def build_completion(opt_parser):
|
||||
commands = []
|
||||
|
||||
for group in opt_parser.option_groups:
|
||||
for option in group.option_list:
|
||||
long_option = option.get_opt_string().strip('-')
|
||||
help_msg = shell_quote([option.help])
|
||||
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
|
||||
if option._short_opts:
|
||||
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
|
||||
if option.help != optparse.SUPPRESS_HELP:
|
||||
complete_cmd += ['--description', option.help]
|
||||
complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
|
||||
commands.append(shell_quote(complete_cmd))
|
||||
|
||||
with open(FISH_COMPLETION_TEMPLATE) as f:
|
||||
template = f.read()
|
||||
filled_template = template.replace('{{commands}}', '\n'.join(commands))
|
||||
with open(FISH_COMPLETION_FILE, 'w') as f:
|
||||
f.write(filled_template)
|
||||
|
||||
parser = youtube_dl.parseOpts()[0]
|
||||
build_completion(parser)
|
1
setup.py
1
setup.py
@@ -48,6 +48,7 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||
else:
|
||||
files_spec = [
|
||||
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
||||
('etc/fish/completions', ['youtube-dl.fish']),
|
||||
('share/doc/youtube_dl', ['README.txt']),
|
||||
('share/man/man1', ['youtube-dl.1'])
|
||||
]
|
||||
|
@@ -40,6 +40,9 @@ from youtube_dl.utils import (
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
uppercase_escape,
|
||||
limit_length,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -286,5 +289,41 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
||||
def test_limit_length(self):
|
||||
self.assertEqual(limit_length(None, 12), None)
|
||||
self.assertEqual(limit_length('foo', 12), 'foo')
|
||||
self.assertTrue(
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||
self.assertEqual(escape_rfc3986(reserved), reserved)
|
||||
self.assertEqual(escape_rfc3986(unreserved), unreserved)
|
||||
self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82')
|
||||
self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82')
|
||||
self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
|
||||
self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
|
||||
|
||||
def test_escape_url(self):
|
||||
self.assertEqual(
|
||||
escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
|
||||
'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
|
||||
'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/фрагмент'),
|
||||
'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/абв?абв=абв#абв'),
|
||||
'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
|
||||
)
|
||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -28,6 +28,7 @@ from .utils import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
escape_url,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
@@ -1241,6 +1242,25 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
|
||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
url = req if isinstance(req, compat_str) else req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
if isinstance(req, compat_str):
|
||||
req = url_escaped
|
||||
else:
|
||||
req = compat_urllib_request.Request(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
|
@@ -75,6 +75,8 @@ __authors__ = (
|
||||
'Ole Ernst',
|
||||
'Aaron McDaniel (mcd1992)',
|
||||
'Magnus Kolstad',
|
||||
'Hari Padmanaban',
|
||||
'Carlos Ramos',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@@ -6,6 +6,7 @@ from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allmyvideos import AllmyvideosIE
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
@@ -25,6 +26,7 @@ from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
@@ -83,6 +85,7 @@ from .dropbox import DropboxIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .ellentv import (
|
||||
EllenTVIE,
|
||||
@@ -365,6 +368,7 @@ from .trutube import TruTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
|
59
youtube_dl/extractor/allmyvideos.py
Normal file
59
youtube_dl/extractor/allmyvideos.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class AllmyvideosIE(InfoExtractor):
|
||||
IE_NAME = 'allmyvideos.net'
|
||||
_VALID_URL = r'https?://allmyvideos\.net/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://allmyvideos.net/jih3nce3x6wn',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
'id': 'jih3nce3x6wn',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
|
||||
post = compat_urllib_parse.urlencode(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
|
||||
#Could be several links with different quality
|
||||
links = re.findall(r'"file" : "?(.+?)",', webpage)
|
||||
# Assume the links are ordered in quality
|
||||
formats = [{
|
||||
'url': l,
|
||||
'quality': i,
|
||||
} for i, l in enumerate(links)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
@@ -51,9 +51,6 @@ class ARDMediathekIE(InfoExtractor):
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
|
53
youtube_dl/extractor/behindkink.py
Normal file
53
youtube_dl/extractor/behindkink.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class BehindKinkIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
|
||||
'md5': '41ad01222b8442089a55528fec43ec01',
|
||||
'info_dict': {
|
||||
'id': '36370',
|
||||
'ext': 'mp4',
|
||||
'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
|
||||
'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
|
||||
'upload_date': '20140814',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
year = mobj.group('year')
|
||||
month = mobj.group('month')
|
||||
day = mobj.group('day')
|
||||
upload_date = year + month + day
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"'file':\s*'([^']+)'",
|
||||
webpage, 'URL base')
|
||||
|
||||
video_id = url_basename(video_url)
|
||||
video_id = video_id.split('_')[0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -9,6 +9,8 @@ from ..utils import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
remove_end,
|
||||
HEADRequest,
|
||||
compat_HTTPError,
|
||||
)
|
||||
|
||||
|
||||
@@ -21,6 +23,7 @@ class CloudyIE(InfoExtractor):
|
||||
'''
|
||||
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
||||
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
||||
_MAX_TRIES = 2
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
@@ -42,24 +45,30 @@ class CloudyIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
|
||||
|
||||
url = self._EMBED_URL % (video_host, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if try_num > self._MAX_TRIES - 1:
|
||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||
|
||||
file_key = self._search_regex(
|
||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
||||
data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode({
|
||||
form = {
|
||||
'file': video_id,
|
||||
'key': file_key,
|
||||
}))
|
||||
}
|
||||
|
||||
if error_url:
|
||||
form.update({
|
||||
'numOfErrors': try_num,
|
||||
'errorCode': '404',
|
||||
'errorUrl': error_url,
|
||||
})
|
||||
|
||||
data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))
|
||||
player_data = self._download_webpage(
|
||||
data_url, video_id, 'Downloading player data')
|
||||
data = compat_parse_qs(player_data)
|
||||
|
||||
try_num += 1
|
||||
|
||||
if 'error' in data:
|
||||
raise ExtractorError(
|
||||
'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
|
||||
@@ -69,16 +78,31 @@ class CloudyIE(InfoExtractor):
|
||||
if title:
|
||||
title = remove_end(title, '&asdasdas').strip()
|
||||
|
||||
formats = []
|
||||
video_url = data.get('url', [None])[0]
|
||||
|
||||
if video_url:
|
||||
formats.append({
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
})
|
||||
try:
|
||||
self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||
return self._extract_video(video_host, video_id, file_key, video_url, try_num)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = self._EMBED_URL % (video_host, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
||||
|
||||
return self._extract_video(video_host, video_id, file_key)
|
||||
|
@@ -11,10 +11,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class DaumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P<id>[^?#&]+)'
|
||||
IE_NAME = 'daum.net'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||
'info_dict': {
|
||||
'id': '52554690',
|
||||
@@ -24,11 +24,17 @@ class DaumIE(InfoExtractor):
|
||||
'upload_date': '20130831',
|
||||
'duration': 3868,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||
webpage = self._download_webpage(canonical_url, video_id)
|
||||
full_id = self._search_regex(
|
||||
@@ -42,7 +48,6 @@ class DaumIE(InfoExtractor):
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||
formats = []
|
||||
for format_el in urls.findall('result/output_list/output_list'):
|
||||
profile = format_el.attrib['profile']
|
||||
@@ -52,7 +57,7 @@ class DaumIE(InfoExtractor):
|
||||
})
|
||||
url_doc = self._download_xml(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||
video_id, note=False)
|
||||
video_id, note='Downloading video data for %s format' % profile)
|
||||
format_url = url_doc.find('result/url').text
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
61
youtube_dl/extractor/einthusan.py
Normal file
61
youtube_dl/extractor/einthusan.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||
'md5': 'af244f4458cd667205e513d75da5b8b1',
|
||||
'info_dict': {
|
||||
'id': '2447',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ek Villain',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||
'md5': 'ef63c7a803e22315880ed182c10d1c5c',
|
||||
'info_dict': {
|
||||
'id': '1671',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soodhu Kavvuum',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''',
|
||||
webpage, 'video url')
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
|
||||
webpage, "thumbnail url", fatal=False)
|
||||
if thumbnail is not None:
|
||||
thumbnail = thumbnail.replace('..', 'http://www.einthusan.com')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
@@ -12,8 +12,8 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
|
||||
ExtractorError,
|
||||
limit_length,
|
||||
)
|
||||
|
||||
|
||||
@@ -37,6 +37,14 @@ class FacebookIE(InfoExtractor):
|
||||
'duration': 38,
|
||||
'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...',
|
||||
}
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@@ -131,8 +139,7 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
if len(video_title) > 80 + 3:
|
||||
video_title = video_title[:80] + '...'
|
||||
video_title = limit_length(video_title, 80)
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
|
||||
|
@@ -8,45 +8,68 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
parse_duration,
|
||||
compat_urllib_parse_urlparse,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_video(self, video_id):
|
||||
info = self._download_xml(
|
||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||
'getInfosOeuvre.php?id-diffusion='
|
||||
+ video_id, video_id, 'Downloading XML config')
|
||||
def _extract_video(self, video_id, catalogue):
|
||||
info = self._download_json(
|
||||
'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
|
||||
% (video_id, catalogue),
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
manifest_url = info.find('videos/video/url').text
|
||||
manifest_url = manifest_url.replace('/z/', '/i/')
|
||||
|
||||
if manifest_url.startswith('rtmp'):
|
||||
formats = [{'url': manifest_url, 'ext': 'flv'}]
|
||||
else:
|
||||
formats = []
|
||||
available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
|
||||
for index, format_descr in enumerate(available_formats.split(',')):
|
||||
format_info = {
|
||||
'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
|
||||
if m_resolution is not None:
|
||||
format_info.update({
|
||||
'width': int(m_resolution.group('width')),
|
||||
'height': int(m_resolution.group('height')),
|
||||
})
|
||||
formats.append(format_info)
|
||||
if info.get('status') == 'NOK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
|
||||
|
||||
thumbnail_path = info.find('image').text
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
if video['statut'] != 'ONLINE':
|
||||
continue
|
||||
video_url = video['url']
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = video['format']
|
||||
if video_url.endswith('.f4m'):
|
||||
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
for f4m_format in f4m_formats:
|
||||
f4m_format['preference'] = 1
|
||||
formats.extend(f4m_formats)
|
||||
elif video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'preference': 1,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'preference': 2,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('titre').text,
|
||||
'title': info['titre'],
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'formats': formats,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
|
||||
|
||||
@@ -61,7 +84,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion="(\d+)"', webpage, 'ID')
|
||||
return self._extract_video(video_id)
|
||||
return self._extract_video(video_id, 'Pluzz')
|
||||
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
@@ -70,13 +93,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
'md5': '9cecf35f99c4079c199e9817882a9a1c',
|
||||
'info_dict': {
|
||||
'id': '84981923',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Soir 3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'upload_date': '20130826',
|
||||
'timestamp': 1377548400,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
@@ -88,15 +111,17 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
}
|
||||
},
|
||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id')
|
||||
return self._extract_video(video_id)
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
@@ -112,91 +137,77 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
# france2
|
||||
{
|
||||
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||
'file': '75540104.mp4',
|
||||
'md5': 'c03fc87cb85429ffd55df32b9fc05523',
|
||||
'info_dict': {
|
||||
'title': '13h15, le samedi...',
|
||||
'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'id': '109169362',
|
||||
'ext': 'flv',
|
||||
'title': '13h15, le dimanche...',
|
||||
'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
|
||||
'upload_date': '20140914',
|
||||
'timestamp': 1410693600,
|
||||
},
|
||||
},
|
||||
# france3
|
||||
{
|
||||
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||
'md5': '679bb8f8921f8623bd658fa2f8364da0',
|
||||
'info_dict': {
|
||||
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le scandale du prix des médicaments',
|
||||
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'upload_date': '20131113',
|
||||
'timestamp': 1384380000,
|
||||
},
|
||||
},
|
||||
# france4
|
||||
{
|
||||
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
|
||||
'info_dict': {
|
||||
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hero Corp Making of - Extrait 1',
|
||||
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'upload_date': '20131106',
|
||||
'timestamp': 1383766500,
|
||||
},
|
||||
},
|
||||
# france5
|
||||
{
|
||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||
'md5': '78f0f4064f9074438e660785bbf2c5d9',
|
||||
'info_dict': {
|
||||
'id': '92837968',
|
||||
'ext': 'mp4',
|
||||
'id': '108961659',
|
||||
'ext': 'flv',
|
||||
'title': 'C à dire ?!',
|
||||
'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'description': 'md5:1a4aeab476eb657bf57c4ff122129f81',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410795000,
|
||||
},
|
||||
},
|
||||
# franceo
|
||||
{
|
||||
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||
'md5': '52f0bfe202848b15915a2f39aaa8981b',
|
||||
'info_dict': {
|
||||
'id': '92327925',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infô-Afrique',
|
||||
'id': '108634970',
|
||||
'ext': 'flv',
|
||||
'title': 'Infô Afrique',
|
||||
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410822000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The id changes frequently',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj.group('key'):
|
||||
webpage = self._download_webpage(url, mobj.group('key'))
|
||||
id_res = [
|
||||
(r'''(?x)<div\s+class="video-player">\s*
|
||||
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
|
||||
class="francetv-video-player">'''),
|
||||
(r'<a id="player_direct" href="http://info\.francetelevisions'
|
||||
'\.fr/\?id-video=([^"/&]+)'),
|
||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||
]
|
||||
video_id = self._html_search_regex(id_res, webpage, 'video ID')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
return self._extract_video(video_id)
|
||||
webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class GenerationQuoiIE(InfoExtractor):
|
||||
@@ -232,16 +243,15 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
||||
'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
|
||||
'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
|
||||
'info_dict': {
|
||||
'id': 'EV_6785',
|
||||
'ext': 'mp4',
|
||||
'title': 'Einstein on the beach au Théâtre du Châtelet',
|
||||
'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'id': 'EV_22853',
|
||||
'ext': 'flv',
|
||||
'title': 'Dans les jardins de William Christie - Le Camus',
|
||||
'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
|
||||
'upload_date': '20140829',
|
||||
'timestamp': 1409317200,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -249,5 +259,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
|
||||
return self._extract_video(video_id)
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
|
||||
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
@@ -877,7 +877,7 @@ class GenericIE(InfoExtractor):
|
||||
if not found:
|
||||
found = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
||||
webpage)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
|
@@ -46,9 +46,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
'info_dict': {
|
||||
'id': '453614',
|
||||
@@ -58,7 +58,10 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
'duration': 18,
|
||||
'upload_date': '20131006',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -8,11 +8,11 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
|
||||
_find = lambda el, p: el.find(_x(p)).text.strip()
|
||||
|
||||
|
||||
class NosVideoIE(InfoExtractor):
|
||||
@@ -53,9 +53,15 @@ class NosVideoIE(InfoExtractor):
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(_x('.//xspf:track'))
|
||||
title = _find(track, './xspf:title')
|
||||
url = _find(track, './xspf:file')
|
||||
thumbnail = _find(track, './xspf:image')
|
||||
if track is None:
|
||||
raise ExtractorError(
|
||||
'XML playlist is missing the \'track\' element',
|
||||
expected=True)
|
||||
title = xpath_text(track, _x('./xspf:title'), 'title')
|
||||
url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True)
|
||||
thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail')
|
||||
if title is not None:
|
||||
title = title.strip()
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
|
67
youtube_dl/extractor/turbo.py
Normal file
67
youtube_dl/extractor/turbo.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class TurboIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
|
||||
_API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
|
||||
_TEST = {
|
||||
'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
|
||||
'md5': '33f4b91099b36b5d5a91f84b5bcba600',
|
||||
'info_dict': {
|
||||
'id': '454443',
|
||||
'ext': 'mp4',
|
||||
'duration': 3715,
|
||||
'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
|
||||
'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist = self._download_xml(self._API_URL.format(video_id), video_id)
|
||||
item = playlist.find('./channel/item')
|
||||
if item is None:
|
||||
raise ExtractorError('Playlist item was not found', expected=True)
|
||||
|
||||
title = xpath_text(item, './title', 'title')
|
||||
duration = int_or_none(xpath_text(item, './durate', 'duration'))
|
||||
thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['3g', 'sd', 'hq'])
|
||||
for child in item:
|
||||
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
|
||||
if m:
|
||||
quality = m.group('quality')
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': child.text,
|
||||
'quality': get_quality(quality),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
@@ -11,22 +11,48 @@ from ..utils import (
|
||||
|
||||
class VpornIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
|
||||
'md5': 'facf37c1b86546fa0208058546842c55',
|
||||
'info_dict': {
|
||||
'id': '497944',
|
||||
'display_id': 'violet-on-her-th-birthday',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violet on her 19th birthday',
|
||||
'description': 'Violet dances in front of the camera which is sure to get you horny.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'kileyGrope',
|
||||
'categories': ['Masturbation', 'Teen'],
|
||||
'duration': 393,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
|
||||
'md5': 'facf37c1b86546fa0208058546842c55',
|
||||
'info_dict': {
|
||||
'id': '497944',
|
||||
'display_id': 'violet-on-her-th-birthday',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violet on her 19th birthday',
|
||||
'description': 'Violet dances in front of the camera which is sure to get you horny.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'kileyGrope',
|
||||
'categories': ['Masturbation', 'Teen'],
|
||||
'duration': 393,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.vporn.com/female/hana-shower/523564/',
|
||||
'md5': 'ced35a4656198a1664cf2cda1575a25f',
|
||||
'info_dict': {
|
||||
'id': '523564',
|
||||
'display_id': 'hana-shower',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hana Shower',
|
||||
'description': 'Hana showers at the bathroom.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Hmmmmm',
|
||||
'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
|
||||
'duration': 588,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -64,7 +90,7 @@ class VpornIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
|
||||
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
|
||||
video_url = video[1]
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
|
@@ -1437,6 +1437,24 @@ def uppercase_escape(s):
|
||||
lambda m: unicode_escape(m.group(0))[0],
|
||||
s)
|
||||
|
||||
|
||||
def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, unicode):
|
||||
s = s.encode('utf-8')
|
||||
return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
|
||||
|
||||
|
||||
def escape_url(url):
|
||||
"""Escape URL as suggested by RFC 3986"""
|
||||
url_parsed = compat_urllib_parse_urlparse(url)
|
||||
return url_parsed._replace(
|
||||
path=escape_rfc3986(url_parsed.path),
|
||||
params=escape_rfc3986(url_parsed.params),
|
||||
query=escape_rfc3986(url_parsed.query),
|
||||
fragment=escape_rfc3986(url_parsed.fragment)
|
||||
).geturl()
|
||||
|
||||
try:
|
||||
struct.pack(u'!I', 0)
|
||||
except TypeError:
|
||||
@@ -1571,3 +1589,13 @@ except AttributeError:
|
||||
if ret:
|
||||
raise subprocess.CalledProcessError(ret, p.args, output=output)
|
||||
return output
|
||||
|
||||
|
||||
def limit_length(s, length):
|
||||
""" Add ellipses to overly long strings """
|
||||
if s is None:
|
||||
return None
|
||||
ELLIPSES = '...'
|
||||
if len(s) > length:
|
||||
return s[:length - len(ELLIPSES)] + ELLIPSES
|
||||
return s
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.09.14.2'
|
||||
__version__ = '2014.09.16.1'
|
||||
|
Reference in New Issue
Block a user