mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-19 22:58:37 +09:00
Compare commits
19 Commits
2014.08.25
...
2014.08.27
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3f514a353e | ||
![]() |
da9ec3b932 | ||
![]() |
191b7cbba9 | ||
![]() |
e8c59b9642 | ||
![]() |
6abb066128 | ||
![]() |
8f1ea7cbb6 | ||
![]() |
a204c85408 | ||
![]() |
15a1f4b8fe | ||
![]() |
c7bee2a725 | ||
![]() |
dbc1366b50 | ||
![]() |
704df56da7 | ||
![]() |
33ac271ba7 | ||
![]() |
0963f92f23 | ||
![]() |
9a66c1079c | ||
![]() |
f971dcbba0 | ||
![]() |
0990305d2a | ||
![]() |
bcc069a937 | ||
![]() |
34708e1bb6 | ||
![]() |
829476b80a |
@@ -102,7 +102,10 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
match_rex = re.compile(match_str)
|
||||
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str) and match_rex.match(got),
|
||||
isinstance(got, compat_str),
|
||||
'Expected a %r object, but got %r' % (compat_str, type(got)))
|
||||
self.assertTrue(
|
||||
match_rex.match(got),
|
||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
|
@@ -7,6 +7,7 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import (
|
||||
assertGreaterEqual,
|
||||
get_params,
|
||||
gettestcases,
|
||||
expect_info_dict,
|
||||
@@ -136,12 +137,18 @@ def generator(test_case):
|
||||
self.assertEqual(res_dict['_type'], 'playlist')
|
||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
||||
if 'playlist_mincount' in test_case:
|
||||
self.assertGreaterEqual(
|
||||
assertGreaterEqual(
|
||||
self,
|
||||
len(res_dict['entries']),
|
||||
test_case['playlist_mincount'],
|
||||
'Expected at least %d in playlist %s, but got only %d' % (
|
||||
test_case['playlist_mincount'], test_case['url'],
|
||||
len(res_dict['entries'])))
|
||||
if 'playlist_count' in test_case:
|
||||
self.assertEqual(
|
||||
len(res_dict['entries']),
|
||||
test_case['playlist_count'],
|
||||
'Expected at %d in playlist %s, but got %d.')
|
||||
|
||||
for tc in test_cases:
|
||||
tc_filename = get_tc_filename(tc)
|
||||
|
@@ -310,24 +310,6 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
||||
self.assertEqual(len(result['entries']), 3)
|
||||
|
||||
def test_GoogleSearch(self):
|
||||
dl = FakeYDL()
|
||||
ie = GoogleSearchIE(dl)
|
||||
result = ie.extract('gvsearch15:python language')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'python language')
|
||||
self.assertEqual(result['title'], 'python language')
|
||||
self.assertEqual(len(result['entries']), 15)
|
||||
|
||||
def test_generic_rss_feed(self):
|
||||
dl = FakeYDL()
|
||||
ie = GenericIE(dl)
|
||||
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
|
||||
self.assertEqual(result['title'], 'Zero Punctuation')
|
||||
self.assertTrue(len(result['entries']) > 10)
|
||||
|
||||
def test_ted_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = TEDIE(dl)
|
||||
|
@@ -209,6 +209,7 @@ from .mtv import (
|
||||
MTVIggyIE,
|
||||
)
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspace import MySpaceIE
|
||||
from .myspass import MySpassIE
|
||||
@@ -315,6 +316,7 @@ from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BlipTVIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_TESTS]+)))'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
|
@@ -114,7 +114,7 @@ class InfoExtractor(object):
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
location: Physical location of the video.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The subtitle file contents as a dictionary in the format
|
||||
{language: subtitles}.
|
||||
duration: Length of the video in seconds, as an integer.
|
||||
@@ -636,6 +636,55 @@ class InfoExtractor(object):
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
|
||||
formats = [{
|
||||
'format_id': 'm3u8-meta',
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': 'm3u8',
|
||||
'preference': -1,
|
||||
'resolution': 'multiple',
|
||||
'format_note': 'Quality selection URL',
|
||||
}]
|
||||
|
||||
m3u8_doc = self._download_webpage(m3u8_url, video_id)
|
||||
last_info = None
|
||||
kv_rex = re.compile(
|
||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = {}
|
||||
for m in kv_rex.finditer(line):
|
||||
v = m.group('val')
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_info[m.group('key')] = v
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||
|
||||
f = {
|
||||
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
||||
'url': line.strip(),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
}
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
video, audio = codecs.split(',')
|
||||
f['vcodec'] = video.partition('.')[0]
|
||||
f['acodec'] = audio.partition('.')[0]
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
if resolution:
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -1,10 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
@@ -12,86 +15,98 @@ class EightTracksIE(InfoExtractor):
|
||||
IE_NAME = '8tracks'
|
||||
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||
_TEST = {
|
||||
u"name": u"EightTracks",
|
||||
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||
u"playlist": [
|
||||
"name": "EightTracks",
|
||||
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||
"info_dict": {
|
||||
'id': '1336550',
|
||||
'display_id': 'youtube-dl-test-tracks-a',
|
||||
"description": "test chars: \"'/\\ä↭",
|
||||
"title": "youtube-dl test tracks \"'/\\ä↭<>",
|
||||
},
|
||||
"playlist": [
|
||||
{
|
||||
u"file": u"11885610.m4a",
|
||||
u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
|
||||
u"info_dict": {
|
||||
u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
|
||||
"info_dict": {
|
||||
"id": "11885610",
|
||||
"ext": "m4a",
|
||||
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885608.m4a",
|
||||
u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
"md5": "4ab26f05c1f7291ea460a3920be8021f",
|
||||
"info_dict": {
|
||||
"id": "11885608",
|
||||
"ext": "m4a",
|
||||
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885679.m4a",
|
||||
u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
"md5": "d30b5b5f74217410f4689605c35d1fd7",
|
||||
"info_dict": {
|
||||
"id": "11885679",
|
||||
"ext": "m4a",
|
||||
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885680.m4a",
|
||||
u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
"md5": "4eb0a669317cd725f6bbd336a29f923a",
|
||||
"info_dict": {
|
||||
"id": "11885680",
|
||||
"ext": "m4a",
|
||||
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885682.m4a",
|
||||
u"md5": u"1893e872e263a2705558d1d319ad19e8",
|
||||
u"info_dict": {
|
||||
u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
"md5": "1893e872e263a2705558d1d319ad19e8",
|
||||
"info_dict": {
|
||||
"id": "11885682",
|
||||
"ext": "m4a",
|
||||
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885683.m4a",
|
||||
u"md5": u"b673c46f47a216ab1741ae8836af5899",
|
||||
u"info_dict": {
|
||||
u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
"md5": "b673c46f47a216ab1741ae8836af5899",
|
||||
"info_dict": {
|
||||
"id": "11885683",
|
||||
"ext": "m4a",
|
||||
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885684.m4a",
|
||||
u"md5": u"1d74534e95df54986da7f5abf7d842b7",
|
||||
u"info_dict": {
|
||||
u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
"md5": "1d74534e95df54986da7f5abf7d842b7",
|
||||
"info_dict": {
|
||||
"id": "11885684",
|
||||
"ext": "m4a",
|
||||
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885685.m4a",
|
||||
u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
|
||||
u"info_dict": {
|
||||
u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
|
||||
"info_dict": {
|
||||
"id": "11885685",
|
||||
"ext": "m4a",
|
||||
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
|
||||
json_like = self._search_regex(
|
||||
r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
|
||||
data = json.loads(json_like)
|
||||
|
||||
session = str(random.randint(0, 1000000000))
|
||||
@@ -99,21 +114,30 @@ class EightTracksIE(InfoExtractor):
|
||||
track_count = data['tracks_count']
|
||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||
next_url = first_url
|
||||
res = []
|
||||
entries = []
|
||||
for i in range(track_count):
|
||||
api_json = self._download_webpage(next_url, playlist_id,
|
||||
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
|
||||
errnote=u'Failed to download song information')
|
||||
api_json = self._download_webpage(
|
||||
next_url, playlist_id,
|
||||
note='Downloading song information %d/%d' % (i + 1, track_count),
|
||||
errnote='Failed to download song information')
|
||||
api_data = json.loads(api_json)
|
||||
track_data = api_data[u'set']['track']
|
||||
track_data = api_data['set']['track']
|
||||
info = {
|
||||
'id': track_data['id'],
|
||||
'id': compat_str(track_data['id']),
|
||||
'url': track_data['track_file_stream_url'],
|
||||
'title': track_data['performer'] + u' - ' + track_data['name'],
|
||||
'raw_title': track_data['name'],
|
||||
'uploader_id': data['user']['login'],
|
||||
'ext': 'm4a',
|
||||
}
|
||||
res.append(info)
|
||||
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
|
||||
return res
|
||||
entries.append(info)
|
||||
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
|
||||
session, mix_id, track_data['id'])
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': compat_str(mix_id),
|
||||
'display_id': playlist_id,
|
||||
'title': data.get('name'),
|
||||
'description': data.get('description'),
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import fix_xml_ampersands
|
||||
|
||||
|
||||
class EmpflixIE(InfoExtractor):
|
||||
@@ -36,7 +37,8 @@ class EmpflixIE(InfoExtractor):
|
||||
webpage, 'flashvars.config')
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, video_id, note='Downloading metadata')
|
||||
cfg_url, video_id, note='Downloading metadata',
|
||||
transform_source=fix_xml_ampersands)
|
||||
|
||||
formats = [
|
||||
{
|
||||
@@ -44,11 +46,13 @@ class EmpflixIE(InfoExtractor):
|
||||
'format_id': item.find('res').text,
|
||||
} for item in cfg_xml.findall('./quality/item')
|
||||
]
|
||||
thumbnail = cfg_xml.find('./startThumb').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -341,6 +341,16 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'www.handjobhub.com',
|
||||
'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
|
||||
}
|
||||
},
|
||||
# RSS feed
|
||||
{
|
||||
'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||
'info_dict': {
|
||||
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||
'title': 'Zero Punctuation',
|
||||
'description': 're:'
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -809,7 +819,6 @@ class GenericIE(InfoExtractor):
|
||||
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||
["']?url["']?\s*:\s*["']([^"']+)["']
|
||||
''', webpage)
|
||||
assert found
|
||||
if not found:
|
||||
# Try to find twitter cards info
|
||||
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
|
@@ -14,6 +14,14 @@ class GoogleSearchIE(SearchInfoExtractor):
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = 'video.google:search'
|
||||
_SEARCH_KEY = 'gvsearch'
|
||||
_TEST = {
|
||||
'url': 'gvsearch15:python language',
|
||||
'info_dict': {
|
||||
'id': 'python language',
|
||||
'title': 'python language',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
|
@@ -18,6 +18,7 @@ class IGNIE(InfoExtractor):
|
||||
_DESCRIPTION_RE = [
|
||||
r'<span class="page-object-description">(.+?)</span>',
|
||||
r'id="my_show_video">.*?<p>(.*?)</p>',
|
||||
r'<meta name="description" content="(.*?)"',
|
||||
]
|
||||
|
||||
_TESTS = [
|
||||
@@ -55,6 +56,17 @@ class IGNIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||
'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
|
||||
'info_dict': {
|
||||
'id': '078fdd005f6d3c02f63d795faa1b984f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||
'description': 'Giant skeletons, bloody hunts, and captivating'
|
||||
' natural beauty take our breath away.',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _find_video_id(self, webpage):
|
||||
@@ -62,6 +74,7 @@ class IGNIE(InfoExtractor):
|
||||
r'data-video-id="(.+?)"',
|
||||
r'<object id="vid_(.+?)"',
|
||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
|
||||
]
|
||||
return self._search_regex(res_id, webpage, 'video id')
|
||||
|
||||
@@ -70,10 +83,7 @@ class IGNIE(InfoExtractor):
|
||||
name_or_id = mobj.group('name_or_id')
|
||||
page_type = mobj.group('type')
|
||||
webpage = self._download_webpage(url, name_or_id)
|
||||
if page_type == 'articles':
|
||||
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
|
||||
return self.url_result(video_url, ie='IGN')
|
||||
elif page_type != 'video':
|
||||
if page_type != 'video':
|
||||
multiple_urls = re.findall(
|
||||
'<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||
webpage)
|
||||
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
@@ -38,7 +39,7 @@ class MixcloudIE(InfoExtractor):
|
||||
try:
|
||||
# We only want to know if the request succeed
|
||||
# don't download the whole file
|
||||
self._request_webpage(url, None, False)
|
||||
self._request_webpage(HEADRequest(url), None, False)
|
||||
return url
|
||||
except ExtractorError:
|
||||
url = None
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
@@ -8,15 +10,17 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class MofosexIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
|
||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||
u'file': u'5018.mp4',
|
||||
u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
|
||||
u'info_dict': {
|
||||
u"title": u"Japanese Teen Music Video",
|
||||
u"age_limit": 18,
|
||||
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||
'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
|
||||
'info_dict': {
|
||||
'id': '5018',
|
||||
'ext': 'mp4',
|
||||
'title': 'Japanese Teen Music Video',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,8 +33,8 @@ class MofosexIE(InfoExtractor):
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
|
||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
|
78
youtube_dl/extractor/musicvault.py
Normal file
78
youtube_dl/extractor/musicvault.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
strip_jsonp,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MusicVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
|
||||
'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
|
||||
'info_dict': {
|
||||
'id': '1010863',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'the-allman-brothers-band',
|
||||
'title': 'Straight from the Heart',
|
||||
'duration': 244,
|
||||
'uploader': 'The Allman Brothers Band',
|
||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||
'upload_date': '19811216',
|
||||
'location': 'Capitol Theatre (Passaic, NJ)',
|
||||
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'<meta itemprop="thumbnail" content="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
data_div = self._search_regex(
|
||||
r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
|
||||
uploader = self._html_search_regex(
|
||||
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'<h2.*?>(.*?)</h2>', data_div, 'title')
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
|
||||
location = self._html_search_regex(
|
||||
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
|
||||
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
||||
VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
|
||||
kaltura_id = self._search_regex(
|
||||
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
|
||||
webpage, 'kaltura ID')
|
||||
video_url = VIDEO_URL_TEMPLATE % {
|
||||
'entry_id': kaltura_id,
|
||||
'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
|
||||
'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
|
||||
}
|
||||
|
||||
return {
|
||||
'id': mobj.group('id'),
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'display_id': display_id,
|
||||
'uploader_id': mobj.group('uploader_id'),
|
||||
'thumbnail': thumbnail,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'upload_date': upload_date,
|
||||
'location': location,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
}
|
@@ -61,7 +61,10 @@ class SockshareIE(InfoExtractor):
|
||||
r'<a href="([^"]*)".+class="download_file_link"',
|
||||
webpage, 'file url')
|
||||
video_url = "http://www.sockshare.com" + video_url
|
||||
title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title')
|
||||
title = self._html_search_regex((
|
||||
r'<h1>(.+)<strong>',
|
||||
r'var name = "([^"]+)";'),
|
||||
webpage, 'title', default=None)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<img\s+src="([^"]*)".+?name="bg"',
|
||||
webpage, 'thumbnail')
|
||||
|
77
youtube_dl/extractor/sportdeutschland.py
Normal file
77
youtube_dl/extractor/sportdeutschland.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class SportDeutschlandIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
||||
'info_dict': {
|
||||
'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
|
||||
'categories': ['Badminton'],
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE',
|
||||
'timestamp': 1409043600,
|
||||
'upload_date': '20140826',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Live stream',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
sport_id = mobj.group('sport')
|
||||
|
||||
api_url = 'http://splink.tv/api/permalinks/%s/%s' % (
|
||||
sport_id, video_id)
|
||||
req = compat_urllib_request.Request(api_url, headers={
|
||||
'Accept': 'application/vnd.vidibus.v2.html+json',
|
||||
'Referer': url,
|
||||
})
|
||||
data = self._download_json(req, video_id)
|
||||
|
||||
categories = list(data.get('section', {}).get('tags', {}).values())
|
||||
asset = data['asset']
|
||||
|
||||
smil_url = asset['video']
|
||||
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
|
||||
smil_doc = self._download_xml(
|
||||
smil_url, video_id, note='Downloading SMIL metadata')
|
||||
base_url = smil_doc.find('./head/meta').attrib['base']
|
||||
formats.extend([{
|
||||
'format_id': 'rmtp',
|
||||
'url': base_url,
|
||||
'play_path': n.attrib['src'],
|
||||
'ext': 'flv',
|
||||
'preference': -100,
|
||||
'format_note': 'Seems to fail at example stream',
|
||||
} for n in smil_doc.findall('./body/video')])
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': asset['title'],
|
||||
'thumbnail': asset.get('image'),
|
||||
'description': asset.get('teaser'),
|
||||
'categories': categories,
|
||||
'view_count': asset.get('views'),
|
||||
'rtmp_live': asset['live'],
|
||||
'timestamp': parse_iso8601(asset.get('date')),
|
||||
}
|
||||
|
@@ -766,10 +766,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
return ret
|
||||
|
||||
def http_request(self, req):
|
||||
for h,v in std_headers.items():
|
||||
if h in req.headers:
|
||||
del req.headers[h]
|
||||
req.add_header(h, v)
|
||||
for h, v in std_headers.items():
|
||||
if h not in req.headers:
|
||||
req.add_header(h, v)
|
||||
if 'Youtubedl-no-compression' in req.headers:
|
||||
if 'Accept-encoding' in req.headers:
|
||||
del req.headers['Accept-encoding']
|
||||
@@ -1458,6 +1457,12 @@ def urlencode_postdata(*args, **kargs):
|
||||
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||
|
||||
|
||||
try:
|
||||
etree_iter = xml.etree.ElementTree.Element.iter
|
||||
except AttributeError: # Python <=2.6
|
||||
etree_iter = lambda n: n.findall('.//*')
|
||||
|
||||
|
||||
def parse_xml(s):
|
||||
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
@@ -1465,7 +1470,14 @@ def parse_xml(s):
|
||||
|
||||
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
|
||||
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
|
||||
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
||||
tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
||||
# Fix up XML parser in Python 2.x
|
||||
if sys.version_info < (3, 0):
|
||||
for n in etree_iter(tree):
|
||||
if n.text is not None:
|
||||
if not isinstance(n.text, compat_str):
|
||||
n.text = n.text.decode('utf-8')
|
||||
return tree
|
||||
|
||||
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.08.25.2'
|
||||
__version__ = '2014.08.27'
|
||||
|
Reference in New Issue
Block a user