mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-19 22:58:37 +09:00
Compare commits
94 Commits
2015.07.21
...
2015.07.28
Author | SHA1 | Date | |
---|---|---|---|
![]() |
289bbb350e | ||
![]() |
d247a2c8bf | ||
![]() |
88ed52aec9 | ||
![]() |
4c6bd5b5b6 | ||
![]() |
aeb7b41d44 | ||
![]() |
5bdec59de1 | ||
![]() |
7a89681722 | ||
![]() |
51da40e621 | ||
![]() |
1af330f29f | ||
![]() |
9afa1770d1 | ||
![]() |
3ebbcce1c7 | ||
![]() |
2c7c721933 | ||
![]() |
7523647391 | ||
![]() |
9700cd9097 | ||
![]() |
eab7faa0c1 | ||
![]() |
a56c1e38c7 | ||
![]() |
40a2d17052 | ||
![]() |
b14fa8e687 | ||
![]() |
678e436f2e | ||
![]() |
ff81c4c99c | ||
![]() |
420658e6cb | ||
![]() |
593ddd851b | ||
![]() |
1243402657 | ||
![]() |
2b2ee140c3 | ||
![]() |
d97f5cd795 | ||
![]() |
f3f0b8e403 | ||
![]() |
660f9459da | ||
![]() |
10952eb2cf | ||
![]() |
cdad742700 | ||
![]() |
a9e8f60ef6 | ||
![]() |
a8b7b26068 | ||
![]() |
ba911137fa | ||
![]() |
d3f007af18 | ||
![]() |
2929fa0e79 | ||
![]() |
297a564bee | ||
![]() |
53b8247cb5 | ||
![]() |
59db9f8018 | ||
![]() |
b73b14f72c | ||
![]() |
41597d9bed | ||
![]() |
b37317d8b0 | ||
![]() |
87dc451108 | ||
![]() |
ca4456eda8 | ||
![]() |
993df6bc22 | ||
![]() |
61be92e26a | ||
![]() |
c59b61c0da | ||
![]() |
3e214851a4 | ||
![]() |
a47b602b08 | ||
![]() |
a083b859e4 | ||
![]() |
948199deac | ||
![]() |
c356620ec1 | ||
![]() |
f79ebf09a2 | ||
![]() |
c7620992d2 | ||
![]() |
ce1bafdce9 | ||
![]() |
9872e588c8 | ||
![]() |
d609edf4f1 | ||
![]() |
3a99d321a8 | ||
![]() |
4bb3d999ac | ||
![]() |
40101dc311 | ||
![]() |
e9c6deffee | ||
![]() |
9c29bc69f7 | ||
![]() |
1e12429564 | ||
![]() |
795704f0f1 | ||
![]() |
981b9cdc8c | ||
![]() |
3f724339db | ||
![]() |
70c857b728 | ||
![]() |
c84683c88b | ||
![]() |
b68a2613f8 | ||
![]() |
28afa6e77a | ||
![]() |
496ce6b349 | ||
![]() |
7c80519cbf | ||
![]() |
8250c32f49 | ||
![]() |
0eacd2aaae | ||
![]() |
a5dd9a0c5d | ||
![]() |
726adc43ec | ||
![]() |
3c6ae8b59e | ||
![]() |
e89d7e3029 | ||
![]() |
a3bfddfa5e | ||
![]() |
36da48798a | ||
![]() |
aa5740fb61 | ||
![]() |
da92eeae42 | ||
![]() |
a9dcf4a860 | ||
![]() |
2a282a3b5f | ||
![]() |
7bb23aeca4 | ||
![]() |
de939d89eb | ||
![]() |
77c975f536 | ||
![]() |
75ab0ebcf5 | ||
![]() |
10273d6e08 | ||
![]() |
d5552a3477 | ||
![]() |
a8b081a052 | ||
![]() |
9e96dc8b35 | ||
![]() |
a650110ba7 | ||
![]() |
54b31d149e | ||
![]() |
a745475808 | ||
![]() |
f11554092b |
2
AUTHORS
2
AUTHORS
@@ -133,3 +133,5 @@ Remita Amine
|
||||
Aurélio A. Heckert
|
||||
Bernhard Minks
|
||||
sceext
|
||||
Zach Bruggeman
|
||||
Tjark Saul
|
||||
|
@@ -51,6 +51,7 @@
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **BeatportPro**
|
||||
- **Beeg**
|
||||
@@ -224,6 +225,7 @@
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **Izlesene**
|
||||
@@ -252,6 +254,7 @@
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **Lecture2Go**
|
||||
- **Letv**: 乐视网
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
|
@@ -324,6 +324,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
|
@@ -1104,7 +1104,8 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format_list = []
|
||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||
info_dict['extractor'] in ['youtube', 'ted']):
|
||||
info_dict['extractor'] in ['youtube', 'ted'] and
|
||||
not info_dict.get('is_live')):
|
||||
merger = FFmpegMergerPP(self)
|
||||
if merger.available and merger.can_merge():
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
|
@@ -37,7 +37,7 @@ class DashSegmentsFD(FileDownloader):
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s/%s' % (base_url, target_url)
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
append_url_to_file(
|
||||
|
@@ -43,7 +43,10 @@ from .azubu import AzubuIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .beatportpro import BeatportProIE
|
||||
@@ -243,6 +246,7 @@ from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@@ -275,6 +279,7 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .letv import (
|
||||
LetvIE,
|
||||
LetvTvIE,
|
||||
|
722
youtube_dl/extractor/bbc.py
Normal file
722
youtube_dl/extractor/bbc.py
Normal file
@@ -0,0 +1,722 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
'id': 'b04v209v',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, The Essential New Tune Special',
|
||||
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
'id': 'p02n76xf',
|
||||
'ext': 'flv',
|
||||
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
||||
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||
'info_dict': {
|
||||
'id': 'b05zmgw1',
|
||||
'ext': 'flv',
|
||||
'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
|
||||
'title': 'Royal Academy Summer Exhibition',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Skip DASH until supported
|
||||
elif transfer_format == 'dash':
|
||||
pass
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int_or_none(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int_or_none(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
return self._download_media_selector_url(
|
||||
self._MEDIASELECTOR_URL % programme_id, programme_id)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
def _process_media_selector(self, media_selection, programme_id):
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
|
||||
version = playlist.get('defaultAvailableVersion')
|
||||
if version:
|
||||
smp_config = version['smpConfig']
|
||||
title = smp_config['title']
|
||||
description = smp_config['summary']
|
||||
for item in smp_config['items']:
|
||||
kind = item['kind']
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
programme_id = item.get('vpid')
|
||||
duration = int_or_none(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
return self._process_legacy_playlist(playlist_id)
|
||||
|
||||
def _process_legacy_playlist_url(self, url, display_id):
|
||||
playlist = self._download_legacy_playlist_url(url, display_id)
|
||||
return self._extract_from_legacy_playlist(playlist, display_id)
|
||||
|
||||
def _process_legacy_playlist(self, playlist_id):
|
||||
return self._process_legacy_playlist_url(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
|
||||
|
||||
def _download_legacy_playlist_url(self, url, playlist_id=None):
|
||||
return self._download_xml(
|
||||
url, playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
|
||||
def get_programme_id(item):
|
||||
def get_from_attributes(item):
|
||||
for p in('identifier', 'group'):
|
||||
value = item.get(p)
|
||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||
return value
|
||||
get_from_attributes(item)
|
||||
mediator = item.find('./{http://bbc.co.uk/2008/emp/playlist}mediator')
|
||||
if mediator is not None:
|
||||
return get_from_attributes(mediator)
|
||||
|
||||
programme_id = get_programme_id(item)
|
||||
duration = int_or_none(item.get('duration'))
|
||||
# TODO: programme_id can be None and media items can be incorporated right inside
|
||||
# playlist's item (e.g. http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
# as f4m and m3u8
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = None
|
||||
|
||||
tviplayer = self._search_regex(
|
||||
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||
webpage, 'player', default=None)
|
||||
|
||||
if tviplayer:
|
||||
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
programme_id = player.get('vpid')
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._search_regex(
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BBCIE(BBCCoUkIE):
|
||||
IE_NAME = 'bbc'
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
# fails with notukerror for some videos
|
||||
# _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s'
|
||||
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s'
|
||||
|
||||
_TESTS = [{
|
||||
# article with multiple videos embedded with data-media-meta containing
|
||||
# playlist.sxml, externalId and no direct video links
|
||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# article with multiple videos embedded with data-media-meta (more videos)
|
||||
'url': 'http://www.bbc.com/news/business-28299555',
|
||||
'info_dict': {
|
||||
'id': 'business-28299555',
|
||||
'title': 'Farnborough Airshow: Video highlights',
|
||||
'description': 'BBC reports and video highlights at the Farnborough Airshow.',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'skip': 'Save time',
|
||||
}, {
|
||||
# article with multiple videos embedded with `new SMP()`
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
|
||||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BBC Blogs - Adam Curtis - BUGGER',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
# single video embedded with mediaAssetPage.init()
|
||||
'url': 'http://www.bbc.com/news/world-europe-32041533',
|
||||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'flv',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with single video embedded with data-media-meta containing
|
||||
# direct video links (for now these are extracted) and playlist.xml (with
|
||||
# media items as f4m and m3u8 - currently unsupported)
|
||||
'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
|
||||
'info_dict': {
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'duration': 47,
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video embedded with mediaAssetPage.init() (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'duration': 87,
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
'info_dict': {
|
||||
'id': 'p018zqqg',
|
||||
'ext': 'flv',
|
||||
'title': 'Hyundai Santa Fe Sport: Rock star',
|
||||
'description': 'md5:b042a26142c4154a6e472933cf20793d',
|
||||
'timestamp': 1368473503,
|
||||
'upload_date': '20130513',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video with playlist.sxml URL
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'flv',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# custom redirection to www.bbc.com
|
||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BBCCoUkIE.suitable(url) else super(BBCIE, cls).suitable(url)
|
||||
|
||||
def _extract_from_media_meta(self, media_meta, video_id):
|
||||
# Direct links to media in media metadata (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
# TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
|
||||
source_files = media_meta.get('sourceFiles')
|
||||
if source_files:
|
||||
return [{
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'ext': f.get('encoding'),
|
||||
'tbr': float_or_none(f.get('bitrate'), 1000),
|
||||
'filesize': int_or_none(f.get('filesize')),
|
||||
} for format_id, f in source_files.items() if f.get('url')], []
|
||||
|
||||
programme_id = media_meta.get('externalId')
|
||||
if programme_id:
|
||||
return self._download_media_selector(programme_id)
|
||||
|
||||
# Process playlist.sxml as legacy playlist
|
||||
href = media_meta.get('href')
|
||||
if href:
|
||||
playlist = self._download_legacy_playlist_url(href)
|
||||
_, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
|
||||
return formats, subtitles
|
||||
|
||||
return [], []
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
[r'"datePublished":\s*"([^"]+)',
|
||||
r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
|
||||
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'],
|
||||
webpage, 'date', default=None))
|
||||
|
||||
# single video with playlist.sxml URL (e.g. http://www.bbc.com/sport/0/football/3365340ng)
|
||||
playlist = self._search_regex(
|
||||
r'<param[^>]+name="playlist"[^>]+value="([^"]+)"',
|
||||
webpage, 'playlist', default=None)
|
||||
if playlist:
|
||||
programme_id, title, description, duration, formats, subtitles = \
|
||||
self._process_legacy_playlist_url(playlist, playlist_id)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-video-player-vpid="([\da-z]{8})"',
|
||||
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'],
|
||||
webpage, 'vpid', default=None)
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
# digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
|
||||
digital_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
|
||||
programme_id, fatal=False)
|
||||
page_info = digital_data.get('page', {}).get('pageInfo', {})
|
||||
title = page_info.get('pageName') or self._og_search_title(webpage)
|
||||
description = page_info.get('description') or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
|
||||
entries = []
|
||||
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
|
||||
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
|
||||
if embed_url and re.match(EMBED_URL, embed_url):
|
||||
entries.append(embed_url)
|
||||
entries.extend(re.findall(
|
||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
||||
playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||
medias = extract_all(r"data-media-meta='({[^']+})'")
|
||||
|
||||
if not medias:
|
||||
# Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
|
||||
media_asset_page = self._parse_json(
|
||||
self._search_regex(
|
||||
r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'),
|
||||
playlist_id)
|
||||
medias = []
|
||||
for video in media_asset_page.get('videos', {}).values():
|
||||
medias.extend(video.values())
|
||||
|
||||
entries = []
|
||||
for num, media_meta in enumerate(medias, start=1):
|
||||
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = media_meta.get('externalId')
|
||||
if not video_id:
|
||||
video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
|
||||
|
||||
title = media_meta.get('caption')
|
||||
if not title:
|
||||
title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
|
||||
|
||||
duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
|
||||
|
||||
images = []
|
||||
for image in media_meta.get('images', {}).values():
|
||||
images.extend(image.values())
|
||||
if 'image' in media_meta:
|
||||
images.append(media_meta['image'])
|
||||
|
||||
thumbnails = [{
|
||||
'url': image.get('href'),
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in images]
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
@@ -1,379 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
'id': 'b04v209v',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, The Essential New Tune Special',
|
||||
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
'id': 'p02n76xf',
|
||||
'ext': 'flv',
|
||||
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
||||
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||
'info_dict': {
|
||||
'id': 'b05zmgw1',
|
||||
'ext': 'flv',
|
||||
'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
|
||||
'title': 'Royal Academy Summer Exhibition',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int(media.get('width'))
|
||||
height = int(media.get('height'))
|
||||
file_size = int(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
|
||||
version = playlist.get('defaultAvailableVersion')
|
||||
if version:
|
||||
smp_config = version['smpConfig']
|
||||
title = smp_config['title']
|
||||
description = smp_config['summary']
|
||||
for item in smp_config['items']:
|
||||
kind = item['kind']
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
programme_id = item.get('vpid')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
playlist = self._download_xml(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
|
||||
playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = None
|
||||
|
||||
tviplayer = self._search_regex(
|
||||
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||
webpage, 'player', default=None)
|
||||
|
||||
if tviplayer:
|
||||
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
programme_id = player.get('vpid')
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._search_regex(
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -106,15 +106,11 @@ class CanalplusIE(InfoExtractor):
|
||||
continue
|
||||
format_id = fmt.tag
|
||||
if format_id == 'HLS':
|
||||
hls_formats = self._extract_m3u8_formats(format_url, video_id, 'flv')
|
||||
for fmt in hls_formats:
|
||||
fmt['preference'] = preference(format_id)
|
||||
formats.extend(hls_formats)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', preference=preference(format_id)))
|
||||
elif format_id == 'HDS':
|
||||
hds_formats = self._extract_f4m_formats(format_url + '?hdcore=2.11.3', video_id)
|
||||
for fmt in hds_formats:
|
||||
fmt['preference'] = preference(format_id)
|
||||
formats.extend(hds_formats)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=2.11.3', video_id, preference=preference(format_id)))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@@ -36,7 +36,7 @@ class ComCarCoffIE(InfoExtractor):
|
||||
webpage, 'full data json'))
|
||||
|
||||
video_id = full_data['activeVideo']['video']
|
||||
video_data = full_data['videos'][video_id]
|
||||
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
||||
thumbnails = [{
|
||||
'url': video_data['images']['thumb'],
|
||||
}, {
|
||||
|
@@ -65,7 +65,7 @@ class InfoExtractor(object):
|
||||
|
||||
Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* ext Will be calculated from url if missing
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
Calculated from the format_id, width, height.
|
||||
@@ -155,7 +155,7 @@ class InfoExtractor(object):
|
||||
lower to higher preference, each element is a dictionary
|
||||
with the "ext" entry and one of:
|
||||
* "data": The subtitles file contents
|
||||
* "url": A url pointing to the subtitles file
|
||||
* "url": A URL pointing to the subtitles file
|
||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||
automatically generated captions
|
||||
duration: Length of the video in seconds, as an integer.
|
||||
@@ -176,13 +176,17 @@ class InfoExtractor(object):
|
||||
Set to "root" to indicate that this is a
|
||||
comment to the original video.
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
webpage_url: The URL to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
by YoutubeDL if it's missing)
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
is_live: True, False, or None (=unknown). Whether this video is a
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
start_time: Time in seconds where the reproduction should start, as
|
||||
specified in the URL.
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
specified in the URL.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@@ -501,7 +505,7 @@ class InfoExtractor(object):
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
"""Returns a URL that points to a page that should be processed"""
|
||||
# TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
'url': url,
|
||||
@@ -635,7 +639,7 @@ class InfoExtractor(object):
|
||||
return unescapeHTML(escaped)
|
||||
|
||||
def _og_search_thumbnail(self, html, **kargs):
|
||||
return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)
|
||||
return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
|
||||
|
||||
def _og_search_description(self, html, **kargs):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
@@ -1116,7 +1120,7 @@ class InfoExtractor(object):
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
Base class for paged search queries extractors.
|
||||
They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||
They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||
Instances should define _SEARCH_KEY and _MAX_RESULTS.
|
||||
"""
|
||||
|
||||
|
@@ -13,8 +13,10 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -28,10 +30,16 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||
return request
|
||||
|
||||
def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
|
||||
request = self._build_request(url)
|
||||
return self._download_webpage_handle(request, *args, **kwargs)
|
||||
|
||||
def _download_webpage_no_ff(self, url, *args, **kwargs):
|
||||
request = self._build_request(url)
|
||||
return self._download_webpage(request, *args, **kwargs)
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||
IE_NAME = 'dailymotion'
|
||||
|
||||
@@ -50,10 +58,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'x2iuewm',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'IGN',
|
||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||
'upload_date': '20150306',
|
||||
'description': 'Several come bundled with the Steam Controller.',
|
||||
'thumbnail': 're:^https?:.*\.(?:jpg|png)$',
|
||||
'duration': 74,
|
||||
'timestamp': 1425657362,
|
||||
'upload_date': '20150306',
|
||||
'uploader': 'IGN',
|
||||
'uploader_id': 'xijv66',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
# Vevo video
|
||||
@@ -87,38 +102,106 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://www.dailymotion.com/video/%s' % video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = self._build_request(url)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# It may just embed a vevo video:
|
||||
m_vevo = re.search(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage)
|
||||
if m_vevo is not None:
|
||||
vevo_id = m_vevo.group('id')
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
webpage = self._download_webpage_no_ff(
|
||||
'https://www.dailymotion.com/video/%s' % video_id, video_id)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"',
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)'],
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
player_v5 = self._search_regex(
|
||||
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
formats = []
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for media in media_list:
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
type_ = media.get('type')
|
||||
if type_ == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if type_ == 'application/x-mpegURL' or determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
'format_id': quality,
|
||||
}
|
||||
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = metadata['title']
|
||||
duration = int_or_none(metadata.get('duration'))
|
||||
timestamp = int_or_none(metadata.get('created_time'))
|
||||
thumbnail = metadata.get('poster_url')
|
||||
uploader = metadata.get('owner', {}).get('screenname')
|
||||
uploader_id = metadata.get('owner', {}).get('id')
|
||||
|
||||
subtitles = {}
|
||||
for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': determine_ext(subtitle_url),
|
||||
'url': subtitle_url,
|
||||
} for subtitle_url in subtitle.get('urls', [])]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'age_limit': age_limit,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# vevo embed
|
||||
vevo_id = self._search_regex(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage, 'vevo embed', default=None)
|
||||
if vevo_id:
|
||||
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
|
||||
|
||||
# fallback old player
|
||||
embed_page = self._download_webpage_no_ff(
|
||||
'https://www.dailymotion.com/embed/video/%s' % video_id,
|
||||
video_id, 'Downloading embed page')
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'video:release_date', webpage, 'upload date'))
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE),
|
||||
video_id)
|
||||
|
||||
embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_request = self._build_request(embed_url)
|
||||
embed_page = self._download_webpage(
|
||||
embed_request, video_id, 'Downloading embed page')
|
||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE)
|
||||
info = json.loads(info)
|
||||
if info.get('error') is not None:
|
||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||
raise ExtractorError(msg, expected=True)
|
||||
@@ -139,16 +222,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError('Unable to extract video URL')
|
||||
self._sort_formats(formats)
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
title = self._og_search_title(webpage, default=None)
|
||||
if title is None:
|
||||
title = self._html_search_regex(
|
||||
@@ -159,8 +237,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': info['owner.screenname'],
|
||||
'upload_date': video_upload_date,
|
||||
'timestamp': timestamp,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'age_limit': age_limit,
|
||||
@@ -200,10 +279,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = []
|
||||
processed_urls = set()
|
||||
for pagenum in itertools.count(1):
|
||||
request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
|
||||
webpage = self._download_webpage(request,
|
||||
id, 'Downloading page %s' % pagenum)
|
||||
page_url = self._PAGE_TEMPLATE % (id, pagenum)
|
||||
webpage, urlh = self._download_webpage_handle_no_ff(
|
||||
page_url, id, 'Downloading page %s' % pagenum)
|
||||
if urlh.geturl() in processed_urls:
|
||||
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
|
||||
page_url, urlh.geturl()), id)
|
||||
break
|
||||
|
||||
processed_urls.add(urlh.geturl())
|
||||
|
||||
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
|
||||
|
||||
@@ -227,7 +313,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P<user>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
@@ -236,6 +322,17 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -286,8 +383,7 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = self._build_request(url)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
webpage = self._download_webpage_no_ff(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
|
@@ -276,14 +276,6 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
},
|
||||
# BBC iPlayer embeds
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
|
||||
'info_dict': {
|
||||
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||
@@ -1702,7 +1694,7 @@ class GenericIE(InfoExtractor):
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = compat_urlparse.urljoin(url, found.group(1))
|
||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
|
42
youtube_dl/extractor/ir90tv.py
Normal file
42
youtube_dl/extractor/ir90tv.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
|
||||
|
||||
class Ir90TvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'md5': '411dbd94891381960cb9e13daa47a869',
|
||||
'info_dict': {
|
||||
'id': '95719',
|
||||
'ext': 'mp4',
|
||||
'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), '90tv.ir :: ')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', webpage, 'video url')
|
||||
|
||||
thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)
|
||||
|
||||
return {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'video_url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
62
youtube_dl/extractor/lecture2go.py
Normal file
62
youtube_dl/extractor/lecture2go.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Lecture2GoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
|
||||
'md5': 'ac02b570883020d208d405d5a3fd2f7f',
|
||||
'info_dict': {
|
||||
'id': '17473',
|
||||
'ext': 'flv',
|
||||
'title': '2 - Endliche Automaten und reguläre Sprachen',
|
||||
'creator': 'Frank Heitmann',
|
||||
'duration': 5220,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
|
||||
|
||||
formats = []
|
||||
for url in set(re.findall(r'"src","([^"]+)"', webpage)):
|
||||
ext = determine_ext(url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(url, video_id))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
creator = self._html_search_regex(
|
||||
r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'creator': creator,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
}
|
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -134,7 +135,7 @@ class LetvIE(InfoExtractor):
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
url_info_dict['height'] = format_id[:-1]
|
||||
url_info_dict['height'] = int_or_none(format_id[:-1])
|
||||
|
||||
urls.append(url_info_dict)
|
||||
|
||||
|
@@ -8,18 +8,30 @@ from ..utils import (
|
||||
|
||||
|
||||
class NationalGeographicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||
_VALID_URL = r'http://video\.nationalgeographic\.com/.*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
|
||||
'info_dict': {
|
||||
'id': '_JeBD_D7PlS5',
|
||||
'ext': 'flv',
|
||||
'title': 'The Real Jaws',
|
||||
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
@@ -37,5 +49,6 @@ class NationalGeographicIE(InfoExtractor):
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
# For some reason, the normal links don't work and we must force the use of f4m
|
||||
# For some reason, the normal links don't work and we must force
|
||||
# the use of f4m
|
||||
{'force_smil_url': True}))
|
||||
|
@@ -32,7 +32,7 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'A More Perfect Union',
|
||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'duration': 3190,
|
||||
},
|
||||
@@ -46,7 +46,7 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'Losing Iraq',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'duration': 5050,
|
||||
},
|
||||
@@ -60,7 +60,7 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2201174722',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
||||
'duration': 801,
|
||||
},
|
||||
@@ -72,7 +72,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365297708',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
||||
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -88,7 +88,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'killer-typhoon',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:c741d14e979fc53228c575894094f157',
|
||||
'title': 'Killer Typhoon',
|
||||
'title': 'NOVA - Killer Typhoon',
|
||||
'duration': 3172,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140122',
|
||||
@@ -110,7 +110,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2280706814',
|
||||
'display_id': 'player',
|
||||
'ext': 'mp4',
|
||||
'title': 'Death and the Civil War',
|
||||
'title': 'American Experience - Death and the Civil War',
|
||||
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
|
||||
'duration': 6705,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -118,6 +118,21 @@ class PBSIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.pbs.org/video/2365367186/',
|
||||
'info_dict': {
|
||||
'id': '2365367186',
|
||||
'display_id': '2365367186',
|
||||
'ext': 'mp4',
|
||||
'title': 'To Catch A Comet - Full Episode',
|
||||
'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
|
||||
'duration': 3342,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -232,6 +247,12 @@ class PBSIE(InfoExtractor):
|
||||
'url': closed_captions_url,
|
||||
}]
|
||||
|
||||
# info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
|
||||
# Try turning it to 'program - title' naming scheme if possible
|
||||
alt_title = info.get('program', {}).get('title')
|
||||
if alt_title:
|
||||
info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
@@ -9,7 +9,9 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -224,10 +226,13 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'ids': clip_id,
|
||||
})
|
||||
|
||||
videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')
|
||||
video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0]
|
||||
|
||||
duration = float(videos[0]['duration'])
|
||||
source_ids = [source['id'] for source in videos[0]['sources']]
|
||||
if video.get('is_protected') is True:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
duration = float_or_none(video.get('duration'))
|
||||
source_ids = [source['id'] for source in video['sources']]
|
||||
source_ids_str = ','.join(map(str, source_ids))
|
||||
|
||||
g = '01!8d8F_)r9]4s[qeuXfP%'
|
||||
|
@@ -82,16 +82,21 @@ class RtlNlIE(InfoExtractor):
|
||||
|
||||
meta = info.get('meta', {})
|
||||
|
||||
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
||||
# NB: nowadays, recent ffmpeg and avconv can handle these encrypted streams, so
|
||||
# this adaptive -> flash workaround is not required in general, but it also
|
||||
# allows bypassing georestriction therefore is retained for now.
|
||||
videopath = material['videopath'].replace('/adaptive/', '/flash/')
|
||||
# m3u8 streams are encrypted and may not be handled properly by older ffmpeg/avconv.
|
||||
# To workaround this previously adaptive -> flash trick was used to obtain
|
||||
# unencrypted m3u8 streams (see https://github.com/rg3/youtube-dl/issues/4118)
|
||||
# and bypass georestrictions as well.
|
||||
# Currently, unencrypted m3u8 playlists are (intentionally?) invalid and therefore
|
||||
# unusable albeit can be fixed by simple string replacement (see
|
||||
# https://github.com/rg3/youtube-dl/pull/6337)
|
||||
# Since recent ffmpeg and avconv handle encrypted streams just fine encrypted
|
||||
# streams are used now.
|
||||
videopath = material['videopath']
|
||||
m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4')
|
||||
|
||||
video_urlpart = videopath.split('/flash/')[1][:-5]
|
||||
video_urlpart = videopath.split('/adaptive/')[1][:-5]
|
||||
PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
|
||||
|
||||
formats.extend([
|
||||
|
@@ -19,7 +19,16 @@ from ..utils import (
|
||||
|
||||
class RTSIE(InfoExtractor):
|
||||
IE_DESC = 'RTS.ch'
|
||||
_VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
rts:(?P<rts_id>\d+)|
|
||||
https?://
|
||||
(?:www\.)?rts\.ch/
|
||||
(?:
|
||||
(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|
|
||||
play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+)
|
||||
)
|
||||
)'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -122,6 +131,15 @@ class RTSIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
# article with videos on rhs
|
||||
'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html',
|
||||
'info_dict': {
|
||||
'id': '6693917',
|
||||
'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280',
|
||||
'only_matching': True,
|
||||
@@ -130,7 +148,7 @@ class RTSIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id') or m.group('id_new')
|
||||
video_id = m.group('rts_id') or m.group('id') or m.group('id_new')
|
||||
display_id = m.group('display_id') or m.group('display_id_new')
|
||||
|
||||
def download_json(internal_id):
|
||||
@@ -143,6 +161,15 @@ class RTSIE(InfoExtractor):
|
||||
# video_id extracted out of URL is not always a real id
|
||||
if 'video' not in all_info and 'audio' not in all_info:
|
||||
page = self._download_webpage(url, display_id)
|
||||
|
||||
# article with videos on rhs
|
||||
videos = re.findall(
|
||||
r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:rts:video:(\d+)"',
|
||||
page)
|
||||
if videos:
|
||||
entries = [self.url_result('rts:%s' % video_urn, 'RTS') for video_urn in videos]
|
||||
return self.playlist_result(entries, video_id, self._og_search_title(page))
|
||||
|
||||
internal_id = self._html_search_regex(
|
||||
r'<(?:video|audio) data-id="([0-9]+)"', page,
|
||||
'internal video id')
|
||||
|
@@ -23,6 +23,15 @@ class SnagFilmsEmbedIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '#whilewewatch',
|
||||
}
|
||||
}, {
|
||||
# invalid labels, 360p is better that 480p
|
||||
'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
|
||||
'md5': '882fca19b9eb27ef865efeeaed376a48',
|
||||
'info_dict': {
|
||||
'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
|
||||
'ext': 'mp4',
|
||||
'title': 'Life in Limbo',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
|
||||
'only_matching': True,
|
||||
@@ -52,14 +61,15 @@ class SnagFilmsEmbedIE(InfoExtractor):
|
||||
if not file_:
|
||||
continue
|
||||
type_ = source.get('type')
|
||||
format_id = source.get('label')
|
||||
ext = determine_ext(file_)
|
||||
if any(_ == 'm3u8' for _ in (type_, ext)):
|
||||
format_id = source.get('label') or ext
|
||||
if all(v == 'm3u8' for v in (type_, ext)):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)kbps', file_, 'bitrate', default=None))
|
||||
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
|
||||
file_, 'bitrate', default=None))
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
|
@@ -282,9 +282,11 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
msgs = (compat_str(err['error_message']) for err in info['errors'])
|
||||
raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
|
||||
|
||||
entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
|
||||
'entries': entries,
|
||||
'id': '%s' % info['id'],
|
||||
'title': info['title'],
|
||||
}
|
||||
@@ -379,9 +381,7 @@ class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
data = self._download_json(
|
||||
base_url + data, playlist_id, 'Downloading playlist')
|
||||
|
||||
entries = [
|
||||
self._extract_info_dict(t, quiet=True, secret_token=token)
|
||||
for t in data['tracks']]
|
||||
entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
@@ -9,7 +9,7 @@ from .spiegeltv import SpiegeltvIE
|
||||
|
||||
|
||||
class SpiegelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||
@@ -39,6 +39,9 @@ class SpiegelIE(InfoExtractor):
|
||||
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
|
||||
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import parse_filesize
|
||||
|
||||
|
||||
class TagesschauIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
||||
@@ -18,7 +18,7 @@ class TagesschauIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
|
||||
'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
||||
@@ -28,8 +28,39 @@ class TagesschauIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:695c01bfd98b7e313c501386327aea59',
|
||||
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
}
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
|
||||
'md5': 'aef45de271c4bf0a5db834aa40bf774c',
|
||||
'info_dict': {
|
||||
'id': '18407',
|
||||
'ext': 'mp3',
|
||||
'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
|
||||
'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
|
||||
'thumbnail': 're:^https?:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
@@ -49,19 +80,26 @@ class TagesschauIE(InfoExtractor):
|
||||
playerpage = self._download_webpage(
|
||||
player_url, display_id, 'Downloading player page')
|
||||
|
||||
medias = re.findall(
|
||||
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
|
||||
playerpage)
|
||||
formats = []
|
||||
for url, ext, res in medias:
|
||||
for media in re.finditer(
|
||||
r'''(?x)
|
||||
(?P<q_url>["\'])(?P<url>http://media.+?)(?P=q_url)
|
||||
,\s*type:(?P<q_type>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
|
||||
(?:,\s*quality:(?P<q_quality>["\'])(?P<quality>.+?)(?P=q_quality))?
|
||||
''', playerpage):
|
||||
url = media.group('url')
|
||||
type_ = media.group('type')
|
||||
ext = media.group('ext')
|
||||
res = media.group('quality')
|
||||
f = {
|
||||
'format_id': res + '_' + ext,
|
||||
'format_id': '%s_%s' % (res, ext) if res else ext,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if type_ == 'audio' else None,
|
||||
}
|
||||
f.update(self._FORMATS.get(res, {}))
|
||||
formats.append(f)
|
||||
thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
|
||||
thumbnail = self._og_search_thumbnail(playerpage)
|
||||
title = self._og_search_title(webpage).strip()
|
||||
description = self._og_search_description(webpage).strip()
|
||||
else:
|
||||
@@ -99,17 +137,14 @@ class TagesschauIE(InfoExtractor):
|
||||
'filesize_approx': parse_filesize(m.group('filesize_approx')),
|
||||
})
|
||||
formats.append(format)
|
||||
thumbnail_fn = self._search_regex(
|
||||
r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p class="teasertext">(.*?)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
webpage, 'description', default=None)
|
||||
title = self._html_search_regex(
|
||||
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
|
||||
|
||||
self._sort_formats(formats)
|
||||
thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
|
@@ -77,7 +77,11 @@ class UdemyIE(InfoExtractor):
|
||||
login_popup = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
|
||||
def is_logged(webpage):
|
||||
return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<'])
|
||||
|
||||
# already logged in
|
||||
if is_logged(login_popup):
|
||||
return
|
||||
|
||||
login_form = self._form_hidden_inputs('login-form', login_popup)
|
||||
@@ -95,8 +99,7 @@ class UdemyIE(InfoExtractor):
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
if all(logout_pattern not in response
|
||||
for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']):
|
||||
if not is_logged(response):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
|
||||
response, 'error message', default=None)
|
||||
|
@@ -1,129 +1,137 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ViewsterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
|
||||
_VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
|
||||
_TESTS = [{
|
||||
# movielink, paymethod=fre
|
||||
'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
|
||||
'playlist': [{
|
||||
'md5': '8f9d94b282d80c42b378dffdbb11caf3',
|
||||
'info_dict': {
|
||||
'id': '1293-19341-000-movie',
|
||||
'ext': 'flv',
|
||||
'title': "'Hout' (Wood) - Movie",
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': '1293-19341-000',
|
||||
'title': "'Hout' (Wood)",
|
||||
'description': 'md5:925733185a9242ef96f436937683f33b',
|
||||
}
|
||||
}, {
|
||||
# movielink, paymethod=adv
|
||||
# movie, Type=Movie
|
||||
'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
|
||||
'playlist': [{
|
||||
'md5': '77a005453ca7396cbe3d35c9bea30aef',
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000-movie',
|
||||
'ext': 'flv',
|
||||
'title': "THE LISTENING PROJECT - Movie",
|
||||
},
|
||||
}],
|
||||
'md5': '14d3cfffe66d57b41ae2d9c873416f01',
|
||||
'info_dict': {
|
||||
'id': '1140-11855-000',
|
||||
'title': "THE LISTENING PROJECT",
|
||||
'description': 'md5:714421ae9957e112e672551094bf3b08',
|
||||
}
|
||||
'ext': 'flv',
|
||||
'title': 'The listening Project',
|
||||
'description': 'md5:bac720244afd1a8ea279864e67baa071',
|
||||
'timestamp': 1214870400,
|
||||
'upload_date': '20080701',
|
||||
'duration': 4680,
|
||||
},
|
||||
}, {
|
||||
# direct links, no movielink
|
||||
'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
|
||||
'playlist': [{
|
||||
'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-trailer',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Trailer",
|
||||
},
|
||||
}, {
|
||||
'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-behind-scenes',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Behind Scenes",
|
||||
},
|
||||
}, {
|
||||
'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000-scene-from-movie',
|
||||
'ext': 'mp4',
|
||||
'title': "Sinister - Scene from movie",
|
||||
},
|
||||
}],
|
||||
# series episode, Type=Episode
|
||||
'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
|
||||
'md5': 'd5434c80fcfdb61651cc2199a88d6ba3',
|
||||
'info_dict': {
|
||||
'id': '1198-56411-000',
|
||||
'title': "Sinister",
|
||||
'description': 'md5:014c40b0488848de9683566a42e33372',
|
||||
}
|
||||
'id': '1284-19427-001',
|
||||
'ext': 'flv',
|
||||
'title': 'The World and a Wall',
|
||||
'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
|
||||
'timestamp': 1428192000,
|
||||
'upload_date': '20150405',
|
||||
'duration': 1500,
|
||||
},
|
||||
}, {
|
||||
# serie, Type=Serie
|
||||
'url': 'http://www.viewster.com/serie/1303-19426-000/',
|
||||
'info_dict': {
|
||||
'id': '1303-19426-000',
|
||||
'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
|
||||
'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# unfinished serie, no Type
|
||||
'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
|
||||
'info_dict': {
|
||||
'id': '1284-19427-000',
|
||||
'title': 'Baby Steps—Season 2',
|
||||
'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}]
|
||||
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
_AUTH_TOKEN = '/YqhSYsx8EaU9Bsta3ojlA=='
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
request.add_header('Auth-token', self._AUTH_TOKEN)
|
||||
return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
info = self._download_json(
|
||||
'https://public-api.viewster.com/search/%s' % video_id,
|
||||
video_id, 'Downloading entry JSON')
|
||||
|
||||
movie = self._download_json(
|
||||
request, video_id, 'Downloading movie metadata JSON')
|
||||
entry_id = info.get('Id') or info['id']
|
||||
|
||||
title = movie.get('title') or movie['original_title']
|
||||
description = movie.get('synopsis')
|
||||
thumbnail = movie.get('large_artwork') or movie.get('artwork')
|
||||
# unfinished serie has no Type
|
||||
if info.get('Type') in ['Serie', None]:
|
||||
episodes = self._download_json(
|
||||
'https://public-api.viewster.com/series/%s/episodes' % entry_id,
|
||||
video_id, 'Downloading series JSON')
|
||||
entries = [
|
||||
self.url_result(
|
||||
'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
|
||||
for episode in episodes]
|
||||
title = (info.get('Title') or info['Synopsis']['Title']).strip()
|
||||
description = info.get('Synopsis', {}).get('Detailed')
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
entries = []
|
||||
for clip in movie['play_list']:
|
||||
entry = None
|
||||
|
||||
# movielink api
|
||||
link_request = clip.get('link_request')
|
||||
if link_request:
|
||||
request = compat_urllib_request.Request(
|
||||
'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s¤cy=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
|
||||
% link_request)
|
||||
request.add_header('Accept', self._ACCEPT_HEADER)
|
||||
|
||||
movie_link = self._download_json(
|
||||
request, video_id, 'Downloading movie link JSON', fatal=False)
|
||||
|
||||
if movie_link:
|
||||
formats = self._extract_f4m_formats(
|
||||
movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
|
||||
self._sort_formats(formats)
|
||||
entry = {
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
# direct link
|
||||
clip_url = clip.get('clip_data', {}).get('url')
|
||||
if clip_url:
|
||||
entry = {
|
||||
'url': clip_url,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
|
||||
if entry:
|
||||
entry.update({
|
||||
'id': '%s-%s' % (video_id, clip['canonical_title']),
|
||||
'title': '%s - %s' % (title, clip['title']),
|
||||
formats = []
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL'):
|
||||
media = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/video?mediaType=%s'
|
||||
% (entry_id, compat_urllib_parse.quote(media_type)),
|
||||
video_id, 'Downloading %s JSON' % media_type, fatal=False)
|
||||
if not media:
|
||||
continue
|
||||
video_url = media.get('Uri')
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
video_url += '&' if '?' in video_url else '?'
|
||||
video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls',
|
||||
fatal=False # m3u8 sometimes fail
|
||||
))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
})
|
||||
entries.append(entry)
|
||||
self._sort_formats(formats)
|
||||
|
||||
playlist = self.playlist_result(entries, video_id, title, description)
|
||||
playlist['thumbnail'] = thumbnail
|
||||
return playlist
|
||||
synopsis = info.get('Synopsis', {})
|
||||
# Prefer title outside synopsis since it's less messy
|
||||
title = (info.get('Title') or synopsis['Title']).strip()
|
||||
description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short')
|
||||
duration = int_or_none(info.get('Duration'))
|
||||
timestamp = parse_iso8601(info.get('ReleaseDate'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -88,6 +88,14 @@ class VikiBaseIE(InfoExtractor):
|
||||
if not self._token:
|
||||
self.report_warning('Unable to get session token, login has probably failed')
|
||||
|
||||
@staticmethod
|
||||
def dict_selection(dict_obj, preferred_key):
|
||||
if preferred_key in dict_obj:
|
||||
return dict_obj.get(preferred_key)
|
||||
|
||||
filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
|
||||
return filtered_dict[0] if filtered_dict else None
|
||||
|
||||
|
||||
class VikiIE(VikiBaseIE):
|
||||
IE_NAME = 'viki'
|
||||
@@ -173,6 +181,19 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.viki.com/player/44699v',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'I Planet Entertainment',
|
||||
'upload_date': '20111122',
|
||||
'timestamp': 1321985454,
|
||||
'description': 'md5:44b1e46619df3a072294645c770cef36',
|
||||
'title': 'Love In Magic',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -181,19 +202,14 @@ class VikiIE(VikiBaseIE):
|
||||
video = self._call_api(
|
||||
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
title = None
|
||||
titles = video.get('titles')
|
||||
if titles:
|
||||
title = titles.get('en') or titles[titles.keys()[0]]
|
||||
title = self.dict_selection(video.get('titles', {}), 'en')
|
||||
if not title:
|
||||
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
container_titles = video.get('container', {}).get('titles')
|
||||
if container_titles:
|
||||
container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
|
||||
title = '%s - %s' % (container_title, title)
|
||||
container_titles = video.get('container', {}).get('titles', {})
|
||||
container_title = self.dict_selection(container_titles, 'en')
|
||||
title = '%s - %s' % (container_title, title)
|
||||
|
||||
descriptions = video.get('descriptions')
|
||||
description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None
|
||||
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
||||
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('created_at'))
|
||||
@@ -242,8 +258,8 @@ class VikiIE(VikiBaseIE):
|
||||
|
||||
formats = []
|
||||
for format_id, stream_dict in streams.items():
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
if format_id == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
@@ -299,11 +315,9 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'containers/%s.json' % channel_id, channel_id,
|
||||
'Downloading channel JSON')
|
||||
|
||||
titles = channel['titles']
|
||||
title = titles.get('en') or titles[titles.keys()[0]]
|
||||
title = self.dict_selection(channel['titles'], 'en')
|
||||
|
||||
descriptions = channel['descriptions']
|
||||
description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
|
||||
description = self.dict_selection(channel['descriptions'], 'en')
|
||||
|
||||
entries = []
|
||||
for video_type in ('episodes', 'clips', 'movies'):
|
||||
|
@@ -19,6 +19,7 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
@@ -31,6 +32,7 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -279,13 +281,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
|
||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
@@ -295,11 +297,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
|
||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
|
||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||
@@ -317,7 +319,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = 'youtube'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
|
||||
'info_dict': {
|
||||
'id': 'BaW_jenozKc',
|
||||
'ext': 'mp4',
|
||||
@@ -329,6 +331,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'categories': ['Science & Technology'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'start_time': 1,
|
||||
'end_time': 9,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -889,6 +893,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'http' if self._downloader.params.get('prefer_insecure', False)
|
||||
else 'https')
|
||||
|
||||
start_time = None
|
||||
end_time = None
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
for component in [parsed_url.fragment, parsed_url.query]:
|
||||
query = compat_parse_qs(component)
|
||||
if start_time is None and 't' in query:
|
||||
start_time = parse_duration(query['t'][0])
|
||||
if start_time is None and 'start' in query:
|
||||
start_time = parse_duration(query['start'][0])
|
||||
if end_time is None and 'end' in query:
|
||||
end_time = parse_duration(query['end'][0])
|
||||
|
||||
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
|
||||
mobj = re.search(self._NEXT_URL_RE, url)
|
||||
if mobj:
|
||||
@@ -967,7 +983,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_id, note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
get_video_info = compat_parse_qs(video_info_webpage)
|
||||
add_dash_mpd(get_video_info)
|
||||
if get_video_info.get('use_cipher_signature') != ['True']:
|
||||
add_dash_mpd(get_video_info)
|
||||
if not video_info:
|
||||
video_info = get_video_info
|
||||
if 'token' in get_video_info:
|
||||
@@ -976,7 +993,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if 'reason' in video_info:
|
||||
if 'The uploader has not made this video available in your country.' in video_info['reason']:
|
||||
regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
|
||||
if regions_allowed is not None:
|
||||
if regions_allowed:
|
||||
raise ExtractorError('YouTube said: This video is available in %s only' % (
|
||||
', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
|
||||
expected=True)
|
||||
@@ -1255,6 +1272,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1309,10 +1309,10 @@ def parse_duration(s):
|
||||
m = re.match(
|
||||
r'''(?ix)(?:P?T)?
|
||||
(?:
|
||||
(?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
|
||||
(?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
|
||||
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
||||
|
||||
\s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
|
||||
\s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
|
||||
(?:
|
||||
(?:
|
||||
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.07.21'
|
||||
__version__ = '2015.07.28'
|
||||
|
Reference in New Issue
Block a user