Merge branch 'bleacherreport' of github.com:remitamine/youtube-dl into remitamine-bleacherreport

This commit is contained in:
remitamine 2015-12-21 11:18:32 +01:00
commit 2c28da8e05
5 changed files with 218 additions and 118 deletions

View File

@ -61,6 +61,10 @@ from .beatportpro import BeatportProIE
from .bet import BetIE from .bet import BetIE
from .bild import BildIE from .bild import BildIE
from .bilibili import BiliBiliIE from .bilibili import BiliBiliIE
from .bleacherreport import (
BleacherReportIE,
BleacherReportCMSIE,
)
from .blinkx import BlinkxIE from .blinkx import BlinkxIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bpb import BpbIE from .bpb import BpbIE

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class AMPIE(InfoExtractor):
# parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url):
item = self._download_json(
url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed')['channel']['item']
video_id = item['guid']
def get_media_node(name, default=None):
media_name = 'media-%s' % name
media_group = item.get('media-group') or item
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
thumbnails = []
media_thumbnail = get_media_node('thumbnail')
if media_thumbnail:
if isinstance(media_thumbnail, dict):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data['@attributes']
thumbnails.append({
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
subtitles = {}
media_subtitle = get_media_node('subTitle')
if media_subtitle:
if isinstance(media_subtitle, dict):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
subtitle = subtitle_data['@attributes']
lang = subtitle.get('lang') or 'en'
subtitles[lang] = [{'url': subtitle['href']}]
formats = []
media_content = get_media_node('content')
if isinstance(media_content, dict):
media_content = [media_content]
for media_data in media_content:
media = media_data['@attributes']
media_type = media['type']
if media_type == 'video/f4m':
f4m_formats = self._extract_f4m_formats(
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif media_type == 'application/x-mpegURL':
m3u8_formats = self._extract_m3u8_formats(
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
formats.append({
'format_id': media_data['media-category']['@attributes']['label'],
'url': media['url'],
'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': get_media_node('title'),
'description': get_media_node('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(item.get('pubDate'), ' '),
'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
'formats': formats,
}

View File

@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .amp import AMPIE
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
)
class BleacherReportIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
_TESTS = [{
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
'info_dict': {
'id': '2496438',
'ext': 'mp4',
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
'uploader_id': 3992341,
'description': 'CFB, ACC, Florida State',
'timestamp': 1434380212,
'upload_date': '20150615',
'uploader': 'Team Stream Now ',
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
'info_dict': {
'id': '2586817',
'ext': 'mp4',
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961,
'uploader': 'Sean Fay',
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
'uploader_id': 6466954,
'upload_date': '20151011',
},
'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
article_id = self._match_id(url)
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
thumbnails = []
primary_photo = article_data.get('primaryPhoto')
if primary_photo:
thumbnails = [{
'url': primary_photo['url'],
'width': primary_photo.get('width'),
'height': primary_photo.get('height'),
}]
info = {
'_type': 'url_transparent',
'id': article_id,
'title': article_data['title'],
'uploader': article_data.get('author', {}).get('name'),
'uploader_id': article_data.get('authorId'),
'timestamp': parse_iso8601(article_data.get('createdAt')),
'thumbnails': thumbnails,
'comment_count': int_or_none(article_data.get('commentsCount')),
'view_count': int_or_none(article_data.get('hitCount')),
}
video = article_data.get('video')
if video:
video_type = video['type']
if video_type == 'cms.bleacherreport.com':
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
elif video_type == 'ooyala.com':
info['url'] = 'ooyala:%s' % video['id']
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':
info['url'] = 'https://vine.co/v/%s' % video['id']
else:
info['url'] = video_type + video['id']
return info
else:
raise ExtractorError('no video in the article', expected=True)
class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'md5': 'f0ca220af012d4df857b54f792c586bb',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
info['id'] = video_id
return info

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import itertools import itertools
from .common import InfoExtractor from .amp import AMPIE
from ..compat import ( from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_urllib_parse, compat_urllib_parse,
@ -12,14 +12,11 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
determine_ext,
int_or_none,
parse_iso8601,
sanitized_Request, sanitized_Request,
) )
class DramaFeverBaseIE(InfoExtractor): class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever' _NETRC_MACHINE = 'dramafever'
@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
'timestamp': 1404336058, 'timestamp': 1404336058,
'upload_date': '20140702', 'upload_date': '20140702',
'duration': 343, 'duration': 343,
} },
'params': {
# m3u8 download
'skip_download': True,
},
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.') video_id = self._match_id(url).replace('/', '.')
try: try:
feed = self._download_json( info = self._extract_feed_info(
'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id, 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
video_id, 'Downloading episode JSON')['channel']['item']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError): if isinstance(e.cause, compat_HTTPError):
raise ExtractorError( raise ExtractorError(
'Currently unavailable in your country.', expected=True) 'Currently unavailable in your country.', expected=True)
raise raise
media_group = feed.get('media-group', {})
formats = []
for media_content in media_group['media-content']:
src = media_content.get('@attributes', {}).get('url')
if not src:
continue
ext = determine_ext(src)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id='hds'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', m3u8_id='hls'))
else:
formats.append({
'url': src,
})
self._sort_formats(formats)
title = media_group.get('media-title')
description = media_group.get('media-description')
duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
thumbnail = self._proto_relative_url(
media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
timestamp = parse_iso8601(feed.get('pubDate'), ' ')
subtitles = {}
for media_subtitle in media_group.get('media-subTitle', []):
lang = media_subtitle.get('@attributes', {}).get('lang')
href = media_subtitle.get('@attributes', {}).get('href')
if not lang or not href:
continue
subtitles[lang] = [{
'ext': 'ttml',
'url': href,
}]
series_id, episode_number = video_id.split('.') series_id, episode_number = video_id.split('.')
episode_info = self._download_json( episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode # We only need a single episode info, so restricting page size to one episode
@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
if value: if value:
subfile = value[0].get('subfile') or value[0].get('new_subfile') subfile = value[0].get('subfile') or value[0].get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/': if subfile and subfile != 'http://www.dramafever.com/st/':
subtitles.setdefault('English', []).append({ info['subtitiles'].setdefault('English', []).append({
'ext': 'srt', 'ext': 'srt',
'url': subfile, 'url': subfile,
}) })
return { return info
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
class DramaFeverSeriesIE(DramaFeverBaseIE): class DramaFeverSeriesIE(DramaFeverBaseIE):

View File

@ -2,14 +2,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .amp import AMPIE
from ..utils import (
parse_iso8601,
int_or_none,
)
class FoxNewsIE(InfoExtractor): class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video' IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [ _TESTS = [
@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3937480', 'id': '3937480',
'ext': 'flv', 'ext': 'flv',
'title': 'Frozen in Time', 'title': 'Frozen in Time',
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler', 'description': '16-year-old girl is size of toddler',
'duration': 265, 'duration': 265,
'timestamp': 1304411491, # 'timestamp': 1304411491,
'upload_date': '20110503', # 'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
}, },
@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3922535568001', 'id': '3922535568001',
'ext': 'mp4', 'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
'description': "Congressman discusses the president's executive action", 'description': "Congressman discusses president's plan",
'duration': 292, 'duration': 292,
'timestamp': 1417662047, # 'timestamp': 1417662047,
'upload_date': '20141204', # 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
}, },
@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) host, video_id = re.match(self._VALID_URL, url).groups()
video_id = mobj.group('id')
host = mobj.group('host')
video = self._download_json( info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id) 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
info['id'] = video_id
item = video['channel']['item'] return info
title = item['title']
description = item['description']
timestamp = parse_iso8601(item['dc-date'])
media_group = item['media-group']
duration = None
formats = []
for media in media_group['media-content']:
attributes = media['@attributes']
video_url = attributes['url']
if video_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
elif video_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
elif not video_url.endswith('.smil'):
duration = int_or_none(attributes.get('duration'))
formats.append({
'url': video_url,
'format_id': media['media-category']['@attributes']['label'],
'preference': 1,
'vbr': int_or_none(attributes.get('bitrate')),
'filesize': int_or_none(attributes.get('fileSize'))
})
self._sort_formats(formats)
media_thumbnail = media_group['media-thumbnail']['@attributes']
thumbnails = [{
'url': media_thumbnail['url'],
'width': int_or_none(media_thumbnail.get('width')),
'height': int_or_none(media_thumbnail.get('height')),
}] if media_thumbnail else []
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}