Compare commits

..

No commits in common. "c669554ef5491302eb20fc2bcb52339ea1a4ac1a" and "d18f4419a72a01abc2cb45ef23f2400cd3eb5f43" have entirely different histories.

5 changed files with 288 additions and 227 deletions

View File

@ -1,15 +1,14 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import calendar
import re import re
import time
from .amp import AMPIE from .amp import AMPIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from .youtube import YoutubeIE
parse_duration, from ..compat import compat_urlparse
parse_iso8601,
try_get,
)
class AbcNewsVideoIE(AMPIE): class AbcNewsVideoIE(AMPIE):
@ -19,8 +18,8 @@ class AbcNewsVideoIE(AMPIE):
(?: (?:
abcnews\.go\.com/ abcnews\.go\.com/
(?: (?:
(?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-| [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
video/(?:embed|itemfeed)\?.*?\bid= video/embed\?.*?\bid=
)| )|
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/ fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
) )
@ -50,12 +49,6 @@ class AbcNewsVideoIE(AMPIE):
}, { }, {
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
'only_matching': True,
}, {
'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -76,23 +69,28 @@ class AbcNewsIE(InfoExtractor):
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# Youtube Embeds 'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
'info_dict': { 'info_dict': {
'id': '51286501', 'id': '10505354',
'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player", 'ext': 'flv',
'description': 'Billingsley went from a child actor to Hollywood power player.', 'display_id': 'dramatic-video-rare-death-job-america',
'title': 'Occupational Hazards',
'description': 'Nightline investigates the dangers that lurk at various jobs.',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20100428',
'timestamp': 1272412800,
}, },
'playlist_count': 5, 'add_ie': ['AbcNewsVideo'],
}, { }, {
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
'info_dict': { 'info_dict': {
'id': '38897857', 'id': '38897857',
'ext': 'mp4', 'ext': 'mp4',
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
'title': 'Justin Timberlake Drops Hints For Secret Single', 'title': 'Justin Timberlake Drops Hints For Secret Single',
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
'upload_date': '20160505', 'upload_date': '20160515',
'timestamp': 1462442280, 'timestamp': 1463329500,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -104,55 +102,49 @@ class AbcNewsIE(InfoExtractor):
}, { }, {
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
'only_matching': True, 'only_matching': True,
}, {
# inline.type == 'video'
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
story_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, story_id) display_id = mobj.group('display_id')
story = self._parse_json(self._search_regex( video_id = mobj.group('id')
r"window\['__abcnews__'\]\s*=\s*({.+?});",
webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
article_contents = story.get('articleContents') or {}
def entries(): webpage = self._download_webpage(url, video_id)
featured_video = story.get('featuredVideo') or {} video_url = self._search_regex(
feed = try_get(featured_video, lambda x: x['video']['feed']) r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
if feed: full_video_url = compat_urlparse.urljoin(url, video_url)
yield {
'_type': 'url', youtube_url = YoutubeIE._extract_url(webpage)
'id': featured_video.get('id'),
'title': featured_video.get('name'), timestamp = None
'url': feed, date_str = self._html_search_regex(
'thumbnail': featured_video.get('images'), r'<span[^>]+class="timestamp">([^<]+)</span>',
'description': featured_video.get('description'), webpage, 'timestamp', fatal=False)
'timestamp': parse_iso8601(featured_video.get('uploadDate')), if date_str:
'duration': parse_duration(featured_video.get('duration')), tz_offset = 0
if date_str.endswith(' ET'): # Eastern Time
tz_offset = -5
date_str = date_str[:-3]
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
for date_format in date_formats:
try:
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
except ValueError:
continue
if timestamp is not None:
timestamp -= tz_offset * 3600
entry = {
'_type': 'url_transparent',
'ie_key': AbcNewsVideoIE.ie_key(), 'ie_key': AbcNewsVideoIE.ie_key(),
} 'url': full_video_url,
for inline in (article_contents.get('inlines') or []):
inline_type = inline.get('type')
if inline_type == 'iframe':
iframe_url = try_get(inline, lambda x: x['attrs']['src'])
if iframe_url:
yield self.url_result(iframe_url)
elif inline_type == 'video':
video_id = inline.get('id')
if video_id:
yield {
'_type': 'url',
'id': video_id, 'id': video_id,
'url': 'http://abcnews.go.com/video/embed?id=' + video_id, 'display_id': display_id,
'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'), 'timestamp': timestamp,
'description': inline.get('description'),
'duration': parse_duration(inline.get('duration')),
'ie_key': AbcNewsVideoIE.ie_key(),
} }
return self.playlist_result( if youtube_url:
entries(), story_id, article_contents.get('headline'), entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
article_contents.get('subHead')) return self.playlist_result(entries)
return entry

View File

@ -1459,7 +1459,6 @@ from .vrv import (
VRVSeriesIE, VRVSeriesIE,
) )
from .vshare import VShareIE from .vshare import VShareIE
from .vtm import VTMIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE

View File

@ -128,7 +128,6 @@ from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .arcpublishing import ArcPublishingIE from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2224,20 +2223,6 @@ class GenericIE(InfoExtractor):
'duration': 1581, 'duration': 1581,
}, },
}, },
{
# MyChannels SDK embed
# https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
'md5': '90c0699c37006ef18e198c032d81739c',
'info_dict': {
'id': '194165',
'ext': 'mp4',
'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
'timestamp': 1611740340,
'upload_date': '20210127',
'duration': 159,
},
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -2477,9 +2462,6 @@ class GenericIE(InfoExtractor):
webpage = self._webpage_read_content( webpage = self._webpage_read_content(
full_response, url, video_id, prefix=first_bytes) full_response, url, video_id, prefix=first_bytes)
if '<title>DPG Media Privacy Gate</title>' in webpage:
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id) self.report_extraction(video_id)
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest? # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
@ -2611,11 +2593,6 @@ class GenericIE(InfoExtractor):
if arc_urls: if arc_urls:
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key()) return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
mychannels_urls = MedialaanIE._extract_urls(webpage)
if mychannels_urls:
return self.playlist_from_matches(
mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',

View File

@ -2,113 +2,268 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .gigya import GigyaBaseIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
extract_attributes,
int_or_none, int_or_none,
mimetype2ext, parse_duration,
parse_iso8601, try_get,
unified_timestamp,
) )
class MedialaanIE(InfoExtractor): class MedialaanIE(GigyaBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.|nieuws\.)?
(?: (?:
(?:embed\.)?mychannels.video/embed/| (?P<site_id>vtm|q2|vtmkzoom)\.be/
embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
(?:www\.)?(?:
(?: (?:
7sur7| video(?:/[^/]+/id/|/?\?.*?\baid=)|
demorgen| (?:[^/]+/)*
hln|
joe|
qmusic
)\.be|
(?:
[abe]d|
bndestem|
destentor|
gelderlander|
pzc|
tubantia|
volkskrant
)\.nl
)/video/(?:[^/]+/)*[^/?&#]+~p
) )
(?P<id>\d+) )
(?P<id>[^/?#&]+)
''' '''
_NETRC_MACHINE = 'medialaan'
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
_SITE_TO_APP_ID = {
'vtm': 'vtm_watch',
'q2': 'q2',
'vtmkzoom': 'vtmkzoom',
}
_TESTS = [{ _TESTS = [{
'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993', # vod
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
'info_dict': { 'info_dict': {
'id': '193993', 'id': 'vtm_20170219_VM0678361_vtmwatch',
'ext': 'mp4', 'ext': 'mp4',
'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?', 'title': 'Allemaal Chris afl. 6',
'timestamp': 1611663540, 'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
'upload_date': '20210126', 'timestamp': 1487533280,
'duration': 238, 'upload_date': '20170219',
'duration': 2562,
'series': 'Allemaal Chris',
'season': 'Allemaal Chris',
'season_number': 1,
'season_id': '256936078124527',
'episode': 'Allemaal Chris afl. 6',
'episode_number': 6,
'episode_id': '256936078591527',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Requires account credentials',
}, { }, {
'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093', # clip
'url': 'http://vtm.be/video?aid=168332',
'info_dict': {
'id': '168332',
'ext': 'mp4',
'title': '"Veronique liegt!"',
'description': 'md5:1385e2b743923afe54ba4adc38476155',
'timestamp': 1489002029,
'upload_date': '20170308',
'duration': 96,
},
}, {
# vod
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default', # vod
'url': 'http://vtm.be/video?aid=163157',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://embed.mychannels.video/script/production/193993', # vod
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://embed.mychannels.video/production/193993', # clip
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://mychannels.video/embed/193993', # http/s redirect
'only_matching': True, 'url': 'https://vtmkzoom.be/video?aid=45724',
'info_dict': {
'id': '257136373657000',
'ext': 'mp4',
'title': 'K3 Dansstudio Ushuaia afl.6',
},
'params': {
'skip_download': True,
},
'skip': 'Requires account credentials',
}, { }, {
'url': 'https://embed.mychannels.video/embed/193993', # nieuws.vtm.be
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod def _real_initialize(self):
def _extract_urls(webpage): self._logged_in = False
entries = []
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage): def _login(self):
mychannels_id = extract_attributes(element).get('data-mychannels-id') username, password = self._get_login_info()
if mychannels_id: if username is None:
entries.append('https://mychannels.video/embed/' + mychannels_id) self.raise_login_required()
return entries
auth_data = {
'APIKey': self._APIKEY,
'sdk': 'js_6.1',
'format': 'json',
'loginID': username,
'password': password,
}
auth_info = self._gigya_login(auth_data)
self._uid = auth_info['UID']
self._uid_signature = auth_info['UIDSignature']
self._signature_timestamp = auth_info['signatureTimestamp']
self._logged_in = True
def _real_extract(self, url): def _real_extract(self, url):
production_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
production = self._download_json( video_id, site_id = mobj.group('id', 'site_id')
'https://embed.mychannels.video/sdk/production/' + production_id,
production_id, query={'options': 'UUUU_default'})['productions'][0]
title = production['title']
formats = [] webpage = self._download_webpage(url, video_id)
for source in (production.get('sources') or []):
src = source.get('src') config = self._parse_json(
if not src: self._search_regex(
continue r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
ext = mimetype2ext(source.get('type')) webpage, 'config', default='{}'), video_id,
if ext == 'm3u8': transform_source=lambda s: s.replace(
formats.extend(self._extract_m3u8_formats( '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
src, production_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) vod_id = config.get('vodId') or self._search_regex(
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
r'"vodId"\s*:\s*"(.+?)"',
r'<[^>]+id=["\']vod-(\d+)'),
webpage, 'video_id', default=None)
# clip, no authentication required
if not vod_id:
player = self._parse_json(
self._search_regex(
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
default=''),
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
if player:
video = player[-1]
if video['videoUrl'] in ('http', 'https'):
return self.url_result(video['url'], MedialaanIE.ie_key())
info = {
'id': video_id,
'url': video['videoUrl'],
'title': video['title'],
'thumbnail': video.get('imageUrl'),
'timestamp': int_or_none(video.get('createdDate')),
'duration': int_or_none(video.get('duration')),
}
else: else:
formats.append({ info = self._parse_html5_media_entries(
'ext': ext, url, webpage, video_id, m3u8_id='hls')[0]
'url': src, info.update({
'id': video_id,
'title': self._html_search_meta('description', webpage),
'duration': parse_duration(self._html_search_meta('duration', webpage)),
}) })
# vod, authentication required
else:
if not self._logged_in:
self._login()
settings = self._parse_json(
self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings', default='{}'),
video_id)
def get(container, item):
return try_get(
settings, lambda x: x[container][item],
compat_str) or self._search_regex(
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
default=None)
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
data = self._download_json(
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
video_id, query={
'app_id': app_id,
'user_network': sso,
'UID': self._uid,
'UIDSignature': self._uid_signature,
'signatureTimestamp': self._signature_timestamp,
})
formats = self._extract_m3u8_formats(
data['response']['uri'], video_id, entry_protocol='m3u8_native',
ext='mp4', m3u8_id='hls')
self._sort_formats(formats) self._sort_formats(formats)
return { info = {
'id': production_id, 'id': vod_id,
'title': title,
'formats': formats, 'formats': formats,
'thumbnail': production.get('posterUrl'),
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
'duration': int_or_none(production.get('duration')) or None,
} }
api_key = get('vod', 'apiKey')
channel = get('medialaanGigya', 'channel')
if api_key:
videos = self._download_json(
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
query={
'channels': channel,
'ids': vod_id,
'limit': 1,
'apikey': api_key,
})
if videos:
video = try_get(
videos, lambda x: x['response']['videos'][0], dict)
if video:
def get(container, item, expected_type=None):
return try_get(
video, lambda x: x[container][item], expected_type)
def get_string(container, item):
return get(container, item, compat_str)
info.update({
'series': get_string('program', 'title'),
'season': get_string('season', 'title'),
'season_number': int_or_none(get('season', 'number')),
'season_id': get_string('season', 'id'),
'episode': get_string('episode', 'title'),
'episode_number': int_or_none(get('episode', 'number')),
'episode_id': get_string('episode', 'id'),
'duration': int_or_none(
video.get('duration')) or int_or_none(
video.get('durationMillis'), scale=1000),
'title': get_string('episode', 'title'),
'description': get_string('episode', 'text'),
'timestamp': unified_timestamp(get_string(
'publication', 'begin')),
})
if not info.get('title'):
info['title'] = try_get(
config, lambda x: x['videoConfig']['title'],
compat_str) or self._html_search_regex(
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
default=None) or self._og_search_title(webpage)
if not info.get('description'):
info['description'] = self._html_search_regex(
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
webpage, 'description', default=None)
return info

View File

@ -1,62 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
try_get,
)
class VTMIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
_TEST = {
'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
'md5': '37dca85fbc3a33f2de28ceb834b071f8',
'info_dict': {
'id': '192445',
'ext': 'mp4',
'title': 'Gast vernielt Genkse hotelkamer',
'timestamp': 1611060180,
'upload_date': '20210119',
'duration': 74,
# TODO: fix url _type result processing
# 'series': 'Op Interventie',
}
}
def _real_extract(self, url):
uuid = self._match_id(url)
video = self._download_json(
'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
uuid, query={
'query': '''{
getComponent(type: Video, uuid: "%s") {
... on Video {
description
duration
myChannelsVideo
program {
title
}
publishedAt
title
}
}
}''' % uuid,
}, headers={
'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
})['data']['getComponent']
return {
'_type': 'url',
'id': uuid,
'title': video.get('title'),
'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
'description': video.get('description'),
'timestamp': parse_iso8601(video.get('publishedAt')),
'duration': int_or_none(video.get('duration')),
'series': try_get(video, lambda x: x['program']['title']),
'ie_key': 'Medialaan',
}