Compare commits

..

No commits in common. "1631fca1ee1c3312027c702854d741bbb8025dcd" and "d81421af4b4c3f8f6e197ad4a06fcdb948484c24" have entirely different histories.

3 changed files with 143 additions and 180 deletions

View File

@ -1,87 +1,92 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..compat import compat_str
int_or_none,
parse_iso8601,
try_get,
)
class TF1IE(InfoExtractor): class TF1IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tf1\.fr/[^/]+/(?P<program_slug>[^/]+)/videos/(?P<id>[^/?&#]+)\.html' """TF1 uses the wat.tv player."""
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
'id': '10635995',
'ext': 'mp4',
'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle',
'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
},
'params': {
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
'expected_warnings': ['HTTP Error 404'],
}, {
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
'info_dict': {
'id': 'le-grand-mysterioso-chuggington-7085291-739',
'ext': 'mp4',
'title': 'Le grand Mystérioso - Chuggington',
'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
'upload_date': '20150103',
},
'params': {
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
'skip': 'HTTP Error 410: Gone',
}, {
'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
'only_matching': True,
}, {
'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html',
'only_matching': True,
}, {
'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
'only_matching': True,
}, {
'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html', 'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html',
'info_dict': { 'info_dict': {
'id': '13641379', 'id': '13641379',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:f392bc52245dc5ad43771650c96fb620', 'title': 'md5:f392bc52245dc5ad43771650c96fb620',
'description': 'md5:a02cdb217141fb2d469d6216339b052f', 'description': 'md5:44bc54f0a21322f5b91d68e76a544eae',
'upload_date': '20190611', 'upload_date': '20190611',
'timestamp': 1560273989,
'duration': 1738,
'series': 'Quotidien avec Yann Barthès',
'tags': ['intégrale', 'quotidien', 'Replay'],
}, },
'params': { 'params': {
# Sometimes wat serves the whole file with the --test option # Sometimes wat serves the whole file with the --test option
'skip_download': True, 'skip_download': True,
'format': 'bestvideo',
}, },
}, {
'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html',
'only_matching': True,
}, {
'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
program_slug, slug = re.match(self._VALID_URL, url).groups() video_id = self._match_id(url)
video = self._download_json(
'https://www.tf1.fr/graphql/web', slug, query={
'id': '9b80783950b85247541dd1d851f9cc7fa36574af015621f853ab111a679ce26f',
'variables': json.dumps({
'programSlug': program_slug,
'slug': slug,
})
})['data']['videoBySlug']
wat_id = video['streamId']
tags = [] webpage = self._download_webpage(url, video_id)
for tag in (video.get('tags') or []):
label = tag.get('label')
if not label:
continue
tags.append(label)
decoration = video.get('decoration') or {} wat_id = None
thumbnails = [] data = self._parse_json(
for source in (try_get(decoration, lambda x: x['image']['sources'], list) or []): self._search_regex(
source_url = source.get('url') r'__APOLLO_STATE__\s*=\s*({.+?})\s*(?:;|</script>)', webpage,
if not source_url: 'data', default='{}'), video_id, fatal=False)
continue
thumbnails.append({
'url': source_url,
'width': int_or_none(source.get('width')),
})
return { if data:
'_type': 'url_transparent', try:
'id': wat_id, wat_id = next(
'url': 'wat:' + wat_id, video.get('streamId')
'title': video.get('title'), for key, video in data.items()
'thumbnails': thumbnails, if isinstance(video, dict)
'description': decoration.get('description'), and video.get('slug') == video_id)
'timestamp': parse_iso8601(video.get('date')), if not isinstance(wat_id, compat_str) or not wat_id.isdigit():
'duration': int_or_none(try_get(video, lambda x: x['publicPlayingInfos']['duration'])), wat_id = None
'tags': tags, except StopIteration:
'series': decoration.get('programLabel'), pass
'season_number': int_or_none(video.get('season')),
'episode_number': int_or_none(video.get('episode')), if not wat_id:
} wat_id = self._html_search_regex(
(r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
r'(["\']?)streamId\1\s*:\s*(["\']?)(?P<id>\d+)\2'),
webpage, 'wat id', group='id')
return self.url_result('wat:%s' % wat_id, 'Wat')

View File

@ -2,110 +2,55 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from .kaltura import KalturaIE
from ..utils import (
int_or_none,
unified_timestamp,
)
class TMZIE(InfoExtractor): class TMZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.tmz.com/videos/0-cegprt2p/',
'md5': '31f9223e20eef55954973359afa61a20',
'info_dict': {
'id': 'P6YjLBLk',
'ext': 'mp4',
'title': "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet",
'description': 'md5:b714359fc18607715ebccbd2da8ff488',
'timestamp': 1467831837,
'upload_date': '20160706',
},
'add_ie': [JWPlatformIE.ie_key()],
}, {
'url': 'http://www.tmz.com/videos/0_okj015ty/', 'url': 'http://www.tmz.com/videos/0_okj015ty/',
'only_matching': True, 'md5': '4d22a51ef205b6c06395d8394f72d560',
'info_dict': {
'id': '0_okj015ty',
'ext': 'mp4',
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
'timestamp': 1394747163,
'uploader_id': 'batchUser',
'upload_date': '20140313',
}
}, { }, {
'url': 'https://www.tmz.com/videos/071119-chris-morgan-women-4590005-0-zcsejvcr/', 'url': 'http://www.tmz.com/videos/0-cegprt2p/',
'only_matching': True,
}, {
'url': 'https://www.tmz.com/videos/2021-02-19-021921-floyd-mayweather-1043872/',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).replace('-', '_') video_id = self._match_id(url).replace('-', '_')
return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id)
webpage = self._download_webpage(url, video_id, fatal=False)
if webpage:
tmz_video_id = self._search_regex(
r'nodeRef\s*:\s*["\']tmz:video:([\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12})',
webpage, 'video id', default=None)
video = self._download_json(
'https://www.tmz.com/_/video/%s' % tmz_video_id, video_id,
fatal=False)
if video:
message = video['message']
info = {
'_type': 'url_transparent',
'title': message.get('title'),
'description': message.get('description'),
'timestamp': unified_timestamp(message.get('published_at')),
'duration': int_or_none(message.get('duration')),
}
jwplatform_id = message.get('jwplayer_media_id')
if jwplatform_id:
info.update({
'url': 'jwplatform:%s' % jwplatform_id,
'ie_key': JWPlatformIE.ie_key(),
})
else:
kaltura_entry_id = message.get('kaltura_entry_id') or video_id
kaltura_partner_id = message.get('kaltura_partner_id') or '591531'
info.update({
'url': 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id),
'ie_key': KalturaIE.ie_key(),
})
return info
return self.url_result(
'kaltura:591531:%s' % video_id, KalturaIE.ie_key(), video_id)
class TMZArticleIE(InfoExtractor): class TMZArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
_TEST = { _TEST = {
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
'md5': '3316ff838ae5bb7f642537825e1e90d2',
'info_dict': { 'info_dict': {
'id': 'PAKZa97W', 'id': '0_6snoelag',
'ext': 'mp4', 'ext': 'mov',
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
'timestamp': 1429466400, 'timestamp': 1429467813,
'upload_date': '20150419', 'upload_date': '20150419',
}, 'uploader_id': 'batchUser',
'params': { }
'skip_download': True,
},
'add_ie': [JWPlatformIE.ie_key()],
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
tmz_url = self._search_regex(
r'clickLink\s*\(\s*["\'](?P<url>%s)' % TMZIE._VALID_URL, webpage,
'video id', default=None, group='url')
if tmz_url:
return self.url_result(tmz_url, ie=TMZIE.ie_key())
embedded_video_info = self._parse_json(self._html_search_regex( embedded_video_info = self._parse_json(self._html_search_regex(
r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'), r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
video_id) video_id)
return self.url_result( return self.url_result(
'http://www.tmz.com/videos/%s/' % embedded_video_info['id'], 'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])
ie=TMZIE.ie_key())

View File

@ -4,10 +4,9 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none,
try_get,
unified_strdate, unified_strdate,
HEADRequest,
int_or_none,
) )
@ -30,7 +29,6 @@ class WatIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['HTTP Error 404'], 'expected_warnings': ['HTTP Error 404'],
'skip': 'This content is no longer available',
}, },
{ {
'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html', 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
@ -42,10 +40,8 @@ class WatIE(InfoExtractor):
'upload_date': '20140816', 'upload_date': '20140816',
}, },
'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."], 'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."],
'skip': 'This content is no longer available',
}, },
] ]
_GEO_BYPASS = False
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -53,54 +49,71 @@ class WatIE(InfoExtractor):
# 'contentv4' is used in the website, but it also returns the related # 'contentv4' is used in the website, but it also returns the related
# videos, we don't need them # videos, we don't need them
# video_data = self._download_json(
# 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
video_data = self._download_json( video_data = self._download_json(
'https://mediainfo.tf1.fr/mediainfocombo/' + video_id, 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
video_id, query={'context': 'MYTF1'})
video_info = video_data['media'] video_info = video_data['media']
error_desc = video_info.get('error_desc') error_desc = video_info.get('error_desc')
if error_desc: if error_desc:
if video_info.get('error_code') == 'GEOBLOCKED': self.report_warning(
self.raise_geo_restricted(error_desc, video_info.get('geoList')) '%s returned error: %s' % (self.IE_NAME, error_desc))
raise ExtractorError(error_desc, expected=True)
title = video_info['title'] chapters = video_info['chapters']
if chapters:
first_chapter = chapters[0]
def video_id_for_chapter(chapter):
return chapter['tc_start'].split('-')[0]
if video_id_for_chapter(first_chapter) != video_id:
self.to_screen('Multipart video detected')
entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
return self.playlist_result(entries, video_id, video_info['title'])
# Otherwise we can continue and extract just one part, we have to use
# the video id for getting the video url
else:
first_chapter = video_info
title = first_chapter['title']
def extract_url(path_template, url_type):
req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False)
if head:
red_url = head.geturl()
if req_url != red_url:
return red_url
return None
formats = [] formats = []
def extract_formats(manifest_urls):
for f, f_url in manifest_urls.items():
if not f_url:
continue
if f in ('dash', 'mpd'):
formats.extend(self._extract_mpd_formats(
f_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
video_id, mpd_id='dash', fatal=False))
elif f == 'hls':
formats.extend(self._extract_m3u8_formats(
f_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
delivery = video_data.get('delivery') or {}
extract_formats({delivery.get('format'): delivery.get('url')})
if not formats:
if delivery.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
manifest_urls = self._download_json( manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False) 'http://www.wat.tv/get/webhtml/' + video_id, video_id)
if manifest_urls: m3u8_url = manifest_urls.get('hls')
extract_formats(manifest_urls) if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
mpd_url = manifest_urls.get('mpd')
if mpd_url:
formats.extend(self._extract_mpd_formats(
mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
duration = None
files = video_info['files']
if files:
duration = int_or_none(files[0].get('duration'))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': video_info.get('preview'), 'thumbnail': first_chapter.get('preview'),
'upload_date': unified_strdate(try_get( 'description': first_chapter.get('description'),
video_data, lambda x: x['mediametrie']['chapters'][0]['estatS4'])), 'view_count': int_or_none(video_info.get('views')),
'duration': int_or_none(video_info.get('duration')), 'upload_date': upload_date,
'duration': duration,
'formats': formats, 'formats': formats,
} }