2016-10-02 20:39:18 +09:00
# coding: utf-8
2014-01-30 14:13:57 +09:00
from __future__ import unicode_literals
2013-09-09 04:55:11 +09:00
import re
from . common import InfoExtractor
2018-02-11 22:03:31 +09:00
from . . compat import (
compat_urlparse ,
)
2014-12-13 20:24:42 +09:00
from . . utils import (
2014-09-17 01:47:59 +09:00
clean_html ,
2018-02-11 23:58:04 +09:00
determine_ext ,
2014-12-13 20:24:42 +09:00
ExtractorError ,
2014-09-17 01:47:59 +09:00
int_or_none ,
2018-02-11 23:58:04 +09:00
try_get ,
2018-07-21 21:08:28 +09:00
url_or_none ,
2013-09-09 04:55:11 +09:00
)
2017-12-06 18:54:20 +09:00
from . dailymotion import DailymotionIE
2021-09-19 12:03:31 +09:00
from . . downloader import PROTOCOL_MAP
2013-09-09 04:55:11 +09:00
2013-09-10 22:50:34 +09:00
class FranceTVBaseInfoExtractor ( InfoExtractor ) :
2018-02-11 23:40:38 +09:00
def _make_url_result ( self , video_or_full_id , catalog = None ) :
full_id = ' francetv: %s ' % video_or_full_id
if ' @ ' not in video_or_full_id and catalog :
2018-02-11 23:14:05 +09:00
full_id + = ' @ %s ' % catalog
return self . url_result (
2018-02-11 23:40:38 +09:00
full_id , ie = FranceTVIE . ie_key ( ) ,
video_id = video_or_full_id . split ( ' @ ' ) [ 0 ] )
2018-02-11 23:14:05 +09:00
class FranceTVIE ( InfoExtractor ) :
_VALID_URL = r ''' (?x)
( ? :
https ? : / /
sivideo \. webservices \. francetelevisions \. fr / tools / getInfosOeuvre / v2 / \?
. * ? \bidDiffusion = [ ^ & ] + |
( ? :
https ? : / / videos \. francetv \. fr / video / |
francetv :
)
( ? P < id > [ ^ @ ] + ) ( ? : @ ( ? P < catalog > . + ) ) ?
)
'''
_TESTS = [ {
# without catalog
' url ' : ' https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0 ' ,
2021-09-20 10:12:41 +09:00
' md5 ' : ' 283491d723a14db7c4e10b887c4b475a ' ,
2018-02-11 23:14:05 +09:00
' info_dict ' : {
' id ' : ' 162311093 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 13h15, le dimanche... - Les mystères de Jésus ' ,
} ,
} , {
# with catalog
' url ' : ' https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4 ' ,
' only_matching ' : True ,
} , {
' url ' : ' http://videos.francetv.fr/video/NI_657393@Regions ' ,
' only_matching ' : True ,
} , {
' url ' : ' francetv:162311093 ' ,
' only_matching ' : True ,
} , {
' url ' : ' francetv:NI_1004933@Zouzous ' ,
' only_matching ' : True ,
} , {
' url ' : ' francetv:NI_983319@Info-web ' ,
' only_matching ' : True ,
} , {
' url ' : ' francetv:NI_983319 ' ,
' only_matching ' : True ,
} , {
' url ' : ' francetv:NI_657393@Regions ' ,
' only_matching ' : True ,
2018-02-11 23:58:04 +09:00
} , {
# france-3 live
' url ' : ' francetv:SIM_France3 ' ,
' only_matching ' : True ,
2018-02-11 23:14:05 +09:00
} ]
2017-05-10 01:07:01 +09:00
def _extract_video ( self , video_id , catalogue = None ) :
2018-02-11 23:14:05 +09:00
# Videos are identified by idDiffusion so catalogue part is optional.
# However when provided, some extra formats may be returned so we pass
# it if available.
2021-09-19 12:03:31 +09:00
info = {
' title ' : None ,
' subtitle ' : None ,
' image ' : None ,
' subtitles ' : { } ,
' duration ' : None ,
' videos ' : [ ] ,
' formats ' : [ ] ,
}
def update_info ( name , value ) :
if ( info [ name ] is None ) and value :
info [ name ] = value
for device_type in [ ' desktop ' , ' mobile ' ] :
linfo = self . _download_json (
' https://player.webservices.francetelevisions.fr/v1/videos/ %s ' % video_id ,
video_id , ' Downloading %s video JSON ' % device_type , query = {
' device_type ' : device_type ,
' browser ' : ' chrome ' ,
} , fatal = False )
if linfo and linfo . get ( ' video ' ) :
if linfo . get ( ' meta ' ) :
update_info ( ' title ' , linfo [ ' meta ' ] . get ( ' title ' ) )
update_info ( ' subtitle ' , linfo [ ' meta ' ] . get ( ' additional_title ' ) )
update_info ( ' image ' , linfo [ ' meta ' ] . get ( ' image_url ' ) )
if linfo [ ' video ' ] . get ( ' url ' ) :
if linfo [ ' video ' ] . get ( ' drm ' ) :
self . _downloader . to_screen ( ' This video source is DRM protected. Skipping ' )
else :
info [ ' videos ' ] . append ( linfo [ ' video ' ] )
update_info ( ' duration ' , linfo [ ' video ' ] . get ( ' duration ' ) )
if len ( info [ ' videos ' ] ) == 0 :
2014-09-17 01:47:59 +09:00
raise ExtractorError (
2021-09-19 12:03:31 +09:00
' No video source has been found ' ,
expected = True ,
video_id = video_id )
2014-11-25 03:37:20 +09:00
allowed_countries = info [ ' videos ' ] [ 0 ] . get ( ' geoblocage ' )
if allowed_countries :
georestricted = True
geo_info = self . _download_json (
' http://geo.francetv.fr/ws/edgescape.json ' , video_id ,
' Downloading geo restriction info ' )
country = geo_info [ ' reponse ' ] [ ' geo_info ' ] [ ' country_code ' ]
if country not in allowed_countries :
raise ExtractorError (
' The video is not available from your location ' ,
expected = True )
else :
georestricted = False
2018-02-11 22:03:31 +09:00
def sign ( manifest_url , manifest_id ) :
for host in ( ' hdfauthftv-a.akamaihd.net ' , ' hdfauth.francetv.fr ' ) :
2018-07-21 21:08:28 +09:00
signed_url = url_or_none ( self . _download_webpage (
2018-02-11 22:03:31 +09:00
' https:// %s /esi/TA ' % host , video_id ,
' Downloading signed %s manifest URL ' % manifest_id ,
fatal = False , query = {
' url ' : manifest_url ,
2018-07-21 21:08:28 +09:00
} ) )
if signed_url :
2018-02-11 22:03:31 +09:00
return signed_url
return manifest_url
2018-02-11 23:58:04 +09:00
is_live = None
2021-09-19 12:03:31 +09:00
for video in info [ ' videos ' ] :
2020-11-18 07:16:04 +09:00
video_url = video . get ( ' url ' )
2014-09-17 01:47:59 +09:00
if not video_url :
continue
2018-02-11 23:58:04 +09:00
if is_live is None :
2020-11-18 07:16:04 +09:00
is_live = ( try_get (
video , lambda x : x [ ' plages_ouverture ' ] [ 0 ] [ ' direct ' ] , bool ) is True
2020-11-18 07:06:19 +09:00
or video . get ( ' is_live ' ) is True
or ' /live.francetv.fr/ ' in video_url )
2020-11-18 07:16:04 +09:00
format_id = video . get ( ' format ' )
2015-04-05 02:02:04 +09:00
ext = determine_ext ( video_url )
if ext == ' f4m ' :
2014-11-25 03:37:20 +09:00
if georestricted :
2019-03-09 21:14:41 +09:00
# See https://github.com/ytdl-org/youtube-dl/issues/3963
2014-11-25 03:37:20 +09:00
# m3u8 urls work fine
continue
2021-09-19 12:03:31 +09:00
info [ ' formats ' ] . extend ( self . _extract_f4m_formats (
2018-02-11 22:03:31 +09:00
sign ( video_url , format_id ) + ' &hdcore=3.7.0&plugin=aasp-3.7.0.39.44 ' ,
video_id , f4m_id = format_id , fatal = False ) )
2015-04-05 02:02:04 +09:00
elif ext == ' m3u8 ' :
2021-09-19 12:03:31 +09:00
format , subtitle = self . _extract_m3u8_formats (
2018-02-11 22:03:31 +09:00
sign ( video_url , format_id ) , video_id , ' mp4 ' ,
entry_protocol = ' m3u8_native ' , m3u8_id = format_id ,
2021-09-19 12:03:31 +09:00
fatal = False , include_subtitles = True )
info [ ' formats ' ] . extend ( format )
for lang in subtitle :
if lang in info [ ' subtitles ' ] :
info [ ' subtitles ' ] [ lang ] . extend ( subtitle [ lang ] )
else :
info [ ' subtitles ' ] [ lang ] = subtitle [ lang ]
2020-11-18 07:06:19 +09:00
elif ext == ' mpd ' :
2021-09-19 12:03:31 +09:00
info [ ' formats ' ] . extend ( self . _extract_mpd_formats (
2020-11-18 07:06:19 +09:00
sign ( video_url , format_id ) , video_id , mpd_id = format_id , fatal = False ) )
2014-09-17 01:47:59 +09:00
elif video_url . startswith ( ' rtmp ' ) :
2021-09-19 12:03:31 +09:00
info [ ' formats ' ] . append ( {
2014-09-17 01:47:59 +09:00
' url ' : video_url ,
' format_id ' : ' rtmp- %s ' % format_id ,
' ext ' : ' flv ' ,
} )
else :
2016-03-20 16:00:46 +09:00
if self . _is_valid_url ( video_url , video_id , format_id ) :
2021-09-19 12:03:31 +09:00
info [ ' formats ' ] . append ( {
2016-03-20 16:00:46 +09:00
' url ' : video_url ,
' format_id ' : format_id ,
} )
2020-11-18 07:06:19 +09:00
2021-09-19 12:03:31 +09:00
self . _sort_formats ( info [ ' formats ' ] )
2015-09-02 01:37:42 +09:00
2021-09-19 12:03:31 +09:00
if info [ ' subtitle ' ] :
info [ ' title ' ] + = ' - %s ' % info [ ' subtitle ' ]
info [ ' title ' ] = info [ ' title ' ] . strip ( )
2015-10-26 23:11:09 +09:00
2021-09-19 12:03:31 +09:00
for lang , sts in info [ ' subtitles ' ] . items ( ) :
for st in sts :
st [ ' downloader ' ] = lambda ydl , filename : PROTOCOL_MAP [ ' m3u8_native ' ] ( ydl , ydl . params ) . download ( filename , st )
2021-09-20 10:15:56 +09:00
2014-07-28 21:37:13 +09:00
return {
' id ' : video_id ,
2021-09-19 12:03:31 +09:00
' title ' : self . _live_title ( info [ ' title ' ] ) if is_live else info [ ' title ' ] ,
2020-11-18 07:16:04 +09:00
' description ' : clean_html ( info . get ( ' synopsis ' ) ) ,
2021-09-19 12:03:31 +09:00
' thumbnail ' : info . get ( ' image ' ) ,
' duration ' : int_or_none ( info . get ( ' duration ' ) ) ,
2020-11-18 07:16:04 +09:00
' timestamp ' : int_or_none ( try_get ( info , lambda x : x [ ' diffusion ' ] [ ' timestamp ' ] ) ) ,
2018-02-11 23:58:04 +09:00
' is_live ' : is_live ,
2021-09-19 12:03:31 +09:00
' formats ' : info [ ' formats ' ] ,
' subtitles ' : info [ ' subtitles ' ] ,
2014-07-28 21:37:13 +09:00
}
2013-09-10 22:50:34 +09:00
2018-02-11 23:14:05 +09:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
catalog = mobj . group ( ' catalog ' )
if not video_id :
qs = compat_urlparse . parse_qs ( compat_urlparse . urlparse ( url ) . query )
video_id = qs . get ( ' idDiffusion ' , [ None ] ) [ 0 ]
catalog = qs . get ( ' catalogue ' , [ None ] ) [ 0 ]
if not video_id :
raise ExtractorError ( ' Invalid URL ' , expected = True )
return self . _extract_video ( video_id , catalog )
2013-09-10 22:50:34 +09:00
2018-02-11 23:14:05 +09:00
class FranceTVSiteIE ( FranceTVBaseInfoExtractor ) :
2017-06-01 02:15:15 +09:00
_VALID_URL = r ' https?://(?:(?:www \ .)?france \ .tv|mobile \ .france \ .tv)/(?:[^/]+/)*(?P<id>[^/]+) \ .html '
2013-09-09 04:55:11 +09:00
2017-05-10 01:07:01 +09:00
_TESTS = [ {
' url ' : ' https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html ' ,
' info_dict ' : {
2019-03-09 01:26:59 +09:00
' id ' : ' ec217ecc-0733-48cf-ac06-af1347b849d1 ' ,
2017-05-10 01:07:01 +09:00
' ext ' : ' mp4 ' ,
' title ' : ' 13h15, le dimanche... - Les mystères de Jésus ' ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2018-02-11 23:14:05 +09:00
' add_ie ' : [ FranceTVIE . ie_key ( ) ] ,
2017-05-10 01:07:01 +09:00
} , {
# france3
' url ' : ' https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html ' ,
' only_matching ' : True ,
} , {
# france4
' url ' : ' https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html ' ,
' only_matching ' : True ,
} , {
# france5
' url ' : ' https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html ' ,
' only_matching ' : True ,
} , {
# franceo
' url ' : ' https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html ' ,
' only_matching ' : True ,
} , {
# france2 live
' url ' : ' https://www.france.tv/france-2/direct.html ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html ' ,
' only_matching ' : True ,
2017-05-13 23:57:00 +09:00
} , {
' url ' : ' https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html ' ,
' only_matching ' : True ,
2017-06-01 02:15:15 +09:00
} , {
' url ' : ' https://www.france.tv/142749-rouge-sang.html ' ,
' only_matching ' : True ,
2018-02-12 02:25:42 +09:00
} , {
# france-3 live
' url ' : ' https://www.france.tv/france-3/direct.html ' ,
' only_matching ' : True ,
2017-05-10 01:07:01 +09:00
} ]
2013-09-09 04:55:11 +09:00
def _real_extract ( self , url ) :
2015-10-28 00:43:29 +09:00
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
2017-05-10 01:07:01 +09:00
catalogue = None
video_id = self . _search_regex (
2019-03-09 01:26:59 +09:00
r ' (?:data-main-video \ s*=|videoId[ " \' ]? \ s*[:=]) \ s*([ " \' ])(?P<id>(?:(?! \ 1).)+) \ 1 ' ,
2017-05-10 01:07:01 +09:00
webpage , ' video id ' , default = None , group = ' id ' )
2015-10-28 00:43:29 +09:00
if not video_id :
2017-05-10 01:07:01 +09:00
video_id , catalogue = self . _html_search_regex (
r ' (?:href=|player \ .setVideo \ ( \ s*) " http://videos? \ .francetv \ .fr/video/([^@]+@[^ " ]+) " ' ,
webpage , ' video ID ' ) . split ( ' @ ' )
2018-02-11 23:14:05 +09:00
return self . _make_url_result ( video_id , catalogue )
2017-05-10 01:07:01 +09:00
class FranceTVInfoIE ( FranceTVBaseInfoExtractor ) :
2014-01-30 14:13:57 +09:00
IE_NAME = ' francetvinfo.fr '
2018-02-11 23:14:05 +09:00
_VALID_URL = r ' https?://(?:www|mobile|france3-regions) \ .francetvinfo \ .fr/(?:[^/]+/)*(?P<id>[^/?#&.]+) '
2013-09-09 04:55:11 +09:00
2014-05-16 22:51:01 +09:00
_TESTS = [ {
2021-09-20 10:12:41 +09:00
' url ' : ' https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2019_3569073.html ' ,
2014-01-30 14:13:57 +09:00
' info_dict ' : {
2021-09-20 10:12:41 +09:00
' id ' : ' e49f9ff0-2177-458e-830f-a28eccf19dd1 ' ,
2016-03-20 16:00:46 +09:00
' ext ' : ' mp4 ' ,
2014-01-30 14:13:57 +09:00
' title ' : ' Soir 3 ' ,
2015-10-26 23:35:45 +09:00
' subtitles ' : {
2021-09-20 10:12:41 +09:00
' fr ' : ' mincount:1 ' ,
2015-10-26 23:35:45 +09:00
} ,
2013-09-10 22:50:34 +09:00
} ,
2016-03-20 16:00:46 +09:00
' params ' : {
' skip_download ' : True ,
2021-09-20 10:12:41 +09:00
' format ' : ' dash-video=118000+dash-audio_fre=192000 ' ,
2016-03-20 16:00:46 +09:00
} ,
2018-02-11 23:14:05 +09:00
' add_ie ' : [ FranceTVIE . ie_key ( ) ] ,
2021-09-20 10:12:41 +09:00
' expected_warnings ' : ' Unknown MIME type application/mp4 in DASH manifest ' ,
2014-05-16 22:51:01 +09:00
} , {
' url ' : ' http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html ' ,
2018-02-11 23:14:05 +09:00
' only_matching ' : True ,
2015-06-21 22:31:33 +09:00
} , {
' url ' : ' http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html ' ,
2018-02-11 23:14:05 +09:00
' only_matching ' : True ,
2016-03-20 15:44:04 +09:00
} , {
' url ' : ' http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html ' ,
2018-02-11 23:14:05 +09:00
' only_matching ' : True ,
2016-07-07 00:37:54 +09:00
} , {
# Dailymotion embed
' url ' : ' http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html ' ,
' md5 ' : ' ee7f1828f25a648addc90cb2687b1f12 ' ,
' info_dict ' : {
' id ' : ' x4iiko0 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen ' ,
' description ' : ' Au lendemain de la victoire du " oui " au référendum sur l \' aéroport de Notre-Dame-des-Landes, l \' ancienne ministre écologiste est l \' invitée de Patrick Cohen. Plus d \' info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016 ' ,
' timestamp ' : 1467011958 ,
' upload_date ' : ' 20160627 ' ,
' uploader ' : ' France Inter ' ,
' uploader_id ' : ' x2q2ez ' ,
} ,
' add_ie ' : [ ' Dailymotion ' ] ,
2021-09-20 10:12:41 +09:00
' params ' : {
# TODO: the download currently fails (FORBIDDEN) - fix and complete the test
' skip_download ' : True ,
} ,
2016-08-11 23:42:55 +09:00
} , {
' url ' : ' http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin ' ,
' only_matching ' : True ,
2021-04-26 00:52:28 +09:00
} , {
# "<figure id=" pattern (#28792)
' url ' : ' https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html ' ,
' only_matching ' : True ,
2014-05-16 22:51:01 +09:00
} ]
2013-09-10 22:50:34 +09:00
def _real_extract ( self , url ) :
2018-02-11 23:14:05 +09:00
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
2015-06-21 22:31:33 +09:00
2016-07-07 00:37:54 +09:00
dailymotion_urls = DailymotionIE . _extract_urls ( webpage )
if dailymotion_urls :
return self . playlist_result ( [
self . url_result ( dailymotion_url , DailymotionIE . ie_key ( ) )
for dailymotion_url in dailymotion_urls ] )
2015-06-21 22:31:33 +09:00
2019-05-05 08:26:30 +09:00
video_id = self . _search_regex (
( r ' player \ .load[^;]+src: \ s*[ " \' ]([^ " \' ]+) ' ,
r ' id-video=([^@]+@[^ " ]+) ' ,
2021-03-30 05:37:43 +09:00
r ' <a[^>]+href= " (?:https?:)?//videos \ .francetv \ .fr/video/([^@]+@[^ " ]+) " ' ,
2021-04-26 00:52:28 +09:00
r ' (?:data-id|<figure[^<]+ \ bid)=[ " \' ]([ \ da-f] {8} -[ \ da-f] {4} -[ \ da-f] {4} -[ \ da-f] {4} -[ \ da-f] {12} ) ' ) ,
2019-05-05 08:26:30 +09:00
webpage , ' video id ' )
2018-02-11 23:14:05 +09:00
2019-05-05 08:26:30 +09:00
return self . _make_url_result ( video_id )