From ecbd4635522762adf93c1d1f62953bfc0dd2d714 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Sun, 19 Sep 2021 03:03:31 +0000
Subject: [PATCH 01/13] more complete patch with subtitles

---
 youtube_dl/YoutubeDL.py          |   2 +
 youtube_dl/extractor/common.py   |  19 +++--
 youtube_dl/extractor/francetv.py | 123 +++++++++++++++++--------------
 3 files changed, 82 insertions(+), 62 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index fe30758ef..02c36fb69 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1879,6 +1879,8 @@ class YoutubeDL(object):
                         except (OSError, IOError):
                             self.report_error('Cannot write subtitles file ' + sub_filename)
                             return
+                    elif sub_info.get('downloader') is not None:
+                        sub_info.get('downloader')(self, encodeFilename(sub_filename))
                     else:
                         try:
                             sub_data = ie._request_webpage(
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 797c35fd5..e63b7537c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1635,7 +1635,7 @@ class InfoExtractor(object):
                               entry_protocol='m3u8', preference=None,
                               m3u8_id=None, note=None, errnote=None,
                               fatal=True, live=False, data=None, headers={},
-                              query={}):
+                              query={}, include_subtitles=False):
         res = self._download_webpage_handle(
             m3u8_url, video_id,
             note=note or 'Downloading m3u8 information',
@@ -1650,11 +1650,11 @@ class InfoExtractor(object):
 
         return self._parse_m3u8_formats(
             m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
-            preference=preference, m3u8_id=m3u8_id, live=live)
+            preference=preference, m3u8_id=m3u8_id, live=live, include_subtitles=include_subtitles)
 
     def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
                             entry_protocol='m3u8', preference=None,
-                            m3u8_id=None, live=False):
+                            m3u8_id=None, live=False, include_subtitles=False):
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
             return []
 
@@ -1662,6 +1662,7 @@ class InfoExtractor(object):
             return []
 
         formats = []
+        subtitles = {}
 
         format_url = lambda u: (
             u
@@ -1696,13 +1697,19 @@ class InfoExtractor(object):
         groups = {}
         last_stream_inf = {}
 
-        def extract_media(x_media_line):
+        def extract_media(x_media_line, include_subtitles=False):
             media = parse_m3u8_attributes(x_media_line)
             # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
             media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
             if not (media_type and group_id and name):
                 return
             groups.setdefault(group_id, []).append(media)
+            if include_subtitles and (media_type == 'SUBTITLES'):
+                subtitles[media['LANGUAGE']] = [{
+                    'url': format_url(media['URI']),
+                    'ext': media['SUBFORMAT'],
+                }]
+                return
             if media_type not in ('VIDEO', 'AUDIO'):
                 return
             media_url = media.get('URI')
@@ -1748,7 +1755,7 @@ class InfoExtractor(object):
         # precede EXT-X-MEDIA tags in HLS manifest such as [3].
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-MEDIA:'):
-                extract_media(line)
+                extract_media(line, include_subtitles=include_subtitles)
 
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-STREAM-INF:'):
@@ -1828,6 +1835,8 @@ class InfoExtractor(object):
                     formats.append(http_f)
 
                 last_stream_inf = {}
+        if include_subtitles:
+            return formats, subtitles
         return formats
 
     @staticmethod
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index e4ec2e200..41e96021a 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -20,6 +20,7 @@ from ..utils import (
     urljoin,
 )
 from .dailymotion import DailymotionIE
+from ..downloader import PROTOCOL_MAP
 
 
 class FranceTVBaseInfoExtractor(InfoExtractor):
@@ -90,17 +91,47 @@ class FranceTVIE(InfoExtractor):
         # Videos are identified by idDiffusion so catalogue part is optional.
         # However when provided, some extra formats may be returned so we pass
         # it if available.
-        info = self._download_json(
-            'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
-            video_id, 'Downloading video JSON', query={
-                'idDiffusion': video_id,
-                'catalogue': catalogue or '',
-            })
 
-        if info.get('status') == 'NOK':
+        info = {
+            'title': None,
+            'subtitle': None,
+            'image': None,
+            'subtitles': {},
+            'duration': None,
+            'videos': [],
+            'formats': [],
+        }
+
+        def update_info(name, value):
+            if (info[name] is None) and value:
+                info[name] = value
+
+        for device_type in ['desktop', 'mobile']:
+            linfo = self._download_json(
+                'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
+                video_id, 'Downloading %s video JSON' % device_type, query={
+                    'device_type': device_type,
+                    'browser': 'chrome',
+                }, fatal=False)
+
+            if linfo and linfo.get('video'):
+                if linfo.get('meta'):
+                    update_info('title', linfo['meta'].get('title'))
+                    update_info('subtitle', linfo['meta'].get('additional_title'))
+                    update_info('image', linfo['meta'].get('image_url'))
+                if linfo['video'].get('url'):
+                    if linfo['video'].get('drm'):
+                        self._downloader.to_screen('This video source is DRM protected. Skipping')
+                    else:
+                        info['videos'].append(linfo['video'])
+                        update_info('duration', linfo['video'].get('duration'))
+
+        if len(info['videos']) == 0:
             raise ExtractorError(
-                '%s returned error: %s' % (self.IE_NAME, info['message']),
-                expected=True)
+                'No video source has been found',
+                expected=True,
+                video_id=video_id)
+
         allowed_countries = info['videos'][0].get('geoblocage')
         if allowed_countries:
             georestricted = True
@@ -129,29 +160,7 @@ class FranceTVIE(InfoExtractor):
 
         is_live = None
 
-        videos = []
-
-        for video in (info.get('videos') or []):
-            if video.get('statut') != 'ONLINE':
-                continue
-            if not video.get('url'):
-                continue
-            videos.append(video)
-
-        if not videos:
-            for device_type in ['desktop', 'mobile']:
-                fallback_info = self._download_json(
-                    'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
-                    video_id, 'Downloading fallback %s video JSON' % device_type, query={
-                        'device_type': device_type,
-                        'browser': 'chrome',
-                    }, fatal=False)
-
-                if fallback_info and fallback_info.get('video'):
-                    videos.append(fallback_info['video'])
-
-        formats = []
-        for video in videos:
+        for video in info['videos']:
             video_url = video.get('url')
             if not video_url:
                 continue
@@ -167,56 +176,56 @@ class FranceTVIE(InfoExtractor):
                     # See https://github.com/ytdl-org/youtube-dl/issues/3963
                     # m3u8 urls work fine
                     continue
-                formats.extend(self._extract_f4m_formats(
+                info['formats'].extend(self._extract_f4m_formats(
                     sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
                     video_id, f4m_id=format_id, fatal=False))
             elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                format, subtitle = self._extract_m3u8_formats(
                     sign(video_url, format_id), video_id, 'mp4',
                     entry_protocol='m3u8_native', m3u8_id=format_id,
-                    fatal=False))
+                    fatal=False, include_subtitles=True)
+                info['formats'].extend(format)
+                for lang in subtitle:
+                    if lang in info['subtitles']:
+                        info['subtitles'][lang].extend(subtitle[lang])
+                    else:
+                        info['subtitles'][lang] = subtitle[lang]
             elif ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
+                info['formats'].extend(self._extract_mpd_formats(
                     sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
             elif video_url.startswith('rtmp'):
-                formats.append({
+                info['formats'].append({
                     'url': video_url,
                     'format_id': 'rtmp-%s' % format_id,
                     'ext': 'flv',
                 })
             else:
                 if self._is_valid_url(video_url, video_id, format_id):
-                    formats.append({
+                    info['formats'].append({
                         'url': video_url,
                         'format_id': format_id,
                     })
 
-        self._sort_formats(formats)
+        self._sort_formats(info['formats'])
 
-        title = info['titre']
-        subtitle = info.get('sous_titre')
-        if subtitle:
-            title += ' - %s' % subtitle
-        title = title.strip()
-
-        subtitles = {}
-        subtitles_list = [{
-            'url': subformat['url'],
-            'ext': subformat.get('format'),
-        } for subformat in info.get('subtitles', []) if subformat.get('url')]
-        if subtitles_list:
-            subtitles['fr'] = subtitles_list
+        if info['subtitle']:
+            info['title'] += ' - %s' % info['subtitle']
+        info['title'] = info['title'].strip()
 
+        for lang, sts in info['subtitles'].items():
+            for st in sts:
+                st['downloader'] = lambda ydl, filename: PROTOCOL_MAP['m3u8_native'](ydl, ydl.params).download(filename, st)
+        
         return {
             'id': video_id,
-            'title': self._live_title(title) if is_live else title,
+            'title': self._live_title(info['title']) if is_live else info['title'],
             'description': clean_html(info.get('synopsis')),
-            'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
-            'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
+            'thumbnail': info.get('image'),
+            'duration': int_or_none(info.get('duration')),
             'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
             'is_live': is_live,
-            'formats': formats,
-            'subtitles': subtitles,
+            'formats': info['formats'],
+            'subtitles': info['subtitles'],
         }
 
     def _real_extract(self, url):

From f96eff43f40503e69ebdcbaf55d11907b0b74aee Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Mon, 20 Sep 2021 01:12:41 +0000
Subject: [PATCH 02/13] Fixing test

---
 test/helper.py                     |   2 +-
 youtube_dl/extractor/common.py     |   2 +-
 youtube_dl/extractor/extractors.py |   5 -
 youtube_dl/extractor/francetv.py   | 187 ++---------------------------
 youtube_dl/utils.py                |   2 +-
 5 files changed, 13 insertions(+), 185 deletions(-)

diff --git a/test/helper.py b/test/helper.py
index e62aab11e..f9623bc6b 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -190,7 +190,7 @@ def expect_info_dict(self, got_dict, expected_dict):
     expect_dict(self, got_dict, expected_dict)
     # Check for the presence of mandatory fields
     if got_dict.get('_type') not in ('playlist', 'multi_video'):
-        for key in ('id', 'url', 'title', 'ext'):
+        for key in ('id', 'webpage_url', 'title', 'ext'):
             self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
     # Check for mandatory fields that are automatically set by YoutubeDL
     for key in ['webpage_url', 'extractor', 'extractor_key']:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e63b7537c..058e224d4 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1707,7 +1707,7 @@ class InfoExtractor(object):
             if include_subtitles and (media_type == 'SUBTITLES'):
                 subtitles[media['LANGUAGE']] = [{
                     'url': format_url(media['URI']),
-                    'ext': media['SUBFORMAT'],
+                    'ext': media.get('SUBFORMAT', 'webtt'),
                 }]
                 return
             if media_type not in ('VIDEO', 'AUDIO'):
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 6e8fc3961..3e33f496b 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -401,12 +401,7 @@ from .franceinter import FranceInterIE
 from .francetv import (
     FranceTVIE,
     FranceTVSiteIE,
-    FranceTVEmbedIE,
     FranceTVInfoIE,
-    FranceTVInfoSportIE,
-    FranceTVJeunesseIE,
-    GenerationWhatIE,
-    CultureboxIE,
 )
 from .freesound import FreesoundIE
 from .freespeech import FreespeechIE
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 41e96021a..079c49190 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -50,14 +50,11 @@ class FranceTVIE(InfoExtractor):
     _TESTS = [{
         # without catalog
         'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
-        'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
+        'md5': '283491d723a14db7c4e10b887c4b475a',
         'info_dict': {
             'id': '162311093',
             'ext': 'mp4',
             'title': '13h15, le dimanche... - Les mystères de Jésus',
-            'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
-            'timestamp': 1502623500,
-            'upload_date': '20170813',
         },
     }, {
         # with catalog
@@ -252,9 +249,6 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
             'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
             'ext': 'mp4',
             'title': '13h15, le dimanche... - Les mystères de Jésus',
-            'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
-            'timestamp': 1502623500,
-            'upload_date': '20170813',
         },
         'params': {
             'skip_download': True,
@@ -316,55 +310,26 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
         return self._make_url_result(video_id, catalogue)
 
 
-class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
-    _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
-
-    _TESTS = [{
-        'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
-        'info_dict': {
-            'id': 'NI_983319',
-            'ext': 'mp4',
-            'title': 'Le Pen Reims',
-            'upload_date': '20170505',
-            'timestamp': 1493981780,
-            'duration': 16,
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': [FranceTVIE.ie_key()],
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        video = self._download_json(
-            'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
-            video_id)
-
-        return self._make_url_result(video['video_id'], video.get('catalog'))
-
-
 class FranceTVInfoIE(FranceTVBaseInfoExtractor):
     IE_NAME = 'francetvinfo.fr'
     _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
 
     _TESTS = [{
-        'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
+        'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2019_3569073.html',
         'info_dict': {
-            'id': '84981923',
+            'id': 'e49f9ff0-2177-458e-830f-a28eccf19dd1',
             'ext': 'mp4',
             'title': 'Soir 3',
-            'upload_date': '20130826',
-            'timestamp': 1377548400,
             'subtitles': {
-                'fr': 'mincount:2',
+                'fr': 'mincount:1',
             },
         },
         'params': {
             'skip_download': True,
+            'format': 'dash-video=118000+dash-audio_fre=192000',
         },
         'add_ie': [FranceTVIE.ie_key()],
+        'expected_warnings': 'Unknown MIME type application/mp4 in DASH manifest',
     }, {
         'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
         'only_matching': True,
@@ -389,6 +354,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
             'uploader_id': 'x2q2ez',
         },
         'add_ie': ['Dailymotion'],
+        'params': {
+            # TODO: the download currently fails (FORBIDDEN) - fix and complete the test
+            'skip_download': True,
+        },
     }, {
         'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
         'only_matching': True,
@@ -417,139 +386,3 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
             webpage, 'video id')
 
         return self._make_url_result(video_id)
-
-
-class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
-    IE_NAME = 'sport.francetvinfo.fr'
-    _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TESTS = [{
-        'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
-        'info_dict': {
-            'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
-            'ext': 'mp4',
-            'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
-            'timestamp': 1523639962,
-            'upload_date': '20180413',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': [FranceTVIE.ie_key()],
-    }]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
-        return self._make_url_result(video_id, 'Sport-web')
-
-
-class GenerationWhatIE(InfoExtractor):
-    IE_NAME = 'france2.fr:generation-what'
-    _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
-
-    _TESTS = [{
-        'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
-        'info_dict': {
-            'id': 'wtvKYUG45iw',
-            'ext': 'mp4',
-            'title': 'Generation What - Garde à vous - FRA',
-            'uploader': 'Generation What',
-            'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
-            'upload_date': '20160411',
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': ['Youtube'],
-    }, {
-        'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-
-        youtube_id = self._search_regex(
-            r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
-            webpage, 'youtube id')
-
-        return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
-
-
-class CultureboxIE(FranceTVBaseInfoExtractor):
-    _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-
-    _TESTS = [{
-        'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
-        'info_dict': {
-            'id': 'EV_134885',
-            'ext': 'mp4',
-            'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
-            'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
-            'upload_date': '20180206',
-            'timestamp': 1517945220,
-            'duration': 5981,
-        },
-        'params': {
-            'skip_download': True,
-        },
-        'add_ie': [FranceTVIE.ie_key()],
-    }]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
-
-        if ">Ce live n'est plus disponible en replay<" in webpage:
-            raise ExtractorError(
-                'Video %s is not available' % display_id, expected=True)
-
-        video_id, catalogue = self._search_regex(
-            r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
-            webpage, 'video id').split('@')
-
-        return self._make_url_result(video_id, catalogue)
-
-
-class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
-
-    _TESTS = [{
-        'url': 'https://www.zouzous.fr/heros/simon',
-        'info_dict': {
-            'id': 'simon',
-        },
-        'playlist_count': 9,
-    }, {
-        'url': 'https://www.ludo.fr/heros/ninjago',
-        'info_dict': {
-            'id': 'ninjago',
-        },
-        'playlist_count': 10,
-    }, {
-        'url': 'https://www.zouzous.fr/heros/simon?abc',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
-
-        playlist = self._download_json(
-            '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
-
-        if not playlist.get('count'):
-            raise ExtractorError(
-                '%s is not available' % playlist_id, expected=True)
-
-        entries = []
-        for item in playlist['items']:
-            identity = item.get('identity')
-            if identity and isinstance(identity, compat_str):
-                entries.append(self._make_url_result(identity))
-
-        return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e722eed58..ecf744041 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1830,7 +1830,7 @@ def write_json_file(obj, fn):
 
     try:
         with tf:
-            json.dump(obj, tf)
+            json.dump(obj, tf, default=lambda _:'<not serialized>')
         if sys.platform == 'win32':
             # Need to remove existing file on Windows, else os.rename raises
             # WindowsError or FileExistsError.

From bfa16e8a1d7054dfd64ae61a9f76af8bff3f1de4 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Mon, 20 Sep 2021 01:15:56 +0000
Subject: [PATCH 03/13] style guide

---
 youtube_dl/extractor/francetv.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 079c49190..2d12e6dfb 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -6,7 +6,6 @@ import re
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_str,
     compat_urlparse,
 )
 from ..utils import (
@@ -14,10 +13,8 @@ from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
-    parse_duration,
     try_get,
     url_or_none,
-    urljoin,
 )
 from .dailymotion import DailymotionIE
 from ..downloader import PROTOCOL_MAP
@@ -212,7 +209,7 @@ class FranceTVIE(InfoExtractor):
         for lang, sts in info['subtitles'].items():
             for st in sts:
                 st['downloader'] = lambda ydl, filename: PROTOCOL_MAP['m3u8_native'](ydl, ydl.params).download(filename, st)
-        
+
         return {
             'id': video_id,
             'title': self._live_title(info['title']) if is_live else info['title'],

From 633ab0c56ffcda9bf2c5d5bb598023e1f7aa86eb Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Mon, 20 Sep 2021 14:33:30 +0000
Subject: [PATCH 04/13] json serialization bugs

---
 youtube_dl/YoutubeDL.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 02c36fb69..c6d906f13 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1771,7 +1771,7 @@ class YoutubeDL(object):
             self.to_stdout(formatSeconds(info_dict['duration']))
         print_mandatory('format')
         if self.params.get('forcejson', False):
-            self.to_stdout(json.dumps(info_dict))
+            self.to_stdout(json.dumps(info_dict, default=lambda _:'<not serialized>'))
 
     def process_info(self, info_dict):
         """Process a single resolved IE result."""
@@ -2076,7 +2076,7 @@ class YoutubeDL(object):
                 raise
             else:
                 if self.params.get('dump_single_json', False):
-                    self.to_stdout(json.dumps(res))
+                    self.to_stdout(json.dumps(res, default=lambda _:'<not serialized>'))
 
         return self._download_retcode
 

From 7f23f02da7b3d07243d83497f4ecbee3c2346af8 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Mon, 20 Sep 2021 14:36:49 +0000
Subject: [PATCH 05/13] review commetns

---
 youtube_dl/YoutubeDL.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index c6d906f13..dc4005a18 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1879,7 +1879,7 @@ class YoutubeDL(object):
                         except (OSError, IOError):
                             self.report_error('Cannot write subtitles file ' + sub_filename)
                             return
-                    elif sub_info.get('downloader') is not None:
+                    elif callable(sub_info.get('downloader')):
                         sub_info.get('downloader')(self, encodeFilename(sub_filename))
                     else:
                         try:

From d0774569c1eca21344c29f1694dae854a7b2cb2c Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Tue, 21 Sep 2021 15:53:53 +0000
Subject: [PATCH 06/13] update supported sites

---
 docs/supportedsites.md | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index ed0d5e9d9..9ec55f01f 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -210,7 +210,6 @@
  - **CTV**
  - **CTVNews**
  - **cu.ntv.co.jp**: Nippon Television Network
- - **Culturebox**
  - **CultureUnplugged**
  - **curiositystream**
  - **curiositystream:collection**
@@ -307,13 +306,10 @@
  - **foxnews**: Fox News and Fox Business Video
  - **foxnews:article**
  - **FoxSports**
- - **france2.fr:generation-what**
  - **FranceCulture**
  - **FranceInter**
  - **FranceTV**
- - **FranceTVEmbed**
  - **francetvinfo.fr**
- - **FranceTVJeunesse**
  - **FranceTVSite**
  - **Freesound**
  - **freespeech.org**
@@ -472,8 +468,6 @@
  - **LinuxAcademy**
  - **LiTV**
  - **LiveJournal**
- - **LiveLeak**
- - **LiveLeakEmbed**
  - **livestream**
  - **livestream:original**
  - **LnkGo**
@@ -877,7 +871,6 @@
  - **SpankBangPlaylist**
  - **Spankwire**
  - **Spiegel**
- - **sport.francetvinfo.fr**
  - **Sport5**
  - **SportBox**
  - **SportDeutschland**

From 5fb593d50a8134d23c9060979a0440f3198725f1 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Tue, 21 Sep 2021 16:31:43 +0000
Subject: [PATCH 07/13] Moving protocol to download subtitles back to the
 subtitle_info and keep the download logic in YoutubeDl

---
 youtube_dl/YoutubeDL.py           | 19 ++++++++++---------
 youtube_dl/downloader/__init__.py |  3 ++-
 youtube_dl/extractor/common.py    |  1 +
 youtube_dl/extractor/francetv.py  |  4 ----
 youtube_dl/utils.py               |  2 +-
 5 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index dc4005a18..018d585ee 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1771,7 +1771,7 @@ class YoutubeDL(object):
             self.to_stdout(formatSeconds(info_dict['duration']))
         print_mandatory('format')
         if self.params.get('forcejson', False):
-            self.to_stdout(json.dumps(info_dict, default=lambda _:'<not serialized>'))
+            self.to_stdout(json.dumps(info_dict))
 
     def process_info(self, info_dict):
         """Process a single resolved IE result."""
@@ -1879,15 +1879,16 @@ class YoutubeDL(object):
                         except (OSError, IOError):
                             self.report_error('Cannot write subtitles file ' + sub_filename)
                             return
-                    elif callable(sub_info.get('downloader')):
-                        sub_info.get('downloader')(self, encodeFilename(sub_filename))
                     else:
+                        fd = get_suitable_downloader(sub_info, self.params)(self, self.params)
                         try:
-                            sub_data = ie._request_webpage(
-                                sub_info['url'], info_dict['id'], note=False).read()
-                            with io.open(encodeFilename(sub_filename), 'wb') as subfile:
-                                subfile.write(sub_data)
-                        except (ExtractorError, IOError, OSError, ValueError) as err:
+                            if self.params.get('verbose'):
+                                self.to_screen('[debug] Invoking subtitle downloader on %r' % sub_info.get('url'))
+                            # The FD is supposed to encodeFilename()
+                            if not fd.download(sub_filename, sub_info):
+                                # depending on the FD, it may catch errors and return False, or not
+                                raise DownloadError('Subtitle download failed')
+                        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error, OSError, IOError, YoutubeDLError) as err:
                             self.report_warning('Unable to download subtitle for "%s": %s' %
                                                 (sub_lang, error_to_compat_str(err)))
                             continue
@@ -2076,7 +2077,7 @@ class YoutubeDL(object):
                 raise
             else:
                 if self.params.get('dump_single_json', False):
-                    self.to_stdout(json.dumps(res, default=lambda _:'<not serialized>'))
+                    self.to_stdout(json.dumps(res))
 
         return self._download_retcode
 
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index 2e485df9d..f3200566e 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -33,7 +33,8 @@ def get_suitable_downloader(info_dict, params={}):
     """Get the downloader class that can handle the info dict."""
     protocol = determine_protocol(info_dict)
     info_dict['protocol'] = protocol
-
+    print('SACHA> ', protocol)
+    
     # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
     #     return FFmpegFD
 
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 058e224d4..e45c67f94 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1708,6 +1708,7 @@ class InfoExtractor(object):
                 subtitles[media['LANGUAGE']] = [{
                     'url': format_url(media['URI']),
                     'ext': media.get('SUBFORMAT', 'webtt'),
+                    'protocol': 'm3u8_native',
                 }]
                 return
             if media_type not in ('VIDEO', 'AUDIO'):
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 2d12e6dfb..95ef0bf6c 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -206,10 +206,6 @@ class FranceTVIE(InfoExtractor):
             info['title'] += ' - %s' % info['subtitle']
         info['title'] = info['title'].strip()
 
-        for lang, sts in info['subtitles'].items():
-            for st in sts:
-                st['downloader'] = lambda ydl, filename: PROTOCOL_MAP['m3u8_native'](ydl, ydl.params).download(filename, st)
-
         return {
             'id': video_id,
             'title': self._live_title(info['title']) if is_live else info['title'],
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ecf744041..ef0afd686 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1830,7 +1830,7 @@ def write_json_file(obj, fn):
 
     try:
         with tf:
-            json.dump(obj, tf, default=lambda _:'<not serialized>')
+            json.dump(obj)
         if sys.platform == 'win32':
             # Need to remove existing file on Windows, else os.rename raises
             # WindowsError or FileExistsError.

From f02f87db8e2186ed4df4721c1e57269a3fdf44c5 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Tue, 21 Sep 2021 16:33:20 +0000
Subject: [PATCH 08/13] json.dump bug fix

---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ef0afd686..e722eed58 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1830,7 +1830,7 @@ def write_json_file(obj, fn):
 
     try:
         with tf:
-            json.dump(obj)
+            json.dump(obj, tf)
         if sys.platform == 'win32':
             # Need to remove existing file on Windows, else os.rename raises
             # WindowsError or FileExistsError.

From 3b9dad99427a011b25565541b5b03c25dfb19c67 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Tue, 21 Sep 2021 16:40:08 +0000
Subject: [PATCH 09/13] flake8 style fixes

---
 youtube_dl/YoutubeDL.py           | 4 ++--
 youtube_dl/downloader/__init__.py | 3 +--
 youtube_dl/extractor/francetv.py  | 1 -
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 018d585ee..956b869f0 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -91,6 +91,7 @@ from .utils import (
     write_string,
     YoutubeDLCookieJar,
     YoutubeDLCookieProcessor,
+    YoutubeDLError,
     YoutubeDLHandler,
     YoutubeDLRedirectHandler,
 )
@@ -1862,7 +1863,6 @@ class YoutubeDL(object):
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             subtitles = info_dict['requested_subtitles']
-            ie = self.get_info_extractor(info_dict['extractor_key'])
             for sub_lang, sub_info in subtitles.items():
                 sub_format = sub_info['ext']
                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
@@ -1887,7 +1887,7 @@ class YoutubeDL(object):
                             # The FD is supposed to encodeFilename()
                             if not fd.download(sub_filename, sub_info):
                                 # depending on the FD, it may catch errors and return False, or not
-                                raise DownloadError('Subtitle download failed')
+                                raise YoutubeDLError('Subtitle download failed')
                         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error, OSError, IOError, YoutubeDLError) as err:
                             self.report_warning('Unable to download subtitle for "%s": %s' %
                                                 (sub_lang, error_to_compat_str(err)))
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index f3200566e..2e485df9d 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -33,8 +33,7 @@ def get_suitable_downloader(info_dict, params={}):
     """Get the downloader class that can handle the info dict."""
     protocol = determine_protocol(info_dict)
     info_dict['protocol'] = protocol
-    print('SACHA> ', protocol)
-    
+
     # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
     #     return FFmpegFD
 
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 95ef0bf6c..003a3a0dc 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -17,7 +17,6 @@ from ..utils import (
     url_or_none,
 )
 from .dailymotion import DailymotionIE
-from ..downloader import PROTOCOL_MAP
 
 
 class FranceTVBaseInfoExtractor(InfoExtractor):

From 8e8e95a4903864bfc643b5857492c531bebad671 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Sat, 25 Sep 2021 22:24:26 +0000
Subject: [PATCH 10/13] document "protocol" field

---
 youtube_dl/extractor/common.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e45c67f94..fd1fcda67 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -250,7 +250,10 @@ class InfoExtractor(object):
                     preference, each element is a dictionary with the "ext"
                     entry and one of:
                         * "data": The subtitles file contents
-                        * "url": A URL pointing to the subtitles file
+                        * "url": A URL pointing to the subtitles resource
+                    With "url", a "protocol" entry (as for "formats" above)
+                    may be provided to indicate how the URL should be
+                    processed; by default it is a file downloaded by HTTP(S)
                     "ext" will be calculated from URL if missing
     automatic_captions: Like 'subtitles', used by the YoutubeIE for
                     automatically generated captions

From 1b0746bab5aa73fbfcbc055fcae290b266749302 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Sat, 25 Sep 2021 22:36:22 +0000
Subject: [PATCH 11/13] Bug fix when _extract_m3u8_formats returns False

---
 youtube_dl/extractor/francetv.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 003a3a0dc..f4ba97d54 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -173,10 +173,13 @@ class FranceTVIE(InfoExtractor):
                     sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
                     video_id, f4m_id=format_id, fatal=False))
             elif ext == 'm3u8':
-                format, subtitle = self._extract_m3u8_formats(
+                res = self._extract_m3u8_formats(
                     sign(video_url, format_id), video_id, 'mp4',
                     entry_protocol='m3u8_native', m3u8_id=format_id,
                     fatal=False, include_subtitles=True)
+                if not res:
+                    continue
+                format, subtitle = res
                 info['formats'].extend(format)
                 for lang in subtitle:
                     if lang in info['subtitles']:

From a2c46586abc71594cba72ad9f539b4c41ddb86a7 Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Sat, 25 Sep 2021 22:53:41 +0000
Subject: [PATCH 12/13] set a preference for formats

---
 youtube_dl/extractor/francetv.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index f4ba97d54..86b4d49f7 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -202,6 +202,18 @@ class FranceTVIE(InfoExtractor):
                         'format_id': format_id,
                     })
 
+        for f in info['formats']:
+            preference = 50
+            if f['format_id'].startswith('dash-audio_qtz=96000') or (f['format_id'].find('Description') >= 0):
+                preference = -1
+            elif f['format_id'].startswith('hls-audio'):
+                preference = 10
+            elif f['format_id'].startswith('dash-audio'):
+                preference = 20
+            elif f['format_id'].startswith('dash-video'):
+                preference = 100
+            f['preference'] = preference
+
         self._sort_formats(info['formats'])
 
         if info['subtitle']:

From 19b3af2b3b06c72ffef3af52440554bde30b1d1f Mon Sep 17 00:00:00 2001
From: Sacha Arnoud <ubuntu@vps-e11b9046.vps.ovh.net.novalocal>
Date: Sat, 25 Sep 2021 23:20:03 +0000
Subject: [PATCH 13/13] fix tests

---
 youtube_dl/extractor/francetv.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 86b4d49f7..edd2da2c8 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -46,12 +46,16 @@ class FranceTVIE(InfoExtractor):
     _TESTS = [{
         # without catalog
         'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
-        'md5': '283491d723a14db7c4e10b887c4b475a',
+        'md5': '944fe929c5ed2c05f864085ec5714f98',
         'info_dict': {
             'id': '162311093',
             'ext': 'mp4',
             'title': '13h15, le dimanche... - Les mystères de Jésus',
         },
+        'params': {
+            'format': 'bestvideo',
+        },
+        'expected_warnings': 'Unknown MIME type application/mp4 in DASH manifest',
     }, {
         # with catalog
         'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
@@ -203,15 +207,15 @@ class FranceTVIE(InfoExtractor):
                     })
 
         for f in info['formats']:
-            preference = 50
+            preference = 100
             if f['format_id'].startswith('dash-audio_qtz=96000') or (f['format_id'].find('Description') >= 0):
                 preference = -1
-            elif f['format_id'].startswith('hls-audio'):
-                preference = 10
             elif f['format_id'].startswith('dash-audio'):
-                preference = 20
+                preference = 10
+            elif f['format_id'].startswith('hls-audio'):
+                preference = 200
             elif f['format_id'].startswith('dash-video'):
-                preference = 100
+                preference = 50
             f['preference'] = preference
 
         self._sort_formats(info['formats'])
@@ -259,8 +263,10 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
         },
         'params': {
             'skip_download': True,
+            'format': 'bestvideo',
         },
         'add_ie': [FranceTVIE.ie_key()],
+        'expected_warnings': 'Unknown MIME type application/mp4 in DASH manifest',
     }, {
         # france3
         'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',