Publicise FIFA extractor

Add FIFA extractor
2025-10-23 16:48:37 +09:00 · 2022-11-29 18:51:06 +00:00 · 2022-11-29 18:49:05 +00:00
3 changed files with 137 additions and 25 deletions
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -87,7 +87,6 @@ from .utils import (
    subtitles_filename,
    UnavailableVideoError,
    url_basename,
-    variadic,
    version_tuple,
    write_json_file,
    write_string,
@@ -1299,46 +1298,57 @@ class YoutubeDL(object):
                format_spec = selector.selector

                def selector_function(ctx):
-
-                    def best_worst(fmts, fmt_spec='best'):
-                        format_idx = 0 if fmt_spec == 'worst' else -1
-                        audiovideo_formats = [
-                            f for f in fmts
-                            if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
-                        if audiovideo_formats:
-                            return audiovideo_formats[format_idx]
-                        # for extractors with incomplete formats (audio only (soundcloud)
-                        # or video only (imgur)) we will fallback to best/worst
-                        # {video,audio}-only format
-                        elif ctx['incomplete_formats']:
-                            return fmts[format_idx]
-
                    formats = list(ctx['formats'])
                    if not formats:
                        return
                    if format_spec == 'all':
-                        pass
-                    elif format_spec in ('best', 'worst', None):
-                        formats = best_worst(formats, format_spec)
-                    elif format_spec in ('bestaudio', 'worstaudio'):
+                        for f in formats:
+                            yield f
+                    elif format_spec in ['best', 'worst', None]:
+                        format_idx = 0 if format_spec == 'worst' else -1
+                        audiovideo_formats = [
+                            f for f in formats
+                            if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+                        if audiovideo_formats:
+                            yield audiovideo_formats[format_idx]
+                        # for extractors with incomplete formats (audio only (soundcloud)
+                        # or video only (imgur)) we will fallback to best/worst
+                        # {video,audio}-only format
+                        elif ctx['incomplete_formats']:
+                            yield formats[format_idx]
+                    elif format_spec == 'bestaudio':
                        audio_formats = [
                            f for f in formats
                            if f.get('vcodec') == 'none']
-                        formats = audio_formats[:1] if format_spec == 'worstaudio' else audio_formats[-1:]
-                    elif format_spec in ('bestvideo', 'worstvideo'):
+                        if audio_formats:
+                            yield audio_formats[-1]
+                    elif format_spec == 'worstaudio':
+                        audio_formats = [
+                            f for f in formats
+                            if f.get('vcodec') == 'none']
+                        if audio_formats:
+                            yield audio_formats[0]
+                    elif format_spec == 'bestvideo':
                        video_formats = [
                            f for f in formats
                            if f.get('acodec') == 'none']
-                        formats = video_formats[:1] if format_spec == 'worstvideo' else video_formats[-1:]
+                        if video_formats:
+                            yield video_formats[-1]
+                    elif format_spec == 'worstvideo':
+                        video_formats = [
+                            f for f in formats
+                            if f.get('acodec') == 'none']
+                        if video_formats:
+                            yield video_formats[0]
                    else:
                        extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
                        if format_spec in extensions:
                            filter_f = lambda f: f['ext'] == format_spec
                        else:
                            filter_f = lambda f: f['format_id'] == format_spec
-                        formats = best_worst(list(filter(filter_f, formats)))
-                    for f in variadic(formats or []):
-                        yield f
+                        matches = list(filter(filter_f, formats))
+                        if matches:
+                            yield matches[-1]
            elif selector.type == MERGE:
                def _merge(formats_info):
                    format_1, format_2 = [f['format_id'] for f in formats_info]
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -374,6 +374,7 @@ from .fc2 import (
    FC2EmbedIE,
 )
 from .fczenit import FczenitIE
+from .fifa import FifaIE
 from .filmon import (
    FilmOnIE,
    FilmOnChannelIE,
--- a/youtube_dl/extractor/fifa.py
+++ b/youtube_dl/extractor/fifa.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    int_or_none,
+    traverse_obj,
+    unified_timestamp,
+)
+
+if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
+
+    BaseInfoExtractor = InfoExtractor
+
+    import re
+
+    class InfoExtractor(BaseInfoExtractor):
+
+        @classmethod
+        def _match_valid_url(cls, url):
+            return re.match(cls._VALID_URL, url)
+
+
+class FifaIE(InfoExtractor):
+    _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
+        'info_dict': {
+            'id': '7on10qPcnyLajDDU3ntg6y',
+            'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
+            'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments'],
+            'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
+            'duration': 8165,
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
+        'info_dict': {
+            'id': '1cg5r5Qt6Qt12ilkDgb1sV',
+            'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
+            'description': 'md5:d908c74ee66322b804ae2e521b02a855',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments', 'Highlights'],
+            'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
+            'duration': 902,
+            'release_timestamp': 1404777600,
+            'release_date': '20140708',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }, {
+        'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
+        'info_dict': {
+            'id': '3C6gQH9C2DLwzNx7BMRQdp',
+            'title': 'Josimar goal against Northern Ireland | Classic Goals',
+            'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
+            'ext': 'mp4',
+            'categories': ['FIFA Tournaments', 'Goal'],
+            'duration': 28,
+            'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    def _real_extract(self, url):
+        video_id, locale = self._match_valid_url(url).group('id', 'locale')
+        webpage = self._download_webpage(url, video_id)
+
+        preconnect_link = self._search_regex(
+            r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
+
+        video_details = self._download_json(
+            '{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
+
+        preplay_parameters = self._download_json(
+            '{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
+
+        content_data = self._download_json(
+            # 1. query string is expected to be sent as-is
+            # 2. `sig` must be appended
+            # 3. if absent, the call appears to work but the manifest is bad (404)
+            'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
+            video_id, 'Downloading Content Data')
+
+        # formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
+        formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_details['title'],
+            'description': video_details.get('description'),
+            'duration': int_or_none(video_details.get('duration')),
+            'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
+            'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
+            'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
+            'formats': formats,
+            'subtitles': subtitles,
+        }
Author	SHA1	Message	Date
dirkf	acb86ebd50	Publicise FIFA extractor	2022-11-29 18:51:06 +00:00
dirkf	2a926aef87	Add FIFA extractor	2022-11-29 18:49:05 +00:00