[franceinter] flake8

[franceinter] add thumbnail url (#27153 )
Co-authored-by: remitamine <remitamine@gmail.com>
2025-07-13 06:54:15 +09:00 · 2020-11-22 19:38:45 +01:00 · 2020-11-22 19:35:53 +01:00 · 2020-11-22 17:39:41 +01:00
3 changed files with 102 additions and 0 deletions
--- a/youtube_dl/extractor/box.py
+++ b/youtube_dl/extractor/box.py
@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    parse_iso8601,
+    # try_get,
+    update_url_query,
+)
+
+
+class BoxIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
+        'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
+        'info_dict': {
+            'id': '510727257538',
+            'ext': 'mp4',
+            'title': 'Garber   St. Louis will be 28th MLS team  +scarving.mp4',
+            'uploader': 'MLS Video',
+            'timestamp': 1566320259,
+            'upload_date': '20190820',
+            'uploader_id': '235196876',
+        }
+    }
+
+    def _real_extract(self, url):
+        shared_name, file_id = re.match(self._VALID_URL, url).groups()
+        webpage = self._download_webpage(url, file_id)
+        request_token = self._parse_json(self._search_regex(
+            r'Box\.config\s*=\s*({.+?});', webpage,
+            'Box config'), file_id)['requestToken']
+        access_token = self._download_json(
+            'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
+            'Downloading token JSON metadata',
+            data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
+                'Content-Type': 'application/json',
+                'X-Request-Token': request_token,
+                'X-Box-EndUser-API': 'sharedName=' + shared_name,
+            })[file_id]['read']
+        shared_link = 'https://app.box.com/s/' + shared_name
+        f = self._download_json(
+            'https://api.box.com/2.0/files/' + file_id, file_id,
+            'Downloading file JSON metadata', headers={
+                'Authorization': 'Bearer ' + access_token,
+                'BoxApi': 'shared_link=' + shared_link,
+                'X-Rep-Hints': '[dash]',  # TODO: extract `hls` formats
+            }, query={
+                'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
+            })
+        title = f['name']
+
+        query = {
+            'access_token': access_token,
+            'shared_link': shared_link
+        }
+
+        formats = []
+
+        # for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
+        #     entry_url_template = try_get(
+        #         entry, lambda x: x['content']['url_template'])
+        #     if not entry_url_template:
+        #         continue
+        #     representation = entry.get('representation')
+        #     if representation == 'dash':
+        #         TODO: append query to every fragment URL
+        #         formats.extend(self._extract_mpd_formats(
+        #             entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
+        #             file_id, query=query))
+
+        authenticated_download_url = f.get('authenticated_download_url')
+        if authenticated_download_url and f.get('is_download_available'):
+            formats.append({
+                'ext': f.get('extension') or determine_ext(title),
+                'filesize': f.get('size'),
+                'format_id': 'download',
+                'url': update_url_query(authenticated_download_url, query),
+            })
+
+        self._sort_formats(formats)
+
+        creator = f.get('created_by') or {}
+
+        return {
+            'id': file_id,
+            'title': title,
+            'formats': formats,
+            'description': f.get('description') or None,
+            'uploader': creator.get('name'),
+            'timestamp': parse_iso8601(f.get('created_at')),
+            'uploader_id': creator.get('id'),
+        }
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -122,6 +122,7 @@ from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bostonglobe import BostonGlobeIE
+from .box import BoxIE
 from .bpb import BpbIE
 from .br import (
    BRIE,
--- a/youtube_dl/extractor/franceinter.py
+++ b/youtube_dl/extractor/franceinter.py
@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
            'ext': 'mp3',
            'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
            'description': 'md5:401969c5d318c061f86bda1fa359292b',
+            'thumbnail': r're:^https?://.*\.jpg',
            'upload_date': '20160907',
        },
    }
@ -31,6 +32,7 @@ class FranceInterIE(InfoExtractor):

        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage)
+        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)

        upload_date_str = self._search_regex(
            r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
@ -48,6 +50,7 @@ class FranceInterIE(InfoExtractor):
            'id': video_id,
            'title': title,
            'description': description,
+            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'formats': [{
                'url': video_url,
Author	SHA1	Message	Date
Remita Amine	2cd43a00d1	[franceinter] flake8	2020-11-22 19:38:45 +01:00
renalid	dd0f524c69	[franceinter] add thumbnail url (#27153 ) Co-authored-by: remitamine <remitamine@gmail.com>	2020-11-22 19:35:53 +01:00
Remita Amine	c84f9475b8	[box] Add new extractor(#5949 )	2020-11-22 17:39:41 +01:00