WIP: arte config api v2

This commit is contained in:
Kaspar Vollenweider 2021-07-26 07:56:29 +02:00
parent a803582717
commit 66eec4e640
No known key found for this signature in database
GPG Key ID: 9039A456BC9C0F78

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -13,14 +14,50 @@ from ..utils import (
int_or_none, int_or_none,
qualities, qualities,
try_get, try_get,
unified_strdate,
url_or_none, url_or_none,
) )
class ArteTVBaseIE(InfoExtractor): class ArteTVBaseIE(InfoExtractor):
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl' _ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
_API_BASE = 'https://api.arte.tv/api/player/v1' _API_BASE_V1 = 'https://api.arte.tv/api/player/v1'
_API_BASE_V2 = 'https://api.arte.tv/api/player/v2'
def _get_api_authorization_header(self, url):
"""Fetches the Authorization header required for api.arte.tv/api/player/v2"""
# actually this request is only for making the authorization
# requirements for api/player/v2 fullfilled, but it contains some
# metadata too since we have to request this page anyway.
html_page = self._download_webpage(url, 'dummy_auth_request_with_some_meta')
page_metadata_json = self._search_regex(
r'window.__INITIAL_STATE__ = (\{.*\});\n', html_page, 'initial_state')
if page_metadata_json:
page_metadata = json.loads(page_metadata_json)
else:
page_metadata = {}
manifest_js = self._download_webpage(
'https://static-cdn.arte.tv/guide/manifest.js', 'arte_api_token')
token = self._search_regex(
r'"default":{"token":"([a-zA-Z0-9_-]*)"}', manifest_js, 'token')
return {
'page_metadata': page_metadata,
'headers': {
'Authorization': 'Bearer %s' % (token),
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'en-GB,en;q=0.8,de-DE;q=0.5,de;q=0.3',
'Referer': url,
'Origin': 'https://www.arte.tv',
'Connection': 'keep-alive',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'TE': 'trailers',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:90.0) Gecko/20100101 Firefox/90.0'
}
}
class ArteTVIE(ArteTVBaseIE): class ArteTVIE(ArteTVBaseIE):
@ -53,28 +90,34 @@ class ArteTVIE(ArteTVBaseIE):
video_id = mobj.group('id') video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2') lang = mobj.group('lang') or mobj.group('lang_2')
info = self._download_json( # legacy for debugging only
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id) legacy_info = self._download_json(
player_info = info['videoJsonPlayer'] '%s/config/%s/%s' % (self._API_BASE_V1, lang, video_id), video_id)
player_info = legacy_info.get('data')
vsr = try_get(player_info, lambda x: x['VSR'], dict) vsr = try_get(player_info, lambda x: x['VSR'], dict)
if not vsr:
error = None
if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
error = try_get(
player_info, lambda x: x['custom_msg']['msg'], compat_str)
if not error:
error = 'Video %s is not available' % player_info.get('VID') or video_id
raise ExtractorError(error, expected=True)
upload_date_str = player_info.get('shootingDate') # v2 api stuff
if not upload_date_str: auth_data = self._get_api_authorization_header(url)
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] info = self._download_json(
'%s/config/%s/%s' % (self._API_BASE_V2, lang, video_id), video_id, headers=auth_data.get('headers'))
attributes = info.get('data').get('attributes')
metadata = attributes.get('metadata')
streams = attributes.get('streams')
title = (player_info.get('VTI') or player_info['VID']).strip() if not streams or not metadata:
subtitle = player_info.get('VSU', '').strip() raise ExtractorError('Required metadata could not be fetched', expected=True)
if subtitle:
title += ' - %s' % subtitle info_dict = {
'id': video_id,
'title': self._get_full_title(metadata),
'description': metadata.get('description'),
'upload_date': self._get_upload_date(attributes.get('rights')),
'thumbnail': self._get_thumbnail_url(metadata)
}
import pdb
pdb.set_trace()
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
@ -170,14 +213,36 @@ class ArteTVIE(ArteTVBaseIE):
self._sort_formats(formats) self._sort_formats(formats)
return { return info_dict
'id': player_info.get('VID') or video_id, # return {
'title': title, # 'id': player_info.get('VID') or video_id,
'description': player_info.get('VDE'), # 'title': title,
'upload_date': unified_strdate(upload_date_str), # 'description': player_info.get('VDE'),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), # 'upload_date': unified_strdate(upload_date_str),
'formats': formats, # 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
} # 'formats': formats,
# }
def _get_full_title(self, metadata):
if metadata.get('subtitle'):
return '%s - %s' % (metadata.get('title'), metadata.get('subtitle'))
return metadata.get('title')
def _get_upload_date(self, rights):
begin = rights.get('begin')
if not begin:
return None
date_part = begin.split('T')[0]
if not date_part:
return None
start_year, start_month, start_day = date_part.split('-')
return '%s%s%s' % (start_year, start_month, start_day)
def _get_thumbnail_url(self, metadata):
images = metadata.get('images')
if not images or not images[0] or not images[0].get('url'):
return None
return images[0].get('url')
class ArteTVEmbedIE(InfoExtractor): class ArteTVEmbedIE(InfoExtractor):
@ -229,7 +294,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
lang, playlist_id = re.match(self._VALID_URL, url).groups() lang, playlist_id = re.match(self._VALID_URL, url).groups()
collection = self._download_json( collection = self._download_json(
'%s/collectionData/%s/%s?source=videos' '%s/collectionData/%s/%s?source=videos'
% (self._API_BASE, lang, playlist_id), playlist_id) % (self._API_BASE_V1, lang, playlist_id), playlist_id)
entries = [] entries = []
for video in collection['videos']: for video in collection['videos']:
if not isinstance(video, dict): if not isinstance(video, dict):