From e17d20829b03d1ec7448d0a2425cfb0109d2a5c5 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Fri, 19 Feb 2021 15:47:37 +0100 Subject: [PATCH 01/12] [mediathekviewweb] Add new extractor --- youtube_dl/extractor/mediathekviewweb.py | 194 +++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 youtube_dl/extractor/mediathekviewweb.py diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py new file mode 100644 index 000000000..e96e5eb4d --- /dev/null +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -0,0 +1,194 @@ +import datetime +import itertools +import json +import re + +from .common import InfoExtractor, SearchInfoExtractor +from ..compat import compat_urllib_parse_unquote +from ..utils import ExtractorError, int_or_none + +class MediathekViewWebSearchIE(SearchInfoExtractor): + IE_NAME = 'mediathekviewweb:search' + IE_DESC = 'MediathekViewWeb search' + _SEARCH_KEY = 'mvwsearch' + _MAX_RESULTS = float('inf') + _MAX_RESULTS_PER_PAGE = 50 + # _GEO_COUNTRIES = ['DE'] + + # _TESTS = [{ + # 'url': 'mvwsearch:tagesschau', + # 'info_dict': { + # 'title': 'post-avant jazzcore', + # }, + # 'playlist_count': 15, + # }] + + # Map of title affixes indicating video variants. + _variants = { + 'audio_description': '(Audiodeskription)', + 'sign_language': '(mit Gebärdensprache)', + } + + def _build_conditions(self, search): + # @note So far, there is no API endpoint to convert a query string into + # a complete query object, as required by the /api/query endpoint. + # @see https://github.com/mediathekview/mediathekviewweb/blob/master/client/index.ts#L144 + # for parsing the search string into properties. + # @see https://github.com/mediathekview/mediathekviewweb/blob/master/client/index.ts#L389 + # for converting properties into field queries. + filters = {} + extra = {} + for component in search.lower().split(): + if len(component) == 0: + continue + + field = None + operator = component[0:1] + value = component[1:] + # Extra, non-field settings. + if operator == '>': + value = int(value.split(',')[0]) * 60 + extra['duration_min'] = max(extra.get('duration_min', 0), value) + continue + elif operator == '<': + value = int(value.split(',')[0]) * 60 + extra['duration_max'] = min(extra.get('duration_max', float('inf')), value) + continue + + # Field query operators. + elif operator == '!': + field = 'channel' + elif operator == '#': + field = 'topic' + elif operator == '+': + field = 'title' + elif operator == '*': + field = 'description' + else: + field = 'topic,title' + operator = '' + value = component + + if field: + # @todo In theory, comma-joined values are for AND queries. + # But so far, each is an AND component, even without comma. + filters.setdefault(field, []).append(' '.join(value.split(','))) + + conditions = [] + for field, keys in filters.items(): + for query in keys: + conditions.append({ + 'fields': field.split(','), + 'query': query, + }) + + return conditions, extra + + def _extract_playlist_entries(self, results): + entries = [] + for item in results: + variant = None + for key, value in self._variants.items(): + if item['title'].find(value) != -1: + variant = key + + formats = [] + formats.append({ + 'url': item['url_video'], + 'format': ('medium ' + self._variants[variant]) if variant else None, + 'format_id': ('medium-' + variant) if variant else 'medium', + 'language_preference': -10 if variant else 10, + 'quality': -2, + 'filesize': item['size'], + }) + if len(item.get('url_video_low', '')) > 0: + formats.append({ + 'url': item['url_video_low'], + 'format': ('low ' + self._variants[variant]) if variant else None, + 'format_id': ('low-' + variant) if variant else 'low', + 'language_preference': -10 if variant else 10, + 'quality': -3, + }) + if len(item.get('url_video_hd', '')) > 0: + formats.append({ + 'url': item['url_video_hd'], + 'format': ('high ' + self._variants[variant]) if variant else None, + 'format_id': ('high-' + variant) if variant else 'high', + 'language_preference': -10 if variant else 10, + 'quality': -1, + }) + self._sort_formats(formats) + + video = { + '_type': 'video', + 'formats': formats, + 'id': item['id'], + 'title': item['title'], + 'description': item['description'], + 'series': item['topic'], + 'channel': item['channel'], + 'uploader': item['channel'], + 'duration': int_or_none(item['duration']), + 'webpage_url': item['url_website'], + } + + upload_date = datetime.datetime.utcfromtimestamp(item['timestamp']) + video['upload_date'] = upload_date.strftime('%Y%m%d') + if item['url_subtitle']: + video.setdefault('subtitles', {}).setdefault('de', []).append({ + 'url': item['url_subtitle'], + }) + entries.append(video) + + return entries + + def _get_n_results(self, query, n): + # @todo Add support for everywhere/future options. + queries, extra = self._build_conditions(query) + queryObject = { + 'queries': queries, + 'sortBy': 'timestamp', + 'sortOrder': 'desc', + 'future': True, + 'duration_min': extra.get('duration_min'), + 'duration_max': extra.get('duration_max'), + 'offset': 0, + 'size': min(n, self._MAX_RESULTS_PER_PAGE), + } + + entries = [] + for page_num in itertools.count(1): + queryObject.update({'offset': (page_num - 1) * queryObject['size']}) + results = self._download_json('https://mediathekviewweb.de/api/query', query, + note='Fetching page %d' % page_num, + data=json.dumps(queryObject).encode('utf-8'), + headers={'Content-Type': 'text/plain'}) + if results['err'] is not None: + raise ExtractorError('API returned an error: %s' % results['err'][0]) + + meta = results['result']['queryInfo'] + print(json.dumps(meta)) + + entries.extend(self._extract_playlist_entries(results['result']['results'])) + + # @todo This returns full pages: 100 results if 51 are requested. + if meta['resultCount'] == 0 or meta['resultCount'] + queryObject['offset'] >= n: + break + + return self.playlist_result(entries, playlist_title=query) + +class MediathekViewWebIE(InfoExtractor): + # @see https://github.com/mediathekview/mediathekviewweb + IE_NAME = 'mediathekviewweb' + _VALID_URL = r'https?://mediathekviewweb\.de/\#query=(?P.+)' + + # @todo Specify test cases. + + def _real_extract(self, url): + query = self._match_id(url) + search = compat_urllib_parse_unquote(query) + return { + '_type': 'url', + 'url': 'mvwsearchall:' + search, + 'ie_key': 'MediathekViewWebSearch', + } From 4c91c4f146558684636c4f8121bccae6d813526b Mon Sep 17 00:00:00 2001 From: ckaotik Date: Fri, 19 Feb 2021 20:35:19 +0100 Subject: [PATCH 02/12] [mediathekviewweb] Register extractor --- youtube_dl/extractor/extractors.py | 4 ++++ youtube_dl/extractor/mediathekviewweb.py | 8 ++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1a39c25c5..395aace8b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -642,6 +642,10 @@ from .mediasite import ( MediasiteCatalogIE, MediasiteNamedCatalogIE, ) +from .mediathekviewweb import ( + MediathekViewWebSearchIE, + MediathekViewWebIE, +) from .medici import MediciIE from .megaphone import MegaphoneIE from .meipai import MeipaiIE diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index e96e5eb4d..b8a8fb8d8 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -165,14 +165,11 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): headers={'Content-Type': 'text/plain'}) if results['err'] is not None: raise ExtractorError('API returned an error: %s' % results['err'][0]) - - meta = results['result']['queryInfo'] - print(json.dumps(meta)) - entries.extend(self._extract_playlist_entries(results['result']['results'])) + meta = results['result']['queryInfo'] # @todo This returns full pages: 100 results if 51 are requested. - if meta['resultCount'] == 0 or meta['resultCount'] + queryObject['offset'] >= n: + if len(entries) >= n or meta['resultCount'] == 0: break return self.playlist_result(entries, playlist_title=query) @@ -183,7 +180,6 @@ class MediathekViewWebIE(InfoExtractor): _VALID_URL = r'https?://mediathekviewweb\.de/\#query=(?P.+)' # @todo Specify test cases. - def _real_extract(self, url): query = self._match_id(url) search = compat_urllib_parse_unquote(query) From a482e8fba0ecefd8fcf8aad96227f358052366b8 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 08:51:48 +0100 Subject: [PATCH 03/12] [mediathekviewweb] flake8 --- youtube_dl/extractor/mediathekviewweb.py | 48 ++++++++++++------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index b8a8fb8d8..c57552792 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -1,12 +1,12 @@ import datetime import itertools import json -import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import ExtractorError, int_or_none + class MediathekViewWebSearchIE(SearchInfoExtractor): IE_NAME = 'mediathekviewweb:search' IE_DESC = 'MediathekViewWeb search' @@ -32,10 +32,6 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): def _build_conditions(self, search): # @note So far, there is no API endpoint to convert a query string into # a complete query object, as required by the /api/query endpoint. - # @see https://github.com/mediathekview/mediathekviewweb/blob/master/client/index.ts#L144 - # for parsing the search string into properties. - # @see https://github.com/mediathekview/mediathekviewweb/blob/master/client/index.ts#L389 - # for converting properties into field queries. filters = {} extra = {} for component in search.lower().split(): @@ -89,7 +85,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): for item in results: variant = None for key, value in self._variants.items(): - if item['title'].find(value) != -1: + if item.setdefault('title', '').find(value) != -1: variant = key formats = [] @@ -99,7 +95,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): 'format_id': ('medium-' + variant) if variant else 'medium', 'language_preference': -10 if variant else 10, 'quality': -2, - 'filesize': item['size'], + 'filesize': item.get('size'), }) if len(item.get('url_video_low', '')) > 0: formats.append({ @@ -122,21 +118,22 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): video = { '_type': 'video', 'formats': formats, - 'id': item['id'], - 'title': item['title'], - 'description': item['description'], - 'series': item['topic'], - 'channel': item['channel'], - 'uploader': item['channel'], - 'duration': int_or_none(item['duration']), - 'webpage_url': item['url_website'], + 'id': item.get('id'), + 'title': item.get('title'), + 'description': item.get('description'), + 'series': item.get('topic'), + 'channel': item.get('channel'), + 'uploader': item.get('channel'), + 'duration': int_or_none(item.get('duration')), + 'webpage_url': item.get('url_website'), } - upload_date = datetime.datetime.utcfromtimestamp(item['timestamp']) - video['upload_date'] = upload_date.strftime('%Y%m%d') - if item['url_subtitle']: + if item.get('timestamp'): + upload_date = datetime.datetime.utcfromtimestamp(item['timestamp']) + video['upload_date'] = upload_date.strftime('%Y%m%d') + if item.get('url_subtitle'): video.setdefault('subtitles', {}).setdefault('de', []).append({ - 'url': item['url_subtitle'], + 'url': item.get('url_subtitle'), }) entries.append(video) @@ -160,20 +157,23 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): for page_num in itertools.count(1): queryObject.update({'offset': (page_num - 1) * queryObject['size']}) results = self._download_json('https://mediathekviewweb.de/api/query', query, - note='Fetching page %d' % page_num, - data=json.dumps(queryObject).encode('utf-8'), - headers={'Content-Type': 'text/plain'}) + note='Fetching page %d' % page_num, + data=json.dumps(queryObject).encode('utf-8'), + headers={'Content-Type': 'text/plain'}) if results['err'] is not None: raise ExtractorError('API returned an error: %s' % results['err'][0]) entries.extend(self._extract_playlist_entries(results['result']['results'])) meta = results['result']['queryInfo'] - # @todo This returns full pages: 100 results if 51 are requested. - if len(entries) >= n or meta['resultCount'] == 0: + if len(entries) >= n: + entries = entries[0:n] + break + elif meta['resultCount'] == 0: break return self.playlist_result(entries, playlist_title=query) + class MediathekViewWebIE(InfoExtractor): # @see https://github.com/mediathekview/mediathekviewweb IE_NAME = 'mediathekviewweb' From 8a6fd68b92aca7b9de94a99d57fce3eb2cc30fe7 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 10:38:42 +0100 Subject: [PATCH 04/12] [mediathekviewweb] Support future/everywhere filters. --- youtube_dl/extractor/mediathekviewweb.py | 67 +++++++++++++++++------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index c57552792..7da225832 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -3,7 +3,7 @@ import itertools import json from .common import InfoExtractor, SearchInfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..compat import compat_parse_qs, compat_urlparse from ..utils import ExtractorError, int_or_none @@ -28,19 +28,27 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): 'audio_description': '(Audiodeskription)', 'sign_language': '(mit Gebärdensprache)', } + _future = True + _everywhere = False def _build_conditions(self, search): # @note So far, there is no API endpoint to convert a query string into # a complete query object, as required by the /api/query endpoint. filters = {} extra = {} + for component in search.lower().split(): if len(component) == 0: continue - field = None operator = component[0:1] value = component[1:] + if len(value) == 0: + # Treat single character query as such. + # @note This differs from MVW's implementation. + operator = '' + value = component + # Extra, non-field settings. if operator == '>': value = int(value.split(',')[0]) * 60 @@ -52,7 +60,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): continue # Field query operators. - elif operator == '!': + if operator == '!': field = 'channel' elif operator == '#': field = 'topic' @@ -61,14 +69,23 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): elif operator == '*': field = 'description' else: - field = 'topic,title' - operator = '' + # No known operator specified. + field = 'generic' value = component - if field: - # @todo In theory, comma-joined values are for AND queries. - # But so far, each is an AND component, even without comma. - filters.setdefault(field, []).append(' '.join(value.split(','))) + # @note In theory, comma-joined values are for AND queries. However + # so far, each condition is AND joined, even without comma. + filters.setdefault(field, []).append(' '.join(value.split(','))) + + # Generic filters can apply to different fields, based on the query. + if 'generic' in filters: + if self._everywhere: + filters['channel,topic,title,description'] = filters['generic'] + elif 'topic' in filters: + filters['title'] = filters['generic'] + else: + filters['topic,title'] = filters['generic'] + filters.pop('generic') conditions = [] for field, keys in filters.items(): @@ -140,13 +157,12 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): return entries def _get_n_results(self, query, n): - # @todo Add support for everywhere/future options. queries, extra = self._build_conditions(query) queryObject = { 'queries': queries, 'sortBy': 'timestamp', 'sortOrder': 'desc', - 'future': True, + 'future': self._future, 'duration_min': extra.get('duration_min'), 'duration_max': extra.get('duration_max'), 'offset': 0, @@ -166,7 +182,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): meta = results['result']['queryInfo'] if len(entries) >= n: - entries = entries[0:n] + entries = entries[:n] break elif meta['resultCount'] == 0: break @@ -180,11 +196,24 @@ class MediathekViewWebIE(InfoExtractor): _VALID_URL = r'https?://mediathekviewweb\.de/\#query=(?P.+)' # @todo Specify test cases. + # https://mediathekviewweb.de/#query=%23tagesschau%20%3E5&everywhere=true&future=false + # & und ! #: https://mediathekviewweb.de/#query=%26%20und%20!%20%23 + def _real_extract(self, url): - query = self._match_id(url) - search = compat_urllib_parse_unquote(query) - return { - '_type': 'url', - 'url': 'mvwsearchall:' + search, - 'ie_key': 'MediathekViewWebSearch', - } + query_hash = self._match_id(url) + + url_stub = '?query=' + query_hash + query = compat_parse_qs(compat_urlparse.urlparse(url_stub).query) + search = query['query'][0] + query.pop('query') + + if len(query) > 0: + # Detect global flags, MVW is very strict about accepted values. + extractor = MediathekViewWebSearchIE(self._downloader) + if query.get('everywhere', [])[0] == 'true': + extractor._everywhere = True + if query.get('future', [])[0] == 'false': + extractor._future = False + return extractor._real_extract('mvwsearchall:' + search) + + return self.url_result('mvwsearchall:' + search, ie=MediathekViewWebSearchIE.ie_key()) From 14384d4a6f817e57d9db0cc9f0ca718fca10578a Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 12:52:47 +0100 Subject: [PATCH 05/12] [mediathekviewweb] Tweaked detection & naming for sign language/audio description --- youtube_dl/extractor/mediathekviewweb.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index 7da225832..ae69c5d07 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -25,8 +25,8 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): # Map of title affixes indicating video variants. _variants = { - 'audio_description': '(Audiodeskription)', - 'sign_language': '(mit Gebärdensprache)', + 'audio_description': 'Audiodeskription', + 'sign_language': 'mit Gebärdensprache', } _future = True _everywhere = False @@ -108,7 +108,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): formats = [] formats.append({ 'url': item['url_video'], - 'format': ('medium ' + self._variants[variant]) if variant else None, + 'format': ('medium (' + self._variants[variant] + ')') if variant else None, 'format_id': ('medium-' + variant) if variant else 'medium', 'language_preference': -10 if variant else 10, 'quality': -2, @@ -117,7 +117,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): if len(item.get('url_video_low', '')) > 0: formats.append({ 'url': item['url_video_low'], - 'format': ('low ' + self._variants[variant]) if variant else None, + 'format': ('low (' + self._variants[variant] + ')') if variant else None, 'format_id': ('low-' + variant) if variant else 'low', 'language_preference': -10 if variant else 10, 'quality': -3, @@ -125,7 +125,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): if len(item.get('url_video_hd', '')) > 0: formats.append({ 'url': item['url_video_hd'], - 'format': ('high ' + self._variants[variant]) if variant else None, + 'format': ('high (' + self._variants[variant] + ')') if variant else None, 'format_id': ('high-' + variant) if variant else 'high', 'language_preference': -10 if variant else 10, 'quality': -1, From 3e3b11c80b648b423294c75a106a1211e6570e88 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 12:53:21 +0100 Subject: [PATCH 06/12] [mediathekviewweb] Fixed future and everywhere detection --- youtube_dl/extractor/mediathekviewweb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index ae69c5d07..abef1f115 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -210,9 +210,9 @@ class MediathekViewWebIE(InfoExtractor): if len(query) > 0: # Detect global flags, MVW is very strict about accepted values. extractor = MediathekViewWebSearchIE(self._downloader) - if query.get('everywhere', [])[0] == 'true': + if query.get('everywhere', []) == ['true']: extractor._everywhere = True - if query.get('future', [])[0] == 'false': + if query.get('future', []) == ['false']: extractor._future = False return extractor._real_extract('mvwsearchall:' + search) From 57f070e5acb7eca206a6d07569be2f8391bcf1a7 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 12:54:09 +0100 Subject: [PATCH 07/12] [mediathekviewweb] Added pretty playlist name if topic is common for all results --- youtube_dl/extractor/mediathekviewweb.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index abef1f115..59033ee2b 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -187,7 +187,13 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): elif meta['resultCount'] == 0: break - return self.playlist_result(entries, playlist_title=query) + common_topic = None + if entries: + common_topic = entries[0]['series'] + for entry in entries: + common_topic = common_topic if entry['series'] == common_topic else None + + return self.playlist_result(entries, playlist_title=common_topic or query) class MediathekViewWebIE(InfoExtractor): From 230fb7caa1da6fa2b93c6d368a071c6e6e49b59d Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 12:54:23 +0100 Subject: [PATCH 08/12] [mediathekviewweb] Added tests --- youtube_dl/extractor/mediathekviewweb.py | 73 ++++++++++++++++++++---- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index 59033ee2b..e63ccf94f 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -13,15 +13,43 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): _SEARCH_KEY = 'mvwsearch' _MAX_RESULTS = float('inf') _MAX_RESULTS_PER_PAGE = 50 - # _GEO_COUNTRIES = ['DE'] - # _TESTS = [{ - # 'url': 'mvwsearch:tagesschau', - # 'info_dict': { - # 'title': 'post-avant jazzcore', - # }, - # 'playlist_count': 15, - # }] + _TESTS = [ + { + 'url': 'mvwsearchall:sandmännchen !kika', + 'info_dict': { + 'title': 'Unser Sandmännchen', + }, + 'playlist': [], + 'playlist_count': 7, + }, + { + # Audio description & common topic. + 'url': 'mvwsearch:#Sendung,Maus Audiodeskription', + 'info_dict' : { + 'title': 'Die Sendung mit der Maus', + }, + 'playlist': [], + 'playlist_count': 1, + 'params': { + 'format': 'medium-audio_description', + 'skip_download': True, + } + }, + { + # Sign language. + 'url': 'mvwsearchall:!ard #Tagesschau Gebärdensprache', + 'info_dict': { + 'title': '!ard #Tagesschau Gebärdensprache', + }, + 'playlist': [], + 'playlist_mincount': 365, + 'params': { + 'format': 'medium-sign_language', + 'skip_download': True, + }, + }, + ] # Map of title affixes indicating video variants. _variants = { @@ -201,9 +229,32 @@ class MediathekViewWebIE(InfoExtractor): IE_NAME = 'mediathekviewweb' _VALID_URL = r'https?://mediathekviewweb\.de/\#query=(?P.+)' - # @todo Specify test cases. - # https://mediathekviewweb.de/#query=%23tagesschau%20%3E5&everywhere=true&future=false - # & und ! #: https://mediathekviewweb.de/#query=%26%20und%20!%20%23 + _TESTS = [ + { + # Test for everywhere. + 'url': 'https://mediathekviewweb.de/#query=!ard%20%23Tagesschau%2020%2CUhr&everywhere=true', + 'info_dict': { + 'title': '!ard #Tagesschau 20,Uhr', + }, + # Without everywhere, there are <100 results. + 'playlist_mincount': 365, + 'params': { + 'skip_download': True, + }, + }, + { + # Test for non-future videos. + 'url': 'https://mediathekviewweb.de/#query=%23sport%2Cim%2Costen%20biathlon&future=false', + 'info_dict': { + 'title': 'Sport im Osten', + }, + # Future yields 4 results instead. + 'playlist_maxcount': 2, + 'params': { + 'skip_download': True, + }, + }, + ] def _real_extract(self, url): query_hash = self._match_id(url) From c8d69b4d33c4b543fddfbca674a03b1f99627972 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 13:08:32 +0100 Subject: [PATCH 09/12] [mediathekviewweb] Stray whitespace --- youtube_dl/extractor/mediathekviewweb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index e63ccf94f..76f9410b2 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -26,7 +26,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): { # Audio description & common topic. 'url': 'mvwsearch:#Sendung,Maus Audiodeskription', - 'info_dict' : { + 'info_dict': { 'title': 'Die Sendung mit der Maus', }, 'playlist': [], From e585d7bcf531d0bc60b68bff5a21cac32d1b9d74 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 14:26:31 +0100 Subject: [PATCH 10/12] Specify utf-8 encoding --- youtube_dl/extractor/mediathekviewweb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index 76f9410b2..bd0c8e758 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -1,3 +1,4 @@ +# coding=utf-8 import datetime import itertools import json From 338e4b85279650f32da4906b6f972eeae7edd130 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sun, 21 Feb 2021 14:31:04 +0100 Subject: [PATCH 11/12] Import __future__.unicode_literals --- youtube_dl/extractor/mediathekviewweb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index bd0c8e758..303655438 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -1,4 +1,6 @@ # coding=utf-8 +from __future__ import unicode_literals + import datetime import itertools import json From bbfd415058dfa8a867b6c6fafc051259faa1aef3 Mon Sep 17 00:00:00 2001 From: ckaotik Date: Sat, 13 Mar 2021 11:27:11 +0100 Subject: [PATCH 12/12] Specify subtitle file extension --- youtube_dl/extractor/mediathekviewweb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/mediathekviewweb.py b/youtube_dl/extractor/mediathekviewweb.py index 303655438..76cf258a1 100644 --- a/youtube_dl/extractor/mediathekviewweb.py +++ b/youtube_dl/extractor/mediathekviewweb.py @@ -182,6 +182,7 @@ class MediathekViewWebSearchIE(SearchInfoExtractor): if item.get('url_subtitle'): video.setdefault('subtitles', {}).setdefault('de', []).append({ 'url': item.get('url_subtitle'), + 'ext': 'ttml', }) entries.append(video)