From 562ab2cd47d8c0a36c3346d06fcd603aa592f705 Mon Sep 17 00:00:00 2001 From: Ishab Ahmed Date: Sat, 1 Apr 2023 15:49:05 +0100 Subject: [PATCH 1/5] [infomaniak] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/infomaniak.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/infomaniak.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3a87f9e33..ef3a0a801 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -513,6 +513,7 @@ from .imgur import ( from .ina import InaIE from .inc import IncIE from .indavideo import IndavideoEmbedIE +from .infomaniak import InfomaniakVOD2IE from .infoq import InfoQIE from .instagram import ( InstagramIE, diff --git a/youtube_dl/extractor/infomaniak.py b/youtube_dl/extractor/infomaniak.py new file mode 100644 index 000000000..2a7686be4 --- /dev/null +++ b/youtube_dl/extractor/infomaniak.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import json + + +class InfomaniakVOD2IE(InfoExtractor): + _VALID_URL = r'https?://player\.vod2\.infomaniak\.com/embed/(?P[^/?#&]+)' + _TEST = { + 'url': 'https://player.vod2.infomaniak.com/embed/1jhvl2uq7kr4y', + 'md5': 'd06fb3fc5a8d7cb4d6e4a0f4e7c5a76a', + 'info_dict': { + 'id': '1jhvl2uq7kr4y', + 'ext': 'mp4', + 'title': 'RolandCarey2016-05-04.mp4', + 'description': '', + 'thumbnail': 'https://res.vod2.infomaniak.com/1/vod/thumbnail/1jhvl2uq8yqqv.jpg', + 'duration': 221, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + # no useful data in given url, formatted url below reveals json data + data_url = "https://res.vod2.infomaniak.com/{}/vod/share/{}".format(video_id[0], video_id) + webpage = self._download_webpage(data_url, video_id) + + data = json.loads(webpage)['data']['media'][0] + + url = data['source']['url'] + title = data['title'] + description = '' + thumbnail = data['image']['url'] + duration = data['duration'] + + video_mimetype = data['source']['mimetype'] + + info_dict = { + 'id': video_id, + 'url': url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + } + + # if file is m3u8 + if video_mimetype == 'application/x-mpegurl': + info_dict['protocol'] = 'm3u8_native' + info_dict['manifest_url'] = url + + return info_dict From c9e13179275e7643915ec2b4d70237e2a1f5610e Mon Sep 17 00:00:00 2001 From: Ishab Ahmed Date: Sat, 1 Apr 2023 16:11:23 +0100 Subject: [PATCH 2/5] [infomaniak] allow optional fields --- youtube_dl/extractor/infomaniak.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/infomaniak.py b/youtube_dl/extractor/infomaniak.py index 2a7686be4..1632e4076 100644 --- a/youtube_dl/extractor/infomaniak.py +++ b/youtube_dl/extractor/infomaniak.py @@ -14,7 +14,6 @@ class InfomaniakVOD2IE(InfoExtractor): 'id': '1jhvl2uq7kr4y', 'ext': 'mp4', 'title': 'RolandCarey2016-05-04.mp4', - 'description': '', 'thumbnail': 'https://res.vod2.infomaniak.com/1/vod/thumbnail/1jhvl2uq8yqqv.jpg', 'duration': 221, } @@ -30,17 +29,15 @@ class InfomaniakVOD2IE(InfoExtractor): url = data['source']['url'] title = data['title'] - description = '' - thumbnail = data['image']['url'] - duration = data['duration'] + thumbnail = data.get('image').get('url') + duration = data.get('duration') - video_mimetype = data['source']['mimetype'] + video_mimetype = data.get('source').get('mimetype') info_dict = { 'id': video_id, 'url': url, 'title': title, - 'description': description, 'thumbnail': thumbnail, 'duration': duration, } From 25e5bcfb1d8c00e633e4a1b87e39d4af0d3fdde9 Mon Sep 17 00:00:00 2001 From: Ishab Ahmed Date: Sun, 2 Apr 2023 13:12:23 +0100 Subject: [PATCH 3/5] [infomaniak] Follow project convention and helpers --- youtube_dl/extractor/infomaniak.py | 58 ++++++++++++++++++------------ 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/infomaniak.py b/youtube_dl/extractor/infomaniak.py index 1632e4076..0d6ad2c1a 100644 --- a/youtube_dl/extractor/infomaniak.py +++ b/youtube_dl/extractor/infomaniak.py @@ -2,37 +2,50 @@ from __future__ import unicode_literals from .common import InfoExtractor -import json +from ..utils import traverse_obj, int_or_none, url_or_none class InfomaniakVOD2IE(InfoExtractor): _VALID_URL = r'https?://player\.vod2\.infomaniak\.com/embed/(?P[^/?#&]+)' - _TEST = { - 'url': 'https://player.vod2.infomaniak.com/embed/1jhvl2uq7kr4y', - 'md5': 'd06fb3fc5a8d7cb4d6e4a0f4e7c5a76a', - 'info_dict': { - 'id': '1jhvl2uq7kr4y', - 'ext': 'mp4', - 'title': 'RolandCarey2016-05-04.mp4', - 'thumbnail': 'https://res.vod2.infomaniak.com/1/vod/thumbnail/1jhvl2uq8yqqv.jpg', - 'duration': 221, + _TESTS = [ + # m3u8 test + { + 'url': 'https://player.vod2.infomaniak.com/embed/1jhvl2uqg6ywp', + 'md5': 'b45c718f1d59869aac4caa82f4a7c386', + 'info_dict': { + 'id': '1jhvl2uqg6ywp', + 'ext': 'm3u8', + 'title': 'Conférence à Dyo, octobre 2022', + 'thumbnail': 'https://res.vod2.infomaniak.com/1/vod/thumbnail/1jhvl2uqg6xjc.jpg', + 'duration': 8012, + }, + }, + # mp4 test + { + 'url': 'https://player.vod2.infomaniak.com/embed/1jhvl2uq7kr4y', + 'md5': 'd06fb3fc5a8d7cb4d6e4a0f4e7c5a76a', + 'info_dict': { + 'id': '1jhvl2uq7kr4y', + 'ext': 'mp4', + 'title': 'RolandCarey2016-05-04.mp4', + 'thumbnail': 'https://res.vod2.infomaniak.com/1/vod/thumbnail/1jhvl2uq8yqqv.jpg', + 'duration': 221, + } } - } + ] def _real_extract(self, url): video_id = self._match_id(url) # no useful data in given url, formatted url below reveals json data - data_url = "https://res.vod2.infomaniak.com/{}/vod/share/{}".format(video_id[0], video_id) - webpage = self._download_webpage(data_url, video_id) + data_url = 'https://res.vod2.infomaniak.com/{0}/vod/share/{1}'.format(video_id[0], video_id) + webpage = self._download_json(data_url, video_id)['data']['media'][0] - data = json.loads(webpage)['data']['media'][0] + url = webpage['source']['url'] + title = webpage['title'] + thumbnail = traverse_obj(webpage, ('image', 'url'), expected_type=url_or_none) + duration = traverse_obj(webpage, 'duration', expected_type=int_or_none) - url = data['source']['url'] - title = data['title'] - thumbnail = data.get('image').get('url') - duration = data.get('duration') - - video_mimetype = data.get('source').get('mimetype') + video_mimetype = traverse_obj(webpage, ('source', 'mimetype'), expected_type=lambda x: x.strip() or None) info_dict = { 'id': video_id, @@ -44,7 +57,8 @@ class InfomaniakVOD2IE(InfoExtractor): # if file is m3u8 if video_mimetype == 'application/x-mpegurl': - info_dict['protocol'] = 'm3u8_native' - info_dict['manifest_url'] = url + info_dict['formats'] = self._extract_m3u8_formats( + info_dict.pop('url'), video_id, ext='m3u8', entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(info_dict['formats']) return info_dict From 6b07fba61bc2f2fb07ca3b19b838810ce1c22e5e Mon Sep 17 00:00:00 2001 From: Ishab Ahmed Date: Sun, 2 Apr 2023 13:19:47 +0100 Subject: [PATCH 4/5] [infomaniak] More maintainable imports --- youtube_dl/extractor/infomaniak.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/infomaniak.py b/youtube_dl/extractor/infomaniak.py index 0d6ad2c1a..18f1769b9 100644 --- a/youtube_dl/extractor/infomaniak.py +++ b/youtube_dl/extractor/infomaniak.py @@ -2,7 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import traverse_obj, int_or_none, url_or_none +from ..utils import ( + traverse_obj, + int_or_none, + url_or_none, +) class InfomaniakVOD2IE(InfoExtractor): From 2c5e02a57ff8d37be2288fb78298903c6667d7bc Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 9 Apr 2023 02:29:00 +0100 Subject: [PATCH 5/5] Rename webpage appropriately, trigger valid tests --- youtube_dl/extractor/infomaniak.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/infomaniak.py b/youtube_dl/extractor/infomaniak.py index 18f1769b9..d737b0d1d 100644 --- a/youtube_dl/extractor/infomaniak.py +++ b/youtube_dl/extractor/infomaniak.py @@ -42,14 +42,14 @@ class InfomaniakVOD2IE(InfoExtractor): video_id = self._match_id(url) # no useful data in given url, formatted url below reveals json data data_url = 'https://res.vod2.infomaniak.com/{0}/vod/share/{1}'.format(video_id[0], video_id) - webpage = self._download_json(data_url, video_id)['data']['media'][0] + media = self._download_json(data_url, video_id)['data']['media'][0] - url = webpage['source']['url'] - title = webpage['title'] - thumbnail = traverse_obj(webpage, ('image', 'url'), expected_type=url_or_none) - duration = traverse_obj(webpage, 'duration', expected_type=int_or_none) + url = media['source']['url'] + title = media['title'] + thumbnail = traverse_obj(media, ('image', 'url'), expected_type=url_or_none) + duration = traverse_obj(media, 'duration', expected_type=int_or_none) - video_mimetype = traverse_obj(webpage, ('source', 'mimetype'), expected_type=lambda x: x.strip() or None) + video_mimetype = traverse_obj(media, ('source', 'mimetype'), expected_type=lambda x: x.strip() or None) info_dict = { 'id': video_id,