From 3073a6d5e9036b0b613f57bc08099862a2af87f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Mar 2015 23:08:18 +0600 Subject: [PATCH] [ultimedia] Add extractor Sponsored by thankyoumotion.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ultimedia.py | 91 +++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 youtube_dl/extractor/ultimedia.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 73c17aa84..867e7c935 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -534,6 +534,7 @@ from .twitch import ( TwitchStreamIE, ) from .ubu import UbuIE +from .ultimedia import UltimediaIE from .udemy import ( UdemyIE, UdemyCourseIE diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py new file mode 100644 index 000000000..97e4445d4 --- /dev/null +++ b/youtube_dl/extractor/ultimedia.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + unified_strdate, + clean_html, +) + + +class UltimediaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P[\d+a-z]+)' + _TESTS = [{ + # news + 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r', + 'md5': '276a0e49de58c7e85d32b057837952a2', + 'info_dict': { + 'id': 's8uk0r', + 'ext': 'mp4', + 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', + 'description': 'md5:3e5c8fd65791487333dda5db8aed32af', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20150317', + }, + }, { + # music + 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8', + 'md5': '2ea3513813cf230605c7e2ffe7eca61c', + 'info_dict': { + 'id': 'xvpfp8', + 'ext': 'mp4', + 'title': "Two - C'est la vie (Clip)", + 'description': 'Two', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20150224', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + deliver_url = self._search_regex( + r']+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"', + webpage, 'deliver URL') + + deliver_page = self._download_webpage( + deliver_url, video_id, 'Downloading iframe page') + + player = self._parse_json( + self._search_regex( + r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'), + video_id) + + quality = qualities(['flash', 'html5']) + + formats = [{ + 'url': mode['config']['file'], + 'format_id': mode.get('type'), + 'quality': quality(mode.get('type')), + } for mode in player['modes']] + self._sort_formats(formats) + + thumbnail = player.get('image') + + title = clean_html(( + self._html_search_regex( + r'(?s).+?(.+?)', + webpage, 'title', default=None) + or self._search_regex( + r"var\s+nameVideo\s*=\s*'([^']+)'", + deliver_page, 'title'))) + + description = clean_html(self._html_search_regex( + r'(?s)Description(.+?)

', webpage, + 'description', fatal=False)) + + upload_date = unified_strdate(self._search_regex( + r'Ajouté le\s*([^<]+)', webpage, + 'upload date', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'formats': formats, + }