From b8bc7a696ba15cbc6d45b20320d928340246b508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 31 Dec 2014 19:40:35 +0600 Subject: [PATCH] [openfilm] Add extractor (Closes #4538) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/openfilm.py | 70 ++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 youtube_dl/extractor/openfilm.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 45b50792e..6c5827f88 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -299,6 +299,7 @@ from .nytimes import NYTimesIE from .nuvid import NuvidIE from .oktoberfesttv import OktoberfestTVIE from .ooyala import OoyalaIE +from .openfilm import OpenFilmIE from .orf import ( ORFTVthekIE, ORFOE1IE, diff --git a/youtube_dl/extractor/openfilm.py b/youtube_dl/extractor/openfilm.py new file mode 100644 index 000000000..2249657eb --- /dev/null +++ b/youtube_dl/extractor/openfilm.py @@ -0,0 +1,70 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + compat_urllib_parse, + parse_age_limit, + int_or_none, +) + + +class OpenFilmIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P.+)' + _TEST = { + 'url': 'http://www.openfilm.com/videos/human-resources-remastered', + 'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37', + 'info_dict': { + 'id': '32736', + 'display_id': 'human-resources-remastered', + 'ext': 'mp4', + 'title': 'Human Resources (Remastered)', + 'description': 'Social Engineering in the 20th Century.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 7164, + 'timestamp': 1334756988, + 'upload_date': '20120418', + 'uploader_id': '41117', + 'view_count': int, + 'age_limit': 0, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + player = compat_urllib_parse.unquote_plus( + self._og_search_video_url(webpage)) + + video = json.loads(self._search_regex( + r'\bp=({.+?})(?:&|$)', player, 'video JSON')) + + video_url = '%s1.mp4' % video['location'] + video_id = video.get('video_id') + display_id = video.get('alias') or display_id + title = video.get('title') + description = video.get('description') + thumbnail = video.get('main_thumb') + duration = int_or_none(video.get('duration')) + timestamp = parse_iso8601(video.get('dt_published'), ' ') + uploader_id = video.get('user_id') + view_count = int_or_none(video.get('views_count')) + age_limit = parse_age_limit(video.get('age_limit')) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'age_limit': age_limit, + }