youtube-dl/youtube_dl/extractor/ninegag.py

54 lines
1.8 KiB
Python
Raw Normal View History

2014-01-29 02:55:06 +09:00
from __future__ import unicode_literals
2013-12-05 22:29:08 +09:00
import json
import re
from .common import InfoExtractor
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
_TEST = {
2014-01-29 02:55:06 +09:00
"url": "http://9gag.tv/v/1912",
"file": "1912.mp4",
"info_dict": {
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
2014-03-14 00:40:53 +09:00
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
"view_count": int,
"thumbnail": "re:^https?://",
2013-12-05 22:29:08 +09:00
},
2014-01-29 02:55:06 +09:00
'add_ie': ['Youtube']
2013-12-05 22:29:08 +09:00
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
2014-03-14 00:40:53 +09:00
youtube_id = self._html_search_regex(
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
webpage, 'video ID')
description = self._html_search_regex(
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
'description', fatal=False)
view_count_str = self._html_search_regex(
r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
fatal=False)
view_count = (
None if view_count_str is None
else int(view_count_str.replace(',', '')))
2013-12-05 22:29:08 +09:00
return {
'_type': 'url_transparent',
2014-03-14 00:40:53 +09:00
'url': youtube_id,
2013-12-05 22:29:08 +09:00
'ie_key': 'Youtube',
'id': video_id,
2014-03-14 00:40:53 +09:00
'title': self._og_search_title(webpage),
'description': description,
'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage),
2013-12-05 22:29:08 +09:00
}