Compare commits

...

3 Commits

Author SHA1 Message Date
fynnkroeger
ddaff36026
Merge 80bc41f018ccc14c1753abe50f350793b5f3cf01 into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:18:52 +08:00
Fynn
80bc41f018 [snapchat] Improve robustness 2021-09-10 18:45:31 +02:00
Fynn
082dfa495e [snapchat] Add new extractor 2021-09-06 14:10:01 +02:00
2 changed files with 60 additions and 0 deletions

View File

@ -1153,6 +1153,7 @@ from .sky import (
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE from .slutload import SlutloadIE
from .snapchat import SnapchatIE
from .snotr import SnotrIE from .snotr import SnotrIE
from .sohu import SohuIE from .sohu import SohuIE
from .sonyliv import SonyLIVIE from .sonyliv import SonyLIVIE

View File

@ -0,0 +1,59 @@
# coding: utf-8
from __future__ import unicode_literals
from ..utils import unified_timestamp, parse_duration, try_get, str_or_none, ExtractorError
from .common import InfoExtractor
class SnapchatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?story.snapchat\.com/o/(?P<id>\w+)'
_TEST = {
'url': 'https://story.snapchat.com/o/W7_EDlXWTBiXAEEniNoMPwAAYuz9_1mcdex8MAXndPyIOAXndPyFyAO1OAA',
'md5': 'ab1900981cadcd955aae32a526096cbd',
'info_dict': {
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYuz9_1mcdex8MAXndPyIOAXndPyFyAO1OAA',
'ext': 'mp4',
'title': 'W7_EDlXWTBiXAEEniNoMPwAAYuz9_1mcdex8MAXndPyIOAXndPyFyAO1OAA',
'thumbnail': r're:https://s\.sc-cdn\.net/.+\.jpg',
'description': '#spotlight',
'timestamp': 1622914559,
'upload_date': '20210605',
'view_count': 72100,
'uploader_id': None
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
schema_video_object_raw = self._html_search_regex(r'<script\s[^>]*?data-react-helmet\s*=\s*"true"\s[^>]*?type\s*=\s*"application/ld\+json">(.+?)</script>',
webpage, 'schema_video_object')
schema_video_object = self._parse_json(schema_video_object_raw, video_id, fatal=True)
try:
video_url = str_or_none(schema_video_object['contentUrl'])
if not video_url:
raise ValueError('video_url must be non-empty string')
except (TypeError, ValueError) as e:
raise ExtractorError('Unexpected format for schema_video_object', cause=e, video_id=video_id)
title = schema_video_object.get('name')
if not title:
title = self._generic_title(url)
views = try_get(schema_video_object.get('interactionStatistic'), lambda x: x['userInteractionCount'])
uploader_id = try_get(schema_video_object.get('creator'), lambda x: x['alternateName'])
return {
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
'thumbnail': schema_video_object.get('thumbnailUrl'),
'timestamp': unified_timestamp(schema_video_object.get('uploadDate')),
'description': schema_video_object.get('description'),
'duration': parse_duration(schema_video_object.get('duration')),
'view_count': views,
'uploader_id': uploader_id
}