2025-09-18 07:36:28 +09:00
4 changed files with 93 additions and 208 deletions
--- a/youtube_dl/extractor/amara.py
+++ b/youtube_dl/extractor/amara.py
@ -1,103 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-from .vimeo import VimeoIE
-from ..utils import (
-    int_or_none,
-    parse_iso8601,
-    update_url_query,
-)
-
-
-class AmaraIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
-    _TESTS = [{
-        # Youtube
-        'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
-        'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
-        'info_dict': {
-            'id': 'h6ZuVdvYnfE',
-            'ext': 'mp4',
-            'title': 'Why jury trials are becoming less common',
-            'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'subtitles': dict,
-            'upload_date': '20160813',
-            'uploader': 'PBS NewsHour',
-            'uploader_id': 'PBSNewsHour',
-            'timestamp': 1549639570,
-        }
-    }, {
-        # Vimeo
-        'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
-        'md5': '99392c75fa05d432a8f11df03612195e',
-        'info_dict': {
-            'id': '18622084',
-            'ext': 'mov',
-            'title': 'Vimeo at CES 2011!',
-            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'subtitles': dict,
-            'timestamp': 1294763658,
-            'upload_date': '20110111',
-            'uploader': 'Sam Morrill',
-            'uploader_id': 'sammorrill'
-        }
-    }, {
-        # Direct Link
-        'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
-        'md5': 'd3970f08512738ee60c5807311ff5d3f',
-        'info_dict': {
-            'id': 's8KL7I3jLmh6',
-            'ext': 'mp4',
-            'title': 'The danger of a single story',
-            'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'subtitles': dict,
-            'upload_date': '20091007',
-            'timestamp': 1254942511,
-        }
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        meta = self._download_json(
-            'https://amara.org/api/videos/%s/' % video_id,
-            video_id, query={'format': 'json'})
-        title = meta['title']
-        video_url = meta['all_urls'][0]
-
-        subtitles = {}
-        for language in (meta.get('languages') or []):
-            subtitles_uri = language.get('subtitles_uri')
-            if not (subtitles_uri and language.get('published')):
-                continue
-            subtitle = subtitles.setdefault(language.get('code') or 'en', [])
-            for f in ('json', 'srt', 'vtt'):
-                subtitle.append({
-                    'ext': f,
-                    'url': update_url_query(subtitles_uri, {'format': f}),
-                })
-
-        info = {
-            'url': video_url,
-            'id': video_id,
-            'subtitles': subtitles,
-            'title': title,
-            'description': meta.get('description'),
-            'thumbnail': meta.get('thumbnail'),
-            'duration': int_or_none(meta.get('duration')),
-            'timestamp': parse_iso8601(meta.get('created')),
-        }
-
-        for ie in (YoutubeIE, VimeoIE):
-            if ie.suitable(video_url):
-                info.update({
-                    '_type': 'url_transparent',
-                    'ie_key': ie.ie_key(),
-                })
-                break
-
-        return info
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -36,7 +36,6 @@ from .afreecatv import AfreecaTVIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
-from .amara import AmaraIE
 from .amcnetworks import AMCNetworksIE
 from .americastestkitchen import AmericasTestKitchenIE
 from .animeondemand import AnimeOnDemandIE
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@ -3,13 +3,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..compat import compat_parse_qs
 from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    lowercase_escape,
-    try_get,
    update_url_query,
 )

@ -40,10 +38,21 @@ class GoogleDriveIE(InfoExtractor):
        # video can't be watched anonymously due to view count limit reached,
        # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
        'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
-        'only_matching': True,
+        'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
+        'info_dict': {
+            'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
+            'ext': 'mp4',
+            'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
+        }
    }, {
        # video id is longer than 28 characters
        'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
+        'info_dict': {
+            'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
+            'ext': 'mp4',
+            'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
+            'duration': 189,
+        },
        'only_matching': True,
    }, {
        'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
@ -162,21 +171,23 @@ class GoogleDriveIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        video_info = compat_parse_qs(self._download_webpage(
-            'https://drive.google.com/get_video_info',
-            video_id, query={'docid': video_id}))
+        webpage = self._download_webpage(
+            'http://docs.google.com/file/d/%s' % video_id, video_id)

-        def get_value(key):
-            return try_get(video_info, lambda x: x[key][0])
-
-        reason = get_value('reason')
-        title = get_value('title')
-        if not title and reason:
-            raise ExtractorError(reason, expected=True)
+        title = self._search_regex(
+            r'"title"\s*,\s*"([^"]+)', webpage, 'title',
+            default=None) or self._og_search_title(webpage)
+        duration = int_or_none(self._search_regex(
+            r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
+            default=None))

        formats = []
-        fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
-        fmt_list = (get_value('fmt_list') or '').split(',')
+        fmt_stream_map = self._search_regex(
+            r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
+            'fmt stream map', default='').split(',')
+        fmt_list = self._search_regex(
+            r'"fmt_list"\s*,\s*"([^"]+)', webpage,
+            'fmt_list', default='').split(',')
        if fmt_stream_map and fmt_list:
            resolutions = {}
            for fmt in fmt_list:
@ -246,14 +257,19 @@ class GoogleDriveIE(InfoExtractor):
                        if urlh and urlh.headers.get('Content-Disposition'):
                            add_source_format(urlh)

-        if not formats and reason:
-            raise ExtractorError(reason, expected=True)
+        if not formats:
+            reason = self._search_regex(
+                r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
+            if reason:
+                raise ExtractorError(reason, expected=True)

        self._sort_formats(formats)

-        hl = get_value('hl')
+        hl = self._search_regex(
+            r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
        subtitles_id = None
-        ttsurl = get_value('ttsurl')
+        ttsurl = self._search_regex(
+            r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
        if ttsurl:
            # the video Id for subtitles will be the last value in the ttsurl
            # query string
@ -263,8 +279,8 @@ class GoogleDriveIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
-            'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
-            'duration': int_or_none(get_value('length_seconds')),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'duration': duration,
            'formats': formats,
            'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
            'automatic_captions': self.extract_automatic_captions(
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import base64
 import hashlib
 import hmac
 import itertools
@ -10,10 +9,6 @@ import re
 import time

 from .common import InfoExtractor
-from ..compat import (
-    compat_parse_qs,
-    compat_urllib_parse_urlparse,
-)
 from ..utils import (
    ExtractorError,
    int_or_none,
@ -170,20 +165,19 @@ class VikiIE(VikiBaseIE):
    }, {
        # episode
        'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
-        'md5': '94e0e34fd58f169f40c184f232356cfe',
+        'md5': '5fa476a902e902783ac7a4d615cdbc7a',
        'info_dict': {
            'id': '44699v',
            'ext': 'mp4',
            'title': 'Boys Over Flowers - Episode 1',
            'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
-            'duration': 4172,
+            'duration': 4204,
            'timestamp': 1270496524,
            'upload_date': '20100405',
            'uploader': 'group8',
            'like_count': int,
            'age_limit': 13,
-        },
-        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
+        }
    }, {
        # youtube external
        'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@ -200,15 +194,14 @@ class VikiIE(VikiBaseIE):
            'uploader_id': 'ad14065n',
            'like_count': int,
            'age_limit': 13,
-        },
-        'skip': 'Page not found!',
+        }
    }, {
        'url': 'http://www.viki.com/player/44699v',
        'only_matching': True,
    }, {
        # non-English description
        'url': 'http://www.viki.com/videos/158036v-love-in-magic',
-        'md5': 'adf9e321a0ae5d0aace349efaaff7691',
+        'md5': '1713ae35df5a521b31f6dc40730e7c9c',
        'info_dict': {
            'id': '158036v',
            'ext': 'mp4',
@ -224,11 +217,8 @@ class VikiIE(VikiBaseIE):
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        resp = self._download_json(
-            'https://www.viki.com/api/videos/' + video_id,
-            video_id, 'Downloading video JSON',
-            headers={'x-viki-app-ver': '4.0.57'})
-        video = resp['video']
+        video = self._call_api(
+            'videos/%s.json' % video_id, video_id, 'Downloading video JSON')

        self._check_errors(video)

@ -275,74 +265,57 @@ class VikiIE(VikiBaseIE):
            'subtitles': subtitles,
        }

+        streams = self._call_api(
+            'videos/%s/streams.json' % video_id, video_id,
+            'Downloading video streams JSON')
+
+        if 'external' in streams:
+            result.update({
+                '_type': 'url_transparent',
+                'url': streams['external']['url'],
+            })
+            return result
+
        formats = []
-
-        def add_format(format_id, format_dict, protocol='http'):
-            # rtmps URLs does not seem to work
-            if protocol == 'rtmps':
-                return
-            format_url = format_dict.get('url')
-            if not format_url:
-                return
-            qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
-            stream = qs.get('stream', [None])[0]
-            if stream:
-                format_url = base64.b64decode(stream).decode()
-            if format_id in ('m3u8', 'hls'):
-                m3u8_formats = self._extract_m3u8_formats(
-                    format_url, video_id, 'mp4',
-                    entry_protocol='m3u8_native',
-                    m3u8_id='m3u8-%s' % protocol, fatal=False)
-                # Despite CODECS metadata in m3u8 all video-only formats
-                # are actually video+audio
-                for f in m3u8_formats:
-                    if '_drm/index_' in f['url']:
+        for format_id, stream_dict in streams.items():
+            height = int_or_none(self._search_regex(
+                r'^(\d+)[pP]$', format_id, 'height', default=None))
+            for protocol, format_dict in stream_dict.items():
+                # rtmps URLs does not seem to work
+                if protocol == 'rtmps':
+                    continue
+                format_url = format_dict['url']
+                if format_id == 'm3u8':
+                    m3u8_formats = self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native',
+                        m3u8_id='m3u8-%s' % protocol, fatal=False)
+                    # Despite CODECS metadata in m3u8 all video-only formats
+                    # are actually video+audio
+                    for f in m3u8_formats:
+                        if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
+                            f['acodec'] = None
+                    formats.extend(m3u8_formats)
+                elif format_url.startswith('rtmp'):
+                    mobj = re.search(
+                        r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
+                        format_url)
+                    if not mobj:
                        continue
-                    if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
-                        f['acodec'] = None
-                    formats.append(f)
-            elif format_id in ('mpd', 'dash'):
-                formats.extend(self._extract_mpd_formats(
-                    format_url, video_id, 'mpd-%s' % protocol, fatal=False))
-            elif format_url.startswith('rtmp'):
-                mobj = re.search(
-                    r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
-                    format_url)
-                if not mobj:
-                    return
-                formats.append({
-                    'format_id': 'rtmp-%s' % format_id,
-                    'ext': 'flv',
-                    'url': mobj.group('url'),
-                    'play_path': mobj.group('playpath'),
-                    'app': mobj.group('app'),
-                    'page_url': url,
-                })
-            else:
-                formats.append({
-                    'url': format_url,
-                    'format_id': '%s-%s' % (format_id, protocol),
-                    'height': int_or_none(self._search_regex(
-                        r'^(\d+)[pP]$', format_id, 'height', default=None)),
-                })
-
-        for format_id, format_dict in (resp.get('streams') or {}).items():
-            add_format(format_id, format_dict)
-        if not formats:
-            streams = self._call_api(
-                'videos/%s/streams.json' % video_id, video_id,
-                'Downloading video streams JSON')
-
-            if 'external' in streams:
-                result.update({
-                    '_type': 'url_transparent',
-                    'url': streams['external']['url'],
-                })
-                return result
-
-            for format_id, stream_dict in streams.items():
-                for protocol, format_dict in stream_dict.items():
-                    add_format(format_id, format_dict, protocol)
+                    formats.append({
+                        'format_id': 'rtmp-%s' % format_id,
+                        'ext': 'flv',
+                        'url': mobj.group('url'),
+                        'play_path': mobj.group('playpath'),
+                        'app': mobj.group('app'),
+                        'page_url': url,
+                    })
+                else:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': '%s-%s' % (format_id, protocol),
+                        'height': height,
+                    })
        self._sort_formats(formats)

        result['formats'] = formats