[youtube] fix automatic captions extraction(closes #27162 )(closes #27388 )

[sonyliv] fix title for movies
[sonyliv] fix extraction(closes #25667 )
2025-09-18 07:36:28 +09:00 · 2020-12-24 16:05:03 +01:00 · 2020-12-24 13:33:12 +01:00 · 2020-12-24 13:10:20 +01:00 · 2020-12-24 13:10:20 +01:00 · 2020-12-24 13:10:20 +01:00
7 changed files with 378 additions and 180 deletions
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -28,6 +28,7 @@ from ..utils import (
    parse_iso8601,
    smuggle_url,
    str_or_none,
    try_get,
    unescapeHTML,
    unsmuggle_url,
    UnsupportedError,
@ -600,24 +601,27 @@ class BrightcoveNewIE(AdobePassIE):
        store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
        def extract_policy_key():
-            webpage = self._download_webpage(
+            base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
-                'http://players.brightcove.net/%s/%s_%s/index.min.js'
+            config = self._download_json(
-                % (account_id, player_id, embed), video_id)
+                base_url + 'config.json', video_id, fatal=False) or {}
-
+            policy_key = try_get(
-            policy_key = None
+                config, lambda x: x['video_cloud']['policy_key'])
            catalog = self._search_regex(
                r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
            if catalog:
                catalog = self._parse_json(
                    js_to_json(catalog), video_id, fatal=False)
                if catalog:
                    policy_key = catalog.get('policyKey')
            if not policy_key:
-                policy_key = self._search_regex(
+                webpage = self._download_webpage(
-                    r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+                    base_url + 'index.min.js', video_id)
-                    webpage, 'policy key', group='pk')
+
                catalog = self._search_regex(
                    r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
                if catalog:
                    catalog = self._parse_json(
                        js_to_json(catalog), video_id, fatal=False)
                    if catalog:
                        policy_key = catalog.get('policyKey')
                if not policy_key:
                    policy_key = self._search_regex(
                        r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
                        webpage, 'policy key', group='pk')
            store_pk(policy_key)
            return policy_key
--- a/youtube_dl/extractor/cbslocal.py
+++ b/youtube_dl/extractor/cbslocal.py
@ -11,7 +11,47 @@ from ..utils import (
 class CBSLocalIE(AnvatoIE):
-    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
+    _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
    _VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
        'info_dict': {
            'id': '3580809',
            'ext': 'mp4',
            'title': 'A Very Blue Anniversary',
            'description': 'CBS2’s Cindy Hsu has more.',
            'thumbnail': 're:^https?://.*',
            'timestamp': int,
            'upload_date': r're:^\d{8}$',
            'uploader': 'CBS',
            'subtitles': {
                'en': 'mincount:5',
            },
            'categories': [
                'Stations\\Spoken Word\\WCBSTV',
                'Syndication\\AOL',
                'Syndication\\MSN',
                'Syndication\\NDN',
                'Syndication\\Yahoo',
                'Content\\News',
                'Content\\News\\Local News',
            ],
            'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
        },
        'params': {
            'skip_download': True,
        },
    }]
    def _real_extract(self, url):
        mcp_id = self._match_id(url)
        return self.url_result(
            'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
 class CBSLocalArticleIE(AnvatoIE):
    _VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
    _TESTS = [{
        # Anvato backend
@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
        'info_dict': {
            'id': '3580809',
            'ext': 'mp4',
            'title': 'A Very Blue Anniversary',
            'description': 'CBS2’s Cindy Hsu has more.',
            'thumbnail': 're:^https?://.*',
            'timestamp': int,
            'upload_date': r're:^\d{8}$',
            'uploader': 'CBS',
            'subtitles': {
                'en': 'mincount:5',
            },
            'categories': [
                'Stations\\Spoken Word\\WCBSTV',
                'Syndication\\AOL',
                'Syndication\\MSN',
                'Syndication\\NDN',
                'Syndication\\Yahoo',
                'Content\\News',
                'Content\\News\\Local News',
            ],
            'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
        },
    }]
    def _real_extract(self, url):
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -163,7 +163,10 @@ from .cbc import (
    CBCOlympicsIE,
 )
 from .cbs import CBSIE
-from .cbslocal import CBSLocalIE
+from .cbslocal import (
    CBSLocalIE,
    CBSLocalArticleIE,
 )
 from .cbsinteractive import CBSInteractiveIE
 from .cbsnews import (
    CBSNewsEmbedIE,
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 import socket
@ -8,6 +9,7 @@ from .common import InfoExtractor
 from ..compat import (
    compat_etree_fromstring,
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_parse_unquote,
    compat_urllib_parse_unquote_plus,
@ -47,7 +49,8 @@ class FacebookIE(InfoExtractor):
                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
                            [^/]+/videos/(?:[^/]+/)?|
                            [^/]+/posts/|
-                            groups/[^/]+/permalink/
+                            groups/[^/]+/permalink/|
                            watchparty/
                        )|
                    facebook:
                )
@ -280,8 +283,18 @@ class FacebookIE(InfoExtractor):
        # data.video.creation_story.attachments[].media
        'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/watchparty/211641140192478',
        'info_dict': {
            'id': '211641140192478',
        },
        'playlist_count': 1,
        'skip': 'Requires logging in',
    }]
    _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
    _api_config = {
        'graphURI': '/api/graphql/'
    }
    @staticmethod
    def _extract_urls(webpage):
@ -405,6 +418,17 @@ class FacebookIE(InfoExtractor):
            self._sort_formats(formats)
        def extract_relay_data(_filter):
            return self._parse_json(self._search_regex(
                r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
                webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
        def extract_relay_prefetched_data(_filter):
            replay_data = extract_relay_data(_filter)
            for require in (replay_data.get('require') or []):
                if require[0] == 'RelayPrefetchedStreamCache':
                    return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
        if not video_data:
            server_js_data = self._parse_json(self._search_regex([
                r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
@ -413,87 +437,83 @@ class FacebookIE(InfoExtractor):
            video_data = extract_from_jsmods_instances(server_js_data)
        if not video_data:
-            graphql_data = self._parse_json(self._search_regex(
+            data = extract_relay_prefetched_data(
-                r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);',
+                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
-                webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}
+            if data:
-            for require in (graphql_data.get('require') or []):
+                entries = []
                if require[0] == 'RelayPrefetchedStreamCache':
                    entries = []
-                    def parse_graphql_video(video):
+                def parse_graphql_video(video):
-                        formats = []
+                    formats = []
-                        q = qualities(['sd', 'hd'])
+                    q = qualities(['sd', 'hd'])
-                        for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
+                    for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
-                            playable_url = video.get('playable_url' + suffix)
+                        playable_url = video.get('playable_url' + suffix)
-                            if not playable_url:
+                        if not playable_url:
-                                continue
+                            continue
-                            formats.append({
+                        formats.append({
-                                'format_id': format_id,
+                            'format_id': format_id,
-                                'quality': q(format_id),
+                            'quality': q(format_id),
-                                'url': playable_url,
+                            'url': playable_url,
-                            })
+                        })
-                        extract_dash_manifest(video, formats)
+                    extract_dash_manifest(video, formats)
-                        process_formats(formats)
+                    process_formats(formats)
-                        v_id = video.get('videoId') or video.get('id') or video_id
+                    v_id = video.get('videoId') or video.get('id') or video_id
-                        info = {
+                    info = {
-                            'id': v_id,
+                        'id': v_id,
-                            'formats': formats,
+                        'formats': formats,
-                            'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
+                        'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
-                            'uploader_id': try_get(video, lambda x: x['owner']['id']),
+                        'uploader_id': try_get(video, lambda x: x['owner']['id']),
-                            'timestamp': int_or_none(video.get('publish_time')),
+                        'timestamp': int_or_none(video.get('publish_time')),
-                            'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
+                        'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
-                        }
+                    }
-                        description = try_get(video, lambda x: x['savable_description']['text'])
+                    description = try_get(video, lambda x: x['savable_description']['text'])
-                        title = video.get('name')
+                    title = video.get('name')
-                        if title:
+                    if title:
-                            info.update({
+                        info.update({
-                                'title': title,
+                            'title': title,
-                                'description': description,
+                            'description': description,
-                            })
+                        })
-                        else:
+                    else:
-                            info['title'] = description or 'Facebook video #%s' % v_id
+                        info['title'] = description or 'Facebook video #%s' % v_id
-                        entries.append(info)
+                    entries.append(info)
-                    def parse_attachment(attachment, key='media'):
+                def parse_attachment(attachment, key='media'):
-                        media = attachment.get(key) or {}
+                    media = attachment.get(key) or {}
-                        if media.get('__typename') == 'Video':
+                    if media.get('__typename') == 'Video':
-                            return parse_graphql_video(media)
+                        return parse_graphql_video(media)
-                    data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
+                nodes = data.get('nodes') or []
                node = data.get('node') or {}
                if not nodes and node:
                    nodes.append(node)
                for node in nodes:
                    story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
                    attachments = try_get(story, [
                        lambda x: x['attached_story']['attachments'],
                        lambda x: x['attachments']
                    ], list) or []
                    for attachment in attachments:
                        attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
                        ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
                        for n in ns:
                            parse_attachment(n)
                        parse_attachment(attachment)
-                    nodes = data.get('nodes') or []
+                edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
-                    node = data.get('node') or {}
+                for edge in edges:
-                    if not nodes and node:
+                    parse_attachment(edge, key='node')
                        nodes.append(node)
                    for node in nodes:
                        story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
                        attachments = try_get(story, [
                            lambda x: x['attached_story']['attachments'],
                            lambda x: x['attachments']
                        ], list) or []
                        for attachment in attachments:
                            attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
                            ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
                            for n in ns:
                                parse_attachment(n)
                            parse_attachment(attachment)
-                    edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
+                video = data.get('video') or {}
-                    for edge in edges:
+                if video:
-                        parse_attachment(edge, key='node')
+                    attachments = try_get(video, [
                        lambda x: x['story']['attachments'],
                        lambda x: x['creation_story']['attachments']
                    ], list) or []
                    for attachment in attachments:
                        parse_attachment(attachment)
                    if not entries:
                        parse_graphql_video(video)
-                    video = data.get('video') or {}
+                return self.playlist_result(entries, video_id)
                    if video:
                        attachments = try_get(video, [
                            lambda x: x['story']['attachments'],
                            lambda x: x['creation_story']['attachments']
                        ], list) or []
                        for attachment in attachments:
                            parse_attachment(attachment)
                        if not entries:
                            parse_graphql_video(video)
                    return self.playlist_result(entries, video_id)
        if not video_data:
            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
@ -504,6 +524,43 @@ class FacebookIE(InfoExtractor):
            elif '>You must log in to continue' in webpage:
                self.raise_login_required()
        if not video_data and '/watchparty/' in url:
            post_data = {
                'doc_id': 3731964053542869,
                'variables': json.dumps({
                    'livingRoomID': video_id,
                }),
            }
            prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
            if prefetched_data:
                lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
                if lsd:
                    post_data[lsd['name']] = lsd['value']
            relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
            for define in (relay_data.get('define') or []):
                if define[0] == 'RelayAPIConfigDefaults':
                    self._api_config = define[2]
            living_room = self._download_json(
                urljoin(url, self._api_config['graphURI']), video_id,
                data=urlencode_postdata(post_data))['data']['living_room']
            entries = []
            for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
                video = try_get(edge, lambda x: x['node']['video']) or {}
                v_id = video.get('id')
                if not v_id:
                    continue
                v_id = compat_str(v_id)
                entries.append(self.url_result(
                    self._VIDEO_PAGE_TEMPLATE % v_id,
                    self.ie_key(), v_id, video.get('name')))
            return self.playlist_result(entries, video_id)
        if not video_data:
            # Video info not in first request, do a secondary request using
            # tahoe player specific URL
            tahoe_data = self._download_webpage(
--- a/youtube_dl/extractor/sonyliv.py
+++ b/youtube_dl/extractor/sonyliv.py
@ -1,40 +1,112 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import time
 import uuid
 from .common import InfoExtractor
-from ..utils import smuggle_url
+from ..compat import compat_HTTPError
 from ..utils import (
    ExtractorError,
    int_or_none,
 )
 class SonyLIVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
    _TESTS = [{
-        'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
+        'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
        'info_dict': {
-            'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
+            'title': 'Bachelors Delight - Achaari Cheese Toast',
-            'id': 'ref:5024612095001',
+            'id': '1000022678',
            'ext': 'mp4',
-            'upload_date': '20170923',
+            'upload_date': '20200411',
-            'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
+            'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
-            'uploader_id': '5182475815001',
+            'timestamp': 1586632091,
-            'timestamp': 1506200547,
+            'duration': 185,
            'season_number': 1,
            'episode': 'Achaari Cheese Toast',
            'episode_number': 1,
            'release_year': 2016,
        },
        'params': {
            'skip_download': True,
        },
        'add_ie': ['BrightcoveNew'],
    }, {
-        'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
+        'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
        'only_matching': True,
    }, {
        'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
        'only_matching': True,
    }, {
        'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
        'only_matching': True,
    }, {
        'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
        'only_matching': True,
    }, {
        'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
        'only_matching': True,
    }]
    _GEO_COUNTRIES = ['IN']
    _TOKEN = None
-    # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
+    def _call_api(self, version, path, video_id):
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
+        headers = {}
        if self._TOKEN:
            headers['security_token'] = self._TOKEN
        try:
            return self._download_json(
                'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
                video_id, headers=headers)['resultObj']
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                message = self._parse_json(
                    e.cause.read().decode(), video_id)['message']
                if message == 'Geoblocked Country':
                    self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
                raise ExtractorError(message)
            raise
    def _real_initialize(self):
        self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
    def _real_extract(self, url):
-        brightcove_id = self._match_id(url)
+        video_id = self._match_id(url)
-        return self.url_result(
+        content = self._call_api(
-            smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
+            '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
-                'geo_countries': ['IN'],
+        if content.get('isEncrypted'):
-                'referrer': url,
+            raise ExtractorError('This video is DRM protected.', expected=True)
-            }),
+        dash_url = content['videoURL']
-            'BrightcoveNew', brightcove_id)
+        headers = {
            'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
        }
        formats = self._extract_mpd_formats(
            dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
        formats.extend(self._extract_m3u8_formats(
            dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
            video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
        for f in formats:
            f.setdefault('http_headers', {}).update(headers)
        self._sort_formats(formats)
        metadata = self._call_api(
            '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
        title = metadata['title']
        episode = metadata.get('episodeTitle')
        if episode and title != episode:
            title += ' - ' + episode
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'thumbnail': content.get('posterURL'),
            'description': metadata.get('longDescription') or metadata.get('shortDescription'),
            'timestamp': int_or_none(metadata.get('creationDate'), 1000),
            'duration': int_or_none(metadata.get('duration')),
            'season_number': int_or_none(metadata.get('season')),
            'episode': episode,
            'episode_number': int_or_none(metadata.get('episodeNumber')),
            'release_year': int_or_none(metadata.get('year')),
        }
--- a/youtube_dl/extractor/streetvoice.py
+++ b/youtube_dl/extractor/streetvoice.py
@ -2,25 +2,40 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..utils import (
-from ..utils import unified_strdate
+    int_or_none,
    parse_iso8601,
    str_or_none,
    strip_or_none,
    try_get,
    urljoin,
 )
 class StreetVoiceIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
    _TESTS = [{
-        'url': 'http://streetvoice.com/skippylu/songs/94440/',
+        'url': 'https://streetvoice.com/skippylu/songs/123688/',
-        'md5': '15974627fc01a29e492c98593c2fd472',
+        'md5': '0eb535970629a5195685355f3ed60bfd',
        'info_dict': {
-            'id': '94440',
+            'id': '123688',
            'ext': 'mp3',
-            'title': '輸',
+            'title': '流浪',
-            'description': 'Crispy脆樂團 - 輸',
+            'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
-            'thumbnail': r're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 260,
+            'duration': 270,
-            'upload_date': '20091018',
+            'upload_date': '20100923',
            'uploader': 'Crispy脆樂團',
            'uploader_id': '627810',
            'uploader_url': 're:^https?://streetvoice.com/skippylu/',
            'timestamp': 1285261661,
            'view_count': int,
            'like_count': int,
            'comment_count': int,
            'repost_count': int,
            'track': '流浪',
            'track_id': '123688',
            'album': '2010',
        }
    }, {
        'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor):
    def _real_extract(self, url):
        song_id = self._match_id(url)
-
+        base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
-        song = self._download_json(
+        song = self._download_json(base_url, song_id, query={
-            'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
+            'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
-
+        })
        title = song['name']
-        author = song['user']['nickname']
+
        formats = []
        for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
            f_url = (self._download_json(
                base_url + suffix + '/', song_id,
                'Downloading %s format URL' % format_id,
                data=b'', fatal=False) or {}).get('file')
            if not f_url:
                continue
            f = {
                'ext': 'mp3',
                'format_id': format_id,
                'url': f_url,
                'vcodec': 'none',
            }
            if format_id == 'hls':
                f['protocol'] = 'm3u8_native'
            abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
            if abr:
                abr = int(abr)
                f.update({
                    'abr': abr,
                    'tbr': abr,
                })
            formats.append(f)
        user = song.get('user') or {}
        username = user.get('username')
        get_count = lambda x: int_or_none(song.get(x + '_count'))
        return {
            'id': song_id,
-            'url': song['file'],
+            'formats': formats,
            'title': title,
-            'description': '%s - %s' % (author, title),
+            'description': strip_or_none(song.get('synopsis')),
-            'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
+            'thumbnail': song.get('image'),
-            'duration': song.get('length'),
+            'duration': int_or_none(song.get('length')),
-            'upload_date': unified_strdate(song.get('created_at')),
+            'timestamp': parse_iso8601(song.get('created_at')),
-            'uploader': author,
+            'uploader': try_get(user, lambda x: x['profile']['nickname']),
-            'uploader_id': compat_str(song['user']['id']),
+            'uploader_id': str_or_none(user.get('id')),
            'uploader_url': urljoin(url, '/%s/' % username) if username else None,
            'view_count': get_count('plays'),
            'like_count': get_count('likes'),
            'comment_count': get_count('comments'),
            'repost_count': get_count('share'),
            'track': title,
            'track_id': song_id,
            'album': try_get(song, lambda x: x['album']['name']),
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1322,17 +1322,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            return self._parse_json(
                uppercase_escape(config), video_id, fatal=False)
-    def _get_automatic_captions(self, video_id, webpage):
+    def _get_automatic_captions(self, video_id, player_response, player_config):
        """We need the webpage for getting the captions url, pass it as an
           argument to speed up the process."""
        self.to_screen('%s: Looking for automatic captions' % video_id)
        player_config = self._get_ytplayer_config(video_id, webpage)
        err_msg = 'Couldn\'t find automatic captions for %s' % video_id
-        if not player_config:
+        if not (player_response or player_config):
            self._downloader.report_warning(err_msg)
            return {}
        try:
-            args = player_config['args']
+            args = player_config.get('args') if player_config else {}
            caption_url = args.get('ttsurl')
            if caption_url:
                timestamp = args['timestamp']
@ -1391,19 +1390,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                return captions
            # New captions format as of 22.06.2017
-            player_response = args.get('player_response')
+            if player_response:
-            if player_response and isinstance(player_response, compat_str):
+                renderer = player_response['captions']['playerCaptionsTracklistRenderer']
-                player_response = self._parse_json(
+                base_url = renderer['captionTracks'][0]['baseUrl']
-                    player_response, video_id, fatal=False)
+                sub_lang_list = []
-                if player_response:
+                for lang in renderer['translationLanguages']:
-                    renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+                    lang_code = lang.get('languageCode')
-                    base_url = renderer['captionTracks'][0]['baseUrl']
+                    if lang_code:
-                    sub_lang_list = []
+                        sub_lang_list.append(lang_code)
-                    for lang in renderer['translationLanguages']:
+                return make_captions(base_url, sub_lang_list)
                        lang_code = lang.get('languageCode')
                        if lang_code:
                            sub_lang_list.append(lang_code)
                    return make_captions(base_url, sub_lang_list)
            # Some videos don't provide ttsurl but rather caption_tracks and
            # caption_translation_languages (e.g. 20LmZk1hakA)
@ -1652,6 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        # Get video info
        video_info = {}
        embed_webpage = None
        ytplayer_config = None
        if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
            age_gate = True
@ -2276,7 +2272,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        # subtitles
        video_subtitles = self.extract_subtitles(video_id, video_webpage)
-        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
+        automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
        video_duration = try_get(
            video_info, lambda x: int_or_none(x['length_seconds'][0]))
Author	SHA1	Message	Date
Remita Amine	4ef1fc9707	[youtube] fix automatic captions extraction(closes #27162 )(closes #27388 )	2020-12-24 16:05:03 +01:00
Remita Amine	f9e6aa1dcf	[sonyliv] fix title for movies	2020-12-24 13:33:12 +01:00
Remita Amine	f83db9064b	[sonyliv] fix extraction(closes #25667 )	2020-12-24 13:10:20 +01:00
Remita Amine	2da9a86399	[streetvoice] fix extraction(closes #27455 )(closes #27492 )	2020-12-24 13:10:20 +01:00
Remita Amine	ecaa535cf4	[facebook] add support for watchparty pages(closes #27507 )	2020-12-24 13:10:20 +01:00
Remita Amine	79dd92b1fe	[cbslocal] fix video extraction	2020-12-24 13:10:20 +01:00
Remita Amine	bd3844c9c2	[brightcove] add another method to extract policyKey	2020-12-24 13:10:20 +01:00