[twitch] Refactor

[twitch] Drop legacy kraken API v5 code altogether
[twitch:vod] Switch to GraphQL for video metadata
2025-10-24 09:08:36 +09:00 · 2021-01-06 03:54:33 +07:00 · 2021-01-06 03:46:45 +07:00 · 2021-01-06 03:34:36 +07:00 · 2021-01-05 21:17:39 +01:00 · 2021-01-06 02:11:49 +07:00
5 changed files with 234 additions and 279 deletions
--- a/youtube_dl/extractor/canvas.py
+++ b/youtube_dl/extractor/canvas.py
@@ -7,12 +7,12 @@ from .common import InfoExtractor
 from .gigya import GigyaBaseIE
 from ..compat import compat_HTTPError
 from ..utils import (
+    extract_attributes,
    ExtractorError,
    strip_or_none,
    float_or_none,
    int_or_none,
    merge_dicts,
-    parse_iso8601,
    str_or_none,
    url_or_none,
 )
@@ -37,6 +37,7 @@ class CanvasIE(InfoExtractor):
        'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
        'only_matching': True,
    }]
+    _GEO_BYPASS = False
    _HLS_ENTRY_PROTOCOLS_MAP = {
        'HLS': 'm3u8_native',
        'HLS_AES': 'm3u8',
@@ -47,29 +48,34 @@ class CanvasIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        site_id, video_id = mobj.group('site_id'), mobj.group('id')

-        # Old API endpoint, serves more formats but may fail for some videos
-        data = self._download_json(
-            'https://mediazone.vrt.be/api/v1/%s/assets/%s'
-            % (site_id, video_id), video_id, 'Downloading asset JSON',
-            'Unable to download asset JSON', fatal=False)
+        data = None
+        if site_id != 'vrtvideo':
+            # Old API endpoint, serves more formats but may fail for some videos
+            data = self._download_json(
+                'https://mediazone.vrt.be/api/v1/%s/assets/%s'
+                % (site_id, video_id), video_id, 'Downloading asset JSON',
+                'Unable to download asset JSON', fatal=False)

        # New API endpoint
        if not data:
+            headers = self.geo_verification_headers()
+            headers.update({'Content-Type': 'application/json'})
            token = self._download_json(
                '%s/tokens' % self._REST_API_BASE, video_id,
-                'Downloading token', data=b'',
-                headers={'Content-Type': 'application/json'})['vrtPlayerToken']
+                'Downloading token', data=b'', headers=headers)['vrtPlayerToken']
            data = self._download_json(
                '%s/videos/%s' % (self._REST_API_BASE, video_id),
-                video_id, 'Downloading video JSON', fatal=False, query={
+                video_id, 'Downloading video JSON', query={
                    'vrtPlayerToken': token,
                    'client': '%s@PROD' % site_id,
                }, expected_status=400)
-            message = data.get('message')
-            if message and not data.get('title'):
-                if data.get('code') == 'AUTHENTICATION_REQUIRED':
-                    self.raise_login_required(message)
-                raise ExtractorError(message, expected=True)
+            if not data.get('title'):
+                code = data.get('code')
+                if code == 'AUTHENTICATION_REQUIRED':
+                    self.raise_login_required()
+                elif code == 'INVALID_LOCATION':
+                    self.raise_geo_restricted(countries=['BE'])
+                raise ExtractorError(data.get('message') or code, expected=True)

        title = data['title']
        description = data.get('description')
@@ -208,17 +214,21 @@ class VrtNUIE(GigyaBaseIE):
    _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
    _TESTS = [{
        # Available via old API endpoint
-        'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
+        'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
        'info_dict': {
-            'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
+            'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
            'ext': 'mp4',
-            'title': 'De zwarte weduwe',
-            'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
+            'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
+            'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
            'duration': 1457.04,
            'thumbnail': r're:^https?://.*\.jpg$',
-            'season': 'Season 1',
-            'season_number': 1,
+            'series': 'Postbus X',
+            'season': 'Seizoen 1989',
+            'season_number': 1989,
+            'episode': 'De zwarte weduwe',
            'episode_number': 1,
+            'timestamp': 1595822400,
+            'upload_date': '20200727',
        },
        'skip': 'This video is only available for registered users',
        'params': {
@@ -300,69 +310,25 @@ class VrtNUIE(GigyaBaseIE):
    def _real_extract(self, url):
        display_id = self._match_id(url)

-        webpage, urlh = self._download_webpage_handle(url, display_id)
+        webpage = self._download_webpage(url, display_id)
+
+        attrs = extract_attributes(self._search_regex(
+            r'(<nui-media[^>]+>)', webpage, 'media element'))
+        video_id = attrs['videoid']
+        publication_id = attrs.get('publicationid')
+        if publication_id:
+            video_id = publication_id + '$' + video_id
+
+        page = (self._parse_json(self._search_regex(
+            r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
+            default='{}'), video_id, fatal=False) or {}).get('page') or {}

        info = self._search_json_ld(webpage, display_id, default={})
-
-        # title is optional here since it may be extracted by extractor
-        # that is delegated from here
-        title = strip_or_none(self._html_search_regex(
-            r'(?ms)<h1 class="content__heading">(.+?)</h1>',
-            webpage, 'title', default=None))
-
-        description = self._html_search_regex(
-            r'(?ms)<div class="content__description">(.+?)</div>',
-            webpage, 'description', default=None)
-
-        season = self._html_search_regex(
-            [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
-                    <span>seizoen\ (.+?)</span>\s*
-                </div>''',
-             r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
-            webpage, 'season', default=None)
-
-        season_number = int_or_none(season)
-
-        episode_number = int_or_none(self._html_search_regex(
-            r'''(?xms)<div\ class="content__episode">\s*
-                    <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
-                </div>''',
-            webpage, 'episode_number', default=None))
-
-        release_date = parse_iso8601(self._html_search_regex(
-            r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
-            webpage, 'release_date', default=None))
-
-        # If there's a ? or a # in the URL, remove them and everything after
-        clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
-        securevideo_url = clean_url + '.mssecurevideo.json'
-
-        try:
-            video = self._download_json(securevideo_url, display_id)
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                self.raise_login_required()
-            raise
-
-        # We are dealing with a '../<show>.relevant' URL
-        redirect_url = video.get('url')
-        if redirect_url:
-            return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
-
-        # There is only one entry, but with an unknown key, so just get
-        # the first one
-        video_id = list(video.values())[0].get('videoid')
-
        return merge_dicts(info, {
            '_type': 'url_transparent',
            'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
            'ie_key': CanvasIE.ie_key(),
            'id': video_id,
            'display_id': display_id,
-            'title': title,
-            'description': description,
-            'season': season,
-            'season_number': season_number,
-            'episode_number': episode_number,
-            'release_date': release_date,
+            'season_number': int_or_none(page.get('episode_season')),
        })
--- a/youtube_dl/extractor/dplay.py
+++ b/youtube_dl/extractor/dplay.py
@@ -17,7 +17,12 @@ from ..utils import (
 class DPlayIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://
        (?P<domain>
-            (?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
+            (?:www\.)?(?P<host>d
+                (?:
+                    play\.(?P<country>dk|fi|jp|se|no)|
+                    iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
+                )
+            )|
            (?P<subdomain_country>es|it)\.dplay\.com
        )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''

@@ -126,6 +131,24 @@ class DPlayIE(InfoExtractor):
    }, {
        'url': 'https://www.dplay.jp/video/gold-rush/24086',
        'only_matching': True,
+    }, {
+        'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
+        'only_matching': True,
    }]

    def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
@@ -241,7 +264,7 @@ class DPlayIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        domain = mobj.group('domain').lstrip('www.')
-        country = mobj.group('country') or mobj.group('subdomain_country')
-        host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
+        country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
+        host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
        return self._get_disco_api_info(
            url, display_id, host, 'dplay' + country, country)
--- a/youtube_dl/extractor/ketnet.py
+++ b/youtube_dl/extractor/ketnet.py
@@ -2,92 +2,71 @@ from __future__ import unicode_literals

 from .canvas import CanvasIE
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+)


 class KetnetIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
    _TESTS = [{
-        'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
-        'md5': '6bdeb65998930251bbd1c510750edba9',
+        'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
+        'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
        'info_dict': {
-            'id': 'zomerse-filmpjes',
+            'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
            'ext': 'mp4',
-            'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
-            'description': 'Gluur mee met Ghost Rockers op de filmset',
+            'title': 'Nachtwacht - Reeks 3: Aflevering 1',
+            'description': 'De Nachtwacht krijgt te maken met een parasiet',
            'thumbnail': r're:^https?://.*\.jpg$',
-        }
-    }, {
-        # mzid in playerConfig instead of sources
-        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
-        'md5': '90139b746a0a9bd7bb631283f6e2a64e',
-        'info_dict': {
-            'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
-            'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
-            'ext': 'flv',
-            'title': 'Nachtwacht: De Greystook',
-            'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 1468.03,
+            'duration': 1468.02,
+            'timestamp': 1609225200,
+            'upload_date': '20201229',
+            'series': 'Nachtwacht',
+            'season': 'Reeks 3',
+            'episode': 'De Greystook',
+            'episode_number': 1,
        },
        'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
    }, {
-        'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
-        'only_matching': True,
-    }, {
-        # mzsource, geo restricted to Belgium
-        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
+        'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
        'only_matching': True,
    }]

    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        display_id = self._match_id(url)

-        webpage = self._download_webpage(url, video_id)
+        video = self._download_json(
+            'https://senior-bff.ketnet.be/graphql', display_id, query={
+                'query': '''{
+  video(id: "content/ketnet/nl/%s.model.json") {
+    description
+    episodeNr
+    imageUrl
+    mediaReference
+    programTitle
+    publicationDate
+    seasonTitle
+    subtitleVideodetail
+    titleVideodetail
+  }
+}''' % display_id,
+            })['data']['video']

-        config = self._parse_json(
-            self._search_regex(
-                r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
-                'player config'),
-            video_id)
-
-        mzid = config.get('mzid')
-        if mzid:
-            return self.url_result(
-                'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
-                CanvasIE.ie_key(), video_id=mzid)
-
-        title = config['title']
-
-        formats = []
-        for source_key in ('', 'mz'):
-            source = config.get('%ssource' % source_key)
-            if not isinstance(source, dict):
-                continue
-            for format_id, format_url in source.items():
-                if format_id == 'hls':
-                    formats.extend(self._extract_m3u8_formats(
-                        format_url, video_id, 'mp4',
-                        entry_protocol='m3u8_native', m3u8_id=format_id,
-                        fatal=False))
-                elif format_id == 'hds':
-                    formats.extend(self._extract_f4m_formats(
-                        format_url, video_id, f4m_id=format_id, fatal=False))
-                else:
-                    formats.append({
-                        'url': format_url,
-                        'format_id': format_id,
-                    })
-        self._sort_formats(formats)
+        mz_id = compat_urllib_parse_unquote(video['mediaReference'])

        return {
-            'id': video_id,
-            'title': title,
-            'description': config.get('description'),
-            'thumbnail': config.get('image'),
-            'series': config.get('program'),
-            'episode': config.get('episode'),
-            'formats': formats,
+            '_type': 'url_transparent',
+            'id': mz_id,
+            'title': video['titleVideodetail'],
+            'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
+            'thumbnail': video.get('imageUrl'),
+            'description': video.get('description'),
+            'timestamp': parse_iso8601(video.get('publicationDate')),
+            'series': video.get('programTitle'),
+            'season': video.get('seasonTitle'),
+            'episode': video.get('subtitleVideodetail'),
+            'episode_number': int_or_none(video.get('episodeNr')),
+            'ie_key': CanvasIE.ie_key(),
        }
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -326,6 +326,19 @@ class RaiIE(RaiBaseIE):
        'params': {
            'skip_download': True,
        },
+    }, {
+        # ContentItem in iframe (see #12652)
+        'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html',
+        'info_dict': {
+            'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd',
+            'ext': 'mp4',
+            'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015',
+            'description': 'md5:d291b03407ec505f95f27970c0b025f4',
+            'upload_date': '20150913',
+        },
+        'params': {
+            'skip_download': True,
+        },
    }, {
        # Direct MMS URL
        'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
@@ -403,7 +416,8 @@ class RaiIE(RaiBaseIE):
                r'''(?x)
                    (?:
                        (?:initEdizione|drawMediaRaiTV)\(|
-                        <(?:[^>]+\bdata-id|var\s+uniquename)=
+                        <(?:[^>]+\bdata-id|var\s+uniquename)=|
+                        <iframe[^>]+\bsrc=
                    )
                    (["\'])
                    (?:(?!\1).)*\bContentItem-(?P<id>%s)
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -9,7 +9,6 @@ import re

 from .common import InfoExtractor
 from ..compat import (
-    compat_kwargs,
    compat_parse_qs,
    compat_str,
    compat_urlparse,
@@ -42,30 +41,16 @@ class TwitchBaseIE(InfoExtractor):
    _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
    _NETRC_MACHINE = 'twitch'

-    def _handle_error(self, response):
-        if not isinstance(response, dict):
-            return
-        error = response.get('error')
-        if error:
-            raise ExtractorError(
-                '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
-                expected=True)
-
-    def _call_api(self, path, item_id, *args, **kwargs):
-        headers = kwargs.get('headers', {}).copy()
-        headers.update({
-            'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
-            'Client-ID': self._CLIENT_ID,
-        })
-        kwargs.update({
-            'headers': headers,
-            'expected_status': (400, 410),
-        })
-        response = self._download_json(
-            '%s/%s' % (self._API_BASE, path), item_id,
-            *args, **compat_kwargs(kwargs))
-        self._handle_error(response)
-        return response
+    _OPERATION_HASHES = {
+        'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
+        'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
+        'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
+        'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
+        'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
+        'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
+        'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
+        'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
+    }

    def _real_initialize(self):
        self._login()
@@ -151,13 +136,46 @@ class TwitchBaseIE(InfoExtractor):
                    })
        self._sort_formats(formats)

-    def _download_access_token(self, channel_name):
-        return self._call_api(
-            'api/channels/%s/access_token' % channel_name, channel_name,
-            'Downloading access token JSON')
+    def _download_base_gql(self, video_id, ops, note, fatal=True):
+        return self._download_json(
+            'https://gql.twitch.tv/gql', video_id, note,
+            data=json.dumps(ops).encode(),
+            headers={
+                'Content-Type': 'text/plain;charset=UTF-8',
+                'Client-ID': self._CLIENT_ID,
+            }, fatal=fatal)

-    def _extract_channel_id(self, token, channel_name):
-        return compat_str(self._parse_json(token, channel_name)['channel_id'])
+    def _download_gql(self, video_id, ops, note, fatal=True):
+        for op in ops:
+            op['extensions'] = {
+                'persistedQuery': {
+                    'version': 1,
+                    'sha256Hash': self._OPERATION_HASHES[op['operationName']],
+                }
+            }
+        return self._download_base_gql(video_id, ops, note)
+
+    def _download_access_token(self, video_id, token_kind, param_name):
+        method = '%sPlaybackAccessToken' % token_kind
+        ops = {
+            'query': '''{
+              %s(
+                %s: "%s",
+                params: {
+                  platform: "web",
+                  playerBackend: "mediaplayer",
+                  playerType: "site"
+                }
+              )
+              {
+                value
+                signature
+              }
+            }''' % (method, param_name, video_id),
+        }
+        return self._download_base_gql(
+            video_id, ops,
+            'Downloading %s access token GraphQL' % token_kind)['data'][method]


 class TwitchVodIE(TwitchBaseIE):
@@ -170,8 +188,6 @@ class TwitchVodIE(TwitchBaseIE):
                        )
                        (?P<id>\d+)
                    '''
-    _ITEM_TYPE = 'vod'
-    _ITEM_SHORTCUT = 'v'

    _TESTS = [{
        'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
@@ -181,7 +197,7 @@ class TwitchVodIE(TwitchBaseIE):
            'title': 'LCK Summer Split - Week 6 Day 1',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 17208,
-            'timestamp': 1435131709,
+            'timestamp': 1435131734,
            'upload_date': '20150624',
            'uploader': 'Riot Games',
            'uploader_id': 'riotgames',
@@ -230,10 +246,20 @@ class TwitchVodIE(TwitchBaseIE):
    }]

    def _download_info(self, item_id):
-        return self._extract_info(
-            self._call_api(
-                'kraken/videos/%s' % item_id, item_id,
-                'Downloading video info JSON'))
+        data = self._download_gql(
+            item_id, [{
+                'operationName': 'VideoMetadata',
+                'variables': {
+                    'channelLogin': '',
+                    'videoID': item_id,
+                },
+            }],
+            'Downloading stream metadata GraphQL')[0]['data']
+        video = data.get('video')
+        if video is None:
+            raise ExtractorError(
+                'Video %s does not exist' % item_id, expected=True)
+        return self._extract_info_gql(video, item_id)

    @staticmethod
    def _extract_info(info):
@@ -272,13 +298,33 @@ class TwitchVodIE(TwitchBaseIE):
            'is_live': is_live,
        }

+    @staticmethod
+    def _extract_info_gql(info, item_id):
+        vod_id = info.get('id') or item_id
+        # id backward compatibility for download archives
+        if vod_id[0] != 'v':
+            vod_id = 'v%s' % vod_id
+        thumbnail = url_or_none(info.get('previewThumbnailURL'))
+        if thumbnail:
+            for p in ('width', 'height'):
+                thumbnail = thumbnail.replace('{%s}' % p, '0')
+        return {
+            'id': vod_id,
+            'title': info.get('title') or 'Untitled Broadcast',
+            'description': info.get('description'),
+            'duration': int_or_none(info.get('lengthSeconds')),
+            'thumbnail': thumbnail,
+            'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
+            'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
+            'timestamp': unified_timestamp(info.get('publishedAt')),
+            'view_count': int_or_none(info.get('viewCount')),
+        }
+
    def _real_extract(self, url):
        vod_id = self._match_id(url)

        info = self._download_info(vod_id)
-        access_token = self._call_api(
-            'api/vods/%s/access_token' % vod_id, vod_id,
-            'Downloading %s access token' % self._ITEM_TYPE)
+        access_token = self._download_access_token(vod_id, 'video', 'id')

        formats = self._extract_m3u8_formats(
            '%s/vod/%s.m3u8?%s' % (
@@ -289,8 +335,8 @@ class TwitchVodIE(TwitchBaseIE):
                    'allow_spectre': 'true',
                    'player': 'twitchweb',
                    'playlist_include_framerate': 'true',
-                    'nauth': access_token['token'],
-                    'nauthsig': access_token['sig'],
+                    'nauth': access_token['value'],
+                    'nauthsig': access_token['signature'],
                })),
            vod_id, 'mp4', entry_protocol='m3u8_native')

@@ -333,37 +379,7 @@ def _make_video_result(node):
    }


-class TwitchGraphQLBaseIE(TwitchBaseIE):
-    _PAGE_LIMIT = 100
-
-    _OPERATION_HASHES = {
-        'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
-        'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
-        'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
-        'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
-        'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
-        'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
-        'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
-    }
-
-    def _download_gql(self, video_id, ops, note, fatal=True):
-        for op in ops:
-            op['extensions'] = {
-                'persistedQuery': {
-                    'version': 1,
-                    'sha256Hash': self._OPERATION_HASHES[op['operationName']],
-                }
-            }
-        return self._download_json(
-            'https://gql.twitch.tv/gql', video_id, note,
-            data=json.dumps(ops).encode(),
-            headers={
-                'Content-Type': 'text/plain;charset=UTF-8',
-                'Client-ID': self._CLIENT_ID,
-            }, fatal=fatal)
-
-
-class TwitchCollectionIE(TwitchGraphQLBaseIE):
+class TwitchCollectionIE(TwitchBaseIE):
    _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'

    _TESTS = [{
@@ -400,7 +416,9 @@ class TwitchCollectionIE(TwitchGraphQLBaseIE):
            entries, playlist_id=collection_id, playlist_title=title)


-class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE):
+class TwitchPlaylistBaseIE(TwitchBaseIE):
+    _PAGE_LIMIT = 100
+
    def _entries(self, channel_name, *args):
        cursor = None
        variables_common = self._make_variables(channel_name, *args)
@@ -440,49 +458,6 @@ class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE):
            if not cursor or not isinstance(cursor, compat_str):
                break

-    # Deprecated kraken v5 API
-    def _entries_kraken(self, channel_name, broadcast_type, sort):
-        access_token = self._download_access_token(channel_name)
-        channel_id = self._extract_channel_id(access_token['token'], channel_name)
-        offset = 0
-        counter_override = None
-        for counter in itertools.count(1):
-            response = self._call_api(
-                'kraken/channels/%s/videos/' % channel_id,
-                channel_id,
-                'Downloading video JSON page %s' % (counter_override or counter),
-                query={
-                    'offset': offset,
-                    'limit': self._PAGE_LIMIT,
-                    'broadcast_type': broadcast_type,
-                    'sort': sort,
-                })
-            videos = response.get('videos')
-            if not isinstance(videos, list):
-                break
-            for video in videos:
-                if not isinstance(video, dict):
-                    continue
-                video_url = url_or_none(video.get('url'))
-                if not video_url:
-                    continue
-                yield {
-                    '_type': 'url_transparent',
-                    'ie_key': TwitchVodIE.ie_key(),
-                    'id': video.get('_id'),
-                    'url': video_url,
-                    'title': video.get('title'),
-                    'description': video.get('description'),
-                    'timestamp': unified_timestamp(video.get('published_at')),
-                    'duration': float_or_none(video.get('length')),
-                    'view_count': int_or_none(video.get('views')),
-                    'language': video.get('language'),
-                }
-            offset += self._PAGE_LIMIT
-            total = int_or_none(response.get('_total'))
-            if total and offset >= total:
-                break
-

 class TwitchVideosIE(TwitchPlaylistBaseIE):
    _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
@@ -724,7 +699,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
            playlist_title='%s - Collections' % channel_name)


-class TwitchStreamIE(TwitchGraphQLBaseIE):
+class TwitchStreamIE(TwitchBaseIE):
    IE_NAME = 'twitch:stream'
    _VALID_URL = r'''(?x)
                    https?://
@@ -814,8 +789,9 @@ class TwitchStreamIE(TwitchGraphQLBaseIE):
        if not stream:
            raise ExtractorError('%s is offline' % channel_name, expected=True)

-        access_token = self._download_access_token(channel_name)
-        token = access_token['token']
+        access_token = self._download_access_token(
+            channel_name, 'stream', 'channelName')
+        token = access_token['value']

        stream_id = stream.get('id') or channel_name
        query = {
@@ -826,7 +802,7 @@ class TwitchStreamIE(TwitchGraphQLBaseIE):
            'player': 'twitchweb',
            'playlist_include_framerate': 'true',
            'segment_preference': '4',
-            'sig': access_token['sig'].encode('utf-8'),
+            'sig': access_token['signature'].encode('utf-8'),
            'token': token.encode('utf-8'),
        }
        formats = self._extract_m3u8_formats(
@@ -912,8 +888,8 @@ class TwitchClipsIE(TwitchBaseIE):
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        clip = self._download_json(
-            'https://gql.twitch.tv/gql', video_id, data=json.dumps({
+        clip = self._download_base_gql(
+            video_id, {
                'query': '''{
  clip(slug: "%s") {
    broadcaster {
@@ -937,10 +913,7 @@ class TwitchClipsIE(TwitchBaseIE):
    }
    viewCount
  }
-}''' % video_id,
-            }).encode(), headers={
-                'Client-ID': self._CLIENT_ID,
-            })['data']['clip']
+}''' % video_id}, 'Downloading clip GraphQL')['data']['clip']

        if not clip:
            raise ExtractorError(
Author	SHA1	Message	Date
Sergey M․	4ae243fc6c	[twitch] Refactor	2021-01-06 03:54:33 +07:00
Sergey M․	8f20ad36dc	[twitch] Drop legacy kraken API v5 code altogether	2021-01-06 03:46:45 +07:00
Sergey M․	799c794947	[twitch:vod] Switch to GraphQL for video metadata	2021-01-06 03:34:36 +07:00
Remita Amine	1ae7ae0b96	[canvas] Fix VRT NU extraction(closes #26957 )(closes #27053 )	2021-01-05 21:17:39 +01:00
Sergey M․	ccc7112291	[twitch] Improve access token extraction and remove unused code (closes #27646 )	2021-01-06 02:11:49 +07:00
23rd	5b24f8f505	[twitch] Switch access token to GraphQL and refactor.	2021-01-06 02:11:42 +07:00
nixxo	fcd90d2583	[rai] Detect ContentItem in iframe (closes #12652 ) (#27673 ) Co-authored-by: Sergey M. <dstftw@gmail.com>	2021-01-06 01:49:56 +07:00
Remita Amine	8f757c7353	[ketnet] fix extraction(closes #27662 )	2021-01-05 18:17:04 +01:00
Remita Amine	be1a3f2d11	[dplay] Add suport Discovery+ domains(closes #27680 )	2021-01-05 17:31:58 +01:00