[twitch] Rename extractor and support channel videos

2025-10-20 07:08:37 +09:00 · 2014-10-17 22:58:18 +07:00
parent 4698f0d858
commit 46fd0dd5a5
2 changed files with 28 additions and 76 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -173,7 +173,6 @@ from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
 from .jove import JoveIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .jpopsukitv import JpopsukiIE
 from .kankan import KankanIE
 from .keezmovies import KeezMoviesIE
@@ -395,6 +394,7 @@ from .tutv import TutvIE
 from .tvigle import TvigleIE
 from .tvp import TvpIE
 from .tvplay import TVPlayIE
 from .twitch import TwitchIE
 from .ubu import UbuIE
 from .udemy import (
    UdemyIE,
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@@ -1,26 +1,20 @@
 from __future__ import unicode_literals
 import itertools
 import json
 import os
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_str,
    ExtractorError,
    formatSeconds,
    parse_iso8601,
 )
-class JustinTVIE(InfoExtractor):
+class TwitchIE(InfoExtractor):
    """Information extractor for justin.tv and twitch.tv"""
    # TODO: One broadcast may be split into multiple videos. The key
    # 'broadcast_id' is the same for all parts, and 'broadcast_part'
    # starts at 1 and increases. Can we treat all parts as one video?
-
+    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
        (?:
            (?P<channelid>[^/]+)|
            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
@@ -28,9 +22,8 @@ class JustinTVIE(InfoExtractor):
        )
        /?(?:\#.*)?$
        """
-    _JUSTIN_PAGE_LIMIT = 100
+    _PAGE_LIMIT = 100
-    IE_NAME = 'justin.tv'
+    _API_BASE = 'https://api.twitch.tv'
    IE_DESC = 'justin.tv and twitch.tv'
    _TEST = {
        'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
        'md5': 'ecaa8a790c22a40770901460af191c9a',
@@ -44,39 +37,6 @@ class JustinTVIE(InfoExtractor):
        }
    }
    _API_BASE = 'https://api.twitch.tv'
    # Return count of items, list of *valid* items
    def _parse_page(self, url, video_id, counter):
        info_json = self._download_webpage(
            url, video_id,
            'Downloading video info JSON on page %d' % counter,
            'Unable to download video info JSON %d' % counter)
        response = json.loads(info_json)
        if type(response) != list:
            error_text = response.get('error', 'unknown error')
            raise ExtractorError('Justin.tv API: %s' % error_text)
        info = []
        for clip in response:
            video_url = clip['video_file_url']
            if video_url:
                video_extension = os.path.splitext(video_url)[1][1:]
                video_date = re.sub('-', '', clip['start_time'][:10])
                video_uploader_id = clip.get('user_id', clip.get('channel_id'))
                video_id = clip['id']
                video_title = clip.get('title', video_id)
                info.append({
                    'id': compat_str(video_id),
                    'url': video_url,
                    'title': video_title,
                    'uploader': clip.get('channel_name', video_uploader_id),
                    'uploader_id': video_uploader_id,
                    'upload_date': video_date,
                    'ext': video_extension,
                })
        return (len(response), info)
    def _handle_error(self, response):
        if not isinstance(response, dict):
            return
@@ -87,25 +47,21 @@ class JustinTVIE(InfoExtractor):
                expected=True)
    def _download_json(self, url, video_id, note='Downloading JSON metadata'):
-        response = super(JustinTVIE, self)._download_json(url, video_id, note)
+        response = super(TwitchIE, self)._download_json(url, video_id, note)
        self._handle_error(response)
        return response
    def _extract_media(self, item, item_id):
        ITEMS = {
            'a': 'video',
            'c': 'chapter',
        }
        info = self._extract_info(self._download_json(
            '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
            'Downloading %s info JSON' % ITEMS[item]))
        response = self._download_json(
            '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
            'Downloading %s playlist JSON' % ITEMS[item])
        entries = []
        chunks = response['chunks']
        qualities = list(chunks.keys())
@@ -144,14 +100,7 @@ class JustinTVIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-
+        if mobj.group('chapterid'):
        api_base = 'http://api.twitch.tv'
        paged = False
        if mobj.group('channelid'):
            paged = True
            video_id = mobj.group('channelid')
            api = api_base + '/channel/archives/%s.json' % video_id
        elif mobj.group('chapterid'):
            return self._extract_media('c', mobj.group('chapterid'))
            """
@@ -203,22 +152,25 @@ class JustinTVIE(InfoExtractor):
            }
            return info
            """
-        else:
+        elif mobj.group('videoid'):
            return self._extract_media('a', mobj.group('videoid'))
-
+        elif mobj.group('channelid'):
-        entries = []
+            channel_id = mobj.group('channelid')
-        offset = 0
+            info = self._download_json(
-        limit = self._JUSTIN_PAGE_LIMIT
+                '%s/kraken/channels/%s' % (self._API_BASE, channel_id),
-        for counter in itertools.count(1):
+                channel_id, 'Downloading channel info JSON')
-            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
+            channel_name = info.get('display_name') or info.get('name')
-            page_count, page_info = self._parse_page(
+            entries = []
-                page_url, video_id, counter)
+            offset = 0
-            entries.extend(page_info)
+            limit = self._PAGE_LIMIT
-            if not paged or page_count != limit:
+            for counter in itertools.count(1):
-                break
+                response = self._download_json(
-            offset += limit
+                    '%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
-        return {
+                    % (self._API_BASE, channel_id, offset, limit),
-            '_type': 'playlist',
+                    channel_id, 'Downloading channel videos JSON page %d' % counter)
-            'id': video_id,
+                videos = response['videos']
-            'entries': entries,
+                if not videos:
-        }
+                    break
                entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
                offset += limit
            return self.playlist_result(entries, channel_id, channel_name)