From 61cead323579fd8d15d39566dae708f4b53e4a76 Mon Sep 17 00:00:00 2001
From: Henrik Heimbuerger <henrik@heimbuerger.de>
Date: Thu, 16 Apr 2020 04:35:05 +0200
Subject: [PATCH] [nebula] Add better channel title extraction (refs #21258)

---
 youtube_dl/extractor/nebula.py | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/nebula.py b/youtube_dl/extractor/nebula.py
index 6aa4e1da4..828ea1c6f 100644
--- a/youtube_dl/extractor/nebula.py
+++ b/youtube_dl/extractor/nebula.py
@@ -105,13 +105,32 @@ class NebulaIE(InfoExtractor):
 
         return video_url
 
-    def _extract_uploader(self, video_meta):
+    def _extract_channel(self, video_meta):
         """
-        Nebula doesn't really seem to have the concept of an uploader internally, videos are often organized
-        more like a (TV) series than by uploader. But in the example case, Lindsay Ellis is the creator, so
-        I'll go with this for now.
+        Extract the channel title, by going through the list of categories and finding the first value of the
+        first category that has a value.
+
+        I know this look like a terrible approach. But actually, it's just reproducing the behavior of the
+        React code the Nebula frontend uses (as of 2020-04-07):
+
+            let channel;
+            if (video && video.categories && video.categories.length) {
+                const channelTitle = video.categories.map((category) => (category.value[0]))
+                                                     .filter((title) => (!!title))[0];
+                channel = getChannelByTitle(state, { title: channelTitle });
+            }
+
+        Basically, it finds the first (truthy) value in the category list and that's assumed to be the
+        channel title. And then the channel details (e.g. the URL) are looked up by title (!) (not by any
+        kind of ID) via an additional API call.
+
+        TODO: Implement the API calls giving us the channel list, so that we can do the title lookup and then figure out the channel URL
         """
-        return video_meta['categories'][0]['value'][0]
+        categories = video_meta['categories']
+        for category in categories:
+            if category['value']:
+                return category['value'][0]
+        return None
 
     def _real_extract(self, url):
         # FIXME: a workaround for testing, because I couldn't figure out how to supply a cookiejar when running the unit tests
@@ -127,6 +146,7 @@ class NebulaIE(InfoExtractor):
         # extract the state object from the webpage, and then retrieve video meta data from it
         state_object = self._extract_state_object(webpage, display_id)
         video_id, video_meta = self._extract_video_metadata(state_object, display_id)
+        channel_title = self._extract_channel(video_meta)
 
         # extract the video URL from the webpage
         video_url = self._extract_video_url(webpage, state_object, video_id)
@@ -146,7 +166,6 @@ class NebulaIE(InfoExtractor):
             'title': video_meta['title'],
             'description': video_meta['description'],
             'timestamp': parse_iso8601(video_meta['published_at']),
-            #'uploader': self._extract_uploader(video_meta),   # TODO: removed because unreliable/sometimes incorrect
             'thumbnails': [
                 {
                     'id': tn['name'],   # this appears to be null in all cases I've seen
@@ -156,8 +175,9 @@ class NebulaIE(InfoExtractor):
                 } for tn in video_meta['thumbnails']
             ],
             'duration': video_meta['duration'],
+            'channel': channel_title,
+            'uploader': channel_title,   # we chose here to declare the channel name as the 'uploader' -- that's certainly arguable, as sometimes it's more of a series
             # TODO: uploader_url: the video page clearly links to this (in the example case: /lindsayellis), but I cannot figure out where it gets it from!
-            # TODO: channel
             # TODO: channel_id
             # TODO: channel_url
         }