use _og_search_title for fallback playlist title ; use digitalconcerthall API for individual playlist item titles

2025-10-16 21:28:37 +09:00 · 2020-04-04 19:51:09 -04:00
parent cddf55300d
commit 32fdbd27fa
1 changed files with 14 additions and 11 deletions
--- a/youtube_dl/extractor/digitalconcerthall.py
+++ b/youtube_dl/extractor/digitalconcerthall.py
@@ -4,10 +4,6 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
-    clean_html,
-    get_element_by_id,
-)


 class DigitalConcertHallIE(InfoExtractor):
@@ -32,18 +28,26 @@ class DigitalConcertHallIE(InfoExtractor):
        self.to_screen('[debug] %s' % args)

    def _real_extract(self, url):
+        MAX_TITLE_LENGTH = 128
        language, video_id = re.match(self._VALID_URL, url).groups()
        if not language:
            language = 'en'
        self.debug_out("url: " + url + " video_id: " + video_id + " language: " + language)
        webpage = self._download_webpage(url, video_id)
-        playlist_title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+        playlist_title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title') \
+           or self._og_search_title(webpage)
        self.debug_out("playlist_title: " + playlist_title)

        # this returns JSON containing the urls of the playlist
+        # Note:  you must be authenticated to get the stream info
        playlist_dict = self._download_json(
            'https://www.digitalconcerthall.com/json_services/get_stream_urls?id='
-            + video_id + "&language=" + language, video_id).get('urls')
+            + video_id + "&language=" + language, video_id, note='Downloading Stream JSON').get('urls')
+        # use the API to get other information about the concert
+        vid_info_dict = self._download_json(
+            'https://api.digitalconcerthall.com/v2/concert/'
+            + video_id, video_id, headers={'Accept': 'application/json',
+                                           'Accept-Language': language}).get('_embedded')

        entries = []
        for key in playlist_dict:
@@ -53,11 +57,10 @@ class DigitalConcertHallIE(InfoExtractor):
            formats = self._extract_m3u8_formats(
                m3u8_url, key, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
            self.debug_out(formats)
-            # the div with id=key contains the video title
-            vid_info_div = clean_html(get_element_by_id(key, webpage))
-            self.debug_out("vid_info_div:\n" + vid_info_div)
-            title = re.sub(r'\s+', ' ', vid_info_div) \
-                or self._og_search_title(webpage)
+            title = [vid_info_dict.get(x)[0].get('title',"unknown title") for x in vid_info_dict
+                     if vid_info_dict.get(x)[0].get('id') == key][0]
+            # avoid filenames that exceed filesystem limits
+            title = (title[:MAX_TITLE_LENGTH] + '..') if len(title) > MAX_TITLE_LENGTH else title
            self.debug_out("title: " + title)
            entries.append({
                'id': key,