diff --git a/youtube_dl/extractor/digitalconcerthall.py b/youtube_dl/extractor/digitalconcerthall.py index b010be267..d64ed3e24 100644 --- a/youtube_dl/extractor/digitalconcerthall.py +++ b/youtube_dl/extractor/digitalconcerthall.py @@ -4,6 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + get_element_by_id, + get_element_by_attribute, + get_element_by_class, +) class DigitalConcertHallIE(InfoExtractor): @@ -18,23 +25,23 @@ class DigitalConcertHallIE(InfoExtractor): 'ext': 'mp4', 'title': 'Video title goes here', 'thumbnail': r're:^https?://.*/images/core/Phil.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) } },] + def debug_out(self, args): + if not self._downloader.params.get('verbose', False): + return + + self.to_screen('[debug] %s' % args) + def _real_extract(self, url): - #video_id = self._match_id(url) language, video_id = re.match(self._VALID_URL, url).groups() if not language: language = 'en' - print("url: ", url, " video_id: ", video_id, " language: ", language, "\n") + self.debug_out("url: " + url + " video_id: " + video_id + " language: " + language) webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.+?)', webpage, 'title') - print("title: ", title, "\n") + self.to_screen("title: " + title) # this returns JSON, which contains the urls of the playlist #video_data = self._download_webpage( @@ -44,11 +51,15 @@ class DigitalConcertHallIE(InfoExtractor): entries = [] for key in playlist_dict: - print("key: ", key, "\n") + self.debug_out("key: " + key) m3u8_url = playlist_dict[key][0]['url'] - print("key url: ", m3u8_url, "\n") + self.debug_out("key url: " + m3u8_url) formats = self._extract_m3u8_formats(m3u8_url, key, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - print("formats:\n", formats) + self.debug_out(formats) + vid_info_div = clean_html(get_element_by_id(key, webpage)) + self.debug_out("vid_info_div:\n" + vid_info_div) + title = re.sub('\s+', ' ', vid_info_div) + self.to_screen("title: " + title ) entries.append({ 'id': key, 'title': title,