From 59e9536b9838c4616330ceea54036a2846eb19a3 Mon Sep 17 00:00:00 2001 From: fourjr <28086837+fourjr@users.noreply.github.com> Date: Sun, 22 Nov 2020 18:03:12 +0800 Subject: [PATCH] [channelnewsasia] fix tests, more relaxed regex --- youtube_dl/extractor/channelnewsasia.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/channelnewsasia.py b/youtube_dl/extractor/channelnewsasia.py index 4739bbb58..7a59192d7 100644 --- a/youtube_dl/extractor/channelnewsasia.py +++ b/youtube_dl/extractor/channelnewsasia.py @@ -9,7 +9,7 @@ class ChannelNewsAsiaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?channelnewsasia\.com/(?:(?:-|\w|\d)+)/(?:(?:-|\w|\d)+)/(?P(?:-|\w|\d)+)' _TESTS = [ { - 'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech', + 'url': 'https://www.channelnewsasia.com/news/video-on-demand/wizards-of-tech/wizards-of-tech-body-13515106', 'md5': 'ed9ed143052f0da3ee8a8fa59ba16870', 'info_dict': { 'id': 'w0ZWRzajE6qDPXDb7DSeaOCJ3bJ3GDqC', @@ -34,11 +34,15 @@ class ChannelNewsAsiaIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - url_obj = ( - re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL) - or re.search(r'(?:\d|\w|-)+)".*', webpage, flags=re.DOTALL)) + + ooyala_id = ( + self._search_regex( + r'id="ooyala-\d+-((?:\d|\w|-)+)--\d+', webpage, 'ooyala id', + default=None, fatal=False) + or self._search_regex( + r'video-asset-id="((?:\d|\w|-)+)', webpage, 'ooyala id', + default=None, fatal=False)) - ooyala_id = url_obj.group('id') return self.url_result( 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id )