From 39d4c1be4d601a3297157315652637ca52000965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Jun 2017 23:29:45 +0700 Subject: [PATCH] [youtube] Improve chapters extraction (closes #13247) --- test/test_youtube_chapters.py | 7 +++++++ youtube_dl/extractor/youtube.py | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py index cb12f8384..324ca8525 100644 --- a/test/test_youtube_chapters.py +++ b/test/test_youtube_chapters.py @@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase): 'title': '3 - Из серпов луны...[Iz serpov luny]', }] ), + ( + # https://www.youtube.com/watch?v=xZW70zEasOk + # time point more than duration + '''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''', + 283, + [] + ), ] def test_youtube_chapters(self): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 40ac1a019..bf4f4e139 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1353,10 +1353,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): start_time = parse_duration(time_point) if start_time is None: continue + if start_time > duration: + break end_time = (duration if next_num == len(chapter_lines) else parse_duration(chapter_lines[next_num][1])) if end_time is None: continue + if end_time > duration: + end_time = duration + if start_time > end_time: + break chapter_title = re.sub( r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-') chapter_title = re.sub(r'\s+', ' ', chapter_title)