[youtube] Improve chapters extraction (closes #13247)
authorSergey M․ <dstftw@gmail.com>
Thu, 1 Jun 2017 16:29:45 +0000 (23:29 +0700)
committerSergey M․ <dstftw@gmail.com>
Thu, 1 Jun 2017 16:29:45 +0000 (23:29 +0700)
test/test_youtube_chapters.py
youtube_dl/extractor/youtube.py

index cb12f83848949524cffb88e53b4f25d0be8eb42e..324ca852578531757d9964f2c90cf6f8e1c4d3b1 100644 (file)
@@ -254,6 +254,13 @@ class TestYoutubeChapters(unittest.TestCase):
                 'title': '3 - Из серпов луны...[Iz serpov luny]',
             }]
         ),
+        (
+            # https://www.youtube.com/watch?v=xZW70zEasOk
+            # time point more than duration
+            '''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
+            283,
+            []
+        ),
     ]
 
     def test_youtube_chapters(self):
index 40ac1a0191a320fe521071fbbf7aac620aafccdd..bf4f4e139b1973eef02b5ea4867b269895a0832b 100644 (file)
@@ -1353,10 +1353,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             start_time = parse_duration(time_point)
             if start_time is None:
                 continue
+            if start_time > duration:
+                break
             end_time = (duration if next_num == len(chapter_lines)
                         else parse_duration(chapter_lines[next_num][1]))
             if end_time is None:
                 continue
+            if end_time > duration:
+                end_time = duration
+            if start_time > end_time:
+                break
             chapter_title = re.sub(
                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
             chapter_title = re.sub(r'\s+', ' ', chapter_title)