[pbs] Fix multi part videos extraction
authorSergey M․ <dstftw@gmail.com>
Thu, 11 Feb 2016 16:02:37 +0000 (22:02 +0600)
committerSergey M․ <dstftw@gmail.com>
Thu, 11 Feb 2016 16:02:37 +0000 (22:02 +0600)
youtube_dl/extractor/pbs.py

index c159c007186a740c972059f37a1bbfb4451c49a1..cca012953d01d173e638924b4992015625815291 100644 (file)
@@ -366,10 +366,14 @@ class PBSIE(InfoExtractor):
                 webpage, 'upload date', default=None))
 
             # tabbed frontline videos
-            tabbed_videos = re.findall(
-                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
-            if tabbed_videos:
-                return tabbed_videos, presumptive_id, upload_date
+            MULTI_PART_REGEXES = (
+                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
+                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
+            )
+            for p in MULTI_PART_REGEXES:
+                tabbed_videos = re.findall(p, webpage)
+                if tabbed_videos:
+                    return tabbed_videos, presumptive_id, upload_date
 
             MEDIA_ID_REGEXES = [
                 r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed