X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fpbs.py;h=16cc667d025514f64a66852d7e4ad0e8952297b8;hb=9cafc3fd8b54b9b91a145cddf9e4db0bd59e1b5f;hp=6166dc2adf41a431c79244e01256ab94f0dcb704;hpb=a1ebfd449486ae5c4e8c38a38e6d5142ce4740e6;p=youtube-dl

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 6166dc2ad..16cc667d0 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -8,7 +8,9 @@ from ..utils import (
     ExtractorError,
     determine_ext,
     int_or_none,
+    float_or_none,
     js_to_json,
+    orderedSet,
     strip_jsonp,
     strip_or_none,
     unified_strdate,
@@ -263,6 +265,13 @@ class PBSIE(InfoExtractor):
             },
             'playlist_count': 2,
         },
+        {
+            'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/',
+            'info_dict': {
+                'id': 'great-war',
+            },
+            'playlist_count': 3,
+        },
         {
             'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
             'info_dict': {
@@ -381,10 +390,10 @@ class PBSIE(InfoExtractor):
             # tabbed frontline videos
             MULTI_PART_REGEXES = (
                 r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
-                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
+                r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
             )
             for p in MULTI_PART_REGEXES:
-                tabbed_videos = re.findall(p, webpage)
+                tabbed_videos = orderedSet(re.findall(p, webpage))
                 if tabbed_videos:
                     return tabbed_videos, presumptive_id, upload_date, description
 
@@ -464,6 +473,7 @@ class PBSIE(InfoExtractor):
                     redirects.append(redirect)
                     redirect_urls.add(redirect_url)
 
+        chapters = []
         # Player pages may also serve different qualities
         for page in ('widget/partnerplayer', 'portalplayer'):
             player = self._download_webpage(
@@ -479,6 +489,20 @@ class PBSIE(InfoExtractor):
                     extract_redirect_urls(video_info)
                     if not info:
                         info = video_info
+                if not chapters:
+                    for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
+                        chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
+                        if not chapter:
+                            continue
+                        start_time = float_or_none(chapter.get('start_time'), 1000)
+                        duration = float_or_none(chapter.get('duration'), 1000)
+                        if start_time is None or duration is None:
+                            continue
+                        chapters.append({
+                            'start_time': start_time,
+                            'end_time': start_time + duration,
+                            'title': chapter.get('title'),
+                        })
 
         formats = []
         http_url = None
@@ -588,4 +612,5 @@ class PBSIE(InfoExtractor):
             'upload_date': upload_date,
             'formats': formats,
             'subtitles': subtitles,
+            'chapters': chapters,
         }