X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fpbs.py;h=8889e4a1aaa3e41f49a63b53c010cf69d0842b1b;hb=685e87b61f785b096745cda5ea64ea0b950f56d1;hp=0727e381b52e3c2b4e520d38c88762b285604c3b;hpb=7a6d33a9a5390d0dab7e9162d6b2552cb0fe23a5;p=youtube-dl

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 0727e381b..8889e4a1a 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -10,6 +10,7 @@ from ..utils import (
     int_or_none,
     float_or_none,
     js_to_json,
+    orderedSet,
     strip_jsonp,
     strip_or_none,
     unified_strdate,
@@ -188,7 +189,7 @@ class PBSIE(InfoExtractor):
            # Direct video URL
            (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
            # Article with embedded player (or direct video)
-           (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
+           (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
            # Player
            (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
         )
@@ -264,6 +265,13 @@ class PBSIE(InfoExtractor):
             },
             'playlist_count': 2,
         },
+        {
+            'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/',
+            'info_dict': {
+                'id': 'great-war',
+            },
+            'playlist_count': 3,
+        },
         {
             'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
             'info_dict': {
@@ -337,6 +345,21 @@ class PBSIE(InfoExtractor):
                 'formats': 'mincount:8',
             },
         },
+        {
+            # https://github.com/rg3/youtube-dl/issues/13801
+            'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
+            'info_dict': {
+                'id': '3003333873',
+                'ext': 'mp4',
+                'title': 'PBS NewsHour - full episode July 31, 2017',
+                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+                'duration': 3265,
+                'thumbnail': r're:^https?://.*\.jpg$',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
             'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
             'only_matching': True,
@@ -382,10 +405,10 @@ class PBSIE(InfoExtractor):
             # tabbed frontline videos
             MULTI_PART_REGEXES = (
                 r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
-                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
+                r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
             )
             for p in MULTI_PART_REGEXES:
-                tabbed_videos = re.findall(p, webpage)
+                tabbed_videos = orderedSet(re.findall(p, webpage))
                 if tabbed_videos:
                     return tabbed_videos, presumptive_id, upload_date, description
 
@@ -425,6 +448,9 @@ class PBSIE(InfoExtractor):
                 if url:
                     break
 
+            if not url:
+                url = self._og_search_url(webpage)
+
             mobj = re.match(self._VALID_URL, url)
 
         player_id = mobj.group('player_id')