[pbs] Improve multipart video support (closes #12981)

author Sergey M․ <dstftw@gmail.com>

Thu, 4 May 2017 15:42:49 +0000 (22:42 +0700)

committer Sergey M․ <dstftw@gmail.com>

Thu, 4 May 2017 15:42:49 +0000 (22:42 +0700)
author Sergey M․ <dstftw@gmail.com>
Thu, 4 May 2017 15:42:49 +0000 (22:42 +0700)
committer Sergey M․ <dstftw@gmail.com>
Thu, 4 May 2017 15:42:49 +0000 (22:42 +0700)
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py

index 0727e381b52e3c2b4e520d38c88762b285604c3b..16cc667d025514f64a66852d7e4ad0e8952297b8 100644 (file)
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -10,6 +10,7 @@ from ..utils import (
      int_or_none,
      float_or_none,
      js_to_json,
+    orderedSet,
      strip_jsonp,
      strip_or_none,
      unified_strdate,
@@ -264,6 +265,13 @@ class PBSIE(InfoExtractor):
              },
              'playlist_count': 2,
          },
+        {
+            'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/',
+            'info_dict': {
+                'id': 'great-war',
+            },
+            'playlist_count': 3,
+        },
          {
              'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
              'info_dict': {
@@ -382,10 +390,10 @@ class PBSIE(InfoExtractor):
              # tabbed frontline videos
              MULTI_PART_REGEXES = (
                  r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
-                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
+                r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
              )
              for p in MULTI_PART_REGEXES:
-                tabbed_videos = re.findall(p, webpage)
+                tabbed_videos = orderedSet(re.findall(p, webpage))
                  if tabbed_videos:
                      return tabbed_videos, presumptive_id, upload_date, description
author	Sergey M․ <dstftw@gmail.com>
	Thu, 4 May 2017 15:42:49 +0000 (22:42 +0700)
committer	Sergey M․ <dstftw@gmail.com>
	Thu, 4 May 2017 15:42:49 +0000 (22:42 +0700)