[pbs] Allow empty attribute in player regex
[youtube-dl] / youtube_dl / extractor / pbs.py
index ccbe2a9f3967a1285310c58d46619bebe18367ef..66b3dda47a098d93261cadf40fbdee8a34c7c6ad 100644 (file)
@@ -92,6 +92,7 @@ class PBSIE(InfoExtractor):
                 'duration': 3172,
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'upload_date': '20140122',
+                'age_limit': 10,
             },
             'params': {
                 'skip_download': True,  # requires ffmpeg
@@ -166,7 +167,7 @@ class PBSIE(InfoExtractor):
                 return media_id, presumptive_id, upload_date
 
             url = self._search_regex(
-                r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
+                r'(?s)<iframe[^>]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
                 webpage, 'player URL')
             mobj = re.match(self._VALID_URL, url)
 
@@ -247,11 +248,11 @@ class PBSIE(InfoExtractor):
                 'url': closed_captions_url,
             }]
 
-        # video.pbs.org video.pbs.org/videoInfo/... frequently provides an obscure 'title' value, like
-        # 'Full Episode', 'Episode 5', etc. prepend program->title
+        # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
+        # Try turning it to 'program - title' naming scheme if possible
         alt_title = info.get('program', {}).get('title')
         if alt_title:
-            info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-\:]+', '', info['title'])
+            info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
 
         return {
             'id': video_id,