[pbs] Add test for #7059
[youtube-dl] / youtube_dl / extractor / pbs.py
index ccbe2a9f3967a1285310c58d46619bebe18367ef..6923c609481a83401fa7f0fa030923d080001a89 100644 (file)
@@ -92,6 +92,7 @@ class PBSIE(InfoExtractor):
                 'duration': 3172,
                 'thumbnail': 're:^https?://.*\.jpg$',
                 'upload_date': '20140122',
+                'age_limit': 10,
             },
             'params': {
                 'skip_download': True,  # requires ffmpeg
@@ -133,6 +134,24 @@ class PBSIE(InfoExtractor):
             'params': {
                 'skip_download': True,  # requires ffmpeg
             },
+        },
+        {
+            # Video embedded in iframe containing angle brackets as attribute's value (e.g.
+            # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
+            # https://github.com/rg3/youtube-dl/issues/7059)
+            'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
+            'info_dict': {
+                'id': '2365546844',
+                'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
+                'ext': 'mp4',
+                'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
+                'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
+                'duration': 1480,
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+            'params': {
+                'skip_download': True,  # requires ffmpeg
+            },
         }
     ]
 
@@ -166,7 +185,7 @@ class PBSIE(InfoExtractor):
                 return media_id, presumptive_id, upload_date
 
             url = self._search_regex(
-                r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
+                r'(?s)<iframe[^>]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
                 webpage, 'player URL')
             mobj = re.match(self._VALID_URL, url)
 
@@ -247,11 +266,11 @@ class PBSIE(InfoExtractor):
                 'url': closed_captions_url,
             }]
 
-        # video.pbs.org video.pbs.org/videoInfo/... frequently provides an obscure 'title' value, like
-        # 'Full Episode', 'Episode 5', etc. prepend program->title
+        # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc)
+        # Try turning it to 'program - title' naming scheme if possible
         alt_title = info.get('program', {}).get('title')
         if alt_title:
-            info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-\:]+', '', info['title'])
+            info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
 
         return {
             'id': video_id,