[pbs] Add support for frontline videos (Closes #3414 #3405)
[youtube-dl] / youtube_dl / extractor / pbs.py
index e7e0042fb4e39a77061976078d4662a9cc17f522..ec95d070411f97e1dad2fde881c9e5f847caafc8 100644 (file)
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import (
+    US_RATINGS,
+)
 
 
 class PBSIE(InfoExtractor):
@@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
             # Article with embedded player
            (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
            # Player
-           video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
+           video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
         )
     '''
 
@@ -29,13 +32,21 @@ class PBSIE(InfoExtractor):
         },
     }
 
-    def _real_extract(self, url):
+    def _extract_ids(self, url):
         mobj = re.match(self._VALID_URL, url)
 
         presumptive_id = mobj.group('presumptive_id')
         display_id = presumptive_id
         if presumptive_id:
             webpage = self._download_webpage(url, display_id)
+
+            # frontline video embed
+            media_id = self._search_regex(
+                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
+                webpage, 'frontline video ID', fatal=False, default=None)
+            if media_id:
+                return media_id, presumptive_id
+
             url = self._search_regex(
                 r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
                 webpage, 'player URL')
@@ -54,9 +65,19 @@ class PBSIE(InfoExtractor):
             video_id = mobj.group('id')
             display_id = video_id
 
+        return video_id, display_id
+
+    def _real_extract(self, url):
+        video_id, display_id = self._extract_ids(url)
+
         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
         info = self._download_json(info_url, display_id)
 
+        rating_str = info.get('rating')
+        if rating_str is not None:
+            rating_str = rating_str.rpartition('-')[2]
+        age_limit = US_RATINGS.get(rating_str)
+
         return {
             'id': video_id,
             'title': info['title'],
@@ -65,4 +86,5 @@ class PBSIE(InfoExtractor):
             'description': info['program'].get('description'),
             'thumbnail': info.get('image_url'),
             'duration': info.get('duration'),
+            'age_limit': age_limit,
         }