[pbs] Add support for frontline videos (Closes #3414 #3405)

[youtube-dl] / youtube_dl / extractor / pbs.py
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py

index e7e0042fb4e39a77061976078d4662a9cc17f522..ec95d070411f97e1dad2fde881c9e5f847caafc8 100644 (file)
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..utils import (
+    US_RATINGS,
+)
  
  
  class PBSIE(InfoExtractor):
@@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
              # Article with embedded player
             (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
             # Player
-           video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
+           video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
          )
      '''
  
@@ -29,13 +32,21 @@ class PBSIE(InfoExtractor):
          },
      }
  
-    def _real_extract(self, url):
+    def _extract_ids(self, url):
          mobj = re.match(self._VALID_URL, url)
  
          presumptive_id = mobj.group('presumptive_id')
          display_id = presumptive_id
          if presumptive_id:
              webpage = self._download_webpage(url, display_id)
+
+            # frontline video embed
+            media_id = self._search_regex(
+                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
+                webpage, 'frontline video ID', fatal=False, default=None)
+            if media_id:
+                return media_id, presumptive_id
+
              url = self._search_regex(
                  r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
                  webpage, 'player URL')
@@ -54,9 +65,19 @@ class PBSIE(InfoExtractor):
              video_id = mobj.group('id')
              display_id = video_id
  
+        return video_id, display_id
+
+    def _real_extract(self, url):
+        video_id, display_id = self._extract_ids(url)
+
          info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
          info = self._download_json(info_url, display_id)
  
+        rating_str = info.get('rating')
+        if rating_str is not None:
+            rating_str = rating_str.rpartition('-')[2]
+        age_limit = US_RATINGS.get(rating_str)
+
          return {
              'id': video_id,
              'title': info['title'],
@@ -65,4 +86,5 @@ class PBSIE(InfoExtractor):
              'description': info['program'].get('description'),
              'thumbnail': info.get('image_url'),
              'duration': info.get('duration'),
+            'age_limit': age_limit,
          }