[theplatform] Extract captions (Closes #4495)
authorSergey M․ <dstftw@gmail.com>
Wed, 17 Dec 2014 14:20:40 +0000 (20:20 +0600)
committerSergey M․ <dstftw@gmail.com>
Wed, 17 Dec 2014 14:20:40 +0000 (20:20 +0600)
youtube_dl/extractor/theplatform.py

index af6ef0033af061713d81785734381a6beffc2c6d..110ed976de3d1a3a31c8c9a88cd976482f7d78ca 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
 import json
 
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
 from ..compat import (
     compat_str,
 )
@@ -16,7 +16,7 @@ from ..utils import (
 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
 
 
-class ThePlatformIE(InfoExtractor):
+class ThePlatformIE(SubtitlesInfoExtractor):
     _VALID_URL = r'''(?x)
         (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
@@ -66,6 +66,20 @@ class ThePlatformIE(InfoExtractor):
         info_json = self._download_webpage(info_url, video_id)
         info = json.loads(info_json)
 
+        subtitles = {}
+        captions = info.get('captions')
+        if isinstance(captions, list):
+            for caption in captions:
+                lang, src = caption.get('lang'), caption.get('src')
+                if lang and src:
+                    subtitles[lang] = src
+
+        if self._downloader.params.get('listsubtitles', False):
+            self._list_available_subtitles(video_id, subtitles)
+            return
+
+        subtitles = self.extract_subtitles(video_id, subtitles)
+
         head = meta.find(_x('smil:head'))
         body = meta.find(_x('smil:body'))
 
@@ -117,6 +131,7 @@ class ThePlatformIE(InfoExtractor):
         return {
             'id': video_id,
             'title': info['title'],
+            'subtitles': subtitles,
             'formats': formats,
             'description': info['description'],
             'thumbnail': info['defaultThumbnailUrl'],