[sbs] improve extraction(fixes #3811)
authorremitamine <remitamine@gmail.com>
Thu, 17 Mar 2016 01:02:18 +0000 (02:02 +0100)
committerremitamine <remitamine@gmail.com>
Thu, 17 Mar 2016 01:07:06 +0000 (02:07 +0100)
- extract error messages
- force the platform smil url(previously the manifest param
in the query is not respected which make theplatform return non working
mp4 files for some videos)

youtube_dl/extractor/sbs.py

index d6ee2d9e2245475d236c12fb6967af68558d8598..2f96477ca9f9cef7e684dfec1c3ff7fe5ac4fecf 100644 (file)
@@ -2,6 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import (
+    smuggle_url,
+    ExtractorError,
+)
 
 
 class SBSIE(InfoExtractor):
@@ -31,21 +35,28 @@ class SBSIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
-        webpage = self._download_webpage(
-            'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id)
-
-        player_params = self._parse_json(
-            self._search_regex(
-                r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'),
-            video_id)
+        player_params = self._download_json(
+            'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
+
+        error = player_params.get('error')
+        if error:
+            error_message = 'Sorry, The video you are looking for does not exist.'
+            video_data = error.get('results') or {}
+            error_code = error.get('errorCode')
+            if error_code == 'ComingSoon':
+                error_message = '%s is not yet available.' % video_data.get('title', '')
+            elif error_code in ('Forbidden', 'intranetAccessOnly'):
+                error_message = 'Sorry, This video cannot be accessed via this website'
+            elif error_code == 'Expired':
+                error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
 
         urls = player_params['releaseUrls']
-        theplatform_url = (urls.get('progressive') or urls.get('standard') or
-                           urls.get('html') or player_params['relatedItemsURL'])
+        theplatform_url = (urls.get('progressive') or urls.get('html') or
+                           urls.get('standard') or player_params['relatedItemsURL'])
 
         return {
             '_type': 'url_transparent',
             'id': video_id,
-            'url': theplatform_url,
+            'url': smuggle_url(theplatform_url, {'force_smil_url': True}),
         }