[godtv] Improve and add support for playlists (Closes #9608)
authorSergey M․ <dstftw@gmail.com>
Thu, 9 Jun 2016 14:29:41 +0000 (21:29 +0700)
committerSergey M․ <dstftw@gmail.com>
Thu, 9 Jun 2016 14:29:41 +0000 (21:29 +0700)
youtube_dl/extractor/godtv.py

index 50f093acef3f54081737baa43beac31910e1c355..78d638cf07adb29f503753f69219730ff097c080 100644 (file)
@@ -1,13 +1,13 @@
-# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from .ooyala import OoyalaIE
+from ..utils import js_to_json
 
 
 class GodTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
         'info_dict': {
             'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
@@ -18,12 +18,40 @@ class GodTVIE(InfoExtractor):
         'params': {
             'skip_download': True,
         }
-    }
+    }, {
+        'url': 'http://god.tv/playlist/bible-study',
+        'info_dict': {
+            'id': 'bible-study',
+        },
+        'playlist_mincount': 37,
+    }]
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
-        ooyala_id = self._search_regex(r'"content_id"\s*:\s*"([\w-]{32})"', webpage, display_id)
+
+        settings = self._parse_json(
+            self._search_regex(
+                r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+                webpage, 'settings', default='{}'),
+            display_id, transform_source=js_to_json, fatal=False)
+
+        ooyala_id = None
+
+        if settings:
+            playlist = settings.get('playlist')
+            if playlist and isinstance(playlist, list):
+                entries = [
+                    OoyalaIE._build_url_result(video['content_id'])
+                    for video in playlist if video.get('content_id')]
+                if entries:
+                    return self.playlist_result(entries, display_id)
+            ooyala_id = settings.get('ooyala', {}).get('content_id')
+
+        if not ooyala_id:
+            ooyala_id = self._search_regex(
+                r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
+                webpage, 'ooyala id', group='id')
 
         return OoyalaIE._build_url_result(ooyala_id)