[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / escapist.py
index 802943dc2f765110104dbfe16d4a038b5ef876e9..4cd815ebc4c1aabfc321cf5f3c74f8328be81d1f 100644 (file)
@@ -1,14 +1,11 @@
 from __future__ import unicode_literals
 
-import json
-
 from .common import InfoExtractor
-from ..compat import compat_urllib_request
-
 from ..utils import (
     determine_ext,
     clean_html,
     int_or_none,
+    float_or_none,
 )
 
 
@@ -36,52 +33,64 @@ def _decrypt_config(key, string):
 
 
 class EscapistIE(InfoExtractor):
-    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+    _VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
-        'md5': 'c6793dbda81388f4264c1ba18684a74d',
+        'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
         'info_dict': {
             'id': '6618',
             'ext': 'mp4',
             'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
             'title': "Breaking Down Baldur's Gate",
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 264,
+            'uploader': 'The Escapist',
         }
     }, {
         'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
-        'md5': 'cf8842a8a46444d241f9a9980d7874f2',
+        'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
         'info_dict': {
             'id': '10044',
             'ext': 'mp4',
             'description': 'This week, Zero Punctuation reviews Evolve.',
             'title': 'Evolve - One vs Multiplayer',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 304,
+            'uploader': 'The Escapist',
         }
+    }, {
+        'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618',
+        'only_matching': True,
+    }, {
+        'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        imsVideo = self._parse_json(
+        ims_video = self._parse_json(
             self._search_regex(
                 r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
             video_id)
-        video_id = imsVideo['videoID']
-        key = imsVideo['hash']
+        video_id = ims_video['videoID']
+        key = ims_video['hash']
+
+        config = self._download_webpage(
+            'http://www.escapistmagazine.com/videos/vidconfig.php',
+            video_id, 'Downloading video config', headers={
+                'Referer': url,
+            }, query={
+                'videoID': video_id,
+                'hash': key,
+            })
 
-        config_req = compat_urllib_request.Request(
-            'http://www.escapistmagazine.com/videos/'
-            'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
-        config_req.add_header('Referer', url)
-        config = self._download_webpage(config_req, video_id, 'Downloading video config')
+        data = self._parse_json(_decrypt_config(key, config), video_id)
 
-        data = json.loads(_decrypt_config(key, config))
+        video_data = data['videoData']
 
-        title = clean_html(data['videoData']['title'])
-        duration = data['videoData']['duration'] / 1000
+        title = clean_html(video_data['title'])
 
         formats = [{
             'url': video['src'],
@@ -94,7 +103,9 @@ class EscapistIE(InfoExtractor):
             'id': video_id,
             'formats': formats,
             'title': title,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'),
             'description': self._og_search_description(webpage),
-            'duration': duration,
+            'duration': float_or_none(video_data.get('duration'), 1000),
+            'uploader': video_data.get('publisher'),
+            'series': video_data.get('show'),
         }