Merge branch 'paged-lists'
[youtube-dl] / youtube_dl / extractor / wimp.py
index 84f065a3df97c4d4b1c40c37cac5d4494d6ff8f6..9a6bb0c768a046e96bac0aa3dd39875821119e83 100644 (file)
@@ -1,28 +1,33 @@
+from __future__ import unicode_literals
+
 import re
-import base64
 
 from .common import InfoExtractor
 
 
 class WimpIE(InfoExtractor):
     _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
+    _TEST = {
+        'url': 'http://www.wimp.com/deerfence/',
+        'file': 'deerfence.flv',
+        'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
+        'info_dict': {
+            "title": "Watch Till End: Herd of deer jump over a fence.",
+            "description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
+        }
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group(1)
         webpage = self._download_webpage(url, video_id)
-        title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')
-        thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
-        googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
-        googleString = base64.b64decode(googleString).decode('ascii')
-        final_url = self._search_regex('","(.*?)"', googleString,'final video url')
-        ext = final_url.rpartition(u'.')[2]
-
-        return [{
-            'id':        video_id,
-            'url':       final_url,
-            'ext':       ext,
-            'title':     title,
-            'thumbnail': thumbnail_url,
-        }]
+        video_url = self._search_regex(
+            r's1\.addVariable\("file",\s*"([^"]+)"\);', webpage, 'video URL')
 
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': self._og_search_title(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': self._og_search_description(webpage),
+        }