[FunnyOrDie] simplify
[youtube-dl] / youtube_dl / extractor / funnyordie.py
index 90ab05338c3b3aa0043f7e6d9c3414896b56a2e3..cdd3baf80065c0bc89f2116dfedbc30f763b94fe 100644 (file)
@@ -1,13 +1,34 @@
+from __future__ import unicode_literals
+
+import json
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
 
 
 class FunnyOrDieIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
+    _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
+    _TESTS = [{
+        'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
+        'md5': 'f647e9e90064b53b6e046e75d0241fbd',
+        'info_dict': {
+            'id': '0732f586d7',
+            'ext': 'mp4',
+            'title': 'Heart-Shaped Box: Literal Video Version',
+            'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
+            'thumbnail': 're:^http:.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.funnyordie.com/embed/e402820827',
+        'md5': '0e0c5a7bf45c52b95cd16aa7f28be0b6',
+        'info_dict': {
+            'id': 'e402820827',
+            'ext': 'mp4',
+            'title': 'Please Use This Song (Jon Lajoie)',
+            'description': 'md5:2ed27d364f5a805a6dba199faaf6681d',
+            'thumbnail': 're:^http:.*\.jpg$',
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -15,20 +36,19 @@ class FunnyOrDieIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
-            webpage, u'video URL', flags=re.DOTALL)
-
-        title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
-            r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
+        video_url = self._search_regex(
+            [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
+            webpage, 'video URL', flags=re.DOTALL)
 
-        video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
-            webpage, u'description', fatal=False, flags=re.DOTALL)
+        post_json = self._search_regex(
+            r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
+        post = json.loads(post_json)
 
-        info = {
+        return {
             'id': video_id,
             'url': video_url,
             'ext': 'mp4',
-            'title': title,
-            'description': video_description,
+            'title': post['name'],
+            'description': post.get('description'),
+            'thumbnail': post.get('picture'),
         }
-        return [info]