cdd3baf80065c0bc89f2116dfedbc30f763b94fe
[youtube-dl] / youtube_dl / extractor / funnyordie.py
1 from __future__ import unicode_literals
2
3 import json
4 import re
5
6 from .common import InfoExtractor
7
8
9 class FunnyOrDieIE(InfoExtractor):
10     _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
11     _TESTS = [{
12         'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
13         'md5': 'f647e9e90064b53b6e046e75d0241fbd',
14         'info_dict': {
15             'id': '0732f586d7',
16             'ext': 'mp4',
17             'title': 'Heart-Shaped Box: Literal Video Version',
18             'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
19             'thumbnail': 're:^http:.*\.jpg$',
20         },
21     }, {
22         'url': 'http://www.funnyordie.com/embed/e402820827',
23         'md5': '0e0c5a7bf45c52b95cd16aa7f28be0b6',
24         'info_dict': {
25             'id': 'e402820827',
26             'ext': 'mp4',
27             'title': 'Please Use This Song (Jon Lajoie)',
28             'description': 'md5:2ed27d364f5a805a6dba199faaf6681d',
29             'thumbnail': 're:^http:.*\.jpg$',
30         },
31     }]
32
33     def _real_extract(self, url):
34         mobj = re.match(self._VALID_URL, url)
35
36         video_id = mobj.group('id')
37         webpage = self._download_webpage(url, video_id)
38
39         video_url = self._search_regex(
40             [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
41             webpage, 'video URL', flags=re.DOTALL)
42
43         post_json = self._search_regex(
44             r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
45         post = json.loads(post_json)
46
47         return {
48             'id': video_id,
49             'url': video_url,
50             'ext': 'mp4',
51             'title': post['name'],
52             'description': post.get('description'),
53             'thumbnail': post.get('picture'),
54         }