Merge branch 'cracked' of https://github.com/hassaanaliw/youtube-dl into hassaanaliw...
[youtube-dl] / youtube_dl / extractor / cracked.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7
8 class CrackedIE(InfoExtractor):
9     _VALID_URL = r'http?://.*?\.cracked\.com/video_+(?P<id>.*)_.*'
10     _TEST = {
11         'url': 'http://www.cracked.com/video_18803_4-social-criticisms-hidden-in-sonic-hedgehog-games.html',
12
13         'info_dict': {
14             'id': '18803',
15             'ext': 'mp4',
16             'title': "4 Social Criticisms Hidden in 'Sonic the Hedgehog' Games | Cracked.com",
17             'height': 375,
18             'width': 666,
19
20
21         }
22     }
23
24     def _real_extract(self, url):
25         mobj = re.match(self._VALID_URL, url)
26         video_id = mobj.group('id')
27
28         webpage = self._download_webpage(url, video_id)
29         title = self._search_regex(r'<title>(.*?)</title>',webpage,'title')
30         video_url = self._search_regex(r'var CK_vidSrc = "+(.*)"',webpage,'url')
31         width = self._search_regex(r'width="(.*?)"',webpage,'width')
32         height = re.findall(r'height="(.*?)"',webpage)[1]
33
34
35
36
37         return {
38             'url':video_url,
39             'id': video_id,
40             'ext':'mp4',
41             'title':title,
42             'height':int(height),
43             'width':int(width)
44
45
46         }