[SexyKarma] Add new extractor
[youtube-dl] / youtube_dl / extractor / sexykarma.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6
7 class SexyKarmaIE(InfoExtractor):
8     _VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/(?P<id>[a-zA-Z0-9\-]+)(.html)'
9     _TESTS = [{
10         'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
11         'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
12         'info_dict': {
13             'id': 'taking-a-quick-pee-yHI70cOyIHt',
14             'ext': 'mp4',
15             'title': 'Taking a quick pee.',
16             'uploader': 'wildginger7',
17         }
18     }, {
19         'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
20         'md5': 'dd216c68d29b49b12842b9babe762a5d',
21         'info_dict': {
22             'id': 'pot-pixie-tribute-8Id6EZPbuHf',
23             'ext': 'mp4',
24             'title': 'pot_pixie tribute',
25             'uploader': 'banffite',
26         }
27     }]
28
29     def _real_extract(self, url):
30         video_id = self._match_id(url)
31
32         # TODO more code goes here, for example ...
33         webpage = self._download_webpage(url, video_id)
34         title = self._html_search_regex(r'<h2 class="he2"><span>(.*?)</span>', webpage, 'title')
35         uploader = self._html_search_regex(r'class="aupa">\n*(.*?)</a>', webpage, 'uploader')
36         url = self._html_search_regex(r'<p><a href="(.*?)" ?\n*target="_blank"><font color', webpage, 'url')
37
38         return {
39             'id': video_id,
40             'title': title,
41             'uploader': uploader,
42             'url': url
43         }