bdbd7543da0b93617aa53379158cf28a37684ea6
[youtube-dl] / youtube_dl / extractor / vporn.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7
8 class VpornIE(InfoExtractor):
9     _VALID_URL = r'http?://(?:www\.)?vporn\.com/[a-z]+/(?P<title_dash>[a-z-]+)/(?P<id>\d+)/?'
10     _TEST = {
11         'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
12         'md5': 'facf37c1b86546fa0208058546842c55',
13         'info_dict': {
14             'id': '497944',
15             'ext': 'mp4',
16             'title': 'Violet On Her 19th Birthday',
17             'description': 'Violet dances in front of the camera which is sure to get you horny.',
18             'duration': 393,
19             'thumbnail': 're:^https?://.*\.jpg$',
20         }
21     }
22
23     def _real_extract(self, url):
24         mobj = re.match(self._VALID_URL, url)
25         video_id = mobj.group('id')
26
27         webpage = self._download_webpage(url, video_id)
28         title = self._html_search_regex(r'<title>(.*?) - Vporn Video</title>', webpage, 'title')
29         video_url = self._html_search_regex(r'flashvars.videoUrlMedium  = "(.*?)"', webpage, 'video_url')
30         description = self._html_search_regex(r'<div class="description_txt">(.*?)</div>', webpage, 'description')
31         thumbnail = 'http://www.vporn.com' + self._html_search_regex(r'flashvars.imageUrl = "(.*?)"', webpage, 'description')
32
33         mobj = re.search(
34             r'<span class="f_right">duration (?P<minutes>\d+) min (?P<seconds>\d+) sec </span>', webpage)
35         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
36
37         return {
38             'id': video_id,
39             'url': video_url,
40             'thumbnail': thumbnail,
41             'title': title,
42             'description': description,
43             'duration': duration,
44         }