[eroprofile] new extractor
[youtube-dl] / youtube_dl / extractor / eroprofile.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6
7 class EroProfileIE(InfoExtractor):
8     _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<display_id>[^/]+)'
9     _TEST = {
10         'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
11         'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
12         'info_dict': {
13             'id': '3733775',
14             'ext': 'm4v',
15             'title': 'HD MOVIES - sexy babe softcore',
16             'description': 'md5:831ee50526c333eb4f6c1e58d382c295',
17             'categories': list,  # NSFW
18             'thumbnail': 're:https?://.*\.jpg',
19             'age_limit': 18,
20         }
21     }
22
23     def _real_extract(self, url):
24         webpage = self._download_webpage(url, 'main')
25
26         video_id = self._html_search_regex(r'glbUpdViews\s*\(\'\d*\',\'(\d+)\'', webpage, 'id')
27
28         video_url = self._html_search_regex(r'<source src="([^"]+)', webpage, 'video_url')
29
30         title = self._html_search_regex(
31             r'<title>([^<]+)\s*-\s*EroProfile</title>', webpage, 'title')
32
33         description = self._html_search_meta('description', webpage, 'description', fatal=False)
34
35         thumbnail = self._html_search_regex(
36           r'onclick="showVideoPlayer\(\)"><img src="([^"]+)', webpage, 'thumbnail', fatal=False)
37
38         categories_str = self._html_search_meta(
39             'keywords', webpage, 'categories', fatal=False)
40         categories = (
41             None if categories_str is None
42             else categories_str.split(','))
43
44         return {
45             'id': video_id,
46             'url': video_url,
47             'title': title,
48             'ext': 'm4v',
49             'description': description,
50             'thumbnail': thumbnail,
51             'categories': categories,
52             'age_limit': 18,
53         }