da3b05a8dc8ca89345e755225ed7885fa580c973
[youtube-dl] / youtube_dl / extractor / snotr.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     float_or_none,
9     str_to_int,
10     parse_duration,
11 )
12
13
14 class SnotrIE(InfoExtractor):
15     _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
16     _TESTS = [{
17         'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
18         'info_dict': {
19             'id': '13708',
20             'ext': 'flv',
21             'title': 'Drone flying through fireworks!',
22             'duration': 247,
23             'filesize_approx': 98566144,
24             'description': 'A drone flying through Fourth of July Fireworks',
25         }
26     }, {
27         'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
28         'info_dict': {
29             'id': '530',
30             'ext': 'flv',
31             'title': 'David Letteman - George W. Bush Top 10',
32             'duration': 126,
33             'filesize_approx': 8912896,
34             'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
35         }
36     }]
37
38     def _real_extract(self, url):
39         mobj = re.match(self._VALID_URL, url)
40         video_id = mobj.group('id')
41
42         webpage = self._download_webpage(url, video_id)
43         title = self._og_search_title(webpage)
44
45         description = self._og_search_description(webpage)
46         video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
47
48         view_count = str_to_int(self._html_search_regex(
49             r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
50             webpage, 'view count', fatal=False))
51
52         duration = parse_duration(self._html_search_regex(
53             r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>',
54             webpage, 'duration', fatal=False))
55
56         filesize_approx = float_or_none(self._html_search_regex(
57             r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>',
58             webpage, 'filesize', fatal=False), invscale=1024 * 1024)
59
60         return {
61             'id': video_id,
62             'description': description,
63             'title': title,
64             'url': video_url,
65             'view_count': view_count,
66             'duration': duration,
67             'filesize_approx': filesize_approx,
68         }