[snotr] Add new extractor
[youtube-dl] / youtube_dl / extractor / snotr.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7
8 from ..utils import (
9
10     str_to_int,
11     parse_iso8601,
12
13
14
15 )
16
17 class SnotrIE(InfoExtractor):
18     _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
19     _TESTS =[ {
20         'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
21         'info_dict': {
22             'id': '13708',
23             'ext': 'flv',
24             'title': 'Drone flying through fireworks!',
25             'duration': 247,
26             'filesize':12320768
27           }
28     },
29
30
31
32         {
33
34         'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
35         'info_dict': {
36             'id': '530',
37             'ext': 'flv',
38             'title': 'David Letteman - George W. Bush Top 10',
39             'duration': 126,
40             'filesize': 1048576
41            }
42      }]
43
44
45     def _real_extract(self, url):
46         mobj = re.match(self._VALID_URL, url)
47         video_id = mobj.group('id')
48
49         # TODO more code goes here, for example ...
50         webpage = self._download_webpage(url, video_id)
51         title = self._og_search_title(webpage)
52
53         description = self._og_search_description(webpage)
54
55         video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
56
57         view_count = str_to_int(self._html_search_regex(r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',webpage,'view count'))
58
59         duration = self._html_search_regex(r'<p>\n<strong>Length:</strong>\n(.*?)</p>',webpage,'duration')
60         duration = str_to_int(duration[:1])*60 + str_to_int(duration[2:4])
61
62         file_size = self._html_search_regex(r'<p>\n<strong>Filesize:</strong>\n(.*?)</p>',webpage,'filesize')
63         file_size = str_to_int(re.match(r'\d+',file_size).group())*131072
64
65         return {
66             'id': video_id,
67             'title': title,
68             'url':video_url,
69             'view_count':view_count,
70             'duration':duration,
71             'filesize':file_size
72
73         }