ac3e3adf22ad194a8af3e833ae4d8acf7484e8b4
[youtube-dl] / youtube_dl / extractor / sharesix.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import (
8     compat_urllib_parse,
9     compat_urllib_request,
10 )
11 from ..utils import (
12     parse_duration,
13 )
14
15
16 class ShareSixIE(InfoExtractor):
17     _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)'
18     _TESTS = [
19         {
20             'url': 'http://sharesix.com/f/OXjQ7Y6',
21             'md5': '9e8e95d8823942815a7d7c773110cc93',
22             'info_dict': {
23                 'id': 'OXjQ7Y6',
24                 'ext': 'mp4',
25                 'title': 'big_buck_bunny_480p_surround-fix.avi',
26                 'duration': 596,
27                 'width': 854,
28                 'height': 480,
29             },
30         },
31         {
32             'url': 'http://sharesix.com/lfrwoxp35zdd',
33             'md5': 'dd19f1435b7cec2d7912c64beeee8185',
34             'info_dict': {
35                 'id': 'lfrwoxp35zdd',
36                 'ext': 'flv',
37                 'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv',
38                 'duration': 65,
39                 'width': 1280,
40                 'height': 720,
41             },
42         }
43     ]
44
45     def _real_extract(self, url):
46         mobj = re.match(self._VALID_URL, url)
47         video_id = mobj.group('id')
48
49         fields = {
50             'method_free': 'Free'
51         }
52         post = compat_urllib_parse.urlencode(fields)
53         req = compat_urllib_request.Request(url, post)
54         req.add_header('Content-type', 'application/x-www-form-urlencoded')
55
56         webpage = self._download_webpage(req, video_id,
57                                          'Downloading video page')
58
59         video_url = self._search_regex(
60             r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL')
61         title = self._html_search_regex(
62             r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title')
63         duration = parse_duration(
64             self._search_regex(
65                 r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>',
66                 webpage,
67                 'duration',
68                 fatal=False
69             )
70         )
71
72         m = re.search(
73             r'''(?xs)<dt>Width\sx\sHeight</dt>.+?
74                      <dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''',
75             webpage
76         )
77         width = height = None
78         if m:
79             width, height = int(m.group('width')), int(m.group('height'))
80
81         formats = [{
82             'format_id': 'sd',
83             'url': video_url,
84             'width': width,
85             'height': height,
86         }]
87
88         return {
89             'id': video_id,
90             'title': title,
91             'duration': duration,
92             'formats': formats,
93         }