Merge remote-tracking branch 'MikeCol/playvid_extract'
[youtube-dl] / youtube_dl / extractor / playvid.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     compat_urllib_parse,
8     determine_ext,
9 )
10
11 class PlayvidIE(InfoExtractor):
12
13     _VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(#|$)'
14     _TEST = {
15         'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
16         'file': 'agbDDi7WZTV.mp4',
17         'md5': '44930f8afa616efdf9482daf4fe53e1e',
18         'info_dict': {
19             'title': 'Michelle Lewin in Miami Beach',
20             'duration': 240,
21             'age_limit': 18,
22         }
23     }
24
25     def _real_extract(self, url):
26         mobj = re.match(self._VALID_URL, url)
27         video_id = mobj.group('id')
28
29         webpage = self._download_webpage(url, video_id)
30
31         self.report_extraction(video_id)
32
33         video_title = None
34         duration = None
35         video_thumbnail = None
36         formats = []
37
38         # most of the information is stored in the flashvars
39         flashvars_match = re.search(r'flashvars="(.+?)"',webpage)
40
41         if flashvars_match:
42             infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&amp;')
43             for info in infos:
44                 videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info)
45                 if videovars_match:
46                     key = videovars_match.group(1)
47                     val = videovars_match.group(2)
48
49                     if key == 'title':
50                         video_title = val.replace('+',' ')
51                     if key == 'duration':
52                         try:
53                             duration = val
54                         except ValueError:
55                             duration = None
56                     if key == 'big_thumb':
57                         video_thumbnail = val
58
59                     videourl_match = re.match(r'^video_urls\]\[(?P<resolution>\d+)p',key)
60                     if videourl_match:
61                         resolution = int(videourl_match.group('resolution'))
62                         formats.append({
63                             'resolution': resolution,            # 360, 480, ...
64                             'ext': determine_ext(val),
65                             'url': val
66                         })
67
68         # fatal error, if no download url is found
69         if len(formats) == 0:
70             raise ExtractorError,'no video url found'
71
72         # Extract title - should be in the flashvars; if not, look elsewhere
73         if video_title is None:
74             video_title = self._html_search_regex(
75                 r'<title>(.*?)</title', webpage, 'title')
76
77         return {
78             'id': video_id,
79             'formats': formats,
80             'title': video_title,
81             'thumbnail': video_thumbnail,
82             'duration': duration,
83             'description': None,
84             'age_limit': 18
85         }