[playvid] Simplify (#2539)
[youtube-dl] / youtube_dl / extractor / playvid.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     compat_urllib_parse,
8 )
9
10
11 class PlayvidIE(InfoExtractor):
12     _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
13     _TEST = {
14         'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
15         'md5': '44930f8afa616efdf9482daf4fe53e1e',
16         'info_dict': {
17             'id': 'agbDDi7WZTV',
18             'ext': 'mp4',
19             'title': 'Michelle Lewin in Miami Beach',
20             'duration': 240,
21             'age_limit': 18,
22         }
23     }
24
25     def _real_extract(self, url):
26         mobj = re.match(self._VALID_URL, url)
27         video_id = mobj.group('id')
28
29         webpage = self._download_webpage(url, video_id)
30
31         video_title = None
32         duration = None
33         video_thumbnail = None
34         formats = []
35
36         # most of the information is stored in the flashvars
37         flashvars = self._html_search_regex(
38             r'flashvars="(.+?)"', webpage, 'flashvars')
39
40         infos = compat_urllib_parse.unquote(flashvars).split(r'&')
41         for info in infos:
42             videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
43             if videovars_match:
44                 key = videovars_match.group(1)
45                 val = videovars_match.group(2)
46
47                 if key == 'title':
48                     video_title = compat_urllib_parse.unquote_plus(val)
49                 if key == 'duration':
50                     try:
51                         duration = int(val)
52                     except ValueError:
53                         pass
54                 if key == 'big_thumb':
55                     video_thumbnail = val
56
57                 videourl_match = re.match(
58                     r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
59                 if videourl_match:
60                     height = int(videourl_match.group('resolution'))
61                     formats.append({
62                         'height': height,
63                         'url': val,
64                     })
65         self._sort_formats(formats)
66
67         # Extract title - should be in the flashvars; if not, look elsewhere
68         if video_title is None:
69             video_title = self._html_search_regex(
70                 r'<title>(.*?)</title', webpage, 'title')
71
72         return {
73             'id': video_id,
74             'formats': formats,
75             'title': video_title,
76             'thumbnail': video_thumbnail,
77             'duration': duration,
78             'description': None,
79             'age_limit': 18
80         }