_ Git - youtube-dl/blob - youtube_dl/extractor/playvid.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     compat_urllib_parse,
   8     determine_ext,
   9 )
  10
  11 class PlayvidIE(InfoExtractor):
  12
  13     _VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(#|$)'
  14     _TEST = {
  15         'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
  16         'file': 'agbDDi7WZTV.mp4',
  17         'md5': '44930f8afa616efdf9482daf4fe53e1e',
  18         'info_dict': {
  19             'title': 'Michelle Lewin in Miami Beach',
  20             'duration': 240,
  21             'age_limit': 18,
  22         }
  23     }
  24
  25     def _real_extract(self, url):
  26         mobj = re.match(self._VALID_URL, url)
  27         video_id = mobj.group('id')
  28
  29         webpage = self._download_webpage(url, video_id)
  30
  31         self.report_extraction(video_id)
  32
  33         video_title = None
  34         duration = None
  35         video_thumbnail = None
  36         formats = []
  37
  38         # most of the information is stored in the flashvars
  39         flashvars_match = re.search(r'flashvars="(.+?)"',webpage)
  40
  41         if flashvars_match:
  42             infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&amp;')
  43             for info in infos:
  44                 videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info)
  45                 if videovars_match:
  46                     key = videovars_match.group(1)
  47                     val = videovars_match.group(2)
  48
  49                     if key == 'title':
  50                         video_title = val.replace('+',' ')
  51                     if key == 'duration':
  52                         try:
  53                             duration = val
  54                         except ValueError:
  55                             duration = None
  56                     if key == 'big_thumb':
  57                         video_thumbnail = val
  58
  59                     videourl_match = re.match(r'^video_urls\]\[(?P<resolution>\d+)p',key)
  60                     if videourl_match:
  61                         resolution = int(videourl_match.group('resolution'))
  62                         formats.append({
  63                             'resolution': resolution,            # 360, 480, ...
  64                             'ext': determine_ext(val),
  65                             'url': val
  66                         })
  67
  68         # fatal error, if no download url is found
  69         if len(formats) == 0:
  70             raise ExtractorError,'no video url found'
  71
  72         # Extract title - should be in the flashvars; if not, look elsewhere
  73         if video_title is None:
  74             video_title = self._html_search_regex(
  75                 r'<title>(.*?)</title', webpage, 'title')
  76
  77         return {
  78             'id': video_id,
  79             'formats': formats,
  80             'title': video_title,
  81             'thumbnail': video_thumbnail,
  82             'duration': duration,
  83             'description': None,
  84             'age_limit': 18
  85         }