_ Git - youtube-dl/blob - youtube_dl/extractor/vporn.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7
   8 class VpornIE(InfoExtractor):
   9     _VALID_URL = r'http?://(?:www\.)?vporn\.com/[a-z]+/(?P<title_dash>[a-z-]+)/(?P<id>\d+)/?'
  10     _TEST = {
  11         'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
  12         'md5': 'facf37c1b86546fa0208058546842c55',
  13         'info_dict': {
  14             'id': '497944',
  15             'ext': 'mp4',
  16             'title': 'Violet On Her 19th Birthday',
  17             'description': 'Violet dances in front of the camera which is sure to get you horny.',
  18             'duration': 393,
  19             'thumbnail': 're:^https?://.*\.jpg$',
  20         }
  21     }
  22
  23     def _real_extract(self, url):
  24         mobj = re.match(self._VALID_URL, url)
  25         video_id = mobj.group('id')
  26
  27         webpage = self._download_webpage(url, video_id)
  28         title = self._html_search_regex(r'<title>(.*?) - Vporn Video</title>', webpage, 'title')
  29         video_url = self._html_search_regex(r'flashvars.videoUrlMedium  = "(.*?)"', webpage, 'video_url')
  30         description = self._html_search_regex(r'<div class="description_txt">(.*?)</div>', webpage, 'description')
  31         thumbnail = 'http://www.vporn.com' + self._html_search_regex(r'flashvars.imageUrl = "(.*?)"', webpage, 'description')
  32
  33         mobj = re.search(
  34             r'<span class="f_right">duration (?P<minutes>\d+) min (?P<seconds>\d+) sec </span>', webpage)
  35         duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
  36
  37         return {
  38             'id': video_id,
  39             'url': video_url,
  40             'thumbnail': thumbnail,
  41             'title': title,
  42             'description': description,
  43             'duration': duration,
  44         }