_ Git - youtube-dl/blob - youtube_dl/extractor/sexykarma.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5
   6
   7 class SexyKarmaIE(InfoExtractor):
   8     _VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/(?P<id>[a-zA-Z0-9\-]+)(.html)'
   9     _TESTS = [{
  10         'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
  11         'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
  12         'info_dict': {
  13             'id': 'taking-a-quick-pee-yHI70cOyIHt',
  14             'ext': 'mp4',
  15             'title': 'Taking a quick pee.',
  16             'uploader': 'wildginger7',
  17         }
  18     }, {
  19         'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
  20         'md5': 'dd216c68d29b49b12842b9babe762a5d',
  21         'info_dict': {
  22             'id': 'pot-pixie-tribute-8Id6EZPbuHf',
  23             'ext': 'mp4',
  24             'title': 'pot_pixie tribute',
  25             'uploader': 'banffite',
  26         }
  27     }]
  28
  29     def _real_extract(self, url):
  30         video_id = self._match_id(url)
  31
  32         # TODO more code goes here, for example ...
  33         webpage = self._download_webpage(url, video_id)
  34         title = self._html_search_regex(r'<h2 class="he2"><span>(.*?)</span>', webpage, 'title')
  35         uploader = self._html_search_regex(r'class="aupa">\n*(.*?)</a>', webpage, 'uploader')
  36         url = self._html_search_regex(r'<p><a href="(.*?)" ?\n*target="_blank"><font color', webpage, 'url')
  37
  38         return {
  39             'id': video_id,
  40             'title': title,
  41             'uploader': uploader,
  42             'url': url
  43         }