_ Git - youtube-dl/blob - youtube_dl/extractor/porn91.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from ..compat import compat_urllib_parse
   5 from .common import InfoExtractor
   6
   7
   8 class Porn91IE(InfoExtractor):
   9     IE_NAME = '91porn'
  10     _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)'
  11
  12     _TEST = {
  13         'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
  14         'md5': '6df8f6d028bc8b14f5dbd73af742fb20',
  15         'info_dict': {
  16             'id': '7e42283b4f5ab36da134',
  17             'title': '18岁大一漂亮学妹，水嫩性感，再爽一次！',
  18             'ext': 'mp4'
  19         }
  20     }
  21
  22     def _real_extract(self, url):
  23         video_id = self._match_id(url)
  24         url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
  25         self._set_cookie('91porn.com', 'language', 'cn_CN')
  26         webpage = self._download_webpage(url, video_id, "get HTML content")
  27         title = self._search_regex(
  28             r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
  29         title = title.replace('\n', '')
  30
  31         # get real url
  32         file_id = self._search_regex(
  33             r'so.addVariable\(\'file\',\'(\d+)\'', webpage, 'file id')
  34         sec_code = self._search_regex(
  35             r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code')
  36         max_vid = self._search_regex(
  37             r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid')
  38         url_params = compat_urllib_parse.urlencode({
  39             'VID': file_id,
  40             'mp4': '1',
  41             'seccode': sec_code,
  42             'max_vid': max_vid,
  43         })
  44         info_cn = self._download_webpage(
  45             'http://91porn.com/getfile.php?' + url_params, video_id,
  46             "get real video url")
  47         video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url')
  48
  49         return {
  50             'id': video_id,
  51             'title': title,
  52             'url': video_url,
  53         }