_ Git - youtube-dl/blob - youtube_dl/extractor/vodlocker.py

   1 # -*- coding: utf-8 -*-
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     determine_ext,
   9     compat_urllib_parse,
  10     compat_urllib_request,
  11 )
  12
  13
  14 class VodlockerIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
  16
  17     _TESTS = [{
  18         'url': 'http://vodlocker.com/e8wvyzz4sl42',
  19         'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
  20         'info_dict': {
  21             'id': 'e8wvyzz4sl42',
  22             'ext': 'mp4',
  23             'title': 'Germany vs Brazil',
  24             'thumbnail': 're:http://.*\.jpg',
  25         },
  26     }]
  27
  28     def _real_extract(self, url):
  29         mobj = re.match(self._VALID_URL, url)
  30         video_id = mobj.group('id')
  31
  32         url = 'http://vodlocker.com/%s' % video_id
  33
  34         webpage = self._download_webpage(url, video_id)
  35
  36         fields = dict(re.findall(r'''(?x)<input\s+
  37             type="hidden"\s+
  38             name="([^"]+)"\s+
  39             (?:id="[^"]+"\s+)?
  40             value="([^"]*)"
  41             ''', webpage))
  42
  43         if fields['op'] == 'download1':
  44             time.sleep(3) #they do detect when requests happen too fast!
  45             post = compat_urllib_parse.urlencode(fields)
  46             req = compat_urllib_request.Request(url, post)
  47             req.add_header('Content-type', 'application/x-www-form-urlencoded')
  48             webpage = self._download_webpage(req, video_id, 'Downloading video page')
  49
  50         title = self._search_regex(r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
  51         thumbnail = self._search_regex(r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
  52         url = self._search_regex(r'file:\s*"(http[^\"]+)",', webpage, 'file url')
  53
  54         formats = [{
  55             'format_id': 'sd',
  56             'url': url,
  57             'ext': determine_ext(url),
  58             'quality': 1,
  59         }]
  60
  61         return {
  62             'id': video_id,
  63             'title': title,
  64             'thumbnail': thumbnail,
  65             'formats': formats,
  66         }