git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/vodlocker.py

   1 # -*- coding: utf-8 -*-
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     determine_ext,
   9     compat_urllib_parse,
  10     compat_urllib_request,
  11 )
  12
  13
  14 class VodlockerIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
  16
  17     _TESTS = [{
  18         'url': 'http://vodlocker.com/e8wvyzz4sl42',
  19         'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
  20         'info_dict': {
  21             'id': 'e8wvyzz4sl42',
  22             'ext': 'mp4',
  23             'title': 'Germany vs Brazil',
  24             'thumbnail': 're:http://.*\.jpg',
  25         },
  26     }]
  27
  28     def _real_extract(self, url):
  29         mobj = re.match(self._VALID_URL, url)
  30         video_id = mobj.group('id')
  31         webpage = self._download_webpage(url, video_id)
  32
  33         fields = dict(re.findall(r'''(?x)<input\s+
  34             type="hidden"\s+
  35             name="([^"]+)"\s+
  36             (?:id="[^"]+"\s+)?
  37             value="([^"]*)"
  38             ''', webpage))
  39
  40         if fields['op'] == 'download1':
  41             self._sleep(3, video_id)  # they do detect when requests happen too fast!
  42             post = compat_urllib_parse.urlencode(fields)
  43             req = compat_urllib_request.Request(url, post)
  44             req.add_header('Content-type', 'application/x-www-form-urlencoded')
  45             webpage = self._download_webpage(
  46                 req, video_id, 'Downloading video page')
  47
  48         title = self._search_regex(
  49             r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
  50         thumbnail = self._search_regex(
  51             r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
  52         url = self._search_regex(
  53             r'file:\s*"(http[^\"]+)",', webpage, 'file url')
  54
  55         formats = [{
  56             'format_id': 'sd',
  57             'url': url,
  58         }]
  59
  60         return {
  61             'id': video_id,
  62             'title': title,
  63             'thumbnail': thumbnail,
  64             'formats': formats,
  65         }