_ Git - youtube-dl/blob - youtube_dl/extractor/fivetv.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     int_or_none,
   7 )
   8
   9
  10 class FiveTVIE(InfoExtractor):
  11     _VALID_URL = r'http://(?:www\.)?5-tv\.ru/[^/]*/(?P<id>\d+)'
  12     _TESTS = [
  13         {
  14             'url': 'http://5-tv.ru/news/96814/',
  15             'md5': 'bbff554ad415ecf5416a2f48c22d9283',
  16             'info_dict': {
  17                 'id': '96814',
  18                 'ext': 'mp4',
  19                 'title': 'Россияне выбрали имя для общенациональной платежной системы',
  20                 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660',
  21                 'thumbnail': 're:^https?://.*\.jpg$',
  22                 'width': 480,
  23                 'height': 360,
  24                 'duration': 180,
  25             },
  26         },
  27         {
  28             'url': 'http://5-tv.ru/video/1021729/',
  29             'md5': '299c8b72960efc9990acd2c784dc2296',
  30             'info_dict': {
  31                 'id': '1021729',
  32                 'ext': 'mp4',
  33                 'title': '3D принтер',
  34                 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41',
  35                 'thumbnail': 're:^https?://.*\.jpg$',
  36                 'width': 480,
  37                 'height': 360,
  38                 'duration': 180,
  39             },
  40         },
  41     ]
  42
  43     def _real_extract(self, url):
  44         video_id = self._match_id(url)
  45
  46         webpage = self._download_webpage(url, video_id)
  47
  48         video_link = self._search_regex(
  49             r'(<a.*?class="videoplayer">)', webpage, 'video link')
  50
  51         url = self._search_regex(r'href="([^"]+)"', video_link, 'video url')
  52         width = int_or_none(self._search_regex(
  53             r'width:(\d+)px', video_link, 'width', default=None, fatal=False))
  54         height = int_or_none(self._search_regex(
  55             r'height:(\d+)px', video_link, 'height', default=None, fatal=False))
  56         duration = int_or_none(self._og_search_property(
  57             'video:duration', webpage, 'duration'))
  58         return {
  59             'id': video_id,
  60             'url': url,
  61             'width': width,
  62             'height': height,
  63             'title': self._og_search_title(webpage),
  64             'description': self._og_search_description(webpage),
  65             'thumbnail': self._og_search_thumbnail(webpage),
  66             'duration': duration,
  67         }