git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/ssa.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     unescapeHTML,
   6     parse_duration,
   7 )
   8
   9
  10 class SSAIE(InfoExtractor):
  11     _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
  12     _TEST = {
  13         'url': 'http://ssa.nls.uk/film/3561',
  14         'info_dict': {
  15             'id': '3561',
  16             'ext': 'flv',
  17             'title': 'SHETLAND WOOL',
  18             'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
  19             'duration': 900,
  20             'thumbnail': 're:^https?://.*\.jpg$',
  21         },
  22         'params': {
  23             # rtmp download
  24             'skip_download': True,
  25         },
  26     }
  27
  28     def _real_extract(self, url):
  29         video_id = self._match_id(url)
  30
  31         webpage = self._download_webpage(url, video_id)
  32
  33         streamer = self._search_regex(
  34             r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
  35         play_path = self._search_regex(
  36             r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
  37
  38         def search_field(field_name, fatal=False):
  39             return self._search_regex(
  40                 r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
  41                 webpage, 'title', fatal=fatal)
  42
  43         title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
  44         description = unescapeHTML(search_field('Description'))
  45         duration = parse_duration(search_field('Running time'))
  46         thumbnail = self._search_regex(
  47             r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
  48
  49         return {
  50             'id': video_id,
  51             'url': streamer,
  52             'play_path': play_path,
  53             'ext': 'flv',
  54             'title': title,
  55             'description': description,
  56             'duration': duration,
  57             'thumbnail': thumbnail,
  58         }