_ Git - youtube-dl/blob - youtube_dl/extractor/movingimage.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     unescapeHTML,
   6     parse_duration,
   7 )
   8
   9
  10 class MovingImageIE(InfoExtractor):
  11     _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
  12     _TEST = {
  13         'url': 'http://movingimage.nls.uk/film/3561',
  14         'md5': '4caa05c2b38453e6f862197571a7be2f',
  15         'info_dict': {
  16             'id': '3561',
  17             'ext': 'mp4',
  18             'title': 'SHETLAND WOOL',
  19             'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
  20             'duration': 900,
  21             'thumbnail': r're:^https?://.*\.jpg$',
  22         },
  23     }
  24
  25     def _real_extract(self, url):
  26         video_id = self._match_id(url)
  27
  28         webpage = self._download_webpage(url, video_id)
  29
  30         formats = self._extract_m3u8_formats(
  31             self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
  32             video_id, ext='mp4', entry_protocol='m3u8_native')
  33
  34         def search_field(field_name, fatal=False):
  35             return self._search_regex(
  36                 r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
  37                 webpage, 'title', fatal=fatal)
  38
  39         title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
  40         description = unescapeHTML(search_field('Description'))
  41         duration = parse_duration(search_field('Running time'))
  42         thumbnail = self._search_regex(
  43             r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
  44
  45         return {
  46             'id': video_id,
  47             'formats': formats,
  48             'title': title,
  49             'description': description,
  50             'duration': duration,
  51             'thumbnail': thumbnail,
  52         }