_ Git - youtube-dl/blob - youtube_dl/extractor/imgur.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     int_or_none,
   8     js_to_json,
   9     mimetype2ext,
  10 )
  11
  12
  13 class ImgurIE(InfoExtractor):
  14     _VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)'
  15
  16     _TESTS = [{
  17         'url': 'https://i.imgur.com/A61SaA1.gifv',
  18         'info_dict': {
  19             'id': 'A61SaA1',
  20             'ext': 'mp4',
  21             'title': 'MRW gifv is up and running without any bugs',
  22             'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
  23         },
  24     }]
  25
  26     def _real_extract(self, url):
  27         video_id = self._match_id(url)
  28         webpage = self._download_webpage(url, video_id)
  29
  30         width = int_or_none(self._search_regex(
  31             r'<param name="width" value="([0-9]+)"',
  32             webpage, 'width', fatal=False))
  33         height = int_or_none(self._search_regex(
  34             r'<param name="height" value="([0-9]+)"',
  35             webpage, 'height', fatal=False))
  36
  37         formats = []
  38         video_elements = self._search_regex(
  39             r'(?s)<div class="video-elements">(.*?)</div>',
  40             webpage, 'video elements')
  41         formats = []
  42         for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
  43             formats.append({
  44                 'format_id': m.group('type').partition('/')[2],
  45                 'url': self._proto_relative_url(m.group('src')),
  46                 'ext': mimetype2ext(m.group('type')),
  47                 'acodec': 'none',
  48                 'width': width,
  49                 'height': height,
  50                 'http_headers': {
  51                     'User-Agent': 'youtube-dl (like wget)',
  52                 },
  53             })
  54
  55         gif_json = self._search_regex(
  56             r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
  57             webpage, 'GIF code', fatal=False)
  58         if gif_json:
  59             gifd = self._parse_json(
  60                 gif_json, video_id, transform_source=js_to_json)
  61             formats.append({
  62                 'format_id': 'gif',
  63                 'preference': -10,
  64                 'width': width,
  65                 'height': height,
  66                 'ext': 'gif',
  67                 'acodec': 'none',
  68                 'vcodec': 'gif',
  69                 'container': 'gif',
  70                 'url': self._proto_relative_url(gifd['gifUrl']),
  71                 'filesize': gifd.get('size'),
  72                 'http_headers': {
  73                     'User-Agent': 'youtube-dl (like wget)',
  74                 },
  75             })
  76
  77         self._sort_formats(formats)
  78
  79         return {
  80             'id': video_id,
  81             'formats': formats,
  82             'description': self._og_search_description(webpage),
  83             'title': self._og_search_title(webpage),
  84         }