_ Git - youtube-dl/blob - youtube_dl/extractor/escapist.py

   1 from __future__ import unicode_literals
   2
   3 import json
   4
   5 from .common import InfoExtractor
   6 from ..compat import compat_urllib_request
   7
   8 from ..utils import (
   9     determine_ext,
  10     clean_html,
  11     int_or_none,
  12     float_or_none,
  13 )
  14
  15
  16 def _decrypt_config(key, string):
  17     a = ''
  18     i = ''
  19     r = ''
  20
  21     while len(a) < (len(string) / 2):
  22         a += key
  23
  24     a = a[0:int(len(string) / 2)]
  25
  26     t = 0
  27     while t < len(string):
  28         i += chr(int(string[t] + string[t + 1], 16))
  29         t += 2
  30
  31     icko = [s for s in i]
  32
  33     for t, c in enumerate(a):
  34         r += chr(ord(c) ^ ord(icko[t]))
  35
  36     return r
  37
  38
  39 class EscapistIE(InfoExtractor):
  40     _VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
  41     _TESTS = [{
  42         'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
  43         'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
  44         'info_dict': {
  45             'id': '6618',
  46             'ext': 'mp4',
  47             'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
  48             'title': "Breaking Down Baldur's Gate",
  49             'thumbnail': 're:^https?://.*\.jpg$',
  50             'duration': 264,
  51         }
  52     }, {
  53         'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
  54         'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf',
  55         'info_dict': {
  56             'id': '10044',
  57             'ext': 'mp4',
  58             'description': 'This week, Zero Punctuation reviews Evolve.',
  59             'title': 'Evolve - One vs Multiplayer',
  60             'thumbnail': 're:^https?://.*\.jpg$',
  61             'duration': 304,
  62         }
  63     }]
  64
  65     def _real_extract(self, url):
  66         video_id = self._match_id(url)
  67         webpage = self._download_webpage(url, video_id)
  68
  69         ims_video = self._parse_json(
  70             self._search_regex(
  71                 r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
  72             video_id)
  73         video_id = ims_video['videoID']
  74         key = ims_video['hash']
  75
  76         config_req = compat_urllib_request.Request(
  77             'http://www.escapistmagazine.com/videos/'
  78             'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
  79         config_req.add_header('Referer', url)
  80         config = self._download_webpage(config_req, video_id, 'Downloading video config')
  81
  82         data = json.loads(_decrypt_config(key, config))
  83
  84         video_data = data['videoData']
  85
  86         title = clean_html(video_data['title'])
  87         duration = float_or_none(video_data.get('duration'), 1000)
  88         uploader = video_data.get('publisher')
  89
  90         formats = [{
  91             'url': video['src'],
  92             'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
  93             'height': int_or_none(video.get('res')),
  94         } for video in data['files']['videos']]
  95         self._sort_formats(formats)
  96
  97         return {
  98             'id': video_id,
  99             'formats': formats,
 100             'title': title,
 101             'thumbnail': self._og_search_thumbnail(webpage),
 102             'description': self._og_search_description(webpage),
 103             'duration': duration,
 104             'uploader': uploader,
 105         }