_ Git - youtube-dl/blob - youtube_dl/extractor/vshare.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..compat import compat_chr
   8 from ..utils import decode_packed_codes
   9
  10
  11 class VShareIE(InfoExtractor):
  12     _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
  13     _TESTS = [{
  14         'url': 'https://vshare.io/d/0f64ce6',
  15         'md5': '17b39f55b5497ae8b59f5fbce8e35886',
  16         'info_dict': {
  17             'id': '0f64ce6',
  18             'title': 'vl14062007715967',
  19             'ext': 'mp4',
  20         }
  21     }, {
  22         'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
  23         'only_matching': True,
  24     }]
  25
  26     def _extract_packed(self, webpage):
  27         packed = self._search_regex(r'(eval\(function.+)', webpage, 'packed code')
  28         unpacked = decode_packed_codes(packed)
  29         digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
  30         digits = digits.split(',')
  31         digits = [int(digit) for digit in digits]
  32         key_digit = self._search_regex(r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
  33         chars = [compat_chr(d - int(key_digit)) for d in digits]
  34         return ''.join(chars)
  35
  36     def _real_extract(self, url):
  37         video_id = self._match_id(url)
  38
  39         webpage = self._download_webpage(
  40             'https://vshare.io/v/%s/width-650/height-430/1' % video_id, video_id)
  41
  42         title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
  43         title = title.split(' - ')[0]
  44
  45         unpacked = self._extract_packed(webpage)
  46         video_urls = re.findall(r'<source src="([^"]+)', unpacked)
  47         formats = [{'url': video_url} for video_url in video_urls]
  48         return {
  49             'id': video_id,
  50             'title': title,
  51             'formats': formats,
  52         }
  53
  54     @staticmethod
  55     def _extract_urls(webpage):
  56         return re.findall(
  57             r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
  58             webpage)