_ Git - youtube-dl/blob - youtube_dl/extractor/streamango.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     determine_ext,
   9     int_or_none,
  10     js_to_json,
  11 )
  12
  13
  14 class StreamangoIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
  16     _TESTS = [{
  17         'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
  18         'md5': 'e992787515a182f55e38fc97588d802a',
  19         'info_dict': {
  20             'id': 'clapasobsptpkdfe',
  21             'ext': 'mp4',
  22             'title': '20170315_150006.mp4',
  23         }
  24     }, {
  25         # no og:title
  26         'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
  27         'info_dict': {
  28             'id': 'foqebrpftarclpob',
  29             'ext': 'mp4',
  30             'title': 'foqebrpftarclpob',
  31         },
  32         'params': {
  33             'skip_download': True,
  34         },
  35     }, {
  36         'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
  37         'only_matching': True,
  38     }]
  39
  40     def _real_extract(self, url):
  41         def decrypt_src(str_, val):
  42             k = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
  43             str_ = re.sub(r'[^A-Za-z0-9+/=]', '', str_)
  44             src = ''
  45             sm = [None] * 4
  46             i = 0
  47             str_len = len(str_)
  48             while i < str_len:
  49                 for j in range(4):
  50                     sm[j % 4] = k.index(str_[i])
  51                     i += 1
  52                 charCode = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
  53                 src += chr(charCode)
  54                 if (sm[2] != 0x40):
  55                     charCode = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
  56                     src += chr(charCode)
  57                 if (sm[3] != 0x40):
  58                     charCode = ((sm[2] & 0x3) << 0x6) | sm[3]
  59                     src += chr(charCode)
  60             return src
  61
  62         video_id = self._match_id(url)
  63
  64         webpage = self._download_webpage(url, video_id)
  65
  66         title = self._og_search_title(webpage, default=video_id)
  67
  68         formats = []
  69         for format_ in re.findall(r'\(\s*({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
  70             mobj = re.search(r'(src\s*:\s*[^(]\(([^)]*)\)[\s,]*)', format_)
  71             if mobj is None:
  72                 continue
  73             format_ = format_.replace(mobj.group(0), '')
  74
  75             video = self._parse_json(
  76                 format_, video_id, transform_source=js_to_json, fatal=False)
  77             if not video:
  78                 continue
  79
  80             mobj = re.search(r'[\'"](?P<src>[^\'"]+)[\'"]\s*,\s*(?P<val>\d+)', mobj.group(1))
  81             if mobj is None:
  82                 continue
  83
  84             src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
  85             ext = determine_ext(src, default_ext=None)
  86             if video.get('type') == 'application/dash+xml' or ext == 'mpd':
  87                 formats.extend(self._extract_mpd_formats(
  88                     src, video_id, mpd_id='dash', fatal=False))
  89             else:
  90                 formats.append({
  91                     'url': src,
  92                     'ext': ext or 'mp4',
  93                     'width': int_or_none(video.get('width')),
  94                     'height': int_or_none(video.get('height')),
  95                     'tbr': int_or_none(video.get('bitrate')),
  96                 })
  97         self._sort_formats(formats)
  98
  99         return {
 100             'id': video_id,
 101             'url': url,
 102             'title': title,
 103             'formats': formats,
 104         }