[YoutubeDL] write raw subtitle files
[youtube-dl] / youtube_dl / extractor / streamango.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     determine_ext,
9     int_or_none,
10     js_to_json,
11 )
12
13
14 class StreamangoIE(InfoExtractor):
15     _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
16     _TESTS = [{
17         'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
18         'md5': 'e992787515a182f55e38fc97588d802a',
19         'info_dict': {
20             'id': 'clapasobsptpkdfe',
21             'ext': 'mp4',
22             'title': '20170315_150006.mp4',
23         }
24     }, {
25         'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
26         'only_matching': True,
27     }]
28
29     def _real_extract(self, url):
30         video_id = self._match_id(url)
31
32         webpage = self._download_webpage(url, video_id)
33
34         title = self._og_search_title(webpage)
35
36         formats = []
37         for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
38             video = self._parse_json(
39                 format_, video_id, transform_source=js_to_json, fatal=False)
40             if not video:
41                 continue
42             src = video.get('src')
43             if not src:
44                 continue
45             ext = determine_ext(src, default_ext=None)
46             if video.get('type') == 'application/dash+xml' or ext == 'mpd':
47                 formats.extend(self._extract_mpd_formats(
48                     src, video_id, mpd_id='dash', fatal=False))
49             else:
50                 formats.append({
51                     'url': src,
52                     'ext': ext or 'mp4',
53                     'width': int_or_none(video.get('width')),
54                     'height': int_or_none(video.get('height')),
55                     'tbr': int_or_none(video.get('bitrate')),
56                 })
57         self._sort_formats(formats)
58
59         return {
60             'id': video_id,
61             'url': url,
62             'title': title,
63             'formats': formats,
64         }