Merge pull request #12909 from remitamine/raw-sub
[youtube-dl] / youtube_dl / extractor / streamango.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     determine_ext,
9     int_or_none,
10     js_to_json,
11 )
12
13
14 class StreamangoIE(InfoExtractor):
15     _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
16     _TESTS = [{
17         'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
18         'md5': 'e992787515a182f55e38fc97588d802a',
19         'info_dict': {
20             'id': 'clapasobsptpkdfe',
21             'ext': 'mp4',
22             'title': '20170315_150006.mp4',
23         }
24     }, {
25         # no og:title
26         'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
27         'info_dict': {
28             'id': 'foqebrpftarclpob',
29             'ext': 'mp4',
30             'title': 'foqebrpftarclpob',
31         },
32         'params': {
33             'skip_download': True,
34         },
35     }, {
36         'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
37         'only_matching': True,
38     }]
39
40     def _real_extract(self, url):
41         video_id = self._match_id(url)
42
43         webpage = self._download_webpage(url, video_id)
44
45         title = self._og_search_title(webpage, default=video_id)
46
47         formats = []
48         for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
49             video = self._parse_json(
50                 format_, video_id, transform_source=js_to_json, fatal=False)
51             if not video:
52                 continue
53             src = video.get('src')
54             if not src:
55                 continue
56             ext = determine_ext(src, default_ext=None)
57             if video.get('type') == 'application/dash+xml' or ext == 'mpd':
58                 formats.extend(self._extract_mpd_formats(
59                     src, video_id, mpd_id='dash', fatal=False))
60             else:
61                 formats.append({
62                     'url': src,
63                     'ext': ext or 'mp4',
64                     'width': int_or_none(video.get('width')),
65                     'height': int_or_none(video.get('height')),
66                     'tbr': int_or_none(video.get('bitrate')),
67                 })
68         self._sort_formats(formats)
69
70         return {
71             'id': video_id,
72             'url': url,
73             'title': title,
74             'formats': formats,
75         }