Merge branch 'master' of github.com:rg3/youtube-dl
[youtube-dl] / youtube_dl / extractor / ooyala.py
1 from __future__ import unicode_literals
2 import re
3 import json
4 import base64
5
6 from .common import InfoExtractor
7 from ..utils import (
8     unescapeHTML,
9     ExtractorError,
10     determine_ext,
11     int_or_none,
12 )
13
14
15 class OoyalaIE(InfoExtractor):
16     _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
17
18     _TESTS = [
19         {
20             # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
21             'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
22             'info_dict': {
23                 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
24                 'ext': 'mp4',
25                 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
26                 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
27             },
28         }, {
29             # Only available for ipad
30             'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
31             'info_dict': {
32                 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
33                 'ext': 'mp4',
34                 'title': 'Simulation Overview - Levels of Simulation',
35                 'description': '',
36             },
37         },
38         {
39             # Information available only through SAS api
40             # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
41             'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
42             'md5': 'a84001441b35ea492bc03736e59e7935',
43             'info_dict': {
44                 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
45                 'ext': 'mp4',
46                 'title': 'Ooyala video',
47             }
48         }
49     ]
50
51     @staticmethod
52     def _url_for_embed_code(embed_code):
53         return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
54
55     @classmethod
56     def _build_url_result(cls, embed_code):
57         return cls.url_result(cls._url_for_embed_code(embed_code),
58                               ie=cls.ie_key())
59
60     def _extract_result(self, info, more_info):
61         embedCode = info['embedCode']
62         video_url = info.get('ipad_url') or info['url']
63
64         if determine_ext(video_url) == 'm3u8':
65             formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
66         else:
67             formats = [{
68                 'url': video_url,
69                 'ext': 'mp4',
70             }]
71
72         return {
73             'id': embedCode,
74             'title': unescapeHTML(info['title']),
75             'formats': formats,
76             'description': unescapeHTML(more_info['description']),
77             'thumbnail': more_info['promo'],
78         }
79
80     def _real_extract(self, url):
81         mobj = re.match(self._VALID_URL, url)
82         embedCode = mobj.group('id')
83         player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
84         player = self._download_webpage(player_url, embedCode)
85         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
86                                         player, 'mobile player url')
87         # Looks like some videos are only available for particular devices
88         # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
89         # is only available for ipad)
90         # Working around with fetching URLs for all the devices found starting with 'unknown'
91         # until we succeed or eventually fail for each device.
92         devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
93         devices.remove('unknown')
94         devices.insert(0, 'unknown')
95         for device in devices:
96             mobile_player = self._download_webpage(
97                 '%s&device=%s' % (mobile_url, device), embedCode,
98                 'Downloading mobile player JS for %s device' % device)
99             videos_info = self._search_regex(
100                 r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
101                 mobile_player, 'info', fatal=False, default=None)
102             if videos_info:
103                 break
104
105         if not videos_info:
106             formats = []
107             auth_data = self._download_json(
108                 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode),
109                 embedCode)
110
111             cur_auth_data = auth_data['authorization_data'][embedCode]
112
113             for stream in cur_auth_data['streams']:
114                 formats.append({
115                     'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
116                     'ext': stream.get('delivery_type'),
117                     'format': stream.get('video_codec'),
118                     'format_id': stream.get('profile'),
119                     'width': int_or_none(stream.get('width')),
120                     'height': int_or_none(stream.get('height')),
121                     'abr': int_or_none(stream.get('audio_bitrate')),
122                     'vbr': int_or_none(stream.get('video_bitrate')),
123                 })
124             if len(formats):
125                 return {
126                     'id': embedCode,
127                     'formats': formats,
128                     'title': 'Ooyala video',
129                 }
130
131             if not cur_auth_data['authorized']:
132                 raise ExtractorError(cur_auth_data['message'], expected=True)
133
134         if not videos_info:
135             raise ExtractorError('Unable to extract info')
136         videos_info = videos_info.replace('\\"', '"')
137         videos_more_info = self._search_regex(
138             r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
139         videos_info = json.loads(videos_info)
140         videos_more_info = json.loads(videos_more_info)
141
142         if videos_more_info.get('lineup'):
143             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
144             return {
145                 '_type': 'playlist',
146                 'id': embedCode,
147                 'title': unescapeHTML(videos_more_info['title']),
148                 'entries': videos,
149             }
150         else:
151             return self._extract_result(videos_info[0], videos_more_info)