[9c9media] extract mpd formats and subtitles
[youtube-dl] / youtube_dl / extractor / ninecninemedia.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     parse_iso8601,
10     float_or_none,
11     ExtractorError,
12     int_or_none,
13 )
14
15
16 class NineCNineMediaIE(InfoExtractor):
17     IE_NAME = '9c9media'
18     _GEO_COUNTRIES = ['CA']
19     _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
20     _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
21
22     def _real_extract(self, url):
23         destination_code, content_id = re.match(self._VALID_URL, url).groups()
24         api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
25         content = self._download_json(api_base_url, content_id, query={
26             '$include': '[Media,Season,ContentPackages]',
27         })
28         title = content['Name']
29         if len(content['ContentPackages']) > 1:
30             raise ExtractorError('multiple content packages')
31         content_package = content['ContentPackages'][0]
32         package_id = content_package['Id']
33         content_package_url = api_base_url + 'contentpackages/%s/' % package_id
34         content_package = self._download_json(
35             content_package_url, content_id, query={
36                 '$include': '[HasClosedCaptions]',
37             })
38
39         if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
40             raise ExtractorError('This video is DRM protected.', expected=True)
41
42         manifest_base_url = content_package_url + 'manifest.'
43         formats = []
44         formats.extend(self._extract_m3u8_formats(
45             manifest_base_url + 'm3u8', content_id, 'mp4',
46             'm3u8_native', m3u8_id='hls', fatal=False))
47         formats.extend(self._extract_f4m_formats(
48             manifest_base_url + 'f4m', content_id,
49             f4m_id='hds', fatal=False))
50         formats.extend(self._extract_mpd_formats(
51             manifest_base_url + 'mpd', content_id,
52             mpd_id='dash', fatal=False))
53         self._sort_formats(formats)
54
55         thumbnails = []
56         for image in content.get('Images', []):
57             image_url = image.get('Url')
58             if not image_url:
59                 continue
60             thumbnails.append({
61                 'url': image_url,
62                 'width': int_or_none(image.get('Width')),
63                 'height': int_or_none(image.get('Height')),
64             })
65
66         tags, categories = [], []
67         for source_name, container in (('Tags', tags), ('Genres', categories)):
68             for e in content.get(source_name, []):
69                 e_name = e.get('Name')
70                 if not e_name:
71                     continue
72                 container.append(e_name)
73
74         season = content.get('Season', {})
75
76         info = {
77             'id': content_id,
78             'title': title,
79             'description': content.get('Desc') or content.get('ShortDesc'),
80             'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
81             'episode_number': int_or_none(content.get('Episode')),
82             'season': season.get('Name'),
83             'season_number': season.get('Number'),
84             'season_id': season.get('Id'),
85             'series': content.get('Media', {}).get('Name'),
86             'tags': tags,
87             'categories': categories,
88             'duration': float_or_none(content_package.get('Duration')),
89             'formats': formats,
90         }
91
92         if content_package.get('HasClosedCaptions'):
93             info['subtitles'] = {
94                 'en': [{
95                     'url': manifest_base_url + 'vtt',
96                     'ext': 'vtt',
97                 }, {
98                     'url': manifest_base_url + 'srt',
99                     'ext': 'srt',
100                 }]
101             }
102
103         return info