[9c9media] fix multiple stacks extraction and extract more metadata(#10016)
[youtube-dl] / youtube_dl / extractor / ninecninemedia.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     parse_iso8601,
10     float_or_none,
11     ExtractorError,
12     int_or_none,
13 )
14
15
16 class NineCNineMediaBaseIE(InfoExtractor):
17     _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
18
19
20 class NineCNineMediaStackIE(NineCNineMediaBaseIE):
21     IE_NAME = '9c9media:stack'
22     _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
23
24     def _real_extract(self, url):
25         destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
26         stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
27         stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
28
29         formats = []
30         formats.extend(self._extract_m3u8_formats(
31             stack_base_url + 'm3u8', stack_id, 'mp4',
32             'm3u8_native', m3u8_id='hls', fatal=False))
33         formats.extend(self._extract_f4m_formats(
34             stack_base_url + 'f4m', stack_id,
35             f4m_id='hds', fatal=False))
36         mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
37         if mp4_url:
38             formats.append({
39                 'url': mp4_url,
40                 'format_id': 'mp4',
41             })
42         self._sort_formats(formats)
43
44         return {
45             'id': stack_id,
46             'formats': formats,
47         }
48
49
50 class NineCNineMediaIE(NineCNineMediaBaseIE):
51     IE_NAME = '9c9media'
52     _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
53
54     def _real_extract(self, url):
55         destination_code, content_id = re.match(self._VALID_URL, url).groups()
56         api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
57         content = self._download_json(api_base_url, content_id, query={
58             '$include': '[Media,Season,ContentPackages]',
59         })
60         title = content['Name']
61         if len(content['ContentPackages']) > 1:
62             raise ExtractorError('multiple content packages')
63         content_package = content['ContentPackages'][0]
64         package_id = content_package['Id']
65         content_package_url = api_base_url + 'contentpackages/%s/' % package_id
66         content_package = self._download_json(content_package_url, content_id)
67
68         if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
69             raise ExtractorError('This video is DRM protected.', expected=True)
70
71         stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
72         multistacks = len(stacks) > 1
73
74         thumbnails = []
75         for image in content.get('Images', []):
76             image_url = image.get('Url')
77             if not image_url:
78                 continue
79             thumbnails.append({
80                 'url': image_url,
81                 'width': int_or_none(image.get('Width')),
82                 'height': int_or_none(image.get('Height')),
83             })
84
85         tags, categories = [], []
86         for source_name, container in (('Tags', tags), ('Genres', categories)):
87             for e in content.get(source_name, []):
88                 e_name = e.get('Name')
89                 if not e_name:
90                     continue
91                 container.append(e_name)
92
93         description = content.get('Desc') or content.get('ShortDesc')
94         season = content.get('Season', {})
95         base_info = {
96             'description': description,
97             'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
98             'episode_number': int_or_none(content.get('Episode')),
99             'season': season.get('Name'),
100             'season_number': season.get('Number'),
101             'season_id': season.get('Id'),
102             'series': content.get('Media', {}).get('Name'),
103             'tags': tags,
104             'categories': categories,
105         }
106
107         entries = []
108         for stack in stacks:
109             stack_id = compat_str(stack['Id'])
110             entry = {
111                 '_type': 'url_transparent',
112                 'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
113                 'id': stack_id,
114                 'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
115                 'duration': float_or_none(stack.get('Duration')),
116                 'ie_key': 'NineCNineMediaStack',
117             }
118             entry.update(base_info)
119             entries.append(entry)
120
121         return {
122             '_type': 'multi_video',
123             'id': content_id,
124             'title': title,
125             'description': description,
126             'entries': entries,
127         }