[curiositystream] Add new extractor
[youtube-dl] / youtube_dl / extractor / curiositystream.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     int_or_none,
7     urlencode_postdata,
8     compat_str,
9     ExtractorError,
10 )
11
12
13 class CuriosityStreamBaseIE(InfoExtractor):
14     _NETRC_MACHINE = 'curiositystream'
15     _auth_token = None
16     _API_BASE_URL = 'https://api.curiositystream.com/v1/'
17
18     def _handle_errors(self, result):
19         error = result.get('error', {}).get('message')
20         if error:
21             if isinstance(error, dict):
22                 error = ', '.join(error.values())
23             raise ExtractorError(
24                 '%s said: %s' % (self.IE_NAME, error), expected=True)
25
26     def _call_api(self, path, video_id):
27         headers = {}
28         if self._auth_token:
29             headers['X-Auth-Token'] = self._auth_token
30         result = self._download_json(
31             self._API_BASE_URL + path, video_id, headers=headers)
32         self._handle_errors(result)
33         return result['data']
34
35     def _real_initialize(self):
36         if not self._auth_token:
37             user = self._downloader.cache.load('curiositystream', 'user') or {}
38             self._auth_token = user.get('auth_token')
39             if not self._auth_token:
40                 (email, password) = self._get_login_info()
41                 if email is None:
42                     return
43                 result = self._download_json(
44                     self._API_BASE_URL + 'login', None, data=urlencode_postdata({
45                         'email': email,
46                         'password': password,
47                     }))
48                 self._handle_errors(result)
49                 self._auth_token = result['message']['auth_token']
50                 self._downloader.cache.store(
51                     'curiositystream', 'user', {
52                         'auth_token': self._auth_token,
53                     })
54
55     def _extract_media_info(self, media):
56         video_id = compat_str(media['id'])
57         limelight_media_id = media['limelight_media_id']
58         title = media['title']
59
60         subtitles = {}
61         for closed_caption in media.get('closed_captions', []):
62             sub_url = closed_caption.get('file')
63             if not sub_url:
64                 continue
65             lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
66             subtitles.setdefault(lang, []).append({
67                 'url': sub_url,
68             })
69
70         return {
71             '_type': 'url_transparent',
72             'id': video_id,
73             'url': 'limelight:media:' + limelight_media_id,
74             'title': title,
75             'description': media.get('description'),
76             'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
77             'duration': int_or_none(media.get('duration')),
78             'tags': media.get('tags'),
79             'subtitles': subtitles,
80             'ie_key': 'LimelightMedia',
81         }
82
83
84 class CuriosityStreamIE(CuriosityStreamBaseIE):
85     IE_NAME = 'curiositystream'
86     _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
87     _TEST = {
88         'url': 'https://app.curiositystream.com/video/2',
89         'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
90         'info_dict': {
91             'id': '2',
92             'ext': 'mp4',
93             'title': 'How Did You Develop The Internet?',
94             'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
95             'timestamp': 1448388615,
96             'upload_date': '20151124',
97         }
98     }
99
100     def _real_extract(self, url):
101         video_id = self._match_id(url)
102         media = self._call_api('media/' + video_id, video_id)
103         return self._extract_media_info(media)
104
105
106 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
107     IE_NAME = 'curiositystream:collection'
108     _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
109     _TEST = {
110         'url': 'https://app.curiositystream.com/collection/2',
111         'info_dict': {
112             'id': '2',
113             'title': 'Curious Minds: The Internet',
114             'description': 'How is the internet shaping our lives in the 21st Century?',
115         },
116         'playlist_mincount': 17,
117     }
118
119     def _real_extract(self, url):
120         collection_id = self._match_id(url)
121         collection = self._call_api(
122             'collections/' + collection_id, collection_id)
123         entries = []
124         for media in collection.get('media', []):
125             entries.append(self._extract_media_info(media))
126         return self.playlist_result(
127             entries, collection_id,
128             collection.get('title'), collection.get('description'))