[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / younow.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     int_or_none,
11     try_get,
12 )
13
14 CDN_API_BASE = 'https://cdn.younow.com/php/api'
15 MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
16
17
18 class YouNowLiveIE(InfoExtractor):
19     _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
20     _TEST = {
21         'url': 'https://www.younow.com/AmandaPadeezy',
22         'info_dict': {
23             'id': 'AmandaPadeezy',
24             'ext': 'mp4',
25             'is_live': True,
26             'title': 'March 26, 2017',
27             'thumbnail': r're:^https?://.*\.jpg$',
28             'tags': ['girls'],
29             'categories': ['girls'],
30             'uploader': 'AmandaPadeezy',
31             'uploader_id': '6716501',
32             'uploader_url': 'https://www.younow.com/AmandaPadeezy',
33             'creator': 'AmandaPadeezy',
34         },
35         'skip': True,
36     }
37
38     @classmethod
39     def suitable(cls, url):
40         return (False
41                 if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
42                 else super(YouNowLiveIE, cls).suitable(url))
43
44     def _real_extract(self, url):
45         username = self._match_id(url)
46
47         data = self._download_json(
48             'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
49             % username, username)
50
51         if data.get('errorCode') != 0:
52             raise ExtractorError(data['errorMsg'], expected=True)
53
54         uploader = try_get(
55             data, lambda x: x['user']['profileUrlString'],
56             compat_str) or username
57
58         return {
59             'id': uploader,
60             'is_live': True,
61             'title': self._live_title(uploader),
62             'thumbnail': data.get('awsUrl'),
63             'tags': data.get('tags'),
64             'categories': data.get('tags'),
65             'uploader': uploader,
66             'uploader_id': data.get('userId'),
67             'uploader_url': 'https://www.younow.com/%s' % username,
68             'creator': uploader,
69             'view_count': int_or_none(data.get('viewers')),
70             'like_count': int_or_none(data.get('likes')),
71             'formats': [{
72                 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
73                        % (CDN_API_BASE, data['broadcastId'], data['userId']),
74                 'ext': 'mp4',
75                 'protocol': 'm3u8',
76             }],
77         }
78
79
80 def _extract_moment(item, fatal=True):
81     moment_id = item.get('momentId')
82     if not moment_id:
83         if not fatal:
84             return
85         raise ExtractorError('Unable to extract moment id')
86
87     moment_id = compat_str(moment_id)
88
89     title = item.get('text')
90     if not title:
91         title = 'YouNow %s' % (
92             item.get('momentType') or item.get('titleType') or 'moment')
93
94     uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
95     uploader_id = try_get(item, lambda x: x['owner']['userId'])
96     uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
97
98     entry = {
99         'extractor_key': 'YouNowMoment',
100         'id': moment_id,
101         'title': title,
102         'view_count': int_or_none(item.get('views')),
103         'like_count': int_or_none(item.get('likes')),
104         'timestamp': int_or_none(item.get('created')),
105         'creator': uploader,
106         'uploader': uploader,
107         'uploader_id': uploader_id,
108         'uploader_url': uploader_url,
109         'formats': [{
110             'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
111                    % (moment_id, moment_id),
112             'ext': 'mp4',
113             'protocol': 'm3u8_native',
114         }],
115     }
116
117     return entry
118
119
120 class YouNowChannelIE(InfoExtractor):
121     _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
122     _TEST = {
123         'url': 'https://www.younow.com/its_Kateee_/channel',
124         'info_dict': {
125             'id': '14629760',
126             'title': 'its_Kateee_ moments'
127         },
128         'playlist_mincount': 8,
129     }
130
131     def _entries(self, username, channel_id):
132         created_before = 0
133         for page_num in itertools.count(1):
134             if created_before is None:
135                 break
136             info = self._download_json(
137                 '%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
138                 % (CDN_API_BASE, channel_id, created_before), username,
139                 note='Downloading moments page %d' % page_num)
140             items = info.get('items')
141             if not items or not isinstance(items, list):
142                 break
143             for item in items:
144                 if not isinstance(item, dict):
145                     continue
146                 item_type = item.get('type')
147                 if item_type == 'moment':
148                     entry = _extract_moment(item, fatal=False)
149                     if entry:
150                         yield entry
151                 elif item_type == 'collection':
152                     moments = item.get('momentsIds')
153                     if isinstance(moments, list):
154                         for moment_id in moments:
155                             m = self._download_json(
156                                 MOMENT_URL_FORMAT % moment_id, username,
157                                 note='Downloading %s moment JSON' % moment_id,
158                                 fatal=False)
159                             if m and isinstance(m, dict) and m.get('item'):
160                                 entry = _extract_moment(m['item'])
161                                 if entry:
162                                     yield entry
163                 created_before = int_or_none(item.get('created'))
164
165     def _real_extract(self, url):
166         username = self._match_id(url)
167         channel_id = compat_str(self._download_json(
168             'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
169             % username, username, note='Downloading user information')['userId'])
170         return self.playlist_result(
171             self._entries(username, channel_id), channel_id,
172             '%s moments' % username)
173
174
175 class YouNowMomentIE(InfoExtractor):
176     _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
177     _TEST = {
178         'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
179         'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
180         'info_dict': {
181             'id': '20712117',
182             'ext': 'mp4',
183             'title': 'YouNow capture',
184             'view_count': int,
185             'like_count': int,
186             'timestamp': 1490432040,
187             'upload_date': '20170325',
188             'uploader': 'GABO...',
189             'uploader_id': 35917228,
190         },
191     }
192
193     @classmethod
194     def suitable(cls, url):
195         return (False
196                 if YouNowChannelIE.suitable(url)
197                 else super(YouNowMomentIE, cls).suitable(url))
198
199     def _real_extract(self, url):
200         video_id = self._match_id(url)
201         item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
202         return _extract_moment(item['item'])