1 from __future__ import unicode_literals
8 from .common import InfoExtractor
12 compat_urllib_parse_unquote,
23 class MixcloudIE(InfoExtractor):
24 _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
28 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
30 'id': 'dholbach-cryptkeeper',
32 'title': 'Cryptkeeper',
33 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
34 'uploader': 'Daniel Holbach',
35 'uploader_id': 'dholbach',
36 'thumbnail': r're:https?://.*\.jpg',
40 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
42 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
44 'title': 'Caribou 7 inch Vinyl Mix & Chat',
45 'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
46 'uploader': 'Gilles Peterson Worldwide',
47 'uploader_id': 'gillespeterson',
48 'thumbnail': 're:https?://.*',
52 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
53 'only_matching': True,
56 # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
57 def _decrypt_play_info(self, play_info, video_id):
59 'pleasedontdownloadourmusictheartistswontgetpaid',
60 '(function() { return new Date().toLocaleDateString(); })()'
62 play_info = base64.b64decode(play_info.encode('ascii'))
63 for num, key in enumerate(KEYS, start=1):
65 return self._parse_json(
67 compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)]))
68 for idx, ch in enumerate(play_info)]),
70 except ExtractorError:
74 def _real_extract(self, url):
75 mobj = re.match(self._VALID_URL, url)
76 uploader = mobj.group(1)
77 cloudcast_name = mobj.group(2)
78 track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
80 webpage = self._download_webpage(url, track_id)
82 message = self._html_search_regex(
83 r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
84 webpage, 'error message', default=None)
86 encrypted_play_info = self._search_regex(
87 r'm-play-info="([^"]+)"', webpage, 'play info')
89 play_info = self._decrypt_play_info(encrypted_play_info, track_id)
91 if message and 'stream_url' not in play_info:
92 raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
94 song_url = play_info['stream_url']
96 title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
97 thumbnail = self._proto_relative_url(self._html_search_regex(
98 r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
99 uploader = self._html_search_regex(
100 r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
101 uploader_id = self._search_regex(
102 r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
103 description = self._og_search_description(webpage)
104 view_count = str_to_int(self._search_regex(
105 [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
106 r'/listeners/?">([0-9,.]+)</a>',
107 r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
108 webpage, 'play count', default=None))
114 'description': description,
115 'thumbnail': thumbnail,
116 'uploader': uploader,
117 'uploader_id': uploader_id,
118 'view_count': view_count,
122 class MixcloudPlaylistBaseIE(InfoExtractor):
125 def _find_urls_in_page(self, page):
126 for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
127 yield self.url_result(
128 compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
131 def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
132 real_page_number = real_page_number or current_page + 1
133 return self._download_webpage(
134 'https://www.mixcloud.com/%s/' % path, video_id,
135 note='Download %s (page %d)' % (page_name, current_page + 1),
136 errnote='Unable to download %s' % page_name,
137 query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
138 headers={'X-Requested-With': 'XMLHttpRequest'})
140 def _tracks_page_func(self, page, video_id, page_name, current_page):
141 resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
143 for item in self._find_urls_in_page(resp):
146 def _get_user_description(self, page_content):
147 return self._html_search_regex(
148 r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
149 page_content, 'user description', fatal=False)
152 class MixcloudUserIE(MixcloudPlaylistBaseIE):
153 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
154 IE_NAME = 'mixcloud:user'
157 'url': 'http://www.mixcloud.com/dholbach/',
159 'id': 'dholbach_uploads',
160 'title': 'Daniel Holbach (uploads)',
161 'description': 'md5:def36060ac8747b3aabca54924897e47',
163 'playlist_mincount': 11,
165 'url': 'http://www.mixcloud.com/dholbach/uploads/',
167 'id': 'dholbach_uploads',
168 'title': 'Daniel Holbach (uploads)',
169 'description': 'md5:def36060ac8747b3aabca54924897e47',
171 'playlist_mincount': 11,
173 'url': 'http://www.mixcloud.com/dholbach/favorites/',
175 'id': 'dholbach_favorites',
176 'title': 'Daniel Holbach (favorites)',
177 'description': 'md5:def36060ac8747b3aabca54924897e47',
180 'playlist_items': '1-100',
182 'playlist_mincount': 100,
184 'url': 'http://www.mixcloud.com/dholbach/listens/',
186 'id': 'dholbach_listens',
187 'title': 'Daniel Holbach (listens)',
188 'description': 'md5:def36060ac8747b3aabca54924897e47',
191 'playlist_items': '1-100',
193 'playlist_mincount': 100,
196 def _real_extract(self, url):
197 mobj = re.match(self._VALID_URL, url)
198 user_id = mobj.group('user')
199 list_type = mobj.group('type')
201 # if only a profile URL was supplied, default to download all uploads
202 if list_type is None:
203 list_type = 'uploads'
205 video_id = '%s_%s' % (user_id, list_type)
207 profile = self._download_webpage(
208 'https://www.mixcloud.com/%s/' % user_id, video_id,
209 note='Downloading user profile',
210 errnote='Unable to download user profile')
212 username = self._og_search_title(profile)
213 description = self._get_user_description(profile)
215 entries = OnDemandPagedList(
217 self._tracks_page_func,
218 '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
219 self._PAGE_SIZE, use_cache=True)
221 return self.playlist_result(
222 entries, video_id, '%s (%s)' % (username, list_type), description)
225 class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
226 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
227 IE_NAME = 'mixcloud:playlist'
230 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
232 'id': 'RedBullThre3style_tokyo-finalists-2015',
233 'title': 'National Champions 2015',
234 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
236 'playlist_mincount': 16,
238 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
239 'only_matching': True,
242 def _real_extract(self, url):
243 mobj = re.match(self._VALID_URL, url)
244 user_id = mobj.group('user')
245 playlist_id = mobj.group('playlist')
246 video_id = '%s_%s' % (user_id, playlist_id)
248 webpage = self._download_webpage(
250 note='Downloading playlist page',
251 errnote='Unable to download playlist page')
253 title = self._html_search_regex(
254 r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
255 webpage, 'playlist title',
256 default=None) or self._og_search_title(webpage, fatal=False)
257 description = self._get_user_description(webpage)
259 entries = OnDemandPagedList(
261 self._tracks_page_func,
262 '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
265 return self.playlist_result(entries, video_id, title, description)
268 class MixcloudStreamIE(MixcloudPlaylistBaseIE):
269 _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
270 IE_NAME = 'mixcloud:stream'
273 'url': 'https://www.mixcloud.com/FirstEar/stream/',
276 'title': 'First Ear',
277 'description': 'Curators of good music\nfirstearmusic.com',
279 'playlist_mincount': 192,
282 def _real_extract(self, url):
283 user_id = self._match_id(url)
285 webpage = self._download_webpage(url, user_id)
290 def _handle_page(page):
291 entries.extend(self._find_urls_in_page(page))
292 return self._search_regex(
293 r'm-next-page-url="([^"]+)"', page,
294 'next page URL', default=None)
296 next_page_url = _handle_page(webpage)
298 for idx in itertools.count(0):
299 if not next_page_url or prev_page_url == next_page_url:
302 prev_page_url = next_page_url
303 current_page = int(self._search_regex(
304 r'\?page=(\d+)', next_page_url, 'next page number'))
306 next_page_url = _handle_page(self._fetch_tracks_page(
307 '%s/stream' % user_id, user_id, 'stream', idx,
308 real_page_number=current_page))
310 username = self._og_search_title(webpage)
311 description = self._get_user_description(webpage)
313 return self.playlist_result(entries, user_id, username, description)