X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2Fextractor%2Fmixcloud.py;h=f6360cce6767c7281a661f6e1cf18bbfe8924fe9;hb=0b4a8eb3ac823c26b037eb368c114ce6d976c5c3;hp=5f64e7bd0d98b74aea2a4350a51f057b4d0280ba;hpb=b65c3e77e8fa893a41eb058102422f42276ebc11;p=youtube-dl diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 5f64e7bd0..f6360cce6 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,106 +1,334 @@ from __future__ import unicode_literals +import base64 +import functools +import itertools import re from .common import InfoExtractor +from ..compat import ( + compat_chr, + compat_ord, + compat_str, + compat_urllib_parse_unquote, + compat_urlparse, +) from ..utils import ( - compat_urllib_parse, + clean_html, ExtractorError, - int_or_none, - parse_iso8601, + OnDemandPagedList, + str_to_int, ) class MixcloudIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)' + _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)' IE_NAME = 'mixcloud' - _TEST = { + _TESTS = [{ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/', 'info_dict': { 'id': 'dholbach-cryptkeeper', - 'ext': 'mp3', + 'ext': 'm4a', 'title': 'Cryptkeeper', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'uploader': 'Daniel Holbach', 'uploader_id': 'dholbach', - 'upload_date': '20111115', - 'timestamp': 1321359578, - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, - 'like_count': int, }, - } + }, { + 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', + 'info_dict': { + 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', + 'ext': 'mp3', + 'title': 'Caribou 7 inch Vinyl Mix & Chat', + 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', + 'uploader': 'Gilles Peterson Worldwide', + 'uploader_id': 'gillespeterson', + 'thumbnail': 're:https?://.*', + 'view_count': int, + }, + }, { + 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', + 'only_matching': True, + }] - def check_urls(self, url_list): - """Returns 1st active url from list""" - for url in url_list: + _keys = [ + 'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };', + 'pleasedontdownloadourmusictheartistswontgetpaid', + 'window.addEventListener = window.addEventListener || function() {};', + '(function() { return new Date().toLocaleDateString(); })()' + ] + _current_key = None + + # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js + def _decrypt_play_info(self, play_info, video_id): + play_info = base64.b64decode(play_info.encode('ascii')) + for num, key in enumerate(self._keys, start=1): try: - # We only want to know if the request succeed - # don't download the whole file - self._request_webpage(url, None, False) - return url + return self._parse_json( + ''.join([ + compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)])) + for idx, ch in enumerate(play_info)]), + video_id) except ExtractorError: - url = None - - return None - - def _get_url(self, template_url): - return self.check_urls(template_url % i for i in range(30)) + if num == len(self._keys): + raise def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group(1) cloudcast_name = mobj.group(2) - track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) + track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) webpage = self._download_webpage(url, track_id) - preview_url = self._search_regex( - r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') - song_url = preview_url.replace('/previews/', '/c/originals/') - template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) - final_song_url = self._get_url(template_url) - if final_song_url is None: - self.to_screen('Trying with m4a extension') - template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') - final_song_url = self._get_url(template_url) - if final_song_url is None: - raise ExtractorError('Unable to extract track url') - - PREFIX = ( - r'