X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmixcloud.py;h=c2b7ed9abbd27a2b2c8e0d9d95c59e387630180a;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=84f29155841007f3088a86470040407073726067;hpb=f777397aca868bd56905d0df8cdbc026c5938e4d;p=youtube-dl diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 84f291558..c2b7ed9ab 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,12 +1,9 @@ from __future__ import unicode_literals import re -import itertools from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, HEADRequest, @@ -46,41 +43,33 @@ class MixcloudIE(InfoExtractor): }, }] - def _get_url(self, track_id, template_url, server_number): - boundaries = (1, 30) - for nr in server_numbers(server_number, boundaries): - url = template_url % nr - try: - # We only want to know if the request succeed - # don't download the whole file - self._request_webpage( - HEADRequest(url), track_id, - 'Checking URL %d/%d ...' % (nr, boundaries[-1])) - return url - except ExtractorError: - pass - return None + def _check_url(self, url, track_id, ext): + try: + # We only want to know if the request succeed + # don't download the whole file + self._request_webpage( + HEADRequest(url), track_id, + 'Trying %s URL' % ext) + return True + except ExtractorError: + return False def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group(1) cloudcast_name = mobj.group(2) - track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) + track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) webpage = self._download_webpage(url, track_id) preview_url = self._search_regex( r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') - song_url = preview_url.replace('/previews/', '/c/originals/') - server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number')) - template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) - final_song_url = self._get_url(track_id, template_url, server_number) - if final_song_url is None: - self.to_screen('Trying with m4a extension') - template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') - final_song_url = self._get_url(track_id, template_url, server_number) - if final_song_url is None: - raise ExtractorError('Unable to extract track url') + song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url) + song_url = song_url.replace('/previews/', '/c/originals/') + if not self._check_url(song_url, track_id, 'mp3'): + song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') + if not self._check_url(song_url, track_id, 'm4a'): + raise ExtractorError('Unable to extract track url') PREFIX = ( r'm-play-on-spacebar[^>]+' @@ -107,7 +96,7 @@ class MixcloudIE(InfoExtractor): return { 'id': track_id, 'title': title, - 'url': final_song_url, + 'url': song_url, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, @@ -115,35 +104,3 @@ class MixcloudIE(InfoExtractor): 'view_count': view_count, 'like_count': like_count, } - - -def server_numbers(first, boundaries): - """ Server numbers to try in descending order of probable availability. - Starting from first (i.e. the number of the server hosting the preview file) - and going further and further up to the higher boundary and down to the - lower one in an alternating fashion. Namely: - - server_numbers(2, (1, 5)) - - # Where the preview server is 2, min number is 1 and max is 5. - # Yields: 2, 3, 1, 4, 5 - - Why not random numbers or increasing sequences? Since from what I've seen, - full length files seem to be hosted on servers whose number is closer to - that of the preview; to be confirmed. - """ - zip_longest = getattr(itertools, 'zip_longest', None) - if zip_longest is None: - # python 2.x - zip_longest = itertools.izip_longest - - if len(boundaries) != 2: - raise ValueError("boundaries should be a two-element tuple") - min, max = boundaries - highs = range(first + 1, max + 1) - lows = range(first - 1, min - 1, -1) - rest = filter( - None, itertools.chain.from_iterable(zip_longest(highs, lows))) - yield first - for n in rest: - yield n