Merge remote-tracking branch 'alphapapa/master'
[youtube-dl] / youtube_dl / extractor / mixcloud.py
1 import json
2 import re
3 import socket
4
5 from .common import InfoExtractor
6 from ..utils import (
7     compat_http_client,
8     compat_urllib_error,
9     compat_urllib_request,
10     unified_strdate,
11 )
12
13
14 class MixcloudIE(InfoExtractor):
15     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
16     IE_NAME = u'mixcloud'
17
18     _TEST = {
19         u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
20         u'file': u'dholbach-cryptkeeper.mp3',
21         u'info_dict': {
22             u'title': u'Cryptkeeper',
23             u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
24             u'uploader': u'Daniel Holbach',
25             u'uploader_id': u'dholbach',
26             u'upload_date': u'20111115',
27         },
28     }
29
30     def check_urls(self, url_list):
31         """Returns 1st active url from list"""
32         for url in url_list:
33             try:
34                 compat_urllib_request.urlopen(url)
35                 return url
36             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
37                 url = None
38
39         return None
40
41     def _real_extract(self, url):
42         mobj = re.match(self._VALID_URL, url)
43
44         uploader = mobj.group(1)
45         cloudcast_name = mobj.group(2)
46         track_id = '-'.join((uploader, cloudcast_name))
47         api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
48         webpage = self._download_webpage(url, track_id)
49         json_data = self._download_webpage(api_url, track_id,
50             u'Downloading cloudcast info')
51         info = json.loads(json_data)
52
53         preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
54         song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
55         template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
56         final_song_url = self.check_urls(template_url % i for i in range(30))
57
58         return {
59             'id': track_id,
60             'title': info['name'],
61             'url': final_song_url,
62             'ext': 'mp3',
63             'description': info['description'],
64             'thumbnail': info['pictures'].get('extra_large'),
65             'uploader': info['user']['name'],
66             'uploader_id': info['user']['username'],
67             'upload_date': unified_strdate(info['created_time']),
68             'view_count': info['play_count'],
69         }