Finished audiomack extractor
[youtube-dl] / youtube_dl / extractor / audiomack.py
1 # Xavier Beynon 2014
2 # coding: utf-8
3 from __future__ import unicode_literals
4
5 from .common import InfoExtractor
6 from .soundcloud import SoundcloudIE
7 import datetime
8 import time
9
10
11 class AudiomackIE(InfoExtractor):
12     _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
13     IE_NAME = 'audiomack'
14     _TESTS = [
15         #hosted on audiomack
16         {
17             'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
18             'file': 'Roosh Williams - Extraordinary.mp3',
19             'info_dict':
20             {
21                 'ext': 'mp3',
22                 'title': 'Roosh Williams - Extraordinary'
23             }
24         },
25         #hosted on soundcloud via audiomack
26         {
27             'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
28             'file': '172419696.mp3',
29             'info_dict':
30             {
31                 'ext': 'mp3',
32                 'title': 'Young Thug ft Lil Wayne - Take Kare',
33                 "upload_date": "20141016",
34                 "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n",
35                 "uploader": "Young Thug World"
36             }
37         }
38     ]
39
40     def _real_extract(self, url):
41         #id is what follows /song/ in url, usually the uploader name + title
42         id = url[url.index("/song/")+5:]
43
44         #Call the api, which gives us a json doc with the real url inside
45         rightnow = int(time.mktime(datetime.datetime.now().timetuple()))
46         apiresponse = self._download_json("http://www.audiomack.com/api/music/url/song"+id+"?_="+str(rightnow), id)
47         if not url in apiresponse:
48             raise Exception("Unable to deduce api url of song")
49         realurl = apiresponse["url"]
50
51         #Audiomack wraps a lot of soundcloud tracks in their branded wrapper
52         # - if so, pass the work off to the soundcloud extractor
53         if SoundcloudIE.suitable(realurl):
54             sc = SoundcloudIE(downloader=self._downloader)
55             return sc._real_extract(realurl)
56         else:
57             #Pull out metadata
58             page = self._download_webpage(url, id)
59             artist = self._html_search_regex(r'<span class="artist">(.*)</span>', page, "artist")
60             songtitle = self._html_search_regex(r'<h1 class="profile-title song-title"><span class="artist">.*</span>(.*)</h1>', page, "title")
61             title = artist+" - "+songtitle
62             return {
63                 'id': title,  # ignore id, which is not useful in song name
64                 'title': title,
65                 'url': realurl,
66                 'ext': 'mp3'
67             }