ab2b591036a6371fc31e7437c368858dfb708efa
[youtube-dl] / youtube_dl / extractor / hypem.py
1 import json
2 import re
3 import time
4
5 from .common import InfoExtractor
6 from ..utils import (
7     compat_str,
8     compat_urllib_parse,
9     compat_urllib_request,
10
11     ExtractorError,
12 )
13
14
15 class HypemIE(InfoExtractor):
16     """Information Extractor for hypem"""
17     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
18     _TEST = {
19         u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
20         u'file': u'1v6ga.mp3',
21         u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
22         u'info_dict': {
23             u"title": u"Tame"
24         }
25     }
26
27     def _real_extract(self, url):
28         mobj = re.match(self._VALID_URL, url)
29         if mobj is None:
30             raise ExtractorError(u'Invalid URL: %s' % url)
31         track_id = mobj.group(1)
32
33         data = { 'ax': 1, 'ts': time.time() }
34         data_encoded = compat_urllib_parse.urlencode(data)
35         complete_url = url + "?" + data_encoded
36         request = compat_urllib_request.Request(complete_url)
37         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
38         cookie = urlh.headers.get('Set-Cookie', '')
39
40         self.report_extraction(track_id)
41
42         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
43             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
44         try:
45             track_list = json.loads(html_tracks)
46             track = track_list[u'tracks'][0]
47         except ValueError:
48             raise ExtractorError(u'Hypemachine contained invalid JSON.')
49
50         key = track[u"key"]
51         track_id = track[u"id"]
52         artist = track[u"artist"]
53         title = track[u"song"]
54
55         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
56         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
57         request.add_header('cookie', cookie)
58         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
59         try:
60             song_data = json.loads(song_data_json)
61         except ValueError:
62             raise ExtractorError(u'Hypemachine contained invalid JSON.')
63         final_url = song_data[u"url"]
64
65         return [{
66             'id':       track_id,
67             'url':      final_url,
68             'ext':      "mp3",
69             'title':    title,
70             'artist':   artist,
71         }]