[hypem] Move into own file
[youtube-dl] / youtube_dl / extractor / hypem.py
1 import json
2 import re
3 import time
4
5 from .common import InfoExtractor
6 from ..utils import (
7     compat_str,
8     compat_urllib_parse,
9     compat_urllib_request,
10
11     ExtractorError,
12 )
13
14
15 class HypemIE(InfoExtractor):
16     """Information Extractor for hypem"""
17     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
18
19     def _real_extract(self, url):
20         mobj = re.match(self._VALID_URL, url)
21         if mobj is None:
22             raise ExtractorError(u'Invalid URL: %s' % url)
23         track_id = mobj.group(1)
24
25         data = { 'ax': 1, 'ts': time.time() }
26         data_encoded = compat_urllib_parse.urlencode(data)
27         complete_url = url + "?" + data_encoded
28         request = compat_urllib_request.Request(complete_url)
29         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
30         cookie = urlh.headers.get('Set-Cookie', '')
31
32         self.report_extraction(track_id)
33
34         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
35             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
36         try:
37             track_list = json.loads(html_tracks)
38             track = track_list[u'tracks'][0]
39         except ValueError:
40             raise ExtractorError(u'Hypemachine contained invalid JSON.')
41
42         key = track[u"key"]
43         track_id = track[u"id"]
44         artist = track[u"artist"]
45         title = track[u"song"]
46
47         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
48         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
49         request.add_header('cookie', cookie)
50         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
51         try:
52             song_data = json.loads(song_data_json)
53         except ValueError:
54             raise ExtractorError(u'Hypemachine contained invalid JSON.')
55         final_url = song_data[u"url"]
56
57         return [{
58             'id':       track_id,
59             'url':      final_url,
60             'ext':      "mp3",
61             'title':    title,
62             'artist':   artist,
63         }]