[YandexMusic] Add new extractor
[youtube-dl] / youtube_dl / extractor / yamusic.py
1 # coding=utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import hashlib
6 import time
7
8 from .common import InfoExtractor
9
10 class YandexMusicAlbumIE(InfoExtractor):
11     _VALID_URL = r'http://music.yandex.ru/album/(?P<id>\d+)'
12
13     def _get_track_url(self, storage_dir, track_id):
14         data = self._download_json('http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?requestId=2&nc=%d&action=getTrackSrc&p=download-info/%s/2.mp3' % (time.time(), storage_dir), track_id)
15
16         hsh = hashlib.md5()
17         hsh.update('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s'])
18         hash = hsh.hexdigest()
19         storage = storage_dir.split('.')
20
21         return 'http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default' % (data['host'], hash, data['ts'] + data['path'], storage[1])
22
23     def _get_album_id_and_data(self, url):
24         matched = re.match(self._VALID_URL, url)
25         id = matched.group('id')
26
27         webpage = self._download_webpage(url, id)
28         data = self._parse_json(
29             self._search_regex(
30                 r'var\s+Mu\s+=\s+(.+?);\s+<\/script>', webpage, 'player'),
31             id)
32         return id, data['pageData']
33
34     def _real_extract(self, url):
35
36         id, data = self._get_album_id_and_data(url)
37
38         entries = []
39
40         for track in data['volumes'][0]:
41             entries.append({
42                 'id': track['id'],
43                 'ext': 'mp3',
44                 'url': self._get_track_url(track['storageDir'], track['id']),
45                 'title': track['artists'][0]['name'] + ' - ' + track['title'],
46             })
47
48         return {
49             '_type': 'playlist',
50             'entries': entries,
51             'id': id,
52             'title': data['title'],
53         }
54
55 class YandexMusicPlaylistIE(YandexMusicAlbumIE):
56     _VALID_URL = r'http://music.yandex.ru/users/(?P<user_name>[^/]+)/playlists/(?P<id>\d+)'
57
58     def _real_extract(self, url):
59         id, data = self._get_album_id_and_data(url)
60         data = data['playlist']
61
62         entries = []
63
64         for track in data['tracks']:
65             entries.append({
66                 'id': track['id'],
67                 'ext': 'mp3',
68                 'url': self._get_track_url(track['storageDir'], track['id']),
69                 'title': track['artists'][0]['name'] + ' - ' + track['title'],
70             })
71
72         return {
73             '_type': 'playlist',
74             'entries': entries,
75             'id': id,
76             'title': data['title'],
77         }
78
79 class YandexMusicTrackIE(YandexMusicAlbumIE):
80     _VALID_URL = r'http://music.yandex.ru/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
81     _TEST = {
82         'url': 'http://music.yandex.ru/album/540508/track/4878838',
83         'info_dict': {
84             'id': '4878838',
85             'ext': 'mp3',
86             'title': 'Carlo Ambrosio - Gypsy Eyes 1',
87         }
88     }
89
90     def _real_extract(self, url):
91
92         id, data = self._get_album_id_and_data(url)
93
94         for track in data['volumes'][0]:
95             if track['id'] == id:
96                 track_url = self._get_track_url(track['storageDir'], id)
97                 break
98
99         return {
100             'id': id,
101             'ext': 'mp3',
102             'url': track_url,
103             'title': track['artists'][0]['name'] + ' - ' + track['title'],
104         }