_ Git - youtube-dl/blob - youtube_dl/extractor/xiami.py

   1 # -*- coding: utf-8 -*-
   2
   3 from __future__ import unicode_literals
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     xpath_element,
   8     xpath_text,
   9     xpath_with_ns,
  10     int_or_none,
  11     ExtractorError
  12 )
  13 from ..compat import compat_urllib_parse_unquote
  14
  15
  16 class XiamiBaseIE(InfoExtractor):
  17
  18     _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id'
  19     _NS_MAP = {'xm': 'http://xspf.org/ns/0/'}
  20
  21     def _extract_track(self, track):
  22         artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='')
  23         artist = artist.split(';')
  24
  25         ret = {
  26             'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)),
  27             'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)),
  28             'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)),
  29             'artist': ';'.join(artist) if artist else None,
  30             'creator': artist[0] if artist else None,
  31             'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))),
  32             'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None),
  33             'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))),
  34         }
  35
  36         lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP))
  37         if lyrics_url and lyrics_url.endswith('.lrc'):
  38             ret['description'] = self._download_webpage(lyrics_url, ret['id'])
  39         return ret
  40
  41     def _extract_xml(self, _id, typ=''):
  42         playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id)
  43         tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP))
  44
  45         if not len(tracklist):
  46             raise ExtractorError('No track found')
  47         return [self._extract_track(track) for track in tracklist]
  48
  49     @staticmethod
  50     def _decrypt(origin):
  51         n = int(origin[0])
  52         origin = origin[1:]
  53         short_lenth = len(origin) // n
  54         long_num = len(origin) - short_lenth * n
  55         l = tuple()
  56         for i in range(0, n):
  57             length = short_lenth
  58             if i < long_num:
  59                 length += 1
  60             l += (origin[0:length], )
  61             origin = origin[length:]
  62         ans = ''
  63         for i in range(0, short_lenth + 1):
  64             for j in range(0, n):
  65                 if len(l[j])>i:
  66                     ans += l[j][i]
  67         return compat_urllib_parse_unquote(ans).replace('^', '0')
  68
  69
  70 class XiamiIE(XiamiBaseIE):
  71     IE_NAME = 'xiami:song'
  72     IE_DESC = '虾米音乐'
  73     _VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)'
  74     _TESTS = [
  75         {
  76             'url': 'http://www.xiami.com/song/1775610518',
  77             'md5': '521dd6bea40fd5c9c69f913c232cb57e',
  78             'info_dict': {
  79                 'id': '1775610518',
  80                 'ext': 'mp3',
  81                 'title': 'Woman',
  82                 'creator': 'HONNE',
  83                 'album': 'Woman',
  84                 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
  85                 'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b',
  86             }
  87         },
  88         {
  89             'url': 'http://www.xiami.com/song/1775256504',
  90             'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
  91             'info_dict': {
  92                 'id': '1775256504',
  93                 'ext': 'mp3',
  94                 'title': '悟空',
  95                 'creator': '戴荃',
  96                 'album': '悟空',
  97                 'description': 'md5:206e67e84f9bed1d473d04196a00b990',
  98             }
  99         },
 100     ]
 101
 102     def _real_extract(self, url):
 103         _id = self._match_id(url)
 104         return self._extract_xml(_id)[0]
 105
 106
 107 class XiamiAlbumIE(XiamiBaseIE):
 108     IE_NAME = 'xiami:album'
 109     IE_DESC = '虾米音乐 - 专辑'
 110     _VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)'
 111     _TESTS = [
 112         {
 113             'url': 'http://www.xiami.com/album/2100300444',
 114             'info_dict': {
 115                 'id': '2100300444',
 116             },
 117             'playlist_count': 10,
 118         },
 119         {
 120             'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
 121             'only_matching': True,
 122         }
 123     ]
 124
 125     def _real_extract(self, url):
 126         _id = self._match_id(url)
 127         return self.playlist_result(self._extract_xml(_id, '/type/1'), _id)
 128
 129
 130 class XiamiArtistIE(XiamiBaseIE):
 131     IE_NAME = 'xiami:artist'
 132     IE_DESC = '虾米音乐 - 歌手'
 133     _VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)'
 134     _TEST = {
 135         'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
 136         'info_dict': {
 137             'id': '2132',
 138         },
 139         'playlist_count': 20,
 140     }
 141
 142     def _real_extract(self, url):
 143         _id = self._match_id(url)
 144         return self.playlist_result(self._extract_xml(_id, '/type/2'), _id)
 145
 146
 147 class XiamiCollectionIE(XiamiBaseIE):
 148     IE_NAME = 'xiami:collection'
 149     IE_DESC = '虾米音乐 - 精选集'
 150     _VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)'
 151     _TEST = {
 152         'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
 153         'info_dict': {
 154             'id': '156527391',
 155         },
 156         'playlist_count': 26,
 157     }
 158
 159     def _real_extract(self, url):
 160         _id = self._match_id(url)
 161         return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)