X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fxiami.py;h=618da8382bcbbb3800dc553c80edc2c74fea5c5e;hb=HEAD;hp=a28d63c488aec3882fdd27ad9c70e7f1c204b9a5;hpb=89c0dc9a5fadc3927f7c03f5829e4f2ef8555888;p=youtube-dl diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index a28d63c48..618da8382 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -1,50 +1,54 @@ -# -*- coding: utf-8 -*- - +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - xpath_element, - xpath_text, - xpath_with_ns, - int_or_none, - ExtractorError -) from ..compat import compat_urllib_parse_unquote +from ..utils import int_or_none class XiamiBaseIE(InfoExtractor): - - _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id' - _NS_MAP = {'xm': 'http://xspf.org/ns/0/'} - - def _extract_track(self, track): - artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='') - artist = artist.split(';') - - ret = { - 'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)), - 'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)), - 'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)), - 'artist': ';'.join(artist) if artist else None, - 'creator': artist[0] if artist else None, - 'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))), - 'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None), - 'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))), + _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id' + + def _download_webpage_handle(self, *args, **kwargs): + webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs) + if '>Xiami is currently not available in your country.<' in webpage: + self.raise_geo_restricted('Xiami is currently not available in your country') + return webpage + + def _extract_track(self, track, track_id=None): + track_name = track.get('songName') or track.get('name') or track['subName'] + artist = track.get('artist') or track.get('artist_name') or track.get('singers') + title = '%s - %s' % (artist, track_name) if artist else track_name + track_url = self._decrypt(track['location']) + + subtitles = {} + lyrics_url = track.get('lyric_url') or track.get('lyric') + if lyrics_url and lyrics_url.startswith('http'): + subtitles['origin'] = [{'url': lyrics_url}] + + return { + 'id': track.get('song_id') or track_id, + 'url': track_url, + 'title': title, + 'thumbnail': track.get('pic') or track.get('album_pic'), + 'duration': int_or_none(track.get('length')), + 'creator': track.get('artist', '').split(';')[0], + 'track': track_name, + 'track_number': int_or_none(track.get('track')), + 'album': track.get('album_name') or track.get('title'), + 'artist': artist, + 'subtitles': subtitles, } - lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP)) - if lyrics_url and lyrics_url.endswith('.lrc'): - ret['description'] = self._download_webpage(lyrics_url, ret['id']) - return ret - - def _extract_xml(self, _id, typ=''): - playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id) - tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP)) - - if not len(tracklist): - raise ExtractorError('No track found') - return [self._extract_track(track) for track in tracklist] + def _extract_tracks(self, item_id, referer, typ=None): + playlist = self._download_json( + '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), + item_id, headers={ + 'Referer': referer, + }) + return [ + self._extract_track(track, item_id) + for track in playlist['data']['trackList']] @staticmethod def _decrypt(origin): @@ -62,100 +66,136 @@ class XiamiBaseIE(InfoExtractor): ans = '' for i in range(0, short_lenth + 1): for j in range(0, n): - if len(l[j])>i: + if len(l[j]) > i: ans += l[j][i] return compat_urllib_parse_unquote(ans).replace('^', '0') -class XiamiIE(XiamiBaseIE): +class XiamiSongIE(XiamiBaseIE): IE_NAME = 'xiami:song' IE_DESC = '虾米音乐' - _VALID_URL = r'http://www\.xiami\.com/song/(?P[0-9]+)' - _TESTS = [ - { - 'url': 'http://www.xiami.com/song/1775610518', - 'md5': '521dd6bea40fd5c9c69f913c232cb57e', - 'info_dict': { - 'id': '1775610518', - 'ext': 'mp3', - 'title': 'Woman', - 'creator': 'HONNE', - 'album': 'Woman', - 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', - 'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b', - } + _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.xiami.com/song/1775610518', + 'md5': '521dd6bea40fd5c9c69f913c232cb57e', + 'info_dict': { + 'id': '1775610518', + 'ext': 'mp3', + 'title': 'HONNE - Woman', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 265, + 'creator': 'HONNE', + 'track': 'Woman', + 'album': 'Woman', + 'artist': 'HONNE', + 'subtitles': { + 'origin': [{ + 'ext': 'lrc', + }], + }, + }, + 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/song/1775256504', + 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', + 'info_dict': { + 'id': '1775256504', + 'ext': 'mp3', + 'title': '戴荃 - 悟空', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 200, + 'creator': '戴荃', + 'track': '悟空', + 'album': '悟空', + 'artist': '戴荃', + 'subtitles': { + 'origin': [{ + 'ext': 'lrc', + }], + }, + }, + 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/song/1775953850', + 'info_dict': { + 'id': '1775953850', + 'ext': 'mp3', + 'title': 'До Скону - Чума Пожирает Землю', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 683, + 'creator': 'До Скону', + 'track': 'Чума Пожирает Землю', + 'track_number': 7, + 'album': 'Ад', + 'artist': 'До Скону', }, - { - 'url': 'http://www.xiami.com/song/1775256504', - 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', - 'info_dict': { - 'id': '1775256504', - 'ext': 'mp3', - 'title': '悟空', - 'creator': '戴荃', - 'album': '悟空', - 'description': 'md5:206e67e84f9bed1d473d04196a00b990', - } + 'params': { + 'skip_download': True, }, - ] + }, { + 'url': 'http://www.xiami.com/song/xLHGwgd07a1', + 'only_matching': True, + }] def _real_extract(self, url): - _id = self._match_id(url) - return self._extract_xml(_id)[0] + return self._extract_tracks(self._match_id(url), url)[0] -class XiamiAlbumIE(XiamiBaseIE): - IE_NAME = 'xiami:album' - IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'http://www\.xiami\.com/album/(?P[0-9]+)' - _TESTS = [ - { - 'url': 'http://www.xiami.com/album/2100300444', - 'info_dict': { - 'id': '2100300444', - }, - 'playlist_count': 10, - }, - { - 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', - 'only_matching': True, - } - ] - +class XiamiPlaylistBaseIE(XiamiBaseIE): def _real_extract(self, url): - _id = self._match_id(url) - return self.playlist_result(self._extract_xml(_id, '/type/1'), _id) + item_id = self._match_id(url) + return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id) -class XiamiArtistIE(XiamiBaseIE): +class XiamiAlbumIE(XiamiPlaylistBaseIE): + IE_NAME = 'xiami:album' + IE_DESC = '虾米音乐 - 专辑' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[^/?#&]+)' + _TYPE = '1' + _TESTS = [{ + 'url': 'http://www.xiami.com/album/2100300444', + 'info_dict': { + 'id': '2100300444', + }, + 'playlist_count': 10, + 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', + 'only_matching': True, + }, { + 'url': 'http://www.xiami.com/album/URVDji2a506', + 'only_matching': True, + }] + + +class XiamiArtistIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:artist' IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'http://www\.xiami\.com/artist/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[^/?#&]+)' + _TYPE = '2' + _TESTS = [{ 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', 'info_dict': { 'id': '2132', }, 'playlist_count': 20, - } - - def _real_extract(self, url): - _id = self._match_id(url) - return self.playlist_result(self._extract_xml(_id, '/type/2'), _id) + 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99', + 'only_matching': True, + }] -class XiamiCollectionIE(XiamiBaseIE): +class XiamiCollectionIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:collection' IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'http://www\.xiami\.com/collect/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[^/?#&]+)' + _TYPE = '3' _TEST = { 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', 'info_dict': { 'id': '156527391', }, - 'playlist_count': 26, + 'playlist_mincount': 29, + 'skip': 'Georestricted', } - - def _real_extract(self, url): - _id = self._match_id(url) - return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)