X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fqqmusic.py;h=3401dcaef1ec25168f2629e2914396465e2fc193;hb=fd4eefed39595850b864d3be9711224e4e8e9dd4;hp=d0ea4a7698d57ef418c7d0eec56004f0c2c274e3;hpb=5edea45fab73874c269655b4cf08da0bbc5ea479;p=youtube-dl diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index d0ea4a769..3401dcaef 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..utils import ( strip_jsonp, unescapeHTML, + js_to_json, ) from ..compat import compat_urllib_request @@ -24,6 +25,7 @@ class QQMusicIE(InfoExtractor): 'title': '可惜没如果', 'upload_date': '20141227', 'creator': '林俊杰', + 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30', } }] @@ -40,17 +42,23 @@ class QQMusicIE(InfoExtractor): detail_info_page = self._download_webpage( 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, mid, note='Download song detail info', - errnote='Unable to get song detail info') + errnote='Unable to get song detail info', encoding='gbk') song_name = self._html_search_regex( r"songname:\s*'([^']+)'", detail_info_page, 'song name') publish_time = self._html_search_regex( r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, - 'publish time').replace('-', '') + 'publish time', default=None) + if publish_time: + publish_time = publish_time.replace('-', '') singer = self._html_search_regex( - r"singer:\s*'([^']+)", detail_info_page, 'singer') + r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) + + lrc_content = self._html_search_regex( + r'
]*>([^<>]+)
', + detail_info_page, 'LRC lyrics', default=None) guid = self.m_r_get_ruin() @@ -66,6 +74,7 @@ class QQMusicIE(InfoExtractor): 'title': song_name, 'upload_date': publish_time, 'creator': singer, + 'description': lrc_content, } @@ -74,10 +83,6 @@ class QQPlaylistBaseIE(InfoExtractor): def qq_static_url(category, mid): return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) - @staticmethod - def qq_song_url(mid): - return 'http://y.qq.com/#type=song&mid=%s' % mid - @classmethod def get_entries_from_page(cls, page): entries = [] @@ -85,7 +90,8 @@ class QQPlaylistBaseIE(InfoExtractor): for item in re.findall(r'class="data"[^<>]*>([^<>]+)(top|global)_[0-9]+)' + + _TESTS = [{ + 'url': 'http://y.qq.com/#type=toplist&p=global_12', + 'info_dict': { + 'id': 'global_12', + 'title': 'itunes榜', + }, + 'playlist_count': 10, + }, { + 'url': 'http://y.qq.com/#type=toplist&p=top_6', + 'info_dict': { + 'id': 'top_6', + 'title': 'QQ音乐巅峰榜·欧美', + }, + 'playlist_count': 100, + }, { + 'url': 'http://y.qq.com/#type=toplist&p=global_5', + 'info_dict': { + 'id': 'global_5', + 'title': '韩国mnet排行榜', + }, + 'playlist_count': 50, + }] + + @staticmethod + def strip_qq_jsonp(code): + return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code)) + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_type = list_id.split("_")[0] + num_id = list_id.split("_")[1] + + list_page = self._download_webpage( + "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, + list_id, 'Download toplist page') + + entries = [] + jsonp_url = "" + if list_type == 'top': + jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id + elif list_type == 'global': + jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id + + list = self._download_json(jsonp_url, list_id, note='Retrieve toplist json', + errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + + for song in list['l']: + s = song['s'] + song_mid = s.split("|")[20] + entries.append(self.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) + + list_name = self._html_search_regex( + r'

([^\']+)

', list_page, 'top list name', + default=None) + list_desc = None + + return self.playlist_result(entries, list_id, list_name, list_desc) + \ No newline at end of file