X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbaidu.py;h=234a661d34623b0b2da3028b20bcc23fc11e2991;hb=a6211d237b4e7051ca018cc09440502561fedaa7;hp=84fab551bf93a97b93e0572594f776875cc2668c;hpb=c9c194053dfeb1b3b1a7a2cb29b1b9f34081ca5e;p=youtube-dl diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index 84fab551b..234a661d3 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -4,18 +4,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..utils import unescapeHTML class BaiduVideoIE(InfoExtractor): IE_DESC = '百度视频' - _VALID_URL = r'http://v\.baidu\.com/(?P[a-z]+)/(?P\d+)\.htm' + _VALID_URL = r'https?://v\.baidu\.com/(?P[a-z]+)/(?P\d+)\.htm' _TESTS = [{ 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6', 'info_dict': { 'id': '1069', 'title': '中华小当家 TV版国语', - 'description': 'md5:40a9c1b1c7f4e05d642e7bb1c84eeda0', + 'description': 'md5:51be07afe461cf99fa61231421b5397c', }, 'playlist_count': 52, }, { @@ -28,8 +28,9 @@ class BaiduVideoIE(InfoExtractor): 'playlist_mincount': 12, }] - def _call_api(self, path, category, playlist_id): - return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (path, category, playlist_id), playlist_id) + def _call_api(self, path, category, playlist_id, note): + return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( + path, category, playlist_id), playlist_id, note) def _real_extract(self, url): category, playlist_id = re.match(self._VALID_URL, url).groups() @@ -38,19 +39,18 @@ class BaiduVideoIE(InfoExtractor): if category == 'tv': category = 'tvplay' - playlist_detail = self._call_api('xqinfo', category, playlist_id) + playlist_detail = self._call_api( + 'xqinfo', category, playlist_id, 'Download playlist JSON metadata') playlist_title = playlist_detail['title'] - playlist_description = playlist_detail.get('intro') + playlist_description = unescapeHTML(playlist_detail.get('intro')) - episodes_detail = self._call_api('xqsingle', category, playlist_id) + episodes_detail = self._call_api( + 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') - entries = [] - for episode in episodes_detail['videos']: - episode_id = '%s_%s' % (playlist_id, episode['episode']) - - entries.append(self.url_result( - episode['url'], video_title=episode['title'])) + entries = [self.url_result( + episode['url'], video_title=episode['title'] + ) for episode in episodes_detail['videos']] return self.playlist_result( entries, playlist_id, playlist_title, playlist_description)