[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / baidu.py
index 734b50d1d9a2ddf1617e7e724e673892fb5f2b42..234a661d34623b0b2da3028b20bcc23fc11e2991 100644 (file)
@@ -4,17 +4,18 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import unescapeHTML
 
 
 class BaiduVideoIE(InfoExtractor):
     IE_DESC = '百度视频'
-    _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
+    _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
     _TESTS = [{
         'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
         'info_dict': {
             'id': '1069',
             'title': '中华小当家 TV版国语',
-            'description': 'md5:40a9c1b1c7f4e05d642e7bb1c84eeda0',
+            'description': 'md5:51be07afe461cf99fa61231421b5397c',
         },
         'playlist_count': 52,
     }, {
@@ -27,8 +28,9 @@ class BaiduVideoIE(InfoExtractor):
         'playlist_mincount': 12,
     }]
 
-    def _call_api(self, path, category, playlist_id):
-        return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (path, category, playlist_id), playlist_id)
+    def _call_api(self, path, category, playlist_id, note):
+        return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
+            path, category, playlist_id), playlist_id, note)
 
     def _real_extract(self, url):
         category, playlist_id = re.match(self._VALID_URL, url).groups()
@@ -37,17 +39,18 @@ class BaiduVideoIE(InfoExtractor):
         if category == 'tv':
             category = 'tvplay'
 
-        playlist_detail = self._call_api('xqinfo', category, playlist_id)
+        playlist_detail = self._call_api(
+            'xqinfo', category, playlist_id, 'Download playlist JSON metadata')
 
         playlist_title = playlist_detail['title']
-        playlist_description = playlist_detail.get('intro')
+        playlist_description = unescapeHTML(playlist_detail.get('intro'))
 
-        episodes_detail = self._call_api('xqsingle', category, playlist_id)
+        episodes_detail = self._call_api(
+            'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
 
-        entries = []
-        for episode in episodes_detail['videos']:
-            entries.append(self.url_result(
-                episode['url'], video_title=episode['title']))
+        entries = [self.url_result(
+            episode['url'], video_title=episode['title']
+        ) for episode in episodes_detail['videos']]
 
         return self.playlist_result(
             entries, playlist_id, playlist_title, playlist_description)