[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / yinyuetai.py
index 661c346027643c3dca8b1b98e3eeac5f77a71f71..1fd8d35c637224a8609b23c87db3206a24987a63 100644 (file)
@@ -7,8 +7,9 @@ from ..utils import ExtractorError
 
 class YinYueTaiIE(InfoExtractor):
     IE_NAME = 'yinyuetai:video'
-    _VALID_URL = r'https?://v\.yinyuetai\.com/video(/h5)?/(?P<id>[0-9]+)'
-    _TEST = {
+    IE_DESC = '音悦Tai'
+    _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://v.yinyuetai.com/video/2322376',
         'md5': '6e3abe28d38e3a54b591f9f040595ce0',
         'info_dict': {
@@ -16,8 +17,13 @@ class YinYueTaiIE(InfoExtractor):
             'ext': 'mp4',
             'title': '少女时代_PARTY_Music Video Teaser',
             'creator': '少女时代',
+            'duration': 25,
+            'thumbnail': r're:^https?://.*\.jpg$',
         },
-    }
+    }, {
+        'url': 'http://v.yinyuetai.com/video/h5/2322376',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -29,19 +35,22 @@ class YinYueTaiIE(InfoExtractor):
         if info['error']:
             raise ExtractorError(info['errorMsg'], expected=True)
 
-        formats = [
-            {'url': format_info['videoUrl'], 'format_id': format_info['qualityLevel'],
-             'format': format_info['qualityLevelName'], 'filesize': format_info['fileSize'],
-             'ext': 'mp4', 'preference': format_info['bitrate']}
-            for format_info in info['videoUrlModels']
-        ]
+        formats = [{
+            'url': format_info['videoUrl'],
+            'format_id': format_info['qualityLevel'],
+            'format': format_info.get('qualityLevelName'),
+            'filesize': format_info.get('fileSize'),
+            # though URLs ends with .flv, the downloaded files are in fact mp4
+            'ext': 'mp4',
+            'tbr': format_info.get('bitrate'),
+        } for format_info in info['videoUrlModels']]
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': info['videoName'],
-            'thumbnail': info['bigHeadImage'],
-            'creator': info['artistNames'],
-            'duration': info['duration'],
+            'thumbnail': info.get('bigHeadImage'),
+            'creator': info.get('artistNames'),
+            'duration': info.get('duration'),
             'formats': formats,
         }