[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / yinyuetai.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import ExtractorError
6
7
8 class YinYueTaiIE(InfoExtractor):
9     IE_NAME = 'yinyuetai:video'
10     IE_DESC = '音悦Tai'
11     _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)'
12     _TESTS = [{
13         'url': 'http://v.yinyuetai.com/video/2322376',
14         'md5': '6e3abe28d38e3a54b591f9f040595ce0',
15         'info_dict': {
16             'id': '2322376',
17             'ext': 'mp4',
18             'title': '少女时代_PARTY_Music Video Teaser',
19             'creator': '少女时代',
20             'duration': 25,
21             'thumbnail': r're:^https?://.*\.jpg$',
22         },
23     }, {
24         'url': 'http://v.yinyuetai.com/video/h5/2322376',
25         'only_matching': True,
26     }]
27
28     def _real_extract(self, url):
29         video_id = self._match_id(url)
30
31         info = self._download_json(
32             'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id,
33             'Downloading mv info')['videoInfo']['coreVideoInfo']
34
35         if info['error']:
36             raise ExtractorError(info['errorMsg'], expected=True)
37
38         formats = [{
39             'url': format_info['videoUrl'],
40             'format_id': format_info['qualityLevel'],
41             'format': format_info.get('qualityLevelName'),
42             'filesize': format_info.get('fileSize'),
43             # though URLs ends with .flv, the downloaded files are in fact mp4
44             'ext': 'mp4',
45             'tbr': format_info.get('bitrate'),
46         } for format_info in info['videoUrlModels']]
47         self._sort_formats(formats)
48
49         return {
50             'id': video_id,
51             'title': info['videoName'],
52             'thumbnail': info.get('bigHeadImage'),
53             'creator': info.get('artistNames'),
54             'duration': info.get('duration'),
55             'formats': formats,
56         }