Merge branch 'webofstories' of https://github.com/dufferzafar/youtube-dl into dufferz...
[youtube-dl] / youtube_dl / extractor / yinyuetai.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import ExtractorError
6
7
8 class YinYueTaiIE(InfoExtractor):
9     IE_NAME = 'yinyuetai:video'
10     _VALID_URL = r'https?://v\.yinyuetai\.com/video(?:/h5)?/(?P<id>[0-9]+)'
11     _TESTS = [{
12         'url': 'http://v.yinyuetai.com/video/2322376',
13         'md5': '6e3abe28d38e3a54b591f9f040595ce0',
14         'info_dict': {
15             'id': '2322376',
16             'ext': 'mp4',
17             'title': '少女时代_PARTY_Music Video Teaser',
18             'creator': '少女时代',
19             'duration': 25,
20             'thumbnail': 're:^https?://.*\.jpg$',
21         },
22     }, {
23         'url': 'http://v.yinyuetai.com/video/h5/2322376',
24         'only_matching': True,
25     }]
26
27     def _real_extract(self, url):
28         video_id = self._match_id(url)
29
30         info = self._download_json(
31             'http://ext.yinyuetai.com/main/get-h-mv-info?json=true&videoId=%s' % video_id, video_id,
32             'Downloading mv info')['videoInfo']['coreVideoInfo']
33
34         if info['error']:
35             raise ExtractorError(info['errorMsg'], expected=True)
36
37         formats = [{
38             'url': format_info['videoUrl'],
39             'format_id': format_info['qualityLevel'],
40             'format': format_info.get('qualityLevelName'),
41             'filesize': format_info.get('fileSize'),
42             # though URLs ends with .flv, the downloaded files are in fact mp4
43             'ext': 'mp4',
44             'tbr': format_info.get('bitrate'),
45         } for format_info in info['videoUrlModels']]
46         self._sort_formats(formats)
47
48         return {
49             'id': video_id,
50             'title': info['videoName'],
51             'thumbnail': info.get('bigHeadImage'),
52             'creator': info.get('artistNames'),
53             'duration': info.get('duration'),
54             'formats': formats,
55         }