[huajiao] Add new extractor
[youtube-dl] / youtube_dl / extractor / huajiao.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from ..utils import parse_duration, parse_iso8601
5 from .common import InfoExtractor
6
7
8 class HuajiaoIE(InfoExtractor):
9     IE_DESC = '花椒直播'
10     _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
11     _TEST = {
12         'url': 'http://www.huajiao.com/l/38941232',
13         'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
14         'info_dict': {
15             'id': '38941232',
16             'ext': 'mp4',
17             'title': '#新人求关注#',
18             'description': 're:.*',
19             'duration': 2424.0,
20             'thumbnail': 're:^https?://.*\.jpg$',
21             'timestamp': 1475866459,
22             'upload_date': '20161007',
23             'uploader': 'Penny_余姿昀',
24             'uploader_id': '75206005',
25         }
26     }
27
28     def _real_extract(self, url):
29         video_id = self._match_id(url)
30         webpage = self._download_webpage(url, video_id)
31
32         feed_json = self._search_regex(
33             r'var\s*feed\s*=\s*({.*})', webpage, 'feed json str')
34         feed = self._parse_json(feed_json, video_id)
35
36         description = self._html_search_meta(
37             'description', webpage, 'description', fatal=False)
38
39         return {
40             'id': video_id,
41             'title': feed['feed']['formated_title'],
42             'description': description,
43             'duration': parse_duration(feed['feed']['duration']),
44             'thumbnail': feed['feed']['image'],
45             'timestamp': parse_iso8601(feed['creatime'], ' '),
46             'uploader': feed['author']['nickname'],
47             'uploader_id': feed['author']['uid'],
48             'formats': self._extract_m3u8_formats(
49                 feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
50         }