[huffpost] Add support
[youtube-dl] / youtube_dl / extractor / huffpost.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     parse_duration,
8     unified_strdate,
9 )
10
11
12 class HuffPostIE(InfoExtractor):
13     IE_DESC = 'Huffington Post'
14     _VALID_URL = r'''(?x)
15         https?://(embed\.)?live\.huffingtonpost\.com/
16         (?:
17             r/segment/[^/]+/|
18             HPLEmbedPlayer/\?segmentId=
19         )
20         (?P<id>[0-9a-f]+)'''
21
22     _TEST = {
23         'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
24         'file': '52dd3e4b02a7602131000677.mp4',
25         'md5': 'TODO',
26         'info_dict': {
27             'title': 'TODO',
28             'description': 'TODO',
29             'duration': 1549,
30         }
31     }
32
33     def _real_extract(self, url):
34         mobj = re.match(self._VALID_URL, url)
35         video_id = mobj.group('id')
36
37         api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
38         data = self._download_json(api_url, video_id)['data']
39
40         video_title = data['title']
41         duration = parse_duration(data['running_time'])
42         upload_date = unified_strdate(data['schedule']['started_at'])
43
44         thumbnails = []
45         for url in data['images'].values():
46             m = re.match('.*-([0-9]+x[0-9]+)\.', url)
47             if not m:
48                 continue
49             thumbnails.append({
50                 'url': url,
51                 'resolution': m.group(1),
52             })
53
54         formats = [{
55             'format': key,
56             'format_id': key.replace('/', '.'),
57             'ext': 'mp4',
58             'url': url,
59             'vcodec': 'none' if key.startswith('audio/') else None,
60         } for key, url in data['sources']['live'].items()]
61         self._sort_formats(formats)
62
63         return {
64             'id': video_id,
65             'title': video_title,
66             'formats': formats,
67             'duration': duration,
68             'upload_date': upload_date,
69             'thumbnails': thumbnails,
70         }