[Canvas] Add new extractor
[youtube-dl] / youtube_dl / extractor / canvas.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import os
5 import urlparse
6
7 from youtube_dl import utils
8 from .common import InfoExtractor
9
10
11 class CanvasIE(InfoExtractor):
12     _VALID_URL = r'(?:https?://)?(?:www\.)?canvas\.be/video/(?P<id>.+)'
13     _TEST = {
14         'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
15         'md5': 'ea838375a547ac787d4064d8c7860a6c',
16         'info_dict': {
17             'id': 'de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
18             'title': 'De afspraak veilt voor de Warmste Week',
19             'ext': 'mp4',
20             'duration': 49,
21         }
22     }
23
24     def _real_extract(self, url):
25         video_id = self._match_id(url)
26
27         webpage = self._download_webpage(url, video_id)
28
29         title = self._search_regex(
30             r'<h1 class="video__body__header__title">(.+?)</h1>', webpage,
31             'title')
32         data_video = self._html_search_regex(
33             r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'data-video', group='id')
34         json_url = 'https://mediazone.vrt.be/api/v1/canvas/assets/' + data_video
35         data = self._download_json(json_url, video_id)
36
37         formats = []
38         for target in data['targetUrls']:
39             if 'type' and 'url' in target:
40                 extension = utils.determine_ext(target['url'])
41                 if target['type'] == 'PROGRESSIVE_DOWNLOAD':
42                     formats.append({
43                         'format_id': extension,
44                         'url': target['url'],
45                         'protocol': 'http',
46                     })
47                 elif target['type'] == 'HLS':
48                     formats.extend(self._extract_m3u8_formats(
49                         target['url'], video_id, entry_protocol='m3u8_native',
50                         ext='mp4',
51                         preference=0,
52                         fatal=False,
53                         m3u8_id='hls'))
54                 elif target['type'] == 'HDS':
55                     formats.append({
56                         'format_id': extension,
57                         'url': target['url'],
58                         'protocol': 'HDS',
59                     })
60                 elif target['type'] == 'RTMP':
61                     formats.append({
62                         'format_id': extension,
63                         'url': target['url'],
64                         'protocol': 'rtmp',
65                     })
66                 elif target['type'] == 'RTSP':
67                     formats.append({
68                         'format_id': extension,
69                         'url': target['url'],
70                         'protocol': 'rtsp',
71                     })
72
73         self._sort_formats(formats)
74         duration = utils.int_or_none(data.get('duration')) / 1000
75         return {
76             'id': video_id,
77             'title': title,
78             'formats': formats,
79             'duration': duration,
80         }