[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / viqeo.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     int_or_none,
9     str_or_none,
10     url_or_none,
11 )
12
13
14 class ViqeoIE(InfoExtractor):
15     _VALID_URL = r'''(?x)
16                         (?:
17                             viqeo:|
18                             https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
19                             https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
20                         )
21                         (?P<id>[\da-f]+)
22                     '''
23     _TESTS = [{
24         'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
25         'md5': 'a169dd1a6426b350dca4296226f21e76',
26         'info_dict': {
27             'id': 'cde96f09d25f39bee837',
28             'ext': 'mp4',
29             'title': 'cde96f09d25f39bee837',
30             'thumbnail': r're:^https?://.*\.jpg$',
31             'duration': 76,
32         },
33     }, {
34         'url': 'viqeo:cde96f09d25f39bee837',
35         'only_matching': True,
36     }, {
37         'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
38         'only_matching': True,
39     }]
40
41     @staticmethod
42     def _extract_urls(webpage):
43         return [
44             mobj.group('url')
45             for mobj in re.finditer(
46                 r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
47                 webpage)]
48
49     def _real_extract(self, url):
50         video_id = self._match_id(url)
51
52         webpage = self._download_webpage(
53             'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
54
55         data = self._parse_json(
56             self._search_regex(
57                 r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
58             video_id)
59
60         formats = []
61         thumbnails = []
62         for media_file in data['mediaFiles']:
63             if not isinstance(media_file, dict):
64                 continue
65             media_url = url_or_none(media_file.get('url'))
66             if not media_url or not media_url.startswith(('http', '//')):
67                 continue
68             media_type = str_or_none(media_file.get('type'))
69             if not media_type:
70                 continue
71             media_kind = media_type.split('/')[0].lower()
72             f = {
73                 'url': media_url,
74                 'width': int_or_none(media_file.get('width')),
75                 'height': int_or_none(media_file.get('height')),
76             }
77             format_id = str_or_none(media_file.get('quality'))
78             if media_kind == 'image':
79                 f['id'] = format_id
80                 thumbnails.append(f)
81             elif media_kind in ('video', 'audio'):
82                 is_audio = media_kind == 'audio'
83                 f.update({
84                     'format_id': 'audio' if is_audio else format_id,
85                     'fps': int_or_none(media_file.get('fps')),
86                     'vcodec': 'none' if is_audio else None,
87                 })
88                 formats.append(f)
89         self._sort_formats(formats)
90
91         duration = int_or_none(data.get('duration'))
92
93         return {
94             'id': video_id,
95             'title': video_id,
96             'duration': duration,
97             'thumbnails': thumbnails,
98             'formats': formats,
99         }