]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/vimple.py
[brightcove:new] extract subtitles and strip video title
[youtube-dl] / youtube_dl / extractor / vimple.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import int_or_none
5
6
7 class SprutoBaseIE(InfoExtractor):
8     def _extract_spruto(self, spruto, video_id):
9         playlist = spruto['playlist'][0]
10         title = playlist['title']
11         video_id = playlist.get('videoId') or video_id
12         thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
13         duration = int_or_none(playlist.get('duration'))
14
15         formats = [{
16             'url': f['url'],
17         } for f in playlist['video']]
18         self._sort_formats(formats)
19
20         return {
21             'id': video_id,
22             'title': title,
23             'thumbnail': thumbnail,
24             'duration': duration,
25             'formats': formats,
26         }
27
28
29 class VimpleIE(SprutoBaseIE):
30     IE_DESC = 'Vimple - one-click video hosting'
31     _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
32     _TESTS = [
33         {
34             'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
35             'md5': '2e750a330ed211d3fd41821c6ad9a279',
36             'info_dict': {
37                 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
38                 'ext': 'mp4',
39                 'title': 'Sunset',
40                 'duration': 20,
41                 'thumbnail': 're:https?://.*?\.jpg',
42             },
43         }, {
44             'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
45             'only_matching': True,
46         }
47     ]
48
49     def _real_extract(self, url):
50         video_id = self._match_id(url)
51
52         webpage = self._download_webpage(
53             'http://player.vimple.ru/iframe/%s' % video_id, video_id)
54
55         spruto = self._parse_json(
56             self._search_regex(
57                 r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
58             video_id)
59
60         return self._extract_spruto(spruto, video_id)