[testtube] Add new extractor (Fixes #4759)
[youtube-dl] / youtube_dl / extractor / testtube.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import int_or_none
5
6
7 class TestTubeIE(InfoExtractor):
8     _VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)'
9     _TESTS = [{
10         'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
11         'info_dict': {
12             'id': '60163',
13             'display_id': '5-weird-ways-plants-can-eat-animals',
14             'duration': 275,
15             'ext': 'mp4',
16             'title': '5 Weird Ways Plants Can Eat Animals',
17             'description': 'Why have some plants evolved to eat meat?',
18             'thumbnail': 're:^https?://.*\.jpg$',
19             'uploader': 'DNews',
20             'uploader_id': 'dnews',
21         },
22     }]
23
24     def _real_extract(self, url):
25         display_id = self._match_id(url)
26
27         webpage = self._download_webpage(url, display_id)
28         video_id = self._search_regex(
29             r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
30             webpage, 'video ID')
31
32         all_info = self._download_json(
33             'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id,
34             video_id)
35         info = all_info['items'][0]
36
37         formats = []
38         for vcodec, fdatas in info['media'].items():
39             for name, fdata in fdatas.items():
40                 formats.append({
41                     'format_id': '%s-%s' % (vcodec, name),
42                     'url': fdata['url'],
43                     'vcodec': vcodec,
44                     'tbr': fdata.get('bitrate'),
45                 })
46         self._sort_formats(formats)
47
48         duration = int_or_none(info.get('duration'))
49
50         return {
51             'id': video_id,
52             'display_id': display_id,
53             'title': info['title'],
54             'description': info.get('summary'),
55             'thumbnail': info.get('images', {}).get('large'),
56             'uploader': info.get('show', {}).get('name'),
57             'uploader_id': info.get('show', {}).get('slug'),
58             'duration': duration,
59             'formats': formats,
60         }