[streamable] Add helper for extracting embedded videos
[youtube-dl] / youtube_dl / extractor / streamable.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     ExtractorError,
9     float_or_none,
10     int_or_none,
11 )
12
13
14 class StreamableIE(InfoExtractor):
15     _VALID_URL = r'https?://streamable\.com/(?:e/)?(?P<id>\w+)'
16     _TESTS = [
17         {
18             'url': 'https://streamable.com/dnd1',
19             'md5': '3e3bc5ca088b48c2d436529b64397fef',
20             'info_dict': {
21                 'id': 'dnd1',
22                 'ext': 'mp4',
23                 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol',
24                 'thumbnail': 're:https?://.*\.jpg$',
25                 'uploader': 'teabaker',
26                 'timestamp': 1454964157.35115,
27                 'upload_date': '20160208',
28                 'duration': 61.516,
29                 'view_count': int,
30             }
31         },
32         # older video without bitrate, width/height, etc. info
33         {
34             'url': 'https://streamable.com/moo',
35             'md5': '2cf6923639b87fba3279ad0df3a64e73',
36             'info_dict': {
37                 'id': 'moo',
38                 'ext': 'mp4',
39                 'title': '"Please don\'t eat me!"',
40                 'thumbnail': 're:https?://.*\.jpg$',
41                 'timestamp': 1426115495,
42                 'upload_date': '20150311',
43                 'duration': 12,
44                 'view_count': int,
45             }
46         },
47         {
48             'url': 'https://streamable.com/e/dnd1',
49             'only_matching': True,
50         }
51     ]
52
53     @staticmethod
54     def _extract_url(webpage):
55         print(webpage)
56         mobj = re.search(
57             r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)',
58             webpage)
59         if mobj:
60             return mobj.group('src')
61
62     def _real_extract(self, url):
63         video_id = self._match_id(url)
64
65         # Note: Using the ajax API, as the public Streamable API doesn't seem
66         # to return video info like the title properly sometimes, and doesn't
67         # include info like the video duration
68         video = self._download_json(
69             'https://streamable.com/ajax/videos/%s' % video_id, video_id)
70
71         # Format IDs:
72         # 0 The video is being uploaded
73         # 1 The video is being processed
74         # 2 The video has at least one file ready
75         # 3 The video is unavailable due to an error
76         status = video.get('status')
77         if status != 2:
78             raise ExtractorError(
79                 'This video is currently unavailable. It may still be uploading or processing.',
80                 expected=True)
81
82         title = video.get('reddit_title') or video['title']
83
84         formats = []
85         for key, info in video['files'].items():
86             if not info.get('url'):
87                 continue
88             formats.append({
89                 'format_id': key,
90                 'url': self._proto_relative_url(info['url']),
91                 'width': int_or_none(info.get('width')),
92                 'height': int_or_none(info.get('height')),
93                 'filesize': int_or_none(info.get('size')),
94                 'fps': int_or_none(info.get('framerate')),
95                 'vbr': float_or_none(info.get('bitrate'), 1000)
96             })
97         self._sort_formats(formats)
98
99         return {
100             'id': video_id,
101             'title': title,
102             'description': video.get('description'),
103             'thumbnail': self._proto_relative_url(video.get('thumbnail_url')),
104             'uploader': video.get('owner', {}).get('user_name'),
105             'timestamp': float_or_none(video.get('date_added')),
106             'duration': float_or_none(video.get('duration')),
107             'view_count': int_or_none(video.get('plays')),
108             'formats': formats
109         }