[googledrive] raise ExtractorError instead of warning
[youtube-dl] / youtube_dl / extractor / googledrive.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5     RegexNotFoundError,
6     ExtractorError,
7 )
8
9 class GoogleDriveEmbedIE(InfoExtractor):
10     _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
11     _TEST = {
12         'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
13         'info_dict': {
14             'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
15             'ext': 'mp4',
16             'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
17         }
18     }
19
20     @staticmethod
21     def _extract_url(webpage):
22         mobj = re.search(
23             r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
24             webpage)
25         if mobj:
26             return 'https://drive.google.com/file/d/%s' % mobj.group('id')
27
28     def _real_extract(self, url):
29         video_id = self._match_id(url)
30         return {
31             '_type': 'url',
32             'ie_key': 'GoogleDrive',
33             'url': 'https://drive.google.com/file/d/%s' % video_id
34         }
35
36 class GoogleDriveIE(InfoExtractor):
37     _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
38     _TEST = {
39         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
40         'info_dict': {
41             'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
42             'ext': 'mp4',
43             'title': 'Big Buck Bunny.mp4',
44         }
45     }
46     _formats = {
47         '5': {'ext': 'flv'},
48         '6': {'ext': 'flv'},
49         '13': {'ext': '3gp'},
50         '17': {'ext': '3gp'},
51         '18': {'ext': 'mp4'},
52         '22': {'ext': 'mp4'},
53         '34': {'ext': 'flv'},
54         '35': {'ext': 'flv'},
55         '36': {'ext': '3gp'},
56         '37': {'ext': 'mp4'},
57         '38': {'ext': 'mp4'},
58         '43': {'ext': 'webm'},
59         '44': {'ext': 'webm'},
60         '45': {'ext': 'webm'},
61         '46': {'ext': 'webm'},
62         '59': {'ext': 'mp4'}
63     }
64
65     def _real_extract(self, url):
66         video_id = self._match_id(url)
67         webpage = self._download_webpage(
68             'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
69         )
70         try:
71             title = self._html_search_regex(
72                 r'"title"\s*,\s*"([^"]+)',
73                 webpage,
74                 'title'
75             )
76             fmt_stream_map = self._html_search_regex(
77                 r'"fmt_stream_map"\s*,\s*"([^"]+)',
78                 webpage,
79                 'fmt_stream_map'
80             )
81             fmt_list = self._html_search_regex(
82                 r'"fmt_list"\s*,\s*"([^"]+)',
83                 webpage,
84                 'fmt_list'
85             )
86 #                       timestamp = self._html_search_regex(
87 #                               r'"timestamp"\s*,\s*"([^"]+)',
88 #                               webpage,
89 #                               'timestamp'
90 #                       )
91             length_seconds = self._html_search_regex(
92                 r'"length_seconds"\s*,\s*"([^"]+)',
93                 webpage,
94                 'length_seconds'
95             )
96         except RegexNotFoundError:
97             try:
98                 reason = self._html_search_regex(
99                     r'"reason","([^"]+)',
100                     webpage,
101                     'reason'
102                 )
103                 raise ExtractorError(reason)
104                 return
105             except RegexNotFoundError:
106                 raise ExtractorError('not a video')
107                 return
108
109         fmt_stream_map = fmt_stream_map.split(',')
110         fmt_list = fmt_list.split(',')
111         formats = []
112         for i in range(len(fmt_stream_map)):
113             fmt_id, fmt_url = fmt_stream_map[i].split('|')
114             resolution = fmt_list[i].split('/')[1]
115             width, height = resolution.split('x')
116             formats.append({
117                 'url': fmt_url,
118                 'format_id': fmt_id,
119                 'resolution': resolution,
120                 'width': int(width),
121                 'height': int(height),
122                 'ext': self._formats[fmt_id]['ext']
123             })
124         self._sort_formats(formats)
125
126         return {
127             'id': video_id,
128             'title': title,
129 #           'timestamp': int(timestamp),
130             'duration': int(length_seconds),
131             'formats': formats
132         }