3 from .common import InfoExtractor
4 from ..utils import RegexNotFoundError
6 class GoogleDriveEmbedIE(InfoExtractor):
7 _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
9 'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
11 'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
13 'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
18 def _extract_url(webpage):
20 r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
23 return 'https://drive.google.com/file/d/%s' % mobj.group('id')
25 def _real_extract(self, url):
26 video_id = self._match_id(url)
29 'ie_key': 'GoogleDrive',
30 'url': 'https://drive.google.com/file/d/%s' % video_id
33 class GoogleDriveIE(InfoExtractor):
34 _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
36 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
38 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
40 'title': 'Big Buck Bunny.mp4',
55 '43': {'ext': 'webm'},
56 '44': {'ext': 'webm'},
57 '45': {'ext': 'webm'},
58 '46': {'ext': 'webm'},
62 def _real_extract(self, url):
63 video_id = self._match_id(url)
64 webpage = self._download_webpage(
65 'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
68 title = self._html_search_regex(
69 r'"title"\s*,\s*"([^"]+)',
73 fmt_stream_map = self._html_search_regex(
74 r'"fmt_stream_map"\s*,\s*"([^"]+)',
78 fmt_list = self._html_search_regex(
79 r'"fmt_list"\s*,\s*"([^"]+)',
83 # timestamp = self._html_search_regex(
84 # r'"timestamp"\s*,\s*"([^"]+)',
88 length_seconds = self._html_search_regex(
89 r'"length_seconds"\s*,\s*"([^"]+)',
93 except RegexNotFoundError:
95 reason = self._html_search_regex(
100 self.report_warning(reason)
102 except RegexNotFoundError:
103 self.report_warning('not a video')
106 fmt_stream_map = fmt_stream_map.split(',')
107 fmt_list = fmt_list.split(',')
109 for i in range(len(fmt_stream_map)):
110 fmt_id, fmt_url = fmt_stream_map[i].split('|')
111 resolution = fmt_list[i].split('/')[1]
112 width, height = resolution.split('x')
116 'resolution': resolution,
118 'height': int(height),
119 'ext': self._formats[fmt_id]['ext']
121 self._sort_formats(formats)
126 # 'timestamp': int(timestamp),
127 'duration': int(length_seconds),