remove unnecessary regex group names
[youtube-dl] / youtube_dl / extractor / googledrive.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import RegexNotFoundError
5
6 class GoogleDriveEmbedIE(InfoExtractor):
7     _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
8     _TEST = {
9         'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
10         'info_dict': {
11             'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
12             'ext': 'mp4',
13             'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
14         }
15     }
16
17     @staticmethod
18     def _extract_url(webpage):
19         mobj = re.search(
20             r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
21             webpage)
22         if mobj:
23             return 'https://drive.google.com/file/d/%s' % mobj.group('id')
24
25     def _real_extract(self, url):
26         video_id = self._match_id(url)
27         return {
28             '_type': 'url',
29             'ie-key': 'GoogleDrive',
30             'url': 'https://drive.google.com/file/d/%s' % video_id
31         }
32
33 class GoogleDriveIE(InfoExtractor):
34     _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
35     _TEST = {
36         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
37         'info_dict': {
38             'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
39             'ext': 'mp4',
40             'title': 'Big Buck Bunny.mp4',
41         }
42     }
43     _formats = {
44         '5': {'ext': 'flv'},
45         '6': {'ext': 'flv'},
46         '13': {'ext': '3gp'},
47         '17': {'ext': '3gp'},
48         '18': {'ext': 'mp4'},
49         '22': {'ext': 'mp4'},
50         '34': {'ext': 'flv'},
51         '35': {'ext': 'flv'},
52         '36': {'ext': '3gp'},
53         '37': {'ext': 'mp4'},
54         '38': {'ext': 'mp4'},
55         '43': {'ext': 'webm'},
56         '44': {'ext': 'webm'},
57         '45': {'ext': 'webm'},
58         '46': {'ext': 'webm'},
59         '59': {'ext': 'mp4'}
60     }
61
62     def _real_extract(self, url):
63         video_id = self._match_id(url)
64         webpage = self._download_webpage(
65             'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
66         )
67         try:
68             title = self._html_search_regex(
69                 r'"title"\s+,\s+"[^"]+',
70                 webpage,
71                 'title'
72             )
73             fmt_stream_map = self._html_search_regex(
74                 r'"fmt_stream_map"\s+,\s+"[^"]+',
75                 webpage,
76                 'fmt_stream_map'
77             )
78             fmt_list = self._html_search_regex(
79                 r'"fmt_list"\s+,\s+"[^"]+',
80                 webpage,
81                 'fmt_list'
82             )
83 #                       timestamp = self._html_search_regex(
84 #                               r'"timestamp"\s+,\s+"[^"]+',
85 #                               webpage,
86 #                               'timestamp'
87 #                       )
88             length_seconds = self._html_search_regex(
89                 r'"length_seconds"\s+,\s+"[^"]+',
90                 webpage,
91                 'length_seconds'
92             )
93         except RegexNotFoundError:
94             try:
95                 reason = self._html_search_regex(
96                     r'"reason","[^"]+',
97                     webpage,
98                     'reason'
99                 )
100                 self.report_warning(reason)
101                 return
102             except RegexNotFoundError:
103                 self.report_warning('not a video')
104                 return
105
106         fmt_stream_map = fmt_stream_map.split(',')
107         fmt_list = fmt_list.split(',')
108         formats = []
109         for i in range(len(fmt_stream_map)):
110             fmt_id, fmt_url = fmt_stream_map[i].split('|')
111             resolution = fmt_list[i].split('/')[1]
112             width, height = resolution.split('x')
113             formats.append({
114                 'url': fmt_url,
115                 'format_id': fmt_id,
116                 'resolution': resolution,
117                 'width': int(width),
118                 'height': int(height),
119                 'ext': self._formats[fmt_id]['ext']
120             })
121         self._sort_formats(formats)
122
123         return {
124             'id': video_id,
125             'title': title,
126 #           'timestamp': int(timestamp),
127             'duration': int(length_seconds),
128             'formats': formats
129         }