_ Git - youtube-dl/blob - youtube_dl/extractor/googledrive.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import RegexNotFoundError
   5
   6 class GoogleDriveEmbedIE(InfoExtractor):
   7     _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
   8     _TEST = {
   9         'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
  10         'info_dict': {
  11             'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
  12             'ext': 'mp4',
  13             'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
  14         }
  15     }
  16
  17     @staticmethod
  18     def _extract_url(webpage):
  19         mobj = re.search(
  20             r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
  21             webpage)
  22         if mobj:
  23             return 'https://drive.google.com/file/d/%s' % mobj.group('id')
  24
  25     def _real_extract(self, url):
  26         video_id = self._match_id(url)
  27         return {
  28             '_type': 'url',
  29             'ie-key': 'GoogleDrive',
  30             'url': 'https://drive.google.com/file/d/%s' % video_id
  31         }
  32
  33 class GoogleDriveIE(InfoExtractor):
  34     _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
  35     _TEST = {
  36         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
  37         'info_dict': {
  38             'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
  39             'ext': 'mp4',
  40             'title': 'Big Buck Bunny.mp4',
  41         }
  42     }
  43     _formats = {
  44         '5': {'ext': 'flv'},
  45         '6': {'ext': 'flv'},
  46         '13': {'ext': '3gp'},
  47         '17': {'ext': '3gp'},
  48         '18': {'ext': 'mp4'},
  49         '22': {'ext': 'mp4'},
  50         '34': {'ext': 'flv'},
  51         '35': {'ext': 'flv'},
  52         '36': {'ext': '3gp'},
  53         '37': {'ext': 'mp4'},
  54         '38': {'ext': 'mp4'},
  55         '43': {'ext': 'webm'},
  56         '44': {'ext': 'webm'},
  57         '45': {'ext': 'webm'},
  58         '46': {'ext': 'webm'},
  59         '59': {'ext': 'mp4'}
  60     }
  61
  62     def _real_extract(self, url):
  63         video_id = self._match_id(url)
  64         webpage = self._download_webpage(
  65             'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
  66         )
  67         try:
  68             title = self._html_search_regex(
  69                 r'"title","(?P<title>.*?)"',
  70                 webpage,
  71                 'title',
  72                 group='title'
  73             )
  74             fmt_stream_map = self._html_search_regex(
  75                 r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
  76                 webpage,
  77                 'fmt_stream_map',
  78                 group='fmt_stream_map'
  79             )
  80             fmt_list = self._html_search_regex(
  81                 r'"fmt_list","(?P<fmt_list>.*?)"',
  82                 webpage,
  83                 'fmt_list',
  84                 group='fmt_list'
  85             )
  86 #                       timestamp = self._html_search_regex(
  87 #                               r'"timestamp","(?P<timestamp>.*?)"',
  88 #                               webpage,
  89 #                               'timestamp',
  90 #                               group='timestamp'
  91 #                       )
  92             length_seconds = self._html_search_regex(
  93                 r'"length_seconds","(?P<length_seconds>.*?)"',
  94                 webpage,
  95                 'length_seconds',
  96                 group='length_seconds'
  97             )
  98         except RegexNotFoundError:
  99             try:
 100                 reason = self._html_search_regex(
 101                     r'"reason","(?P<reason>.*?)"',
 102                     webpage,
 103                     'reason',
 104                     group='reason'
 105                 )
 106                 self.report_warning(reason)
 107                 return
 108             except RegexNotFoundError:
 109                 self.report_warning('not a video')
 110                 return
 111
 112         fmt_stream_map = fmt_stream_map.split(',')
 113         fmt_list = fmt_list.split(',')
 114         formats = []
 115         for i in range(len(fmt_stream_map)):
 116             fmt_id, fmt_url = fmt_stream_map[i].split('|')
 117             resolution = fmt_list[i].split('/')[1]
 118             width, height = resolution.split('x')
 119             formats.append({
 120                 'url': fmt_url,
 121                 'format_id': fmt_id,
 122                 'resolution': resolution,
 123                 'width': int(width),
 124                 'height': int(height),
 125                 'ext': self._formats[fmt_id]['ext']
 126             })
 127         self._sort_formats(formats)
 128
 129         return {
 130             'id': video_id,
 131             'title': title,
 132 #           'timestamp': int(timestamp),
 133             'duration': int(length_seconds),
 134             'formats': formats
 135         }