X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgoogledrive.py;h=f2cc57e447660f2d047be5bc306aaa4397bbf6af;hb=540b9f5164d50eb99d9c988ece6eb6775ccaf94a;hp=37d37390197b0393c1415bd86e9e30e2b93d7c5c;hpb=fea82c1780cc751267fd2b9b4145996bfc0c1994;p=youtube-dl diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 37d373901..f2cc57e44 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -13,7 +13,18 @@ from ..utils import ( class GoogleDriveIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P[a-zA-Z0-9_-]{28,})' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:docs|drive)\.google\.com/ + (?: + (?:uc|open)\?.*?id=| + file/d/ + )| + video\.google\.com/get_player\?.*?docid= + ) + (?P[a-zA-Z0-9_-]{28,}) + ''' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', 'md5': '5c602afbbf2c1db91831f5d82f678554', @@ -25,7 +36,7 @@ class GoogleDriveIE(InfoExtractor): } }, { # video can't be watched anonymously due to view count limit reached, - # but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046) + # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046) 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view', 'md5': 'bfbd670d03a470bb1e6d4a257adec12e', 'info_dict': { @@ -42,7 +53,13 @@ class GoogleDriveIE(InfoExtractor): 'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4', 'duration': 189, }, - 'only_matching': True + 'only_matching': True, + }, { + 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28', + 'only_matching': True, + }, { + 'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28', + 'only_matching': True, }] _FORMATS_EXT = { '5': 'flv', @@ -203,19 +220,27 @@ class GoogleDriveIE(InfoExtractor): 'id': video_id, 'export': 'download', }) - urlh = self._request_webpage( - source_url, video_id, note='Requesting source file', - errnote='Unable to request source file', fatal=False) + + def request_source_file(source_url, kind): + return self._request_webpage( + source_url, video_id, note='Requesting %s file' % kind, + errnote='Unable to request %s file' % kind, fatal=False) + urlh = request_source_file(source_url, 'source') if urlh: - def add_source_format(src_url): + def add_source_format(urlh): formats.append({ - 'url': src_url, + # Use redirect URLs as download URLs in order to calculate + # correct cookies in _calc_cookies. + # Using original URLs may result in redirect loop due to + # google.com's cookies mistakenly used for googleusercontent.com + # redirect URLs (see #23919). + 'url': urlh.geturl(), 'ext': determine_ext(title, 'mp4').lower(), 'format_id': 'source', 'quality': 1, }) if urlh.headers.get('Content-Disposition'): - add_source_format(source_url) + add_source_format(urlh) else: confirmation_webpage = self._webpage_read_content( urlh, url, video_id, note='Downloading confirmation page', @@ -225,9 +250,12 @@ class GoogleDriveIE(InfoExtractor): r'confirm=([^&"\']+)', confirmation_webpage, 'confirmation code', fatal=False) if confirm: - add_source_format(update_url_query(source_url, { + confirmed_source_url = update_url_query(source_url, { 'confirm': confirm, - })) + }) + urlh = request_source_file(confirmed_source_url, 'confirmed source') + if urlh and urlh.headers.get('Content-Disposition'): + add_source_format(urlh) if not formats: reason = self._search_regex(