From: Yen Chi Hsuan Date: Sun, 5 Feb 2017 13:41:08 +0000 (+0800) Subject: [googledrive] Fix extraction on Python 3.6 X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=e4e50f60b1040a4b6aa8ecb9139f7d5de195f407;p=youtube-dl [googledrive] Fix extraction on Python 3.6 Since Python 3.6, invalid escape sequences are deprecated. It's likely that there are invalid escape sequences somewhere on the webpage, so instead of unescaping the whole webpage, just unescape the URL. See https://bugs.python.org/issue27364. That change was designed for string literals, while it affects the 'unicode_escape' encoding as well. The code path is: str.decode('unicode_escape') codecs.unicode_escape_decode() PyUnicode_DecodeUnicodeEscape() --- diff --git a/ChangeLog b/ChangeLog index 23a729559..a0025ab91 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [googledrive] Fix extraction on Python 3.6 + + version 2017.02.04.1 Extractors diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 766fc26d0..fec36cbbb 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + lowercase_escape, ) @@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P[a-zA-Z0-9_-]{28,})' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', - 'md5': '881f7700aec4f538571fa1e0eed4a7b6', + 'md5': 'd109872761f7e7ecf353fa108c0dbe1e', 'info_dict': { 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', 'ext': 'mp4', 'title': 'Big Buck Bunny.mp4', - 'duration': 46, + 'duration': 45, } }, { # video id is longer than 28 characters @@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape') + 'http://docs.google.com/file/d/%s' % video_id, video_id) reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) if reason: @@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor): resolution = fmt.split('/')[1] width, height = resolution.split('x') formats.append({ - 'url': fmt_url, + 'url': lowercase_escape(fmt_url), 'format_id': fmt_id, 'resolution': resolution, 'width': int_or_none(width),