summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
6ef3e65)
Since Python 3.6, invalid escape sequences are deprecated. It's likely
that there are invalid escape sequences somewhere on the webpage, so
instead of unescaping the whole webpage, just unescape the URL.
See https://bugs.python.org/issue27364. That change was designed for
string literals, while it affects the 'unicode_escape' encoding as well.
The code path is:
str.decode('unicode_escape')
codecs.unicode_escape_decode()
PyUnicode_DecodeUnicodeEscape()
+version <unreleased>
+
+Extractors
+* [googledrive] Fix extraction on Python 3.6
+
+
version 2017.02.04.1
Extractors
version 2017.02.04.1
Extractors
from ..utils import (
ExtractorError,
int_or_none,
from ..utils import (
ExtractorError,
int_or_none,
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
_TESTS = [{
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
_TESTS = [{
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
- 'md5': '881f7700aec4f538571fa1e0eed4a7b6',
+ 'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
}
}, {
# video id is longer than 28 characters
}
}, {
# video id is longer than 28 characters
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
- 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
+ 'http://docs.google.com/file/d/%s' % video_id, video_id)
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
resolution = fmt.split('/')[1]
width, height = resolution.split('x')
formats.append({
resolution = fmt.split('/')[1]
width, height = resolution.split('x')
formats.append({
+ 'url': lowercase_escape(fmt_url),
'format_id': fmt_id,
'resolution': resolution,
'width': int_or_none(width),
'format_id': fmt_id,
'resolution': resolution,
'width': int_or_none(width),