X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=c7db75f80b5a544a0893fb11a371a682fee98e41;hb=bd690a9f9368095f561184778fb2f3ef12c66342;hp=78dc2b449417c5a8f1b30db15fd7b7a74eb84745;hpb=ee114368ad0bb9822449295910263a99f9de4e1e;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 78dc2b449..c7db75f80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): return ret def http_request(self, req): + # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not + # always respected by websites, some tend to give out URLs with non percent-encoded + # non-ASCII characters (see telemb.py, ard.py [#3412]) + # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) + # To work around aforementioned issue we will replace request's original URL with + # percent-encoded one + # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) + # the code of this workaround has been moved here from YoutubeDL.urlopen() + url = req.get_full_url() + url_escaped = escape_url(url) + + # Substitute URL if any change after escaping + if url != url_escaped: + req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request + new_req = req_type( + url_escaped, data=req.data, headers=req.headers, + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) + new_req.timeout = req.timeout + req = new_req + for h, v in std_headers.items(): # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 # The dict keys are capitalized because of this bug by urllib