From: Philipp Hagemeister Date: Tue, 21 Jan 2014 00:39:39 +0000 (+0100) Subject: Deal with implicitly UTF-16 decoded webpages X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=b60016e83139ace517fc823cf2b22756e64c2e63;p=youtube-dl Deal with implicitly UTF-16 decoded webpages These webpages don't specify an encoding and rely on the BOM --- diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 692d828da..6c5d77e58 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -220,6 +220,8 @@ class InfoExtractor(object): webpage_bytes[:1024]) if m: encoding = m.group(1).decode('ascii') + elif webpage_bytes.startswith(b'\xff\xfe'): + encoding = 'utf-16' else: encoding = 'utf-8' if self._downloader.params.get('dump_intermediate_pages', False):