[extractor/common] fallback on utf-8 when charset is not found
authorAnisse Astier <anisse@astier.eu>
Mon, 7 Apr 2014 21:09:53 +0000 (23:09 +0200)
committerAnisse Astier <anisse@astier.eu>
Mon, 7 Apr 2014 21:10:16 +0000 (23:10 +0200)
fixes #2721

youtube_dl/extractor/common.py

index da4193734971122c2ef72f3dca4acecd93e5f784..9653d44eb33db4efb74e97529cdd2aaea0dc1c03 100644 (file)
@@ -251,7 +251,10 @@ class InfoExtractor(object):
             with open(filename, 'wb') as outf:
                 outf.write(webpage_bytes)
 
-        content = webpage_bytes.decode(encoding, 'replace')
+        try:
+            content = webpage_bytes.decode(encoding, 'replace')
+        except LookupError:
+            content = webpage_bytes.decode('utf-8', 'replace')
 
         if (u'<title>Access to this site is blocked</title>' in content and
                 u'Websense' in content[:512]):