Fix detection of the webpage charset if it's declared using ' instead of "
[youtube-dl] / youtube_dl / extractor / common.py
index a2986cebe5db15d54fbbdf90e87669982ce08aec..77726ee2432fc2bcd6df6ce89dcc560419524051 100644 (file)
@@ -150,7 +150,7 @@ class InfoExtractor(object):
         if m:
             encoding = m.group(1)
         else:
-            m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
+            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
                           webpage_bytes[:1024])
             if m:
                 encoding = m.group(1).decode('ascii')