import os
import re
import sys
+import traceback
import zlib
import email.utils
import json
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-us,en;q=0.5',
}
+
def preferredencoding():
"""Get preferred encoding.
with open(fn, 'w', encoding='utf-8') as f:
json.dump(obj, f)
-
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a character.
"""Clean an HTML snippet into a readable string"""
# Newline vs <br />
html = html.replace('\n', ' ')
- html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
+ html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
+ html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
# Strip html tags
html = re.sub('<.*?>', '', html)
# Replace html entities
else:
return s.encode(sys.getfilesystemencoding(), 'ignore')
+
+class ExtractorError(Exception):
+ """Error during info extraction."""
+ def __init__(self, msg, tb=None):
+ """ tb, if given, is the original traceback (so that it can be printed out). """
+ super(ExtractorError, self).__init__(msg)
+ self.traceback = tb
+
+ def format_traceback(self):
+ if self.traceback is None:
+ return None
+ return u''.join(traceback.format_tb(self.traceback))
+
+
class DownloadError(Exception):
"""Download Error exception.
self.downloaded = downloaded
self.expected = expected
-
-class Trouble(Exception):
- """Trouble helper exception
-
- This is an exception to be handled with
- FileDownloader.trouble
- """
-
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
"""Handler for HTTP requests and responses.