import zlib
import email.utils
import json
+import datetime
try:
import urllib.request as compat_urllib_request
html = re.sub('<.*?>', '', html)
# Replace html entities
html = unescapeHTML(html)
- return html
+ return html.strip()
def sanitize_open(filename, open_mode):
""" tb, if given, is the original traceback (so that it can be printed out). """
super(ExtractorError, self).__init__(msg)
self.traceback = tb
+ self.exc_info = sys.exc_info() # preserve original exception
def format_traceback(self):
if self.traceback is None:
configured to continue on errors. They will contain the appropriate
error message.
"""
- pass
+ def __init__(self, msg, exc_info=None):
+ """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
+ super(DownloadError, self).__init__(msg)
+ self.exc_info = exc_info
class SameFileError(Exception):
https_request = http_request
https_response = http_response
+
+def date_from_str(date_str):
+ """Return a datetime object from a string in the format YYYYMMDD"""
+ return datetime.datetime.strptime(date_str, "%Y%m%d").date()
+
+class DateRange(object):
+ """Represents a time interval between two dates"""
+ def __init__(self, start=None, end=None):
+ """start and end must be strings in the format accepted by date"""
+ if start is not None:
+ self.start = date_from_str(start)
+ else:
+ self.start = datetime.datetime.min.date()
+ if end is not None:
+ self.end = date_from_str(end)
+ else:
+ self.end = datetime.datetime.max.date()
+ if self.start >= self.end:
+ raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+ @classmethod
+ def day(cls, day):
+ """Returns a range that only contains the given day"""
+ return cls(day,day)
+ def __contains__(self, date):
+ """Check if the date is in the range"""
+ date = date_from_str(date)
+ return self.start <= date and date <= self.end
+ def __str__(self):
+ return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())