def clean_html(html):
"""Clean an HTML snippet into a readable string"""
+
+ if html is None: # Convenience for sanitizing descriptions etc.
+ return html
+
# Newline vs <br />
html = html.replace('\n', ' ')
html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
return ''.join(traceback.format_tb(self.traceback))
+class UnsupportedError(ExtractorError):
+ def __init__(self, url):
+ super(UnsupportedError, self).__init__(
+ 'Unsupported URL: %s' % url, expected=True)
+ self.url = url
+
+
class RegexNotFoundError(ExtractorError):
"""Error when a regex didn't match"""
pass
'%b %dth %Y %I:%M%p',
'%Y-%m-%d',
'%Y/%m/%d',
- '%d.%m.%Y',
- '%d/%m/%Y',
- '%d/%m/%y',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
]
if day_first:
format_expressions.extend([
+ '%d.%m.%Y',
+ '%d/%m/%Y',
+ '%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
])
else:
format_expressions.extend([
+ '%m.%d.%Y',
+ '%m/%d/%Y',
+ '%m/%d/%y',
'%m/%d/%Y %H:%M:%S',
])
for expression in format_expressions:
def args_to_str(args):
# Get a short string representation for a subprocess command
return ' '.join(shlex_quote(a) for a in args)
+
+
+def urlhandle_detect_ext(url_handle):
+ try:
+ url_handle.headers
+ getheader = lambda h: url_handle.headers[h]
+ except AttributeError: # Python < 3
+ getheader = url_handle.info().getheader
+
+ return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+ """ Returns True iff the content should be blocked """
+
+ if age_limit is None: # No limit set
+ return False
+ if content_limit is None:
+ return False # Content available for everyone
+ return age_limit < content_limit