return unescapeHTML(res)
+def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'):
+ attributes = re.findall(attributes_regex, attributes_str)
+ attributes_dict = {}
+ if attributes:
+ for (attribute_name, attribute_value) in attributes:
+ attributes_dict[attribute_name] = attribute_value
+ return attributes_dict
+
+
def clean_html(html):
"""Clean an HTML snippet into a readable string"""
if drive_or_unc:
norm_path.pop(0)
sanitized_path = [
- path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+ path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
for path_part in norm_path]
if drive_or_unc:
sanitized_path.insert(0, drive_or_unc + os.path.sep)
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
- return compat_str(upload_date)
+ if upload_date is not None:
+ return compat_str(upload_date)
def determine_ext(url, default_ext='unknown_video'):