release 2013.01.02

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index c18c9beedb85904578dad654c624d97ccc1f68a0..91e1803265dd88d3c50943c44cbb598fd4a139b1 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -154,6 +154,7 @@ std_headers = {
      'Accept-Encoding': 'gzip, deflate',
      'Accept-Language': 'en-us,en;q=0.5',
  }
+
  def preferredencoding():
      """Get preferred encoding.
  
@@ -187,7 +188,6 @@ else:
          with open(fn, 'w', encoding='utf-8') as f:
              json.dump(obj, f)
  
-
  def htmlentity_transform(matchobj):
      """Transforms an HTML entity to a character.
  
@@ -298,7 +298,8 @@ def clean_html(html):
      """Clean an HTML snippet into a readable string"""
      # Newline vs <br />
      html = html.replace('\n', ' ')
-    html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
+    html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
+    html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
      # Strip html tags
      html = re.sub('<.*?>', '', html)
      # Replace html entities
@@ -409,6 +410,17 @@ def encodeFilename(s):
      else:
          return s.encode(sys.getfilesystemencoding(), 'ignore')
  
+
+class ExtractorError(Exception):
+    """Error during info extraction."""
+    def __init__(self, msg, tb=None):
+        """ tb is the original traceback (so that it can be printed out) """
+        super(ExtractorError, self).__init__(msg)
+        if tb is None:
+            tb = sys.exc_info()[2]
+        self.traceback = tb
+
+
  class DownloadError(Exception):
      """Download Error exception.