[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index cf46711b9f5556c01c8a2eba5300dbe5df14ae94..3f9e592e36033b2377e72872e2826cfc1a00764b 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -337,17 +337,30 @@ def get_element_by_id(id, html):
  
  
  def get_element_by_class(class_name, html):
-    return get_element_by_attribute(
+    """Return the content of the first tag with the specified class in the passed HTML document"""
+    retval = get_elements_by_class(class_name, html)
+    return retval[0] if retval else None
+
+
+def get_element_by_attribute(attribute, value, html, escape_value=True):
+    retval = get_elements_by_attribute(attribute, value, html, escape_value)
+    return retval[0] if retval else None
+
+
+def get_elements_by_class(class_name, html):
+    """Return the content of all tags with the specified class in the passed HTML document as a list"""
+    return get_elements_by_attribute(
          'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
          html, escape_value=False)
  
  
-def get_element_by_attribute(attribute, value, html, escape_value=True):
+def get_elements_by_attribute(attribute, value, html, escape_value=True):
      """Return the content of the tag with the specified attribute in the passed HTML document"""
  
      value = re.escape(value) if escape_value else value
  
-    m = re.search(r'''(?xs)
+    retlist = []
+    for m in re.finditer(r'''(?xs)
          <([a-zA-Z0-9:._-]+)
           (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
           \s+%s=['"]?%s['"]?
@@ -355,16 +368,15 @@ def get_element_by_attribute(attribute, value, html, escape_value=True):
          \s*>
          (?P<content>.*?)
          </\1>
-    ''' % (re.escape(attribute), value), html)
+    ''' % (re.escape(attribute), value), html):
+        res = m.group('content')
  
-    if not m:
-        return None
-    res = m.group('content')
+        if res.startswith('"') or res.startswith("'"):
+            res = res[1:-1]
  
-    if res.startswith('"') or res.startswith("'"):
-        res = res[1:-1]
+        retlist.append(unescapeHTML(res))
  
-    return unescapeHTML(res)
+    return retlist
  
  
  class HTMLAttributeParser(compat_HTMLParser):
@@ -689,7 +701,12 @@ def bug_reports_message():
      return msg
  
  
-class ExtractorError(Exception):
+class YoutubeDLError(Exception):
+    """Base exception for YoutubeDL errors."""
+    pass
+
+
+class ExtractorError(YoutubeDLError):
      """Error during info extraction."""
  
      def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
@@ -730,7 +747,7 @@ class RegexNotFoundError(ExtractorError):
      pass
  
  
-class DownloadError(Exception):
+class DownloadError(YoutubeDLError):
      """Download Error exception.
  
      This exception may be thrown by FileDownloader objects if they are not
@@ -744,7 +761,7 @@ class DownloadError(Exception):
          self.exc_info = exc_info
  
  
-class SameFileError(Exception):
+class SameFileError(YoutubeDLError):
      """Same File exception.
  
      This exception will be thrown by FileDownloader objects if they detect
@@ -753,7 +770,7 @@ class SameFileError(Exception):
      pass
  
  
-class PostProcessingError(Exception):
+class PostProcessingError(YoutubeDLError):
      """Post Processing exception.
  
      This exception may be raised by PostProcessor's .run() method to
@@ -761,15 +778,16 @@ class PostProcessingError(Exception):
      """
  
      def __init__(self, msg):
+        super(PostProcessingError, self).__init__(msg)
          self.msg = msg
  
  
-class MaxDownloadsReached(Exception):
+class MaxDownloadsReached(YoutubeDLError):
      """ --max-downloads limit has been reached. """
      pass
  
  
-class UnavailableVideoError(Exception):
+class UnavailableVideoError(YoutubeDLError):
      """Unavailable Format exception.
  
      This exception will be thrown when a video is requested
@@ -778,7 +796,7 @@ class UnavailableVideoError(Exception):
      pass
  
  
-class ContentTooShortError(Exception):
+class ContentTooShortError(YoutubeDLError):
      """Content Too Short exception.
  
      This exception may be raised by FileDownloader objects when a file they
@@ -787,12 +805,15 @@ class ContentTooShortError(Exception):
      """
  
      def __init__(self, downloaded, expected):
+        super(ContentTooShortError, self).__init__(
+            'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
+        )
          # Both in bytes
          self.downloaded = downloaded
          self.expected = expected
  
  
-class XAttrMetadataError(Exception):
+class XAttrMetadataError(YoutubeDLError):
      def __init__(self, code=None, msg='Unknown error'):
          super(XAttrMetadataError, self).__init__(msg)
          self.code = code
@@ -808,7 +829,7 @@ class XAttrMetadataError(Exception):
              self.reason = 'NOT_SUPPORTED'
  
  
-class XAttrUnavailableError(Exception):
+class XAttrUnavailableError(YoutubeDLError):
      pass
  
  
@@ -1672,6 +1693,11 @@ def setproctitle(title):
          libc = ctypes.cdll.LoadLibrary('libc.so.6')
      except OSError:
          return
+    except TypeError:
+        # LoadLibrary in Windows Python 2.7.13 only expects
+        # a bytestring, but since unicode_literals turns
+        # every string into a unicode string, it fails.
+        return
      title_bytes = title.encode('utf-8')
      buf = ctypes.create_string_buffer(len(title_bytes))
      buf.value = title_bytes
@@ -2103,11 +2129,18 @@ def strip_jsonp(code):
  
  
  def js_to_json(code):
+    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+    SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
+    INTEGER_TABLE = (
+        (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
+        (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+    )
+
      def fix_kv(m):
          v = m.group(0)
          if v in ('true', 'false', 'null'):
              return v
-        elif v.startswith('/*') or v == ',':
+        elif v.startswith('/*') or v.startswith('//') or v == ',':
              return ""
  
          if v[0] in ("'", '"'):
@@ -2118,11 +2151,6 @@ def js_to_json(code):
                  '\\x': '\\u00',
              }.get(m.group(0), m.group(0)), v[1:-1])
  
-        INTEGER_TABLE = (
-            (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16),
-            (r'^(0+[0-7]+)\s*:?$', 8),
-        )
-
          for regex, base in INTEGER_TABLE:
              im = re.match(regex, v)
              if im:
@@ -2134,11 +2162,11 @@ def js_to_json(code):
      return re.sub(r'''(?sx)
          "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
          '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
-        /\*.*?\*/|,(?=\s*[\]}])|
+        {comment}|,(?={skip}[\]}}])|
          [a-zA-Z_][.a-zA-Z_0-9]*|
-        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
-        [0-9]+(?=\s*:)
-        ''', fix_kv, code)
+        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
+        [0-9]+(?={skip}:)
+        '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
  
  
  def qualities(quality_ids):
@@ -2364,6 +2392,7 @@ def _match_one(filter_part, dct):
          \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
          (?:
              (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+            (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
              (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
          )
          \s*$
@@ -2372,7 +2401,8 @@ def _match_one(filter_part, dct):
      if m:
          op = COMPARISON_OPERATORS[m.group('op')]
          actual_value = dct.get(m.group('key'))
-        if (m.group('strval') is not None or
+        if (m.group('quotedstrval') is not None or
+            m.group('strval') is not None or
              # If the original field is a string and matching comparisonvalue is
              # a number we should respect the origin of the original field
              # and process comparison value as a string (see
@@ -2382,7 +2412,10 @@ def _match_one(filter_part, dct):
              if m.group('op') not in ('=', '!='):
                  raise ValueError(
                      'Operator %s does not support string values!' % m.group('op'))
-            comparison_value = m.group('strval') or m.group('intval')
+            comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
+            quote = m.group('quote')
+            if quote is not None:
+                comparison_value = comparison_value.replace(r'\%s' % quote, quote)
          else:
              try:
                  comparison_value = int(m.group('intval'))