Merge pull request #8092 from bpfoley/twitter-thumbnail

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 31d60f3233bb3ea95775f390d4f4afc0681b52d3..ec186918cd8672ada2da2d5521e0ba8b22eb273d 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -35,6 +35,7 @@ import xml.etree.ElementTree
  import zlib
  
  from .compat import (
+    compat_HTMLParser,
      compat_basestring,
      compat_chr,
      compat_etree_fromstring,
@@ -272,6 +273,35 @@ def get_element_by_attribute(attribute, value, html):
  
      return unescapeHTML(res)
  
+class HTMLAttributeParser(compat_HTMLParser):
+    """Trivial HTML parser to gather the attributes for a single element"""
+    def __init__(self):
+        self.attrs = { }
+        compat_HTMLParser.__init__(self)
+
+    def handle_starttag(self, tag, attrs):
+        self.attrs = dict(attrs)
+
+def extract_attributes(html_element):
+    """Given a string for an HTML element such as
+    <el
+         a="foo" B="bar" c="&98;az" d=boz
+         empty= noval entity="&amp;"
+         sq='"' dq="'"
+    >
+    Decode and return a dictionary of attributes.
+    {
+        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
+        'empty': '', 'noval': None, 'entity': '&',
+        'sq': '"', 'dq': '\''
+    }.
+    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
+    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
+    """
+    parser = HTMLAttributeParser()
+    parser.feed(html_element)
+    parser.close()
+    return parser.attrs
  
  def clean_html(html):
      """Clean an HTML snippet into a readable string"""
@@ -465,6 +495,10 @@ def encodeFilename(s, for_subprocess=False):
      if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
          return s
  
+    # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+    if sys.platform.startswith('java'):
+        return s
+
      return s.encode(get_subprocess_encoding(), 'ignore')
  
  
@@ -1215,13 +1249,23 @@ if sys.platform == 'win32':
              raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
  
  else:
-    import fcntl
+    # Some platforms, such as Jython, is missing fcntl
+    try:
+        import fcntl
  
-    def _lock_file(f, exclusive):
-        fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+        def _lock_file(f, exclusive):
+            fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
  
-    def _unlock_file(f):
-        fcntl.flock(f, fcntl.LOCK_UN)
+        def _unlock_file(f):
+            fcntl.flock(f, fcntl.LOCK_UN)
+    except ImportError:
+        UNSUPPORTED_MSG = 'file locking is not supported on this platform'
+
+        def _lock_file(f, exclusive):
+            raise IOError(UNSUPPORTED_MSG)
+
+        def _unlock_file(f):
+            raise IOError(UNSUPPORTED_MSG)
  
  
  class locked_file(object):
@@ -1302,6 +1346,17 @@ def format_bytes(bytes):
      return '%.2f%s' % (converted, suffix)
  
  
+def lookup_unit_table(unit_table, s):
+    units_re = '|'.join(re.escape(u) for u in unit_table)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    if not m:
+        return None
+    num_str = m.group('num').replace(',', '.')
+    mult = unit_table[m.group('unit')]
+    return int(float(num_str) * mult)
+
+
  def parse_filesize(s):
      if s is None:
          return None
@@ -1345,15 +1400,28 @@ def parse_filesize(s):
          'Yb': 1000 ** 8,
      }
  
-    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
-    m = re.match(
-        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
-    if not m:
+    return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_count(s):
+    if s is None:
          return None
  
-    num_str = m.group('num').replace(',', '.')
-    mult = _UNIT_TABLE[m.group('unit')]
-    return int(float(num_str) * mult)
+    s = s.strip()
+
+    if re.match(r'^[\d,.]+$', s):
+        return str_to_int(s)
+
+    _UNIT_TABLE = {
+        'k': 1000,
+        'K': 1000,
+        'm': 1000 ** 2,
+        'M': 1000 ** 2,
+        'kk': 1000 ** 2,
+        'KK': 1000 ** 2,
+    }
+
+    return lookup_unit_table(_UNIT_TABLE, s)
  
  
  def month_by_name(name):
@@ -1385,6 +1453,12 @@ def fix_xml_ampersands(xml_str):
  
  def setproctitle(title):
      assert isinstance(title, compat_str)
+
+    # ctypes in Jython is not complete
+    # http://bugs.jython.org/issue2148
+    if sys.platform.startswith('java'):
+        return
+
      try:
          libc = ctypes.cdll.LoadLibrary('libc.so.6')
      except OSError:
@@ -1723,6 +1797,7 @@ def update_url_query(url, query):
      parsed_url = compat_urlparse.urlparse(url)
      qs = compat_parse_qs(parsed_url.query)
      qs.update(query)
+    qs = encode_dict(qs)
      return compat_urlparse.urlunparse(parsed_url._replace(
          query=compat_urllib_parse.urlencode(qs, True)))