#!/usr/bin/env python
# -*- coding: utf-8 -*-
+import calendar
+import contextlib
import ctypes
import datetime
import email.utils
import errno
+import getpass
import gzip
import itertools
import io
import re
import ssl
import socket
+import struct
import subprocess
import sys
import traceback
+import xml.etree.ElementTree
import zlib
try:
except NameError:
compat_chr = chr
+try:
+ from xml.etree.ElementTree import ParseError as compat_xml_parse_error
+except ImportError: # Python 2.6
+ from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
def compat_ord(c):
if type(c) is int: return c
else: return ord(c)
res.append(el)
return res
+
def unescapeHTML(s):
- """
- @param s a string
- """
- assert type(s) == type(u'')
+ if s is None:
+ return None
+ assert type(s) == compat_str
- result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
+ result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
return result
encoding = 'utf-8'
return s.encode(encoding, 'ignore')
-
def decodeOption(optval):
if optval is None:
return optval
https_response = http_response
+def parse_iso8601(date_str):
+ """ Return a UNIX timestamp from the given date """
+
+ if date_str is None:
+ return None
+
+ m = re.search(
+ r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
+ date_str)
+ if not m:
+ timezone = datetime.timedelta()
+ else:
+ date_str = date_str[:-len(m.group(0))]
+ if not m.group('sign'):
+ timezone = datetime.timedelta()
+ else:
+ sign = 1 if m.group('sign') == '+' else -1
+ timezone = datetime.timedelta(
+ hours=sign * int(m.group('hours')),
+ minutes=sign * int(m.group('minutes')))
+
+ dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
+ return calendar.timegm(dt.timetuple())
+
+
def unified_strdate(date_str):
"""Return a string with the date in the format YYYYMMDD"""
+
+ if date_str is None:
+ return None
+
upload_date = None
#Replace commas
date_str = date_str.replace(',', ' ')
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
format_expressions = [
'%d %B %Y',
+ '%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%Y-%m-%d',
+ '%d.%m.%Y',
'%d/%m/%Y',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%d.%m.%Y %H:%M',
+ '%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
+ '%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
]
for expression in format_expressions:
libc = ctypes.cdll.LoadLibrary("libc.so.6")
except OSError:
return
- title = title
- buf = ctypes.create_string_buffer(len(title) + 1)
- buf.value = title.encode('utf-8')
+ title_bytes = title.encode('utf-8')
+ buf = ctypes.create_string_buffer(len(title_bytes))
+ buf.value = title_bytes
try:
- libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
+ libc.prctl(15, buf, 0, 0, 0)
except AttributeError:
return # Strange libc, just skip this
return v if v is None else (int(v) // scale)
+def float_or_none(v, scale=1):
+ return v if v is None else (float(v) / scale)
+
+
def parse_duration(s):
if s is None:
return None
m = re.match(
- r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s)
+ r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
if not m:
return None
res = int(m.group('secs'))
def uppercase_escape(s):
return re.sub(
- r'\\U([0-9a-fA-F]{8})',
- lambda m: compat_chr(int(m.group(1), base=16)), s)
+ r'\\U[0-9a-fA-F]{8}',
+ lambda m: m.group(0).decode('unicode-escape'), s)
+
+try:
+ struct.pack(u'!I', 0)
+except TypeError:
+ # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
+ def struct_pack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.pack(spec, *args)
+
+ def struct_unpack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.unpack(spec, *args)
+else:
+ struct_pack = struct.pack
+ struct_unpack = struct.unpack
+
+
+def read_batch_urls(batch_fd):
+ def fixup(url):
+ if not isinstance(url, compat_str):
+ url = url.decode('utf-8', 'replace')
+ BOM_UTF8 = u'\xef\xbb\xbf'
+ if url.startswith(BOM_UTF8):
+ url = url[len(BOM_UTF8):]
+ url = url.strip()
+ if url.startswith(('#', ';', ']')):
+ return False
+ return url
+
+ with contextlib.closing(batch_fd) as fd:
+ return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+ return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
+
+
+def parse_xml(s):
+ class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass # Ignore doctypes
+
+ parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
+ kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
+ return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+
+
+if sys.version_info < (3, 0) and sys.platform == 'win32':
+ def compat_getpass(prompt, *args, **kwargs):
+ if isinstance(prompt, compat_str):
+ prompt = prompt.encode(preferredencoding())
+ return getpass.getpass(prompt, *args, **kwargs)
+else:
+ compat_getpass = getpass.getpass
+
+
+US_RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+}
+
+
+def strip_jsonp(code):
+ return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)