X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=a2c6780ca354f98514910752cec596a4eee172e5;hb=765ac263dbf17442f41715a1cfce13fc61a4b315;hp=178d1dcb353c9cb68c874f6203a9f488250bf122;hpb=f6861ec96f33722c9b1ba4f2d8ca307dcbe64ac1;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 178d1dcb3..a2c6780ca 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import base64 +import binascii import calendar import codecs import contextlib @@ -56,7 +57,7 @@ from .compat import ( compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', @@ -248,7 +249,7 @@ def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT): def get_element_by_id(id, html): """Return the content of the tag with the specified ID in the passed HTML document""" - return get_element_by_attribute("id", id, html) + return get_element_by_attribute('id', id, html) def get_element_by_attribute(attribute, value, html): @@ -994,7 +995,7 @@ def date_from_str(date_str): unit += 's' delta = datetime.timedelta(**{unit: time}) return today + delta - return datetime.datetime.strptime(date_str, "%Y%m%d").date() + return datetime.datetime.strptime(date_str, '%Y%m%d').date() def hyphenate_date(date_str): @@ -1074,22 +1075,22 @@ def _windows_write_string(s, out): GetStdHandle = ctypes.WINFUNCTYPE( ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( - (b"GetStdHandle", ctypes.windll.kernel32)) + (b'GetStdHandle', ctypes.windll.kernel32)) h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) WriteConsoleW = ctypes.WINFUNCTYPE( ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), - ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32)) + ctypes.wintypes.LPVOID)((b'WriteConsoleW', ctypes.windll.kernel32)) written = ctypes.wintypes.DWORD(0) - GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32)) + GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b'GetFileType', ctypes.windll.kernel32)) FILE_TYPE_CHAR = 0x0002 FILE_TYPE_REMOTE = 0x8000 GetConsoleMode = ctypes.WINFUNCTYPE( ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.POINTER(ctypes.wintypes.DWORD))( - (b"GetConsoleMode", ctypes.windll.kernel32)) + (b'GetConsoleMode', ctypes.windll.kernel32)) INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value def not_a_console(handle): @@ -1387,7 +1388,7 @@ def fix_xml_ampersands(xml_str): def setproctitle(title): assert isinstance(title, compat_str) try: - libc = ctypes.cdll.LoadLibrary("libc.so.6") + libc = ctypes.cdll.LoadLibrary('libc.so.6') except OSError: return title_bytes = title.encode('utf-8') @@ -1427,7 +1428,7 @@ def url_basename(url): class HEADRequest(compat_urllib_request.Request): def get_method(self): - return "HEAD" + return 'HEAD' def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): @@ -1717,6 +1718,16 @@ def encode_dict(d, encoding='utf-8'): return dict((encode(k), encode(v)) for k, v in d.items()) +def dict_get(d, key_or_keys, default=None, skip_false_values=True): + if isinstance(key_or_keys, (list, tuple)): + for key in key_or_keys: + if key not in d or d[key] is None or skip_false_values and not d[key]: + continue + return d[key] + return default + return d.get(key_or_keys, default) + + def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) @@ -1734,12 +1745,12 @@ def parse_age_limit(s): if s is None: return None m = re.match(r'^(?P\d{1,2})\+?$', s) - return int(m.group('age')) if m else US_RATINGS.get(s, None) + return int(m.group('age')) if m else US_RATINGS.get(s) def strip_jsonp(code): return re.sub( - r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) + r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) def js_to_json(code): @@ -1825,14 +1836,20 @@ def error_to_compat_str(err): def mimetype2ext(mt): + ext = { + 'audio/mp4': 'm4a', + }.get(mt) + if ext is not None: + return ext + _, _, res = mt.rpartition('/') return { - 'x-ms-wmv': 'wmv', - 'x-mp4-fragmented': 'mp4', - 'ttml+xml': 'ttml', '3gpp': '3gp', + 'ttml+xml': 'ttml', 'x-flv': 'flv', + 'x-mp4-fragmented': 'mp4', + 'x-ms-wmv': 'wmv', }.get(res, res) @@ -2017,20 +2034,27 @@ def dfxp2srt(dfxp_data): 'ttaf1': 'http://www.w3.org/2006/10/ttaf1', }) - def parse_node(node): - str_or_empty = functools.partial(str_or_none, default='') + class TTMLPElementParser(object): + out = '' - out = str_or_empty(node.text) + def start(self, tag, attrib): + if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): + self.out += '\n' - for child in node: - if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): - out += '\n' + str_or_empty(child.tail) - elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'): - out += str_or_empty(parse_node(child)) - else: - out += str_or_empty(xml.etree.ElementTree.tostring(child)) + def end(self, tag): + pass - return out + def data(self, data): + self.out += data + + def close(self): + return self.out.strip() + + def parse_node(node): + target = TTMLPElementParser() + parser = xml.etree.ElementTree.XMLParser(target=target) + parser.feed(xml.etree.ElementTree.tostring(node)) + return parser.close() dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) out = [] @@ -2565,3 +2589,20 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): return None # No Proxy return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type) + + +def ohdave_rsa_encrypt(data, exponent, modulus): + ''' + Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/ + + Input: + data: data to encrypt, bytes-like object + exponent, modulus: parameter e and N of RSA algorithm, both integer + Output: hex string of encrypted data + + Limitation: supports one block encryption only + ''' + + payload = int(binascii.hexlify(data[::-1]), 16) + encrypted = pow(payload, exponent, modulus) + return '%x' % encrypted