X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=d4951c406c73d8216803f5d9149200787883676b;hb=ecd1936695e73ba850d0618828b4a40d7d16c091;hp=5be7cf99200c57639af17b1dc89f9a861658f331;hpb=732ea2f09bbcf250e376d6a16dfa0ae8c406ff34;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5be7cf992..d4951c406 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -41,6 +41,7 @@ from .compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urlparse, + shlex_quote, ) @@ -55,6 +56,7 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } + def preferredencoding(): """Get preferred encoding. @@ -129,7 +131,7 @@ if sys.version_info >= (2, 7): """ Find the xpath xpath[@key=val] """ assert re.match(r'^[a-zA-Z-]+$', key) assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val) - expr = xpath + u"[@%s='%s']" % (key, val) + expr = xpath + "[@%s='%s']" % (key, val) return node.find(expr) else: def find_xpath_attr(node, xpath, key, val): @@ -145,6 +147,8 @@ else: # On python2.6 the xml.etree.ElementTree.Element methods don't support # the namespace parameter + + def xpath_with_ns(path, ns_map): components = [c.split(':') for c in path.split('/')] replaced = [] @@ -162,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False): xpath = xpath.encode('ascii') n = node.find(xpath) - if n is None: + if n is None or n.text is None: if fatal: name = xpath if name is None else name raise ExtractorError('Could not find XML element %s' % name) @@ -236,9 +240,9 @@ def sanitize_open(filename, open_mode): # In case of error, try to remove win32 forbidden chars alt_filename = os.path.join( - re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) - for path_part in os.path.split(filename) - ) + re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) + for path_part in os.path.split(filename) + ) if alt_filename == filename: raise else: @@ -255,6 +259,7 @@ def timeconvert(timestr): timestamp = email.utils.mktime_tz(timetuple) return timestamp + def sanitize_filename(s, restricted=False, is_id=False): """Sanitizes a string so it could be used as part of a filename. If restricted is set, use a stricter subset of allowed characters. @@ -287,6 +292,7 @@ def sanitize_filename(s, restricted=False, is_id=False): result = '_' return result + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] @@ -357,7 +363,7 @@ def encodeArgument(s): if not isinstance(s, compat_str): # Legacy code that uses byte strings # Uncomment the following line after fixing all post processors - #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) + # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) s = s.decode('ascii') return encodeFilename(s, True) @@ -371,6 +377,7 @@ def decodeOption(optval): assert isinstance(optval, compat_str) return optval + def formatSeconds(secs): if secs > 3600: return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) @@ -381,6 +388,17 @@ def formatSeconds(secs): def make_HTTPS_handler(opts_no_check_certificate, **kwargs): + if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 + context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + if opts_no_check_certificate: + context.verify_mode = ssl.CERT_NONE + try: + return compat_urllib_request.HTTPSHandler(context=context, **kwargs) + except TypeError: + # Python 2.7.8 + # (create_default_context present but HTTPSHandler has no context=) + pass + if sys.version_info < (3, 2): import httplib @@ -402,27 +420,18 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs): def https_open(self, req): return self.do_open(HTTPSConnectionV3, req) return HTTPSHandlerV3(**kwargs) - elif hasattr(ssl, 'create_default_context'): # Python >= 3.4 - context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) - context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3 - if opts_no_check_certificate: - context.verify_mode = ssl.CERT_NONE - return compat_urllib_request.HTTPSHandler(context=context, **kwargs) else: # Python < 3.4 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.verify_mode = (ssl.CERT_NONE if opts_no_check_certificate else ssl.CERT_REQUIRED) context.set_default_verify_paths() - try: - context.load_default_certs() - except AttributeError: - pass # Python < 3.4 return compat_urllib_request.HTTPSHandler(context=context, **kwargs) class ExtractorError(Exception): """Error during info extraction.""" + def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): """ tb, if given, is the original traceback (so that it can be printed out). If expected is set, this is a normal error message and most likely not a bug in youtube-dl. @@ -455,6 +464,13 @@ class ExtractorError(Exception): return ''.join(traceback.format_tb(self.traceback)) +class UnsupportedError(ExtractorError): + def __init__(self, url): + super(UnsupportedError, self).__init__( + 'Unsupported URL: %s' % url, expected=True) + self.url = url + + class RegexNotFoundError(ExtractorError): """Error when a regex didn't match""" pass @@ -467,6 +483,7 @@ class DownloadError(Exception): configured to continue on errors. They will contain the appropriate error message. """ + def __init__(self, msg, exc_info=None): """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ super(DownloadError, self).__init__(msg) @@ -488,9 +505,11 @@ class PostProcessingError(Exception): This exception may be raised by PostProcessor's .run() method to indicate an error in the postprocessing task. """ + def __init__(self, msg): self.msg = msg + class MaxDownloadsReached(Exception): """ --max-downloads limit has been reached. """ pass @@ -520,6 +539,7 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected + class YoutubeDLHandler(compat_urllib_request.HTTPHandler): """Handler for HTTP requests and responses. @@ -632,17 +652,19 @@ def parse_iso8601(date_str, delimiter='T'): return calendar.timegm(dt.timetuple()) -def unified_strdate(date_str): +def unified_strdate(date_str, day_first=True): """Return a string with the date in the format YYYYMMDD""" if date_str is None: return None - upload_date = None - #Replace commas + # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) + # Remove AM/PM + timezone + date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) + format_expressions = [ '%d %B %Y', '%d %b %Y', @@ -657,7 +679,6 @@ def unified_strdate(date_str): '%d/%m/%Y', '%d/%m/%y', '%Y/%m/%d %H:%M:%S', - '%d/%m/%Y %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', '%d.%m.%Y %H:%M', @@ -669,6 +690,14 @@ def unified_strdate(date_str): '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M', ] + if day_first: + format_expressions.extend([ + '%d/%m/%Y %H:%M:%S', + ]) + else: + format_expressions.extend([ + '%m/%d/%Y %H:%M:%S', + ]) for expression in format_expressions: try: upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -680,6 +709,7 @@ def unified_strdate(date_str): upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') return upload_date + def determine_ext(url, default_ext='unknown_video'): if url is None: return default_ext @@ -689,16 +719,20 @@ def determine_ext(url, default_ext='unknown_video'): else: return default_ext + def subtitles_filename(filename, sub_lang, sub_format): return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format + def date_from_str(date_str): """ Return a datetime object from a string in the format YYYYMMDD or (now|today)[+-][0-9](day|week|month|year)(s)?""" today = datetime.date.today() - if date_str == 'now'or date_str == 'today': + if date_str in ('now', 'today'): return today + if date_str == 'yesterday': + return today - datetime.timedelta(days=1) match = re.match('(now|today)(?P[+-])(?P