X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=f9e7ce956573e85289466f775b5e6e295dabb968;hb=a3c776203f17d221ca394aeaf6c008425f77ad05;hp=532e8c7825066ef822506a7589f9d3f48163f1f8;hpb=6df40dcbe0a0d6889fffbdc1dd7bf14452d3a605;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 532e8c782..f9e7ce956 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import errno import gzip import io import json @@ -11,7 +12,8 @@ import sys import traceback import zlib import email.utils -import json +import socket +import datetime try: import urllib.request as compat_urllib_request @@ -148,6 +150,13 @@ try: except NameError: compat_chr = chr +def compat_ord(c): + if type(c) is int: return c + else: return ord(c) + +# This is not clearly defined otherwise +compiled_regex_type = type(re.compile('')) + std_headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', @@ -280,6 +289,12 @@ class AttrParser(compat_html_parser.HTMLParser): lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]] lines[-1] = lines[-1][:self.result[2][1]] return '\n'.join(lines).strip() +# Hack for https://github.com/rg3/youtube-dl/issues/662 +if sys.version_info < (2, 7, 3): + AttrParser.parse_endtag = (lambda self, i: + i + len("") + if self.rawdata[i:].startswith("") + else compat_html_parser.HTMLParser.parse_endtag(self, i)) def get_element_by_id(id, html): """Return the content of the tag with the specified ID in the passed HTML document""" @@ -305,7 +320,7 @@ def clean_html(html): html = re.sub('<.*?>', '', html) # Replace html entities html = unescapeHTML(html) - return html + return html.strip() def sanitize_open(filename, open_mode): @@ -323,16 +338,24 @@ def sanitize_open(filename, open_mode): if sys.platform == 'win32': import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) - return (sys.stdout, filename) + return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) stream = open(encodeFilename(filename), open_mode) return (stream, filename) except (IOError, OSError) as err: - # In case of error, try to remove win32 forbidden chars - filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename) + if err.errno in (errno.EACCES,): + raise - # An exception here should be caught in the caller - stream = open(encodeFilename(filename), open_mode) - return (stream, filename) + # In case of error, try to remove win32 forbidden chars + alt_filename = os.path.join( + re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part) + for path_part in os.path.split(filename) + ) + if alt_filename == filename: + raise + else: + # An exception here should be caught in the caller + stream = open(encodeFilename(filename), open_mode) + return (stream, alt_filename) def timeconvert(timestr): @@ -414,13 +437,48 @@ def encodeFilename(s): encoding = 'utf-8' return s.encode(encoding, 'ignore') +def decodeOption(optval): + if optval is None: + return optval + if isinstance(optval, bytes): + optval = optval.decode(preferredencoding()) + + assert isinstance(optval, compat_str) + return optval + +def formatSeconds(secs): + if secs > 3600: + return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) + elif secs > 60: + return '%d:%02d' % (secs // 60, secs % 60) + else: + return '%d' % secs + +def make_HTTPS_handler(opts): + if sys.version_info < (3,2): + # Python's 2.x handler is very simplistic + return compat_urllib_request.HTTPSHandler() + else: + import ssl + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.set_default_verify_paths() + + context.verify_mode = (ssl.CERT_NONE + if opts.no_check_certificate + else ssl.CERT_REQUIRED) + return compat_urllib_request.HTTPSHandler(context=context) class ExtractorError(Exception): """Error during info extraction.""" def __init__(self, msg, tb=None): """ tb, if given, is the original traceback (so that it can be printed out). """ + + if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.' super(ExtractorError, self).__init__(msg) + self.traceback = tb + self.exc_info = sys.exc_info() # preserve original exception def format_traceback(self): if self.traceback is None: @@ -435,7 +493,10 @@ class DownloadError(Exception): configured to continue on errors. They will contain the appropriate error message. """ - pass + def __init__(self, msg, exc_info=None): + """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ + super(DownloadError, self).__init__(msg) + self.exc_info = exc_info class SameFileError(Exception): @@ -550,3 +611,70 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_request = http_request https_response = http_response + +def unified_strdate(date_str): + """Return a string with the date in the format YYYYMMDD""" + upload_date = None + #Replace commas + date_str = date_str.replace(',',' ') + # %z (UTC offset) is only supported in python>=3.2 + date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) + format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S'] + for expression in format_expressions: + try: + upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') + except: + pass + return upload_date + +def date_from_str(date_str): + """ + Return a datetime object from a string in the format YYYYMMDD or + (now|today)[+-][0-9](day|week|month|year)(s)?""" + today = datetime.date.today() + if date_str == 'now'or date_str == 'today': + return today + match = re.match('(now|today)(?P[+-])(?P