X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=6c5b5df4cc138254e0d7680ef7477fc0f11e6ed0;hb=c43e57242e10eae327d4348390cfb9e574fad5dd;hp=8f856ee8c073dd9095f2b62b1567eabdb321c117;hpb=258d5850c91e0d37a36c6bae0a25314f8149b05a;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8f856ee8c..6c5b5df4c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -12,6 +12,7 @@ import traceback import zlib import email.utils import json +import datetime try: import urllib.request as compat_urllib_request @@ -280,6 +281,12 @@ class AttrParser(compat_html_parser.HTMLParser): lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]] lines[-1] = lines[-1][:self.result[2][1]] return '\n'.join(lines).strip() +# Hack for https://github.com/rg3/youtube-dl/issues/662 +if sys.version_info < (2, 7, 3): + AttrParser.parse_endtag = (lambda self, i: + i + len("") + if self.rawdata[i:].startswith("") + else compat_html_parser.HTMLParser.parse_endtag(self, i)) def get_element_by_id(id, html): """Return the content of the tag with the specified ID in the passed HTML document""" @@ -305,7 +312,7 @@ def clean_html(html): html = re.sub('<.*?>', '', html) # Replace html entities html = unescapeHTML(html) - return html + return html.strip() def sanitize_open(filename, open_mode): @@ -323,7 +330,7 @@ def sanitize_open(filename, open_mode): if sys.platform == 'win32': import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) - return (sys.stdout, filename) + return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) stream = open(encodeFilename(filename), open_mode) return (stream, filename) except (IOError, OSError) as err: @@ -409,8 +416,19 @@ def encodeFilename(s): # match Windows 9x series as well. Besides, NT 4 is obsolete.) return s else: - return s.encode(sys.getfilesystemencoding(), 'ignore') + encoding = sys.getfilesystemencoding() + if encoding is None: + encoding = 'utf-8' + return s.encode(encoding, 'ignore') + +def decodeOption(optval): + if optval is None: + return optval + if isinstance(optval, bytes): + optval = optval.decode(preferredencoding()) + assert isinstance(optval, compat_str) + return optval class ExtractorError(Exception): """Error during info extraction.""" @@ -418,6 +436,7 @@ class ExtractorError(Exception): """ tb, if given, is the original traceback (so that it can be printed out). """ super(ExtractorError, self).__init__(msg) self.traceback = tb + self.exc_info = sys.exc_info() # preserve original exception def format_traceback(self): if self.traceback is None: @@ -432,7 +451,10 @@ class DownloadError(Exception): configured to continue on errors. They will contain the appropriate error message. """ - pass + def __init__(self, msg, exc_info=None): + """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ + super(DownloadError, self).__init__(msg) + self.exc_info = exc_info class SameFileError(Exception): @@ -450,7 +472,8 @@ class PostProcessingError(Exception): This exception may be raised by PostProcessor's .run() method to indicate an error in the postprocessing task. """ - pass + def __init__(self, msg): + self.msg = msg class MaxDownloadsReached(Exception): """ --max-downloads limit has been reached. """ @@ -515,14 +538,19 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): return ret def http_request(self, req): - for h in std_headers: + for h,v in std_headers.items(): if h in req.headers: del req.headers[h] - req.add_header(h, std_headers[h]) + req.add_header(h, v) if 'Youtubedl-no-compression' in req.headers: if 'Accept-encoding' in req.headers: del req.headers['Accept-encoding'] del req.headers['Youtubedl-no-compression'] + if 'Youtubedl-user-agent' in req.headers: + if 'User-agent' in req.headers: + del req.headers['User-agent'] + req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] + del req.headers['Youtubedl-user-agent'] return req def http_response(self, req, resp): @@ -541,3 +569,70 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_request = http_request https_response = http_response + +def unified_strdate(date_str): + """Return a string with the date in the format YYYYMMDD""" + upload_date = None + #Replace commas + date_str = date_str.replace(',',' ') + # %z (UTC offset) is only supported in python>=3.2 + date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) + format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S'] + for expression in format_expressions: + try: + upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') + except: + pass + return upload_date + +def date_from_str(date_str): + """ + Return a datetime object from a string in the format YYYYMMDD or + (now|today)[+-][0-9](day|week|month|year)(s)?""" + today = datetime.date.today() + if date_str == 'now'or date_str == 'today': + return today + match = re.match('(now|today)(?P[+-])(?P