X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=ab1049cc0d99b17adcfc83f951d38e89a89a4bc9;hb=99859d436cdee9acc9c869254e734eba5b748260;hp=66ae41e319e39d7c75597a31034315f996f00797;hpb=8756c5fe7a7eed00361fc6543ccf3a66aa768be0;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 66ae41e31..ab1049cc0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -35,6 +35,11 @@ try: except ImportError: # Python 2 from urlparse import urlparse as compat_urllib_parse_urlparse +try: + import urllib.parse as compat_urlparse +except ImportError: # Python 2 + import urlparse as compat_urlparse + try: import http.cookiejar as compat_cookiejar except ImportError: # Python 2 @@ -198,6 +203,20 @@ else: with open(fn, 'w', encoding='utf-8') as f: json.dump(obj, f) +if sys.version_info >= (2,7): + def find_xpath_attr(node, xpath, key, val): + """ Find the xpath xpath[@key=val] """ + assert re.match(r'^[a-zA-Z]+$', key) + assert re.match(r'^[a-zA-Z@\s]*$', val) + expr = xpath + u"[@%s='%s']" % (key, val) + return node.find(expr) +else: + def find_xpath_attr(node, xpath, key, val): + for f in node.findall(xpath): + if f.attrib.get(key) == val: + return f + return None + def htmlentity_transform(matchobj): """Transforms an HTML entity to a character. @@ -457,7 +476,7 @@ def formatSeconds(secs): def make_HTTPS_handler(opts): if sys.version_info < (3,2): # Python's 2.x handler is very simplistic - return compat_urllib_request.HTTPSHandler() + return YoutubeDLHandlerHTTPS() else: import ssl context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) @@ -466,15 +485,19 @@ def make_HTTPS_handler(opts): context.verify_mode = (ssl.CERT_NONE if opts.no_check_certificate else ssl.CERT_REQUIRED) - return compat_urllib_request.HTTPSHandler(context=context) + return YoutubeDLHandlerHTTPS(context=context) class ExtractorError(Exception): """Error during info extraction.""" - def __init__(self, msg, tb=None): - """ tb, if given, is the original traceback (so that it can be printed out). """ - - if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): - msg = msg + u'; please report this issue on GitHub.' + def __init__(self, msg, tb=None, expected=False): + """ tb, if given, is the original traceback (so that it can be printed out). + If expected is set, this is a normal error message and most likely not a bug in youtube-dl. + """ + + if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): + expected = True + if not expected: + msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.' super(ExtractorError, self).__init__(msg) self.traceback = tb @@ -546,7 +569,8 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected -class YoutubeDLHandler(compat_urllib_request.HTTPHandler): + +class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler """Handler for HTTP requests and responses. This class, when installed with an OpenerDirector, automatically adds @@ -579,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): ret.code = code return ret - def http_request(self, req): - for h,v in std_headers.items(): + def _http_request(self, req): + for h, v in std_headers.items(): if h in req.headers: del req.headers[h] req.add_header(h, v) @@ -595,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): del req.headers['Youtubedl-user-agent'] return req - def http_response(self, req, resp): + def _http_response(self, req, resp): old_resp = resp # gzip if resp.headers.get('Content-encoding', '') == 'gzip': @@ -609,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): resp.msg = old_resp.msg return resp - https_request = http_request - https_response = http_response + +class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler): + http_request = YoutubeDLHandler_Template._http_request + http_response = YoutubeDLHandler_Template._http_response + + +class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler): + https_request = YoutubeDLHandler_Template._http_request + https_response = YoutubeDLHandler_Template._http_response + def unified_strdate(date_str): """Return a string with the date in the format YYYYMMDD""" @@ -619,7 +651,7 @@ def unified_strdate(date_str): date_str = date_str.replace(',',' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) - format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S'] + format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M'] for expression in format_expressions: try: upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -627,6 +659,16 @@ def unified_strdate(date_str): pass return upload_date +def determine_ext(url, default_ext=u'unknown_video'): + guess = url.partition(u'?')[0].rpartition(u'.')[2] + if re.match(r'^[A-Za-z0-9]+$', guess): + return guess + else: + return default_ext + +def subtitles_filename(filename, sub_lang, sub_format): + return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format + def date_from_str(date_str): """ Return a datetime object from a string in the format YYYYMMDD or