#!/usr/bin/env python
# -*- coding: utf-8 -*-
+import errno
import gzip
import io
import json
import traceback
import zlib
import email.utils
-import json
+import socket
import datetime
try:
except ImportError: # Python 2
from urlparse import urlparse as compat_urllib_parse_urlparse
+try:
+ import urllib.parse as compat_urlparse
+except ImportError: # Python 2
+ import urlparse as compat_urlparse
+
try:
import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
except NameError:
compat_chr = chr
+def compat_ord(c):
+ if type(c) is int: return c
+ else: return ord(c)
+
+# This is not clearly defined otherwise
+compiled_regex_type = type(re.compile(''))
+
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
with open(fn, 'w', encoding='utf-8') as f:
json.dump(obj, f)
+if sys.version_info >= (2,7):
+ def find_xpath_attr(node, xpath, key, val):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z]+$', key)
+ assert re.match(r'^[a-zA-Z@]*$', val)
+ expr = xpath + u"[@%s='%s']" % (key, val)
+ return node.find(expr)
+else:
+ def find_xpath_attr(node, xpath, key, val):
+ for f in node.findall(xpath):
+ if f.attrib.get(key) == val:
+ return f
+ return None
+
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a character.
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)
except (IOError, OSError) as err:
- # In case of error, try to remove win32 forbidden chars
- filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)
+ if err.errno in (errno.EACCES,):
+ raise
- # An exception here should be caught in the caller
- stream = open(encodeFilename(filename), open_mode)
- return (stream, filename)
+ # In case of error, try to remove win32 forbidden chars
+ alt_filename = os.path.join(
+ re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
+ for path_part in os.path.split(filename)
+ )
+ if alt_filename == filename:
+ raise
+ else:
+ # An exception here should be caught in the caller
+ stream = open(encodeFilename(filename), open_mode)
+ return (stream, alt_filename)
def timeconvert(timestr):
else:
return '%d' % secs
+def make_HTTPS_handler(opts):
+ if sys.version_info < (3,2):
+ # Python's 2.x handler is very simplistic
+ return compat_urllib_request.HTTPSHandler()
+ else:
+ import ssl
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.set_default_verify_paths()
+
+ context.verify_mode = (ssl.CERT_NONE
+ if opts.no_check_certificate
+ else ssl.CERT_REQUIRED)
+ return compat_urllib_request.HTTPSHandler(context=context)
+
class ExtractorError(Exception):
"""Error during info extraction."""
- def __init__(self, msg, tb=None):
- """ tb, if given, is the original traceback (so that it can be printed out). """
+ def __init__(self, msg, tb=None, expected=False):
+ """ tb, if given, is the original traceback (so that it can be printed out).
+ If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
+ """
+
+ if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+ expected = True
+ if not expected:
+ msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
super(ExtractorError, self).__init__(msg)
+
self.traceback = tb
self.exc_info = sys.exc_info() # preserve original exception
date_str = date_str.replace(',',' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
- format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
+ format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
pass
return upload_date
+def determine_ext(url, default_ext=u'unknown_video'):
+ guess = url.partition(u'?')[0].rpartition(u'.')[2]
+ if re.match(r'^[A-Za-z0-9]+$', guess):
+ return guess
+ else:
+ return default_ext
+
def date_from_str(date_str):
"""
Return a datetime object from a string in the format YYYYMMDD or