#!/usr/bin/env python
# -*- coding: utf-8 -*-
+import errno
import gzip
import io
import json
import traceback
import zlib
import email.utils
-import json
+import socket
import datetime
try:
except ImportError: # Python 2
from urlparse import urlparse as compat_urllib_parse_urlparse
+try:
+ import urllib.parse as compat_urlparse
+except ImportError: # Python 2
+ import urlparse as compat_urlparse
+
try:
import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
except NameError:
compat_chr = chr
+def compat_ord(c):
+ if type(c) is int: return c
+ else: return ord(c)
+
+# This is not clearly defined otherwise
+compiled_regex_type = type(re.compile(''))
+
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
with open(fn, 'w', encoding='utf-8') as f:
json.dump(obj, f)
+if sys.version_info >= (2,7):
+ def find_xpath_attr(node, xpath, key, val):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z]+$', key)
+ assert re.match(r'^[a-zA-Z@]*$', val)
+ expr = xpath + u"[@%s='%s']" % (key, val)
+ return node.find(expr)
+else:
+ def find_xpath_attr(node, xpath, key, val):
+ for f in node.findall(xpath):
+ if f.attrib.get(key) == val:
+ return f
+ return None
+
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a character.
stream = open(encodeFilename(filename), open_mode)
return (stream, filename)
except (IOError, OSError) as err:
- # In case of error, try to remove win32 forbidden chars
- filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)
+ if err.errno in (errno.EACCES,):
+ raise
- # An exception here should be caught in the caller
- stream = open(encodeFilename(filename), open_mode)
- return (stream, filename)
+ # In case of error, try to remove win32 forbidden chars
+ alt_filename = os.path.join(
+ re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
+ for path_part in os.path.split(filename)
+ )
+ if alt_filename == filename:
+ raise
+ else:
+ # An exception here should be caught in the caller
+ stream = open(encodeFilename(filename), open_mode)
+ return (stream, alt_filename)
def timeconvert(timestr):
assert isinstance(optval, compat_str)
return optval
+def formatSeconds(secs):
+ if secs > 3600:
+ return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
+ elif secs > 60:
+ return '%d:%02d' % (secs // 60, secs % 60)
+ else:
+ return '%d' % secs
+
+def make_HTTPS_handler(opts):
+ if sys.version_info < (3,2):
+ # Python's 2.x handler is very simplistic
+ return compat_urllib_request.HTTPSHandler()
+ else:
+ import ssl
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.set_default_verify_paths()
+
+ context.verify_mode = (ssl.CERT_NONE
+ if opts.no_check_certificate
+ else ssl.CERT_REQUIRED)
+ return compat_urllib_request.HTTPSHandler(context=context)
+
class ExtractorError(Exception):
"""Error during info extraction."""
- def __init__(self, msg, tb=None):
- """ tb, if given, is the original traceback (so that it can be printed out). """
+ def __init__(self, msg, tb=None, expected=False):
+ """ tb, if given, is the original traceback (so that it can be printed out).
+ If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
+ """
+
+ if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+ expected = True
+ if not expected:
+ msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
super(ExtractorError, self).__init__(msg)
+
self.traceback = tb
self.exc_info = sys.exc_info() # preserve original exception
date_str = date_str.replace(',',' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
- format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
+ format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
pass
return upload_date
+def determine_ext(url, default_ext=u'unknown_video'):
+ guess = url.partition(u'?')[0].rpartition(u'.')[2]
+ if re.match(r'^[A-Za-z0-9]+$', guess):
+ return guess
+ else:
+ return default_ext
+
def date_from_str(date_str):
- """Return a datetime object from a string in the format YYYYMMDD"""
+ """
+ Return a datetime object from a string in the format YYYYMMDD or
+ (now|today)[+-][0-9](day|week|month|year)(s)?"""
+ today = datetime.date.today()
+ if date_str == 'now'or date_str == 'today':
+ return today
+ match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+ if match is not None:
+ sign = match.group('sign')
+ time = int(match.group('time'))
+ if sign == '-':
+ time = -time
+ unit = match.group('unit')
+ #A bad aproximation?
+ if unit == 'month':
+ unit = 'day'
+ time *= 30
+ elif unit == 'year':
+ unit = 'day'
+ time *= 365
+ unit += 's'
+ delta = datetime.timedelta(**{unit: time})
+ return today + delta
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
class DateRange(object):
self.end = date_from_str(end)
else:
self.end = datetime.datetime.max.date()
- if self.start >= self.end:
+ if self.start > self.end:
raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
@classmethod
def day(cls, day):
return cls(day,day)
def __contains__(self, date):
"""Check if the date is in the range"""
- date = date_from_str(date)
- return self.start <= date and date <= self.end
+ if not isinstance(date, datetime.date):
+ date = date_from_str(date)
+ return self.start <= date <= self.end
def __str__(self):
return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())