import traceback
import zlib
import email.utils
-import json
+import socket
import datetime
try:
except ImportError: # Python 2
from urlparse import urlparse as compat_urllib_parse_urlparse
+try:
+ import urllib.parse as compat_urlparse
+except ImportError: # Python 2
+ import urlparse as compat_urlparse
+
try:
import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
if type(c) is int: return c
else: return ord(c)
+# This is not clearly defined otherwise
+compiled_regex_type = type(re.compile(''))
+
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
with open(fn, 'w', encoding='utf-8') as f:
json.dump(obj, f)
+if sys.version_info >= (2,7):
+ def find_xpath_attr(node, xpath, key, val):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z]+$', key)
+ assert re.match(r'^[a-zA-Z@\s]*$', val)
+ expr = xpath + u"[@%s='%s']" % (key, val)
+ return node.find(expr)
+else:
+ def find_xpath_attr(node, xpath, key, val):
+ for f in node.findall(xpath):
+ if f.attrib.get(key) == val:
+ return f
+ return None
+
def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a character.
def make_HTTPS_handler(opts):
if sys.version_info < (3,2):
# Python's 2.x handler is very simplistic
- return compat_urllib_request.HTTPSHandler()
+ return YoutubeDLHandlerHTTPS()
else:
import ssl
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.verify_mode = (ssl.CERT_NONE
if opts.no_check_certificate
else ssl.CERT_REQUIRED)
- return compat_urllib_request.HTTPSHandler(context=context)
+ return YoutubeDLHandlerHTTPS(context=context)
class ExtractorError(Exception):
"""Error during info extraction."""
- def __init__(self, msg, tb=None):
- """ tb, if given, is the original traceback (so that it can be printed out). """
+ def __init__(self, msg, tb=None, expected=False):
+ """ tb, if given, is the original traceback (so that it can be printed out).
+ If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
+ """
+
+ if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+ expected = True
+ if not expected:
+ msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
super(ExtractorError, self).__init__(msg)
+
self.traceback = tb
self.exc_info = sys.exc_info() # preserve original exception
self.downloaded = downloaded
self.expected = expected
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+
+class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler
"""Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds
ret.code = code
return ret
- def http_request(self, req):
- for h,v in std_headers.items():
+ def _http_request(self, req):
+ for h, v in std_headers.items():
if h in req.headers:
del req.headers[h]
req.add_header(h, v)
del req.headers['Youtubedl-user-agent']
return req
- def http_response(self, req, resp):
+ def _http_response(self, req, resp):
old_resp = resp
# gzip
if resp.headers.get('Content-encoding', '') == 'gzip':
resp.msg = old_resp.msg
return resp
- https_request = http_request
- https_response = http_response
+
+class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
+ http_request = YoutubeDLHandler_Template._http_request
+ http_response = YoutubeDLHandler_Template._http_response
+
+
+class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
+ https_request = YoutubeDLHandler_Template._http_request
+ https_response = YoutubeDLHandler_Template._http_response
+
def unified_strdate(date_str):
"""Return a string with the date in the format YYYYMMDD"""
date_str = date_str.replace(',',' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
- format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
+ format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
pass
return upload_date
+def determine_ext(url, default_ext=u'unknown_video'):
+ guess = url.partition(u'?')[0].rpartition(u'.')[2]
+ if re.match(r'^[A-Za-z0-9]+$', guess):
+ return guess
+ else:
+ return default_ext
+
+def subtitles_filename(filename, sub_lang, sub_format):
+ return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+
def date_from_str(date_str):
"""
Return a datetime object from a string in the format YYYYMMDD or