if os.name == 'nt':
import ctypes
-from Utils import *
+from utils import *
class FileDownloader(object):
except ImportError: # Python<2.5: Not officially supported, but let it slip
warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
-from Utils import *
+from utils import *
class InfoExtractor(object):
+++ /dev/null
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import os
-import subprocess
-import sys
-import time
-
-from Utils import *
-
-
-class PostProcessor(object):
- """Post Processor class.
-
- PostProcessor objects can be added to downloaders with their
- add_post_processor() method. When the downloader has finished a
- successful download, it will take its internal chain of PostProcessors
- and start calling the run() method on each one of them, first with
- an initial argument and then with the returned value of the previous
- PostProcessor.
-
- The chain will be stopped if one of them ever returns None or the end
- of the chain is reached.
-
- PostProcessor objects follow a "mutual registration" process similar
- to InfoExtractor objects.
- """
-
- _downloader = None
-
- def __init__(self, downloader=None):
- self._downloader = downloader
-
- def set_downloader(self, downloader):
- """Sets the downloader for this PP."""
- self._downloader = downloader
-
- def run(self, information):
- """Run the PostProcessor.
-
- The "information" argument is a dictionary like the ones
- composed by InfoExtractors. The only difference is that this
- one has an extra field called "filepath" that points to the
- downloaded file.
-
- When this method returns None, the postprocessing chain is
- stopped. However, this method may return an information
- dictionary that will be passed to the next postprocessing
- object in the chain. It can be the one it received after
- changing some fields.
-
- In addition, this method may raise a PostProcessingError
- exception that will be taken into account by the downloader
- it was called from.
- """
- return information # by default, do nothing
-
-class AudioConversionError(BaseException):
- def __init__(self, message):
- self.message = message
-
-class FFmpegExtractAudioPP(PostProcessor):
-
- def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
- PostProcessor.__init__(self, downloader)
- if preferredcodec is None:
- preferredcodec = 'best'
- self._preferredcodec = preferredcodec
- self._preferredquality = preferredquality
- self._keepvideo = keepvideo
-
- @staticmethod
- def get_audio_codec(path):
- try:
- cmd = ['ffprobe', '-show_streams', '--', encodeFilename(path)]
- handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
- output = handle.communicate()[0]
- if handle.wait() != 0:
- return None
- except (IOError, OSError):
- return None
- audio_codec = None
- for line in output.split('\n'):
- if line.startswith('codec_name='):
- audio_codec = line.split('=')[1].strip()
- elif line.strip() == 'codec_type=audio' and audio_codec is not None:
- return audio_codec
- return None
-
- @staticmethod
- def run_ffmpeg(path, out_path, codec, more_opts):
- if codec is None:
- acodec_opts = []
- else:
- acodec_opts = ['-acodec', codec]
- cmd = ['ffmpeg', '-y', '-i', encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', encodeFilename(out_path)]
- try:
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout,stderr = p.communicate()
- except (IOError, OSError):
- e = sys.exc_info()[1]
- if isinstance(e, OSError) and e.errno == 2:
- raise AudioConversionError('ffmpeg not found. Please install ffmpeg.')
- else:
- raise e
- if p.returncode != 0:
- msg = stderr.strip().split('\n')[-1]
- raise AudioConversionError(msg)
-
- def run(self, information):
- path = information['filepath']
-
- filecodec = self.get_audio_codec(path)
- if filecodec is None:
- self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
- return None
-
- more_opts = []
- if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
- if self._preferredcodec == 'm4a' and filecodec == 'aac':
- # Lossless, but in another container
- acodec = 'copy'
- extension = self._preferredcodec
- more_opts = ['-absf', 'aac_adtstoasc']
- elif filecodec in ['aac', 'mp3', 'vorbis']:
- # Lossless if possible
- acodec = 'copy'
- extension = filecodec
- if filecodec == 'aac':
- more_opts = ['-f', 'adts']
- if filecodec == 'vorbis':
- extension = 'ogg'
- else:
- # MP3 otherwise.
- acodec = 'libmp3lame'
- extension = 'mp3'
- more_opts = []
- if self._preferredquality is not None:
- more_opts += ['-ab', self._preferredquality]
- else:
- # We convert the audio (lossy)
- acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
- extension = self._preferredcodec
- more_opts = []
- if self._preferredquality is not None:
- more_opts += ['-ab', self._preferredquality]
- if self._preferredcodec == 'aac':
- more_opts += ['-f', 'adts']
- if self._preferredcodec == 'm4a':
- more_opts += ['-absf', 'aac_adtstoasc']
- if self._preferredcodec == 'vorbis':
- extension = 'ogg'
- if self._preferredcodec == 'wav':
- extension = 'wav'
- more_opts += ['-f', 'wav']
-
- prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
- new_path = prefix + sep + extension
- self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
- try:
- self.run_ffmpeg(path, new_path, acodec, more_opts)
- except:
- etype,e,tb = sys.exc_info()
- if isinstance(e, AudioConversionError):
- self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
- else:
- self._downloader.to_stderr(u'ERROR: error running ffmpeg')
- return None
-
- # Try to update the date time for extracted audio file.
- if information.get('filetime') is not None:
- try:
- os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
- except:
- self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
-
- if not self._keepvideo:
- try:
- os.remove(encodeFilename(path))
- except (IOError, OSError):
- self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
- return None
-
- information['filepath'] = new_path
- return information
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import subprocess
+import sys
+import time
+
+from utils import *
+
+
+class PostProcessor(object):
+ """Post Processor class.
+
+ PostProcessor objects can be added to downloaders with their
+ add_post_processor() method. When the downloader has finished a
+ successful download, it will take its internal chain of PostProcessors
+ and start calling the run() method on each one of them, first with
+ an initial argument and then with the returned value of the previous
+ PostProcessor.
+
+ The chain will be stopped if one of them ever returns None or the end
+ of the chain is reached.
+
+ PostProcessor objects follow a "mutual registration" process similar
+ to InfoExtractor objects.
+ """
+
+ _downloader = None
+
+ def __init__(self, downloader=None):
+ self._downloader = downloader
+
+ def set_downloader(self, downloader):
+ """Sets the downloader for this PP."""
+ self._downloader = downloader
+
+ def run(self, information):
+ """Run the PostProcessor.
+
+ The "information" argument is a dictionary like the ones
+ composed by InfoExtractors. The only difference is that this
+ one has an extra field called "filepath" that points to the
+ downloaded file.
+
+ When this method returns None, the postprocessing chain is
+ stopped. However, this method may return an information
+ dictionary that will be passed to the next postprocessing
+ object in the chain. It can be the one it received after
+ changing some fields.
+
+ In addition, this method may raise a PostProcessingError
+ exception that will be taken into account by the downloader
+ it was called from.
+ """
+ return information # by default, do nothing
+
+class AudioConversionError(BaseException):
+ def __init__(self, message):
+ self.message = message
+
+class FFmpegExtractAudioPP(PostProcessor):
+
+ def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
+ PostProcessor.__init__(self, downloader)
+ if preferredcodec is None:
+ preferredcodec = 'best'
+ self._preferredcodec = preferredcodec
+ self._preferredquality = preferredquality
+ self._keepvideo = keepvideo
+
+ @staticmethod
+ def get_audio_codec(path):
+ try:
+ cmd = ['ffprobe', '-show_streams', '--', encodeFilename(path)]
+ handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
+ output = handle.communicate()[0]
+ if handle.wait() != 0:
+ return None
+ except (IOError, OSError):
+ return None
+ audio_codec = None
+ for line in output.split('\n'):
+ if line.startswith('codec_name='):
+ audio_codec = line.split('=')[1].strip()
+ elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+ return audio_codec
+ return None
+
+ @staticmethod
+ def run_ffmpeg(path, out_path, codec, more_opts):
+ if codec is None:
+ acodec_opts = []
+ else:
+ acodec_opts = ['-acodec', codec]
+ cmd = ['ffmpeg', '-y', '-i', encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', encodeFilename(out_path)]
+ try:
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout,stderr = p.communicate()
+ except (IOError, OSError):
+ e = sys.exc_info()[1]
+ if isinstance(e, OSError) and e.errno == 2:
+ raise AudioConversionError('ffmpeg not found. Please install ffmpeg.')
+ else:
+ raise e
+ if p.returncode != 0:
+ msg = stderr.strip().split('\n')[-1]
+ raise AudioConversionError(msg)
+
+ def run(self, information):
+ path = information['filepath']
+
+ filecodec = self.get_audio_codec(path)
+ if filecodec is None:
+ self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
+ return None
+
+ more_opts = []
+ if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
+ if self._preferredcodec == 'm4a' and filecodec == 'aac':
+ # Lossless, but in another container
+ acodec = 'copy'
+ extension = self._preferredcodec
+ more_opts = ['-absf', 'aac_adtstoasc']
+ elif filecodec in ['aac', 'mp3', 'vorbis']:
+ # Lossless if possible
+ acodec = 'copy'
+ extension = filecodec
+ if filecodec == 'aac':
+ more_opts = ['-f', 'adts']
+ if filecodec == 'vorbis':
+ extension = 'ogg'
+ else:
+ # MP3 otherwise.
+ acodec = 'libmp3lame'
+ extension = 'mp3'
+ more_opts = []
+ if self._preferredquality is not None:
+ more_opts += ['-ab', self._preferredquality]
+ else:
+ # We convert the audio (lossy)
+ acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
+ extension = self._preferredcodec
+ more_opts = []
+ if self._preferredquality is not None:
+ more_opts += ['-ab', self._preferredquality]
+ if self._preferredcodec == 'aac':
+ more_opts += ['-f', 'adts']
+ if self._preferredcodec == 'm4a':
+ more_opts += ['-absf', 'aac_adtstoasc']
+ if self._preferredcodec == 'vorbis':
+ extension = 'ogg'
+ if self._preferredcodec == 'wav':
+ extension = 'wav'
+ more_opts += ['-f', 'wav']
+
+ prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
+ new_path = prefix + sep + extension
+ self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
+ try:
+ self.run_ffmpeg(path, new_path, acodec, more_opts)
+ except:
+ etype,e,tb = sys.exc_info()
+ if isinstance(e, AudioConversionError):
+ self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
+ else:
+ self._downloader.to_stderr(u'ERROR: error running ffmpeg')
+ return None
+
+ # Try to update the date time for extracted audio file.
+ if information.get('filetime') is not None:
+ try:
+ os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
+ except:
+ self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
+
+ if not self._keepvideo:
+ try:
+ os.remove(encodeFilename(path))
+ except (IOError, OSError):
+ self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
+ return None
+
+ information['filepath'] = new_path
+ return information
+++ /dev/null
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import gzip
-import htmlentitydefs
-import HTMLParser
-import locale
-import os
-import re
-import sys
-import zlib
-import urllib2
-import email.utils
-
-try:
- import cStringIO as StringIO
-except ImportError:
- import StringIO
-
-try:
- import json
-except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
- import trivialjson as json
-
-std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
- 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'en-us,en;q=0.5',
-}
-
-def preferredencoding():
- """Get preferred encoding.
-
- Returns the best encoding scheme for the system, based on
- locale.getpreferredencoding() and some further tweaks.
- """
- def yield_preferredencoding():
- try:
- pref = locale.getpreferredencoding()
- u'TEST'.encode(pref)
- except:
- pref = 'UTF-8'
- while True:
- yield pref
- return yield_preferredencoding().next()
-
-
-def htmlentity_transform(matchobj):
- """Transforms an HTML entity to a Unicode character.
-
- This function receives a match object and is intended to be used with
- the re.sub() function.
- """
- entity = matchobj.group(1)
-
- # Known non-numeric HTML entity
- if entity in htmlentitydefs.name2codepoint:
- return unichr(htmlentitydefs.name2codepoint[entity])
-
- # Unicode character
- mobj = re.match(ur'(?u)#(x?\d+)', entity)
- if mobj is not None:
- numstr = mobj.group(1)
- if numstr.startswith(u'x'):
- base = 16
- numstr = u'0%s' % numstr
- else:
- base = 10
- return unichr(long(numstr, base))
-
- # Unknown entity in name, return its literal representation
- return (u'&%s;' % entity)
-
-
-def sanitize_title(utitle):
- """Sanitizes a video title so it could be used as part of a filename."""
- utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
- return utitle.replace(unicode(os.sep), u'%')
-
-
-def sanitize_open(filename, open_mode):
- """Try to open the given filename, and slightly tweak it if this fails.
-
- Attempts to open the given filename. If this fails, it tries to change
- the filename slightly, step by step, until it's either able to open it
- or it fails and raises a final exception, like the standard open()
- function.
-
- It returns the tuple (stream, definitive_file_name).
- """
- try:
- if filename == u'-':
- if sys.platform == 'win32':
- import msvcrt
- msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
- return (sys.stdout, filename)
- stream = open(encodeFilename(filename), open_mode)
- return (stream, filename)
- except (IOError, OSError), err:
- # In case of error, try to remove win32 forbidden chars
- filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
-
- # An exception here should be caught in the caller
- stream = open(encodeFilename(filename), open_mode)
- return (stream, filename)
-
-
-def timeconvert(timestr):
- """Convert RFC 2822 defined time string into system timestamp"""
- timestamp = None
- timetuple = email.utils.parsedate_tz(timestr)
- if timetuple is not None:
- timestamp = email.utils.mktime_tz(timetuple)
- return timestamp
-
-def simplify_title(title):
- expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
- return expr.sub(u'_', title).strip(u'_')
-
-def orderedSet(iterable):
- """ Remove all duplicates from the input iterable """
- res = []
- for el in iterable:
- if el not in res:
- res.append(el)
- return res
-
-def unescapeHTML(s):
- """
- @param s a string (of type unicode)
- """
- assert type(s) == type(u'')
-
- htmlParser = HTMLParser.HTMLParser()
- return htmlParser.unescape(s)
-
-def encodeFilename(s):
- """
- @param s The name of the file (of type unicode)
- """
-
- assert type(s) == type(u'')
-
- if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
- # Pass u'' directly to use Unicode APIs on Windows 2000 and up
- # (Detecting Windows NT 4 is tricky because 'major >= 4' would
- # match Windows 9x series as well. Besides, NT 4 is obsolete.)
- return s
- else:
- return s.encode(sys.getfilesystemencoding(), 'ignore')
-
-class DownloadError(Exception):
- """Download Error exception.
-
- This exception may be thrown by FileDownloader objects if they are not
- configured to continue on errors. They will contain the appropriate
- error message.
- """
- pass
-
-
-class SameFileError(Exception):
- """Same File exception.
-
- This exception will be thrown by FileDownloader objects if they detect
- multiple files would have to be downloaded to the same file on disk.
- """
- pass
-
-
-class PostProcessingError(Exception):
- """Post Processing exception.
-
- This exception may be raised by PostProcessor's .run() method to
- indicate an error in the postprocessing task.
- """
- pass
-
-class MaxDownloadsReached(Exception):
- """ --max-downloads limit has been reached. """
- pass
-
-
-class UnavailableVideoError(Exception):
- """Unavailable Format exception.
-
- This exception will be thrown when a video is requested
- in a format that is not available for that video.
- """
- pass
-
-
-class ContentTooShortError(Exception):
- """Content Too Short exception.
-
- This exception may be raised by FileDownloader objects when a file they
- download is too small for what the server announced first, indicating
- the connection was probably interrupted.
- """
- # Both in bytes
- downloaded = None
- expected = None
-
- def __init__(self, downloaded, expected):
- self.downloaded = downloaded
- self.expected = expected
-
-
-class YoutubeDLHandler(urllib2.HTTPHandler):
- """Handler for HTTP requests and responses.
-
- This class, when installed with an OpenerDirector, automatically adds
- the standard headers to every HTTP request and handles gzipped and
- deflated responses from web servers. If compression is to be avoided in
- a particular request, the original request in the program code only has
- to include the HTTP header "Youtubedl-No-Compression", which will be
- removed before making the real request.
-
- Part of this code was copied from:
-
- http://techknack.net/python-urllib2-handlers/
-
- Andrew Rowls, the author of that code, agreed to release it to the
- public domain.
- """
-
- @staticmethod
- def deflate(data):
- try:
- return zlib.decompress(data, -zlib.MAX_WBITS)
- except zlib.error:
- return zlib.decompress(data)
-
- @staticmethod
- def addinfourl_wrapper(stream, headers, url, code):
- if hasattr(urllib2.addinfourl, 'getcode'):
- return urllib2.addinfourl(stream, headers, url, code)
- ret = urllib2.addinfourl(stream, headers, url)
- ret.code = code
- return ret
-
- def http_request(self, req):
- for h in std_headers:
- if h in req.headers:
- del req.headers[h]
- req.add_header(h, std_headers[h])
- if 'Youtubedl-no-compression' in req.headers:
- if 'Accept-encoding' in req.headers:
- del req.headers['Accept-encoding']
- del req.headers['Youtubedl-no-compression']
- return req
-
- def http_response(self, req, resp):
- old_resp = resp
- # gzip
- if resp.headers.get('Content-encoding', '') == 'gzip':
- gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
- resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- # deflate
- if resp.headers.get('Content-encoding', '') == 'deflate':
- gz = StringIO.StringIO(self.deflate(resp.read()))
- resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- return resp
import urllib2
import warnings
-from Utils import *
+from utils import *
from FileDownloader import *
from InfoExtractors import *
-from PostProcessing import *
+from PostProcessor import *
def updateSelf(downloader, filename):
''' Update the program file with the latest version from the repository '''
--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import gzip
+import htmlentitydefs
+import HTMLParser
+import locale
+import os
+import re
+import sys
+import zlib
+import urllib2
+import email.utils
+
+try:
+ import cStringIO as StringIO
+except ImportError:
+ import StringIO
+
+try:
+ import json
+except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
+ import trivialjson as json
+
+std_headers = {
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
+ 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Encoding': 'gzip, deflate',
+ 'Accept-Language': 'en-us,en;q=0.5',
+}
+
+def preferredencoding():
+ """Get preferred encoding.
+
+ Returns the best encoding scheme for the system, based on
+ locale.getpreferredencoding() and some further tweaks.
+ """
+ def yield_preferredencoding():
+ try:
+ pref = locale.getpreferredencoding()
+ u'TEST'.encode(pref)
+ except:
+ pref = 'UTF-8'
+ while True:
+ yield pref
+ return yield_preferredencoding().next()
+
+
+def htmlentity_transform(matchobj):
+ """Transforms an HTML entity to a Unicode character.
+
+ This function receives a match object and is intended to be used with
+ the re.sub() function.
+ """
+ entity = matchobj.group(1)
+
+ # Known non-numeric HTML entity
+ if entity in htmlentitydefs.name2codepoint:
+ return unichr(htmlentitydefs.name2codepoint[entity])
+
+ # Unicode character
+ mobj = re.match(ur'(?u)#(x?\d+)', entity)
+ if mobj is not None:
+ numstr = mobj.group(1)
+ if numstr.startswith(u'x'):
+ base = 16
+ numstr = u'0%s' % numstr
+ else:
+ base = 10
+ return unichr(long(numstr, base))
+
+ # Unknown entity in name, return its literal representation
+ return (u'&%s;' % entity)
+
+
+def sanitize_title(utitle):
+ """Sanitizes a video title so it could be used as part of a filename."""
+ utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
+ return utitle.replace(unicode(os.sep), u'%')
+
+
+def sanitize_open(filename, open_mode):
+ """Try to open the given filename, and slightly tweak it if this fails.
+
+ Attempts to open the given filename. If this fails, it tries to change
+ the filename slightly, step by step, until it's either able to open it
+ or it fails and raises a final exception, like the standard open()
+ function.
+
+ It returns the tuple (stream, definitive_file_name).
+ """
+ try:
+ if filename == u'-':
+ if sys.platform == 'win32':
+ import msvcrt
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+ return (sys.stdout, filename)
+ stream = open(encodeFilename(filename), open_mode)
+ return (stream, filename)
+ except (IOError, OSError), err:
+ # In case of error, try to remove win32 forbidden chars
+ filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
+
+ # An exception here should be caught in the caller
+ stream = open(encodeFilename(filename), open_mode)
+ return (stream, filename)
+
+
+def timeconvert(timestr):
+ """Convert RFC 2822 defined time string into system timestamp"""
+ timestamp = None
+ timetuple = email.utils.parsedate_tz(timestr)
+ if timetuple is not None:
+ timestamp = email.utils.mktime_tz(timetuple)
+ return timestamp
+
+def simplify_title(title):
+ expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
+ return expr.sub(u'_', title).strip(u'_')
+
+def orderedSet(iterable):
+ """ Remove all duplicates from the input iterable """
+ res = []
+ for el in iterable:
+ if el not in res:
+ res.append(el)
+ return res
+
+def unescapeHTML(s):
+ """
+ @param s a string (of type unicode)
+ """
+ assert type(s) == type(u'')
+
+ htmlParser = HTMLParser.HTMLParser()
+ return htmlParser.unescape(s)
+
+def encodeFilename(s):
+ """
+ @param s The name of the file (of type unicode)
+ """
+
+ assert type(s) == type(u'')
+
+ if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
+ # Pass u'' directly to use Unicode APIs on Windows 2000 and up
+ # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+ # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+ return s
+ else:
+ return s.encode(sys.getfilesystemencoding(), 'ignore')
+
+class DownloadError(Exception):
+ """Download Error exception.
+
+ This exception may be thrown by FileDownloader objects if they are not
+ configured to continue on errors. They will contain the appropriate
+ error message.
+ """
+ pass
+
+
+class SameFileError(Exception):
+ """Same File exception.
+
+ This exception will be thrown by FileDownloader objects if they detect
+ multiple files would have to be downloaded to the same file on disk.
+ """
+ pass
+
+
+class PostProcessingError(Exception):
+ """Post Processing exception.
+
+ This exception may be raised by PostProcessor's .run() method to
+ indicate an error in the postprocessing task.
+ """
+ pass
+
+class MaxDownloadsReached(Exception):
+ """ --max-downloads limit has been reached. """
+ pass
+
+
+class UnavailableVideoError(Exception):
+ """Unavailable Format exception.
+
+ This exception will be thrown when a video is requested
+ in a format that is not available for that video.
+ """
+ pass
+
+
+class ContentTooShortError(Exception):
+ """Content Too Short exception.
+
+ This exception may be raised by FileDownloader objects when a file they
+ download is too small for what the server announced first, indicating
+ the connection was probably interrupted.
+ """
+ # Both in bytes
+ downloaded = None
+ expected = None
+
+ def __init__(self, downloaded, expected):
+ self.downloaded = downloaded
+ self.expected = expected
+
+
+class YoutubeDLHandler(urllib2.HTTPHandler):
+ """Handler for HTTP requests and responses.
+
+ This class, when installed with an OpenerDirector, automatically adds
+ the standard headers to every HTTP request and handles gzipped and
+ deflated responses from web servers. If compression is to be avoided in
+ a particular request, the original request in the program code only has
+ to include the HTTP header "Youtubedl-No-Compression", which will be
+ removed before making the real request.
+
+ Part of this code was copied from:
+
+ http://techknack.net/python-urllib2-handlers/
+
+ Andrew Rowls, the author of that code, agreed to release it to the
+ public domain.
+ """
+
+ @staticmethod
+ def deflate(data):
+ try:
+ return zlib.decompress(data, -zlib.MAX_WBITS)
+ except zlib.error:
+ return zlib.decompress(data)
+
+ @staticmethod
+ def addinfourl_wrapper(stream, headers, url, code):
+ if hasattr(urllib2.addinfourl, 'getcode'):
+ return urllib2.addinfourl(stream, headers, url, code)
+ ret = urllib2.addinfourl(stream, headers, url)
+ ret.code = code
+ return ret
+
+ def http_request(self, req):
+ for h in std_headers:
+ if h in req.headers:
+ del req.headers[h]
+ req.add_header(h, std_headers[h])
+ if 'Youtubedl-no-compression' in req.headers:
+ if 'Accept-encoding' in req.headers:
+ del req.headers['Accept-encoding']
+ del req.headers['Youtubedl-no-compression']
+ return req
+
+ def http_response(self, req, resp):
+ old_resp = resp
+ # gzip
+ if resp.headers.get('Content-encoding', '') == 'gzip':
+ gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
+ resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ # deflate
+ if resp.headers.get('Content-encoding', '') == 'deflate':
+ gz = StringIO.StringIO(self.deflate(resp.read()))
+ resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ return resp