Take format 37 into account (fixes issue #65)

[youtube-dl] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index a75af8af35806057582c362819479816c09a2960..52bda1b2533e9b8b5c63274074f285a8cccced95 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -19,7 +19,7 @@ import urllib
  import urllib2
  
  std_headers = {
  import urllib2
  
  std_headers = {
-       'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8',
+       'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
         'Accept-Language': 'en-us,en;q=0.5',
         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
         'Accept-Language': 'en-us,en;q=0.5',
@@ -27,6 +27,22 @@ std_headers = {
  
  simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  
  
  simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  
+def preferredencoding():
+       """Get preferred encoding.
+
+       Returns the best encoding scheme for the system, based on
+       locale.getpreferredencoding() and some further tweaks.
+       """
+       def yield_preferredencoding():
+               try:
+                       pref = locale.getpreferredencoding()
+                       u'TEST'.encode(pref)
+               except:
+                       pref = 'UTF-8'
+               while True:
+                       yield pref
+       return yield_preferredencoding().next()
+
  class DownloadError(Exception):
         """Download Error exception.
         
  class DownloadError(Exception):
         """Download Error exception.
         
@@ -52,6 +68,29 @@ class PostProcessingError(Exception):
         """
         pass
  
         """
         pass
  
+class UnavailableFormatError(Exception):
+       """Unavailable Format exception.
+
+       This exception will be thrown when a video is requested
+       in a format that is not available for that video.
+       """
+       pass
+
+class ContentTooShortError(Exception):
+       """Content Too Short exception.
+
+       This exception may be raised by FileDownloader objects when a file they
+       download is too small for what the server announced first, indicating
+       the connection was probably interrupted.
+       """
+       # Both in bytes
+       downloaded = None
+       expected = None
+
+       def __init__(self, downloaded, expected):
+               self.downloaded = downloaded
+               self.expected = expected
+
  class FileDownloader(object):
         """File Downloader class.
  
  class FileDownloader(object):
         """File Downloader class.
  
@@ -91,6 +130,7 @@ class FileDownloader(object):
         ignoreerrors:   Do not stop on download errors.
         ratelimit:      Download speed limit, in bytes/sec.
         nooverwrites:   Prevent overwriting files.
         ignoreerrors:   Do not stop on download errors.
         ratelimit:      Download speed limit, in bytes/sec.
         nooverwrites:   Prevent overwriting files.
+       continuedl:     Try to continue downloads if possible.
         """
  
         params = None
         """
  
         params = None
@@ -119,10 +159,12 @@ class FileDownloader(object):
         def format_bytes(bytes):
                 if bytes is None:
                         return 'N/A'
         def format_bytes(bytes):
                 if bytes is None:
                         return 'N/A'
-               if bytes == 0:
+               if type(bytes) is str:
+                       bytes = float(bytes)
+               if bytes == 0.0:
                         exponent = 0
                 else:
                         exponent = 0
                 else:
-                       exponent = long(math.log(float(bytes), 1024.0))
+                       exponent = long(math.log(bytes, 1024.0))
                 suffix = 'bkMGTPEZY'[exponent]
                 converted = float(bytes) / float(1024**exponent)
                 return '%.2f%s' % (converted, suffix)
                 suffix = 'bkMGTPEZY'[exponent]
                 converted = float(bytes) / float(1024**exponent)
                 return '%.2f%s' % (converted, suffix)
@@ -159,13 +201,13 @@ class FileDownloader(object):
                 new_min = max(bytes / 2.0, 1.0)
                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
                 if elapsed_time < 0.001:
                 new_min = max(bytes / 2.0, 1.0)
                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
                 if elapsed_time < 0.001:
-                       return int(new_max)
+                       return long(new_max)
                 rate = bytes / elapsed_time
                 if rate > new_max:
                 rate = bytes / elapsed_time
                 if rate > new_max:
-                       return int(new_max)
+                       return long(new_max)
                 if rate < new_min:
                 if rate < new_min:
-                       return int(new_min)
-               return int(rate)
+                       return long(new_min)
+               return long(rate)
  
         @staticmethod
         def parse_bytes(bytestr):
  
         @staticmethod
         def parse_bytes(bytestr):
@@ -177,6 +219,16 @@ class FileDownloader(object):
                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
                 return long(round(number * multiplier))
  
                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
                 return long(round(number * multiplier))
  
+       @staticmethod
+       def verify_url(url):
+               """Verify a URL is valid and data could be downloaded. Return real data URL."""
+               request = urllib2.Request(url, None, std_headers)
+               data = urllib2.urlopen(request)
+               data.read(1)
+               url = data.geturl()
+               data.close()
+               return url
+
         def add_info_extractor(self, ie):
                 """Add an InfoExtractor object to the end of the list."""
                 self._ies.append(ie)
         def add_info_extractor(self, ie):
                 """Add an InfoExtractor object to the end of the list."""
                 self._ies.append(ie)
@@ -190,12 +242,12 @@ class FileDownloader(object):
         def to_stdout(self, message, skip_eol=False):
                 """Print message to stdout if not in quiet mode."""
                 if not self.params.get('quiet', False):
         def to_stdout(self, message, skip_eol=False):
                 """Print message to stdout if not in quiet mode."""
                 if not self.params.get('quiet', False):
-                       print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()),
+                       print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
                         sys.stdout.flush()
         
         def to_stderr(self, message):
                 """Print message to stderr."""
                         sys.stdout.flush()
         
         def to_stderr(self, message):
                 """Print message to stderr."""
-               print >>sys.stderr, message
+               print >>sys.stderr, message.encode(preferredencoding())
         
         def fixed_template(self):
                 """Checks if the output template is fixed."""
         
         def fixed_template(self):
                 """Checks if the output template is fixed."""
@@ -235,6 +287,18 @@ class FileDownloader(object):
                 """Report download progress."""
                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
                 """Report download progress."""
                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+
+       def report_resuming_byte(self, resume_len):
+               """Report attemtp to resume at given byte."""
+               self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
+       
+       def report_file_already_downloaded(self, file_name):
+               """Report file has already been fully downloaded."""
+               self.to_stdout(u'[download] %s has already been downloaded' % file_name)
+       
+       def report_unable_to_resume(self):
+               """Report it was impossible to resume download."""
+               self.to_stdout(u'[download] Unable to resume')
         
         def report_finish(self):
                 """Report download finished."""
         
         def report_finish(self):
                 """Report download finished."""
@@ -242,50 +306,54 @@ class FileDownloader(object):
  
         def process_info(self, info_dict):
                 """Process a single dictionary returned by an InfoExtractor."""
  
         def process_info(self, info_dict):
                 """Process a single dictionary returned by an InfoExtractor."""
-               # Forced printings
-               if self.params.get('forcetitle', False):
-                       print info_dict['title'].encode(locale.getpreferredencoding())
-               if self.params.get('forceurl', False):
-                       print info_dict['url'].encode(locale.getpreferredencoding())
-                       
                 # Do nothing else if in simulate mode
                 if self.params.get('simulate', False):
                 # Do nothing else if in simulate mode
                 if self.params.get('simulate', False):
-                       return
+                       try:
+                               info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
+                       except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
+                               raise UnavailableFormatError
+
+                       # Forced printings
+                       if self.params.get('forcetitle', False):
+                               print info_dict['title'].encode(preferredencoding())
+                       if self.params.get('forceurl', False):
+                               print info_dict['url'].encode(preferredencoding())
  
  
+                       return
+                       
                 try:
                 try:
-                       filename = self.params['outtmpl'] % info_dict
-                       self.report_destination(filename)
+                       template_dict = dict(info_dict)
+                       template_dict['epoch'] = unicode(long(time.time()))
+                       filename = self.params['outtmpl'] % template_dict
                 except (ValueError, KeyError), err:
                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
                 except (ValueError, KeyError), err:
                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
-               if self.params['nooverwrites'] and os.path.exists(filename):
-                       self.to_stderr('WARNING: file exists: %s; skipping' % filename)
+               if self.params.get('nooverwrites', False) and os.path.exists(filename):
+                       self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
                         return
                         return
+
                 try:
                         self.pmkdir(filename)
                 except (OSError, IOError), err:
                         self.trouble('ERROR: unable to create directories: %s' % str(err))
                         return
                 try:
                         self.pmkdir(filename)
                 except (OSError, IOError), err:
                         self.trouble('ERROR: unable to create directories: %s' % str(err))
                         return
+
                 try:
                 try:
-                       outstream = open(filename, 'wb')
-               except (OSError, IOError), err:
-                       self.trouble('ERROR: unable to open for writing: %s' % str(err))
-                       return
-               try:
-                       self._do_download(outstream, info_dict['url'])
-                       outstream.close()
+                       success = self._do_download(filename, info_dict['url'].encode('utf-8'))
                 except (OSError, IOError), err:
                 except (OSError, IOError), err:
-                       self.trouble('ERROR: unable to write video data: %s' % str(err))
-                       return
+                       raise UnavailableFormatError
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                         self.trouble('ERROR: unable to download video data: %s' % str(err))
                         return
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                         self.trouble('ERROR: unable to download video data: %s' % str(err))
                         return
-               try:
-                       self.post_process(filename, info_dict)
-               except (PostProcessingError), err:
-                       self.trouble('ERROR: postprocessing: %s' % str(err))
+               except (ContentTooShortError, ), err:
+                       self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                         return
  
                         return
  
-               return
+               if success:
+                       try:
+                               self.post_process(filename, info_dict)
+                       except (PostProcessingError), err:
+                               self.trouble('ERROR: postprocessing: %s' % str(err))
+                               return
  
         def download(self, url_list):
                 """Download a given list of URLs."""
  
         def download(self, url_list):
                 """Download a given list of URLs."""
@@ -322,21 +390,43 @@ class FileDownloader(object):
                         if info is None:
                                 break
         
                         if info is None:
                                 break
         
-       def _do_download(self, stream, url):
+       def _do_download(self, filename, url):
+               stream = None
+               open_mode = 'ab'
+
+               basic_request = urllib2.Request(url, None, std_headers)
                 request = urllib2.Request(url, None, std_headers)
                 request = urllib2.Request(url, None, std_headers)
-               data = urllib2.urlopen(request)
+
+               # Attempt to resume download with "continuedl" option
+               if os.path.isfile(filename):
+                       resume_len = os.path.getsize(filename)
+               else:
+                       resume_len = 0
+               if self.params.get('continuedl', False) and resume_len != 0:
+                       self.report_resuming_byte(resume_len)
+                       request.add_header('Range','bytes=%d-' % resume_len)
+
+               # Establish connection
+               try:
+                       data = urllib2.urlopen(request)
+               except (urllib2.HTTPError, ), err:
+                       if err.code != 416: #  416 is 'Requested range not satisfiable'
+                               raise
+                       data = urllib2.urlopen(basic_request)
+                       content_length = data.info()['Content-Length']
+                       if content_length is not None and long(content_length) == resume_len:
+                               self.report_file_already_downloaded(filename)
+                               return True
+                       else:
+                               self.report_unable_to_resume()
+                               open_mode = 'wb'
+
                 data_len = data.info().get('Content-length', None)
                 data_len_str = self.format_bytes(data_len)
                 byte_counter = 0
                 block_size = 1024
                 start = time.time()
                 while True:
                 data_len = data.info().get('Content-length', None)
                 data_len_str = self.format_bytes(data_len)
                 byte_counter = 0
                 block_size = 1024
                 start = time.time()
                 while True:
-                       # Progress message
-                       percent_str = self.calc_percent(byte_counter, data_len)
-                       eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
-                       speed_str = self.calc_speed(start, time.time(), byte_counter)
-                       self.report_progress(percent_str, data_len_str, speed_str, eta_str)
-
                         # Download and write
                         before = time.time()
                         data_block = data.read(block_size)
                         # Download and write
                         before = time.time()
                         data_block = data.read(block_size)
@@ -345,15 +435,31 @@ class FileDownloader(object):
                         if data_block_len == 0:
                                 break
                         byte_counter += data_block_len
                         if data_block_len == 0:
                                 break
                         byte_counter += data_block_len
+
+                       # Open file just in time
+                       if stream is None:
+                               try:
+                                       stream = open(filename, open_mode)
+                                       self.report_destination(filename)
+                               except (OSError, IOError), err:
+                                       self.trouble('ERROR: unable to open for writing: %s' % str(err))
+                                       return False
                         stream.write(data_block)
                         block_size = self.best_block_size(after - before, data_block_len)
  
                         stream.write(data_block)
                         block_size = self.best_block_size(after - before, data_block_len)
  
+                       # Progress message
+                       percent_str = self.calc_percent(byte_counter, data_len)
+                       eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
+                       speed_str = self.calc_speed(start, time.time(), byte_counter)
+                       self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+
                         # Apply rate limit
                         self.slow_down(start, byte_counter)
  
                 self.report_finish()
                 if data_len is not None and str(byte_counter) != data_len:
                         # Apply rate limit
                         self.slow_down(start, byte_counter)
  
                 self.report_finish()
                 if data_len is not None and str(byte_counter) != data_len:
-                       raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
+                       raise ContentTooShortError(byte_counter, long(data_len))
+               return True
  
  class InfoExtractor(object):
         """Information Extractor class.
  
  class InfoExtractor(object):
         """Information Extractor class.
@@ -424,6 +530,14 @@ class YoutubeIE(InfoExtractor):
         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
         _NETRC_MACHINE = 'youtube'
         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
         _NETRC_MACHINE = 'youtube'
+       _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
+       _video_extensions = {
+               '13': '3gp',
+               '17': 'mp4',
+               '18': 'mp4',
+               '22': 'mp4',
+               '37': 'mp4',
+       }
  
         @staticmethod
         def suitable(url):
  
         @staticmethod
         def suitable(url):
@@ -464,17 +578,17 @@ class YoutubeIE(InfoExtractor):
                 """Report attempt to confirm age."""
                 self._downloader.to_stdout(u'[youtube] Confirming age')
         
                 """Report attempt to confirm age."""
                 self._downloader.to_stdout(u'[youtube] Confirming age')
         
-       def report_webpage_download(self, video_id):
-               """Report attempt to download webpage."""
-               self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
+       def report_video_info_webpage_download(self, video_id):
+               """Report attempt to download video info webpage."""
+               self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
         
         def report_information_extraction(self, video_id):
                 """Report attempt to extract video information."""
                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
         
         
         def report_information_extraction(self, video_id):
                 """Report attempt to extract video information."""
                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
         
-       def report_video_url(self, video_id, video_real_url):
+       def report_unavailable_format(self, video_id, format):
                 """Report extracted video URL."""
                 """Report extracted video URL."""
-               self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
+               self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
         
         def _real_initialize(self):
                 if self._downloader is None:
         
         def _real_initialize(self):
                 if self._downloader is None:
@@ -554,70 +668,99 @@ class YoutubeIE(InfoExtractor):
                 video_id = mobj.group(2)
  
                 # Downloader parameters
                 video_id = mobj.group(2)
  
                 # Downloader parameters
+               best_quality = False
                 format_param = None
                 format_param = None
+               quality_index = 0
                 if self._downloader is not None:
                         params = self._downloader.params
                         format_param = params.get('format', None)
                 if self._downloader is not None:
                         params = self._downloader.params
                         format_param = params.get('format', None)
+                       if format_param == '0':
+                               format_param = self._available_formats[quality_index]
+                               best_quality = True
  
  
-               # Extension
-               video_extension = {
-                       '17': '3gp',
-                       '18': 'mp4',
-                       '22': 'mp4',
-               }.get(format_param, 'flv')
-
-               # Normalize URL, including format
-               normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
-               if format_param is not None:
-                       normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
-               request = urllib2.Request(normalized_url, None, std_headers)
-               try:
-                       self.report_webpage_download(video_id)
-                       video_webpage = urllib2.urlopen(request).read()
-               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
-                       return
-               self.report_information_extraction(video_id)
-               
-               # "t" param
-               mobj = re.search(r', "t": "([^"]+)"', video_webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
-                       return
-               video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
-               if format_param is not None:
-                       video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
-               self.report_video_url(video_id, video_real_url)
+               while True:
+                       # Extension
+                       video_extension = self._video_extensions.get(format_param, 'flv')
  
  
-               # uploader
-               mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
-                       return
-               video_uploader = mobj.group(1)
+                       # Get video info
+                       video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
+                       request = urllib2.Request(video_info_url, None, std_headers)
+                       try:
+                               self.report_video_info_webpage_download(video_id)
+                               video_info_webpage = urllib2.urlopen(request).read()
+                       except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                               self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+                               return
+                       self.report_information_extraction(video_id)
+
+                       # "t" param
+                       mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage)
+                       if mobj is None:
+                               # Attempt to see if YouTube has issued an error message
+                               mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage)
+                               if mobj is None:
+                                       self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
+                                       stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
+                                       stream.write(video_info_webpage)
+                                       stream.close()
+                               else:
+                                       reason = urllib.unquote_plus(mobj.group(1))
+                                       self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
+                               return
+                       token = urllib.unquote(mobj.group(1))
+                       video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
+                       if format_param is not None:
+                               video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
+
+                       # uploader
+                       mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
+                       if mobj is None:
+                               self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+                               return
+                       video_uploader = urllib.unquote(mobj.group(1))
+
+                       # title
+                       mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage)
+                       if mobj is None:
+                               self._downloader.trouble(u'ERROR: unable to extract video title')
+                               return
+                       video_title = urllib.unquote_plus(mobj.group(1))
+                       video_title = video_title.decode('utf-8')
+                       video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
+                       video_title = video_title.replace(os.sep, u'%')
+
+                       # simplified title
+                       simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+                       simple_title = simple_title.strip(ur'_')
+
+                       try:
+                               # Process video information
+                               self._downloader.process_info({
+                                       'id':           video_id.decode('utf-8'),
+                                       'url':          video_real_url.decode('utf-8'),
+                                       'uploader':     video_uploader.decode('utf-8'),
+                                       'title':        video_title,
+                                       'stitle':       simple_title,
+                                       'ext':          video_extension.decode('utf-8'),
+                               })
+
+                               return
+
+                       except UnavailableFormatError, err:
+                               if best_quality:
+                                       if quality_index == len(self._available_formats) - 1:
+                                               # I don't ever expect this to happen
+                                               self._downloader.trouble(u'ERROR: no known formats available for video')
+                                               return
+                                       else:
+                                               self.report_unavailable_format(video_id, format_param)
+                                               quality_index += 1
+                                               format_param = self._available_formats[quality_index]
+                                               continue
+                               else: 
+                                       self._downloader.trouble('ERROR: format not available for video')
+                                       return
  
  
-               # title
-               mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract video title')
-                       return
-               video_title = mobj.group(1).decode('utf-8')
-               video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
-               video_title = video_title.replace(os.sep, u'%')
-
-               # simplified title
-               simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
-               simple_title = simple_title.strip(ur'_')
-
-               # Process video information
-               self._downloader.process_info({
-                       'id':           video_id.decode('utf-8'),
-                       'url':          video_real_url.decode('utf-8'),
-                       'uploader':     video_uploader.decode('utf-8'),
-                       'title':        video_title,
-                       'stitle':       simple_title,
-                       'ext':          video_extension.decode('utf-8'),
-                       })
  
  class MetacafeIE(InfoExtractor):
         """Information Extractor for metacafe.com."""
  
  class MetacafeIE(InfoExtractor):
         """Information Extractor for metacafe.com."""
@@ -703,19 +846,21 @@ class MetacafeIE(InfoExtractor):
  
                 # Extract URL, uploader and title from webpage
                 self.report_extraction(video_id)
  
                 # Extract URL, uploader and title from webpage
                 self.report_extraction(video_id)
-               mobj = re.search(r'(?m)&mediaURL=(http.*?\.flv)', webpage)
+               mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract media URL')
                         return
                 mediaURL = urllib.unquote(mobj.group(1))
  
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract media URL')
                         return
                 mediaURL = urllib.unquote(mobj.group(1))
  
-               mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
-                       return
-               gdaKey = mobj.group(1)
+               #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
+               #if mobj is None:
+               #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
+               #       return
+               #gdaKey = mobj.group(1)
+               #
+               #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
  
  
-               video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
+               video_url = mediaURL
  
                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
                 if mobj is None:
  
                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
                 if mobj is None:
@@ -729,15 +874,18 @@ class MetacafeIE(InfoExtractor):
                         return
                 video_uploader = mobj.group(1)
  
                         return
                 video_uploader = mobj.group(1)
  
-               # Process video information
-               self._downloader.process_info({
-                       'id':           video_id.decode('utf-8'),
-                       'url':          video_url.decode('utf-8'),
-                       'uploader':     video_uploader.decode('utf-8'),
-                       'title':        video_title,
-                       'stitle':       simple_title,
-                       'ext':          video_extension.decode('utf-8'),
+               try:
+                       # Process video information
+                       self._downloader.process_info({
+                               'id':           video_id.decode('utf-8'),
+                               'url':          video_url.decode('utf-8'),
+                               'uploader':     video_uploader.decode('utf-8'),
+                               'title':        video_title,
+                               'stitle':       simple_title,
+                               'ext':          video_extension.decode('utf-8'),
                         })
                         })
+               except UnavailableFormatError:
+                       self._downloader.trouble(u'ERROR: format not available for video')
  
  
  class YoutubeSearchIE(InfoExtractor):
  
  
  class YoutubeSearchIE(InfoExtractor):
@@ -745,7 +893,7 @@ class YoutubeSearchIE(InfoExtractor):
         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
-       _MORE_PAGES_INDICATOR = r'>Next</a>'
+       _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
         _youtube_ie = None
         _max_youtube_results = 1000
  
         _youtube_ie = None
         _max_youtube_results = 1000
  
@@ -780,7 +928,7 @@ class YoutubeSearchIE(InfoExtractor):
                         return
                 else:
                         try:
                         return
                 else:
                         try:
-                               n = int(prefix)
+                               n = long(prefix)
                                 if n <= 0:
                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                                         return
                                 if n <= 0:
                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                                         return
@@ -789,7 +937,7 @@ class YoutubeSearchIE(InfoExtractor):
                                         n = self._max_youtube_results
                                 self._download_n_results(query, n)
                                 return
                                         n = self._max_youtube_results
                                 self._download_n_results(query, n)
                                 return
-                       except ValueError: # parsing prefix as int fails
+                       except ValueError: # parsing prefix as integer fails
                                 self._download_n_results(query, 1)
                                 return
  
                                 self._download_n_results(query, 1)
                                 return
  
@@ -822,7 +970,7 @@ class YoutubeSearchIE(InfoExtractor):
                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                                                 return
  
                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                                                 return
  
-                       if self._MORE_PAGES_INDICATOR not in page:
+                       if re.search(self._MORE_PAGES_INDICATOR, page) is None:
                                 for id in video_ids:
                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                                 return
                                 for id in video_ids:
                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                                 return
@@ -832,10 +980,10 @@ class YoutubeSearchIE(InfoExtractor):
  class YoutubePlaylistIE(InfoExtractor):
         """Information Extractor for YouTube playlists."""
  
  class YoutubePlaylistIE(InfoExtractor):
         """Information Extractor for YouTube playlists."""
  
-       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)'
+       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
-       _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&amp;page=%s'
+       _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
         _youtube_ie = None
  
         def __init__(self, youtube_ie, downloader=None):
         _youtube_ie = None
  
         def __init__(self, youtube_ie, downloader=None):
@@ -881,7 +1029,7 @@ class YoutubePlaylistIE(InfoExtractor):
                                         ids_in_page.append(mobj.group(1))
                         video_ids.extend(ids_in_page)
  
                                         ids_in_page.append(mobj.group(1))
                         video_ids.extend(ids_in_page)
  
-                       if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page:
+                       if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
                                 break
                         pagenum = pagenum + 1
  
                                 break
                         pagenum = pagenum + 1
  
@@ -919,7 +1067,7 @@ class PostProcessor(object):
                 """Run the PostProcessor.
  
                 The "information" argument is a dictionary like the ones
                 """Run the PostProcessor.
  
                 The "information" argument is a dictionary like the ones
-               returned by InfoExtractors. The only difference is that this
+               composed by InfoExtractors. The only difference is that this
                 one has an extra field called "filepath" that points to the
                 downloaded file.
  
                 one has an extra field called "filepath" that points to the
                 downloaded file.
  
@@ -942,6 +1090,22 @@ if __name__ == '__main__':
                 import getpass
                 import optparse
  
                 import getpass
                 import optparse
  
+               # Function to update the program file with the latest version from bitbucket.org
+               def update_self(downloader, filename):
+                       # Note: downloader only used for options
+                       if not os.access (filename, os.W_OK):
+                               sys.exit('ERROR: no write permissions on %s' % filename)
+
+                       downloader.to_stdout('Updating to latest stable version...')
+                       latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
+                       latest_version = urllib.urlopen(latest_url).read().strip()
+                       prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
+                       newcontent = urllib.urlopen(prog_url).read()
+                       stream = open(filename, 'w')
+                       stream.write(newcontent)
+                       stream.close()
+                       downloader.to_stdout('Updated to version %s' % latest_version)
+
                 # General configuration
                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
                 # General configuration
                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
@@ -949,76 +1113,96 @@ if __name__ == '__main__':
  
                 # Parse command line
                 parser = optparse.OptionParser(
  
                 # Parse command line
                 parser = optparse.OptionParser(
-                               usage='Usage: %prog [options] url...',
-                               version='2009.04.25',
-                               conflict_handler='resolve',
-                               )
+                       usage='Usage: %prog [options] url...',
+                       version='INTERNAL',
+                       conflict_handler='resolve',
+               )
+
                 parser.add_option('-h', '--help',
                                 action='help', help='print this help text and exit')
                 parser.add_option('-v', '--version',
                                 action='version', help='print program version and exit')
                 parser.add_option('-h', '--help',
                                 action='help', help='print this help text and exit')
                 parser.add_option('-v', '--version',
                                 action='version', help='print program version and exit')
-               parser.add_option('-u', '--username',
+               parser.add_option('-U', '--update',
+                               action='store_true', dest='update_self', help='update this program to latest stable version')
+               parser.add_option('-i', '--ignore-errors',
+                               action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
+               parser.add_option('-r', '--rate-limit',
+                               dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
+
+               authentication = optparse.OptionGroup(parser, 'Authentication Options')
+               authentication.add_option('-u', '--username',
                                 dest='username', metavar='UN', help='account username')
                                 dest='username', metavar='UN', help='account username')
-               parser.add_option('-p', '--password',
+               authentication.add_option('-p', '--password',
                                 dest='password', metavar='PW', help='account password')
                                 dest='password', metavar='PW', help='account password')
-               parser.add_option('-o', '--output',
-                               dest='outtmpl', metavar='TPL', help='output filename template')
-               parser.add_option('-q', '--quiet',
+               authentication.add_option('-n', '--netrc',
+                               action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
+               parser.add_option_group(authentication)
+
+               video_format = optparse.OptionGroup(parser, 'Video Format Options')
+               video_format.add_option('-f', '--format',
+                               action='store', dest='format', metavar='FMT', help='video format code')
+               video_format.add_option('-b', '--best-quality',
+                               action='store_const', dest='format', help='download the best quality video possible', const='0')
+               video_format.add_option('-m', '--mobile-version',
+                               action='store_const', dest='format', help='alias for -f 17', const='17')
+               video_format.add_option('-d', '--high-def',
+                               action='store_const', dest='format', help='alias for -f 22', const='22')
+               parser.add_option_group(video_format)
+
+               verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+               verbosity.add_option('-q', '--quiet',
                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
-               parser.add_option('-s', '--simulate',
+               verbosity.add_option('-s', '--simulate',
                                 action='store_true', dest='simulate', help='do not download video', default=False)
                                 action='store_true', dest='simulate', help='do not download video', default=False)
-               parser.add_option('-t', '--title',
-                               action='store_true', dest='usetitle', help='use title in file name', default=False)
-               parser.add_option('-l', '--literal',
-                               action='store_true', dest='useliteral', help='use literal title in file name', default=False)
-               parser.add_option('-n', '--netrc',
-                               action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
-               parser.add_option('-g', '--get-url',
+               verbosity.add_option('-g', '--get-url',
                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
-               parser.add_option('-e', '--get-title',
+               verbosity.add_option('-e', '--get-title',
                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
-               parser.add_option('-f', '--format',
-                               dest='format', metavar='FMT', help='video format code')
-               parser.add_option('-m', '--mobile-version',
-                               action='store_const', dest='format', help='alias for -f 17', const='17')
-               parser.add_option('-d', '--high-def',
-                               action='store_const', dest='format', help='alias for -f 22', const='22')
-               parser.add_option('-i', '--ignore-errors',
-                               action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
-               parser.add_option('-r', '--rate-limit',
-                               dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
-               parser.add_option('-a', '--batch-file',
+               parser.add_option_group(verbosity)
+
+               filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+               filesystem.add_option('-t', '--title',
+                               action='store_true', dest='usetitle', help='use title in file name', default=False)
+               filesystem.add_option('-l', '--literal',
+                               action='store_true', dest='useliteral', help='use literal title in file name', default=False)
+               filesystem.add_option('-o', '--output',
+                               dest='outtmpl', metavar='TPL', help='output filename template')
+               filesystem.add_option('-a', '--batch-file',
                                 dest='batchfile', metavar='F', help='file containing URLs to download')
                                 dest='batchfile', metavar='F', help='file containing URLs to download')
-               parser.add_option('-w', '--no-overwrites',
+               filesystem.add_option('-w', '--no-overwrites',
                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
-               (opts, args) = parser.parse_args()
+               filesystem.add_option('-c', '--continue',
+                               action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
+               parser.add_option_group(filesystem)
  
  
+               (opts, args) = parser.parse_args()
+        
                 # Batch file verification
                 batchurls = []
                 if opts.batchfile is not None:
                         try:
                 # Batch file verification
                 batchurls = []
                 if opts.batchfile is not None:
                         try:
-                               batchurls = [line.strip() for line in open(opts.batchfile, 'r')]
+                               batchurls = open(opts.batchfile, 'r').readlines()
+                               batchurls = [x.strip() for x in batchurls]
+                               batchurls = [x for x in batchurls if len(x) > 0]
                         except IOError:
                                 sys.exit(u'ERROR: batch file could not be read')
                 all_urls = batchurls + args
  
                 # Conflicting, missing and erroneous options
                         except IOError:
                                 sys.exit(u'ERROR: batch file could not be read')
                 all_urls = batchurls + args
  
                 # Conflicting, missing and erroneous options
-               if len(all_urls) < 1:
-                       sys.exit(u'ERROR: you must provide at least one URL')
                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
-                       sys.exit(u'ERROR: using .netrc conflicts with giving username/password')
+                       parser.error(u'using .netrc conflicts with giving username/password')
                 if opts.password is not None and opts.username is None:
                 if opts.password is not None and opts.username is None:
-                       sys.exit(u'ERROR: account username missing')
+                       parser.error(u'account username missing')
                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
-                       sys.exit(u'ERROR: using output template conflicts with using title or literal title')
+                       parser.error(u'using output template conflicts with using title or literal title')
                 if opts.usetitle and opts.useliteral:
                 if opts.usetitle and opts.useliteral:
-                       sys.exit(u'ERROR: using title conflicts with using literal title')
+                       parser.error(u'using title conflicts with using literal title')
                 if opts.username is not None and opts.password is None:
                         opts.password = getpass.getpass(u'Type account password and press return:')
                 if opts.ratelimit is not None:
                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
                         if numeric_limit is None:
                 if opts.username is not None and opts.password is None:
                         opts.password = getpass.getpass(u'Type account password and press return:')
                 if opts.ratelimit is not None:
                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
                         if numeric_limit is None:
-                               sys.exit(u'ERROR: invalid rate limit specified')
+                               parser.error(u'invalid rate limit specified')
                         opts.ratelimit = numeric_limit
  
                 # Information extractors
                         opts.ratelimit = numeric_limit
  
                 # Information extractors
@@ -1037,18 +1221,30 @@ if __name__ == '__main__':
                         'forcetitle': opts.gettitle,
                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
                         'format': opts.format,
                         'forcetitle': opts.gettitle,
                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
                         'format': opts.format,
-                       'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
+                       'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
                                 or u'%(id)s.%(ext)s'),
                         'ignoreerrors': opts.ignoreerrors,
                         'ratelimit': opts.ratelimit,
                         'nooverwrites': opts.nooverwrites,
                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
                                 or u'%(id)s.%(ext)s'),
                         'ignoreerrors': opts.ignoreerrors,
                         'ratelimit': opts.ratelimit,
                         'nooverwrites': opts.nooverwrites,
+                       'continuedl': opts.continue_dl,
                         })
                 fd.add_info_extractor(youtube_search_ie)
                 fd.add_info_extractor(youtube_pl_ie)
                 fd.add_info_extractor(metacafe_ie)
                 fd.add_info_extractor(youtube_ie)
                         })
                 fd.add_info_extractor(youtube_search_ie)
                 fd.add_info_extractor(youtube_pl_ie)
                 fd.add_info_extractor(metacafe_ie)
                 fd.add_info_extractor(youtube_ie)
+
+               # Update version
+               if opts.update_self:
+                       update_self(fd, sys.argv[0])
+
+               # Maybe do nothing
+               if len(all_urls) < 1:
+                       if not opts.update_self:
+                               parser.error(u'you must provide at least one URL')
+                       else:
+                               sys.exit()
                 retcode = fd.download(all_urls)
                 sys.exit(retcode)
  
                 retcode = fd.download(all_urls)
                 sys.exit(retcode)