Handle "content too short" errors properly

[youtube-dl] / youtube-dl
diff --git a/youtube-dl b/youtube-dl

index e20e59bf8c3fda8d320803c83794d5697cbb24f0..f6e472445d8e0d9512805487a04afc88bcc68662 100755 (executable)
--- a/youtube-dl
+++ b/youtube-dl
@@ -52,6 +52,29 @@ class PostProcessingError(Exception):
         """
         pass
  
         """
         pass
  
+class UnavailableFormatError(Exception):
+       """Unavailable Format exception.
+
+       This exception will be thrown when a video is requested
+       in a format that is not available for that video.
+       """
+       pass
+
+class ContentTooShortError(Exception):
+       """Content Too Short exception.
+
+       This exception may be raised by FileDownloader objects when a file they
+       download is too small for what the server announced first, indicating
+       the connection was probably interrupted.
+       """
+       # Both in bytes
+       downloaded = None
+       expected = None
+
+       def __init__(self, downloaded, expected):
+               self.downloaded = downloaded
+               self.expected = expected
+
  class FileDownloader(object):
         """File Downloader class.
  
  class FileDownloader(object):
         """File Downloader class.
  
@@ -253,40 +276,48 @@ class FileDownloader(object):
                         return
  
                 try:
                         return
  
                 try:
-                       filename = self.params['outtmpl'] % info_dict
+                       template_dict = dict(info_dict)
+                       template_dict['epoch'] = unicode(long(time.time()))
+                       filename = self.params['outtmpl'] % template_dict
                         self.report_destination(filename)
                 except (ValueError, KeyError), err:
                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
                 if self.params['nooverwrites'] and os.path.exists(filename):
                         self.to_stderr('WARNING: file exists: %s; skipping' % filename)
                         return
                         self.report_destination(filename)
                 except (ValueError, KeyError), err:
                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
                 if self.params['nooverwrites'] and os.path.exists(filename):
                         self.to_stderr('WARNING: file exists: %s; skipping' % filename)
                         return
+
                 try:
                         self.pmkdir(filename)
                 except (OSError, IOError), err:
                         self.trouble('ERROR: unable to create directories: %s' % str(err))
                         return
                 try:
                         self.pmkdir(filename)
                 except (OSError, IOError), err:
                         self.trouble('ERROR: unable to create directories: %s' % str(err))
                         return
+
                 try:
                         outstream = open(filename, 'wb')
                 except (OSError, IOError), err:
                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
                         return
                 try:
                         outstream = open(filename, 'wb')
                 except (OSError, IOError), err:
                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
                         return
+
                 try:
                         self._do_download(outstream, info_dict['url'])
                         outstream.close()
                 except (OSError, IOError), err:
                 try:
                         self._do_download(outstream, info_dict['url'])
                         outstream.close()
                 except (OSError, IOError), err:
-                       self.trouble('ERROR: unable to write video data: %s' % str(err))
-                       return
+                       outstream.close()
+                       os.remove(filename)
+                       raise UnavailableFormatError
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                         self.trouble('ERROR: unable to download video data: %s' % str(err))
                         return
                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                         self.trouble('ERROR: unable to download video data: %s' % str(err))
                         return
+               except (ContentTooShortError, ), err:
+                       self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+                       return
+
                 try:
                         self.post_process(filename, info_dict)
                 except (PostProcessingError), err:
                         self.trouble('ERROR: postprocessing: %s' % str(err))
                         return
  
                 try:
                         self.post_process(filename, info_dict)
                 except (PostProcessingError), err:
                         self.trouble('ERROR: postprocessing: %s' % str(err))
                         return
  
-               return
-
         def download(self, url_list):
                 """Download a given list of URLs."""
                 if len(url_list) > 1 and self.fixed_template():
         def download(self, url_list):
                 """Download a given list of URLs."""
                 if len(url_list) > 1 and self.fixed_template():
@@ -353,7 +384,7 @@ class FileDownloader(object):
  
                 self.report_finish()
                 if data_len is not None and str(byte_counter) != data_len:
  
                 self.report_finish()
                 if data_len is not None and str(byte_counter) != data_len:
-                       raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
+                       raise ContentTooShortError(byte_counter, long(data_len))
  
  class InfoExtractor(object):
         """Information Extractor class.
  
  class InfoExtractor(object):
         """Information Extractor class.
@@ -424,6 +455,13 @@ class YoutubeIE(InfoExtractor):
         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
         _NETRC_MACHINE = 'youtube'
         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
         _NETRC_MACHINE = 'youtube'
+       _available_formats = ['22', '35', '18', '17', '13'] # listed in order of priority for -b flag
+       _video_extensions = {
+               '13': '3gp',
+               '17': 'mp4',
+               '18': 'mp4',
+               '22': 'mp4',
+       }
  
         @staticmethod
         def suitable(url):
  
         @staticmethod
         def suitable(url):
@@ -476,6 +514,10 @@ class YoutubeIE(InfoExtractor):
                 """Report extracted video URL."""
                 self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
         
                 """Report extracted video URL."""
                 self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
         
+       def report_unavailable_format(self, video_id, format):
+               """Report extracted video URL."""
+               self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
+       
         def _real_initialize(self):
                 if self._downloader is None:
                         return
         def _real_initialize(self):
                 if self._downloader is None:
                         return
@@ -554,70 +596,91 @@ class YoutubeIE(InfoExtractor):
                 video_id = mobj.group(2)
  
                 # Downloader parameters
                 video_id = mobj.group(2)
  
                 # Downloader parameters
+               best_quality = False
                 format_param = None
                 format_param = None
+               quality_index = 0
                 if self._downloader is not None:
                         params = self._downloader.params
                         format_param = params.get('format', None)
                 if self._downloader is not None:
                         params = self._downloader.params
                         format_param = params.get('format', None)
+                       if format_param == '0':
+                               format_param = self._available_formats[quality_index]
+                               best_quality = True
  
  
-               # Extension
-               video_extension = {
-                       '17': '3gp',
-                       '18': 'mp4',
-                       '22': 'mp4',
-               }.get(format_param, 'flv')
-
-               # Normalize URL, including format
-               normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
-               if format_param is not None:
-                       normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
-               request = urllib2.Request(normalized_url, None, std_headers)
-               try:
-                       self.report_webpage_download(video_id)
-                       video_webpage = urllib2.urlopen(request).read()
-               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
-                       return
-               self.report_information_extraction(video_id)
-               
-               # "t" param
-               mobj = re.search(r', "t": "([^"]+)"', video_webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
-                       return
-               video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
-               if format_param is not None:
-                       video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
-               self.report_video_url(video_id, video_real_url)
+               while True:
+                       try:
+                               # Extension
+                               video_extension = self._video_extensions.get(format_param, 'flv')
+
+                               # Normalize URL, including format
+                               normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
+                               if format_param is not None:
+                                       normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
+                               request = urllib2.Request(normalized_url, None, std_headers)
+                               try:
+                                       self.report_webpage_download(video_id)
+                                       video_webpage = urllib2.urlopen(request).read()
+                               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                                       return
+                               self.report_information_extraction(video_id)
+                               
+                               # "t" param
+                               mobj = re.search(r', "t": "([^"]+)"', video_webpage)
+                               if mobj is None:
+                                       self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
+                                       return
+                               video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
+                               if format_param is not None:
+                                       video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
+                               self.report_video_url(video_id, video_real_url)
+
+                               # uploader
+                               mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
+                               if mobj is None:
+                                       self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+                                       return
+                               video_uploader = mobj.group(1)
  
  
-               # uploader
-               mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
-                       return
-               video_uploader = mobj.group(1)
+                               # title
+                               mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
+                               if mobj is None:
+                                       self._downloader.trouble(u'ERROR: unable to extract video title')
+                                       return
+                               video_title = mobj.group(1).decode('utf-8')
+                               video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
+                               video_title = video_title.replace(os.sep, u'%')
+
+                               # simplified title
+                               simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+                               simple_title = simple_title.strip(ur'_')
+
+                               # Process video information
+                               self._downloader.process_info({
+                                       'id':           video_id.decode('utf-8'),
+                                       'url':          video_real_url.decode('utf-8'),
+                                       'uploader':     video_uploader.decode('utf-8'),
+                                       'title':        video_title,
+                                       'stitle':       simple_title,
+                                       'ext':          video_extension.decode('utf-8'),
+                               })
+
+                               return
+
+                       except UnavailableFormatError, err:
+                               if best_quality:
+                                       if quality_index == len(self._available_formats) - 1:
+                                               # I don't ever expect this to happen
+                                               self._downloader.trouble(u'ERROR: no known formats available for video')
+                                               return
+                                       else:
+                                               self.report_unavailable_format(video_id, format_param)
+                                               quality_index += 1
+                                               format_param = self._available_formats[quality_index]
+                                               continue
+                               else: 
+                                       self._downloader.trouble('ERROR: format not available for video')
+                                       return
  
  
-               # title
-               mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
-               if mobj is None:
-                       self._downloader.trouble(u'ERROR: unable to extract video title')
-                       return
-               video_title = mobj.group(1).decode('utf-8')
-               video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
-               video_title = video_title.replace(os.sep, u'%')
-
-               # simplified title
-               simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
-               simple_title = simple_title.strip(ur'_')
-
-               # Process video information
-               self._downloader.process_info({
-                       'id':           video_id.decode('utf-8'),
-                       'url':          video_real_url.decode('utf-8'),
-                       'uploader':     video_uploader.decode('utf-8'),
-                       'title':        video_title,
-                       'stitle':       simple_title,
-                       'ext':          video_extension.decode('utf-8'),
-                       })
  
  class MetacafeIE(InfoExtractor):
         """Information Extractor for metacafe.com."""
  
  class MetacafeIE(InfoExtractor):
         """Information Extractor for metacafe.com."""
@@ -729,15 +792,18 @@ class MetacafeIE(InfoExtractor):
                         return
                 video_uploader = mobj.group(1)
  
                         return
                 video_uploader = mobj.group(1)
  
-               # Process video information
-               self._downloader.process_info({
-                       'id':           video_id.decode('utf-8'),
-                       'url':          video_url.decode('utf-8'),
-                       'uploader':     video_uploader.decode('utf-8'),
-                       'title':        video_title,
-                       'stitle':       simple_title,
-                       'ext':          video_extension.decode('utf-8'),
+               try:
+                       # Process video information
+                       self._downloader.process_info({
+                               'id':           video_id.decode('utf-8'),
+                               'url':          video_url.decode('utf-8'),
+                               'uploader':     video_uploader.decode('utf-8'),
+                               'title':        video_title,
+                               'stitle':       simple_title,
+                               'ext':          video_extension.decode('utf-8'),
                         })
                         })
+               except UnavailableFormatError:
+                       self._downloader.trouble(u'ERROR: format not available for video')
  
  
  class YoutubeSearchIE(InfoExtractor):
  
  
  class YoutubeSearchIE(InfoExtractor):
@@ -919,7 +985,7 @@ class PostProcessor(object):
                 """Run the PostProcessor.
  
                 The "information" argument is a dictionary like the ones
                 """Run the PostProcessor.
  
                 The "information" argument is a dictionary like the ones
-               returned by InfoExtractors. The only difference is that this
+               composed by InfoExtractors. The only difference is that this
                 one has an extra field called "filepath" that points to the
                 downloaded file.
  
                 one has an extra field called "filepath" that points to the
                 downloaded file.
  
@@ -949,48 +1015,64 @@ if __name__ == '__main__':
  
                 # Parse command line
                 parser = optparse.OptionParser(
  
                 # Parse command line
                 parser = optparse.OptionParser(
-                               usage='Usage: %prog [options] url...',
-                               version='INTERNAL',
-                               conflict_handler='resolve',
-                               )
+                       usage='Usage: %prog [options] url...',
+                       version='INTERNAL',
+                       conflict_handler='resolve',
+               )
+
                 parser.add_option('-h', '--help',
                                 action='help', help='print this help text and exit')
                 parser.add_option('-v', '--version',
                                 action='version', help='print program version and exit')
                 parser.add_option('-h', '--help',
                                 action='help', help='print this help text and exit')
                 parser.add_option('-v', '--version',
                                 action='version', help='print program version and exit')
-               parser.add_option('-u', '--username',
+               parser.add_option('-i', '--ignore-errors',
+                               action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
+               parser.add_option('-r', '--rate-limit',
+                               dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
+
+               authentication = optparse.OptionGroup(parser, 'Authentication Options')
+               authentication.add_option('-u', '--username',
                                 dest='username', metavar='UN', help='account username')
                                 dest='username', metavar='UN', help='account username')
-               parser.add_option('-p', '--password',
+               authentication.add_option('-p', '--password',
                                 dest='password', metavar='PW', help='account password')
                                 dest='password', metavar='PW', help='account password')
-               parser.add_option('-o', '--output',
-                               dest='outtmpl', metavar='TPL', help='output filename template')
-               parser.add_option('-q', '--quiet',
+               authentication.add_option('-n', '--netrc',
+                               action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
+               parser.add_option_group(authentication)
+
+               video_format = optparse.OptionGroup(parser, 'Video Format Options')
+               video_format.add_option('-f', '--format',
+                               action='append', dest='format', metavar='FMT', help='video format code')
+               video_format.add_option('-b', '--best-quality',
+                               action='store_const', dest='format', help='download the best quality video possible', const='0')
+               video_format.add_option('-m', '--mobile-version',
+                               action='store_const', dest='format', help='alias for -f 17', const='17')
+               video_format.add_option('-d', '--high-def',
+                               action='store_const', dest='format', help='alias for -f 22', const='22')
+               parser.add_option_group(video_format)
+
+               verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+               verbosity.add_option('-q', '--quiet',
                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
-               parser.add_option('-s', '--simulate',
+               verbosity.add_option('-s', '--simulate',
                                 action='store_true', dest='simulate', help='do not download video', default=False)
                                 action='store_true', dest='simulate', help='do not download video', default=False)
-               parser.add_option('-t', '--title',
-                               action='store_true', dest='usetitle', help='use title in file name', default=False)
-               parser.add_option('-l', '--literal',
-                               action='store_true', dest='useliteral', help='use literal title in file name', default=False)
-               parser.add_option('-n', '--netrc',
-                               action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
-               parser.add_option('-g', '--get-url',
+               verbosity.add_option('-g', '--get-url',
                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
-               parser.add_option('-e', '--get-title',
+               verbosity.add_option('-e', '--get-title',
                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
-               parser.add_option('-f', '--format',
-                               dest='format', metavar='FMT', help='video format code')
-               parser.add_option('-m', '--mobile-version',
-                               action='store_const', dest='format', help='alias for -f 17', const='17')
-               parser.add_option('-d', '--high-def',
-                               action='store_const', dest='format', help='alias for -f 22', const='22')
-               parser.add_option('-i', '--ignore-errors',
-                               action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
-               parser.add_option('-r', '--rate-limit',
-                               dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
-               parser.add_option('-a', '--batch-file',
+               parser.add_option_group(verbosity)
+
+               filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+               filesystem.add_option('-t', '--title',
+                               action='store_true', dest='usetitle', help='use title in file name', default=False)
+               filesystem.add_option('-l', '--literal',
+                               action='store_true', dest='useliteral', help='use literal title in file name', default=False)
+               filesystem.add_option('-o', '--output',
+                               dest='outtmpl', metavar='TPL', help='output filename template')
+               filesystem.add_option('-a', '--batch-file',
                                 dest='batchfile', metavar='F', help='file containing URLs to download')
                                 dest='batchfile', metavar='F', help='file containing URLs to download')
-               parser.add_option('-w', '--no-overwrites',
+               filesystem.add_option('-w', '--no-overwrites',
                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
+               parser.add_option_group(filesystem)
+
                 (opts, args) = parser.parse_args()
  
                 # Batch file verification
                 (opts, args) = parser.parse_args()
  
                 # Batch file verification
@@ -1006,22 +1088,29 @@ if __name__ == '__main__':
  
                 # Conflicting, missing and erroneous options
                 if len(all_urls) < 1:
  
                 # Conflicting, missing and erroneous options
                 if len(all_urls) < 1:
-                       sys.exit(u'ERROR: you must provide at least one URL')
+                       parser.error(u'you must provide at least one URL')
                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
-                       sys.exit(u'ERROR: using .netrc conflicts with giving username/password')
+                       parser.error(u'using .netrc conflicts with giving username/password')
                 if opts.password is not None and opts.username is None:
                 if opts.password is not None and opts.username is None:
-                       sys.exit(u'ERROR: account username missing')
+                       parser.error(u'account username missing')
                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
-                       sys.exit(u'ERROR: using output template conflicts with using title or literal title')
+                       parser.error(u'using output template conflicts with using title or literal title')
                 if opts.usetitle and opts.useliteral:
                 if opts.usetitle and opts.useliteral:
-                       sys.exit(u'ERROR: using title conflicts with using literal title')
+                       parser.error(u'using title conflicts with using literal title')
                 if opts.username is not None and opts.password is None:
                         opts.password = getpass.getpass(u'Type account password and press return:')
                 if opts.ratelimit is not None:
                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
                         if numeric_limit is None:
                 if opts.username is not None and opts.password is None:
                         opts.password = getpass.getpass(u'Type account password and press return:')
                 if opts.ratelimit is not None:
                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
                         if numeric_limit is None:
-                               sys.exit(u'ERROR: invalid rate limit specified')
+                               parser.error(u'invalid rate limit specified')
                         opts.ratelimit = numeric_limit
                         opts.ratelimit = numeric_limit
+               if opts.format is not None and len(opts.format) > 1:
+                       parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)')
+               if opts.format is None:
+                       real_format = None
+               else:
+                       real_format = opts.format[0]
+
  
                 # Information extractors
                 youtube_ie = YoutubeIE()
  
                 # Information extractors
                 youtube_ie = YoutubeIE()
@@ -1038,7 +1127,7 @@ if __name__ == '__main__':
                         'forceurl': opts.geturl,
                         'forcetitle': opts.gettitle,
                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
                         'forceurl': opts.geturl,
                         'forcetitle': opts.gettitle,
                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
-                       'format': opts.format,
+                       'format': real_format,
                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')