Merge branch 'master' of github.com:rg3/youtube-dl
[youtube-dl] / youtube_dl / FileDownloader.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import httplib
5 import math
6 import os
7 import re
8 import socket
9 import subprocess
10 import sys
11 import time
12 import urllib2
13
14 if os.name == 'nt':
15         import ctypes
16
17 from utils import *
18
19
20 class FileDownloader(object):
21         """File Downloader class.
22
23         File downloader objects are the ones responsible of downloading the
24         actual video file and writing it to disk if the user has requested
25         it, among some other tasks. In most cases there should be one per
26         program. As, given a video URL, the downloader doesn't know how to
27         extract all the needed information, task that InfoExtractors do, it
28         has to pass the URL to one of them.
29
30         For this, file downloader objects have a method that allows
31         InfoExtractors to be registered in a given order. When it is passed
32         a URL, the file downloader handles it to the first InfoExtractor it
33         finds that reports being able to handle it. The InfoExtractor extracts
34         all the information about the video or videos the URL refers to, and
35         asks the FileDownloader to process the video information, possibly
36         downloading the video.
37
38         File downloaders accept a lot of parameters. In order not to saturate
39         the object constructor with arguments, it receives a dictionary of
40         options instead. These options are available through the params
41         attribute for the InfoExtractors to use. The FileDownloader also
42         registers itself as the downloader in charge for the InfoExtractors
43         that are added to it, so this is a "mutual registration".
44
45         Available options:
46
47         username:          Username for authentication purposes.
48         password:          Password for authentication purposes.
49         usenetrc:          Use netrc for authentication instead.
50         quiet:             Do not print messages to stdout.
51         forceurl:          Force printing final URL.
52         forcetitle:        Force printing title.
53         forcethumbnail:    Force printing thumbnail URL.
54         forcedescription:  Force printing description.
55         forcefilename:     Force printing final filename.
56         simulate:          Do not download the video files.
57         format:            Video format code.
58         format_limit:      Highest quality format to try.
59         outtmpl:           Template for output names.
60         restrictfilenames: Do not allow "&" and spaces in file names
61         ignoreerrors:      Do not stop on download errors.
62         ratelimit:         Download speed limit, in bytes/sec.
63         nooverwrites:      Prevent overwriting files.
64         retries:           Number of times to retry for HTTP error 5xx
65         buffersize:        Size of download buffer in bytes.
66         noresizebuffer:    Do not automatically resize the download buffer.
67         continuedl:        Try to continue downloads if possible.
68         noprogress:        Do not print the progress bar.
69         playliststart:     Playlist item to start at.
70         playlistend:       Playlist item to end at.
71         matchtitle:        Download only matching titles.
72         rejecttitle:       Reject downloads for matching titles.
73         logtostderr:       Log messages to stderr instead of stdout.
74         consoletitle:      Display progress in console window's titlebar.
75         nopart:            Do not use temporary .part files.
76         updatetime:        Use the Last-modified header to set output file timestamps.
77         writedescription:  Write the video description to a .description file
78         writeinfojson:     Write the video description to a .info.json file
79         writesubtitles:    Write the video subtitles to a .srt file
80         subtitleslang:     Language of the subtitles to download
81         """
82
83         params = None
84         _ies = []
85         _pps = []
86         _download_retcode = None
87         _num_downloads = None
88         _screen_file = None
89
90         def __init__(self, params):
91                 """Create a FileDownloader object with the given options."""
92                 self._ies = []
93                 self._pps = []
94                 self._download_retcode = 0
95                 self._num_downloads = 0
96                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
97                 self.params = params
98
99                 if '%(stitle)s' in self.params['outtmpl']:
100                         self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
101
102         @staticmethod
103         def format_bytes(bytes):
104                 if bytes is None:
105                         return 'N/A'
106                 if type(bytes) is str:
107                         bytes = float(bytes)
108                 if bytes == 0.0:
109                         exponent = 0
110                 else:
111                         exponent = int(math.log(bytes, 1024.0))
112                 suffix = 'bkMGTPEZY'[exponent]
113                 converted = float(bytes) / float(1024 ** exponent)
114                 return '%.2f%s' % (converted, suffix)
115
116         @staticmethod
117         def calc_percent(byte_counter, data_len):
118                 if data_len is None:
119                         return '---.-%'
120                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
121
122         @staticmethod
123         def calc_eta(start, now, total, current):
124                 if total is None:
125                         return '--:--'
126                 dif = now - start
127                 if current == 0 or dif < 0.001: # One millisecond
128                         return '--:--'
129                 rate = float(current) / dif
130                 eta = int((float(total) - float(current)) / rate)
131                 (eta_mins, eta_secs) = divmod(eta, 60)
132                 if eta_mins > 99:
133                         return '--:--'
134                 return '%02d:%02d' % (eta_mins, eta_secs)
135
136         @staticmethod
137         def calc_speed(start, now, bytes):
138                 dif = now - start
139                 if bytes == 0 or dif < 0.001: # One millisecond
140                         return '%10s' % '---b/s'
141                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
142
143         @staticmethod
144         def best_block_size(elapsed_time, bytes):
145                 new_min = max(bytes / 2.0, 1.0)
146                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
147                 if elapsed_time < 0.001:
148                         return int(new_max)
149                 rate = bytes / elapsed_time
150                 if rate > new_max:
151                         return int(new_max)
152                 if rate < new_min:
153                         return int(new_min)
154                 return int(rate)
155
156         @staticmethod
157         def parse_bytes(bytestr):
158                 """Parse a string indicating a byte quantity into an integer."""
159                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
160                 if matchobj is None:
161                         return None
162                 number = float(matchobj.group(1))
163                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
164                 return int(round(number * multiplier))
165
166         def add_info_extractor(self, ie):
167                 """Add an InfoExtractor object to the end of the list."""
168                 self._ies.append(ie)
169                 ie.set_downloader(self)
170
171         def add_post_processor(self, pp):
172                 """Add a PostProcessor object to the end of the chain."""
173                 self._pps.append(pp)
174                 pp.set_downloader(self)
175
176         def to_screen(self, message, skip_eol=False):
177                 """Print message to stdout if not in quiet mode."""
178                 assert type(message) == type(u'')
179                 if not self.params.get('quiet', False):
180                         terminator = [u'\n', u''][skip_eol]
181                         output = message + terminator
182                         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
183                                 output = output.encode(preferredencoding(), 'ignore')
184                         self._screen_file.write(output)
185                         self._screen_file.flush()
186
187         def to_stderr(self, message):
188                 """Print message to stderr."""
189                 assert type(message) == type(u'')
190                 sys.stderr.write((message + u'\n').encode(preferredencoding()))
191
192         def to_cons_title(self, message):
193                 """Set console/terminal window title to message."""
194                 if not self.params.get('consoletitle', False):
195                         return
196                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
197                         # c_wchar_p() might not be necessary if `message` is
198                         # already of type unicode()
199                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
200                 elif 'TERM' in os.environ:
201                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
202
203         def fixed_template(self):
204                 """Checks if the output template is fixed."""
205                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
206
207         def trouble(self, message=None):
208                 """Determine action to take when a download problem appears.
209
210                 Depending on if the downloader has been configured to ignore
211                 download errors or not, this method may throw an exception or
212                 not when errors are found, after printing the message.
213                 """
214                 if message is not None:
215                         self.to_stderr(message)
216                 if not self.params.get('ignoreerrors', False):
217                         raise DownloadError(message)
218                 self._download_retcode = 1
219
220         def slow_down(self, start_time, byte_counter):
221                 """Sleep if the download speed is over the rate limit."""
222                 rate_limit = self.params.get('ratelimit', None)
223                 if rate_limit is None or byte_counter == 0:
224                         return
225                 now = time.time()
226                 elapsed = now - start_time
227                 if elapsed <= 0.0:
228                         return
229                 speed = float(byte_counter) / elapsed
230                 if speed > rate_limit:
231                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
232
233         def temp_name(self, filename):
234                 """Returns a temporary filename for the given filename."""
235                 if self.params.get('nopart', False) or filename == u'-' or \
236                                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
237                         return filename
238                 return filename + u'.part'
239
240         def undo_temp_name(self, filename):
241                 if filename.endswith(u'.part'):
242                         return filename[:-len(u'.part')]
243                 return filename
244
245         def try_rename(self, old_filename, new_filename):
246                 try:
247                         if old_filename == new_filename:
248                                 return
249                         os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
250                 except (IOError, OSError), err:
251                         self.trouble(u'ERROR: unable to rename file')
252
253         def try_utime(self, filename, last_modified_hdr):
254                 """Try to set the last-modified time of the given file."""
255                 if last_modified_hdr is None:
256                         return
257                 if not os.path.isfile(encodeFilename(filename)):
258                         return
259                 timestr = last_modified_hdr
260                 if timestr is None:
261                         return
262                 filetime = timeconvert(timestr)
263                 if filetime is None:
264                         return filetime
265                 try:
266                         os.utime(filename, (time.time(), filetime))
267                 except:
268                         pass
269                 return filetime
270
271         def report_writedescription(self, descfn):
272                 """ Report that the description file is being written """
273                 self.to_screen(u'[info] Writing video description to: ' + descfn)
274
275         def report_writesubtitles(self, srtfn):
276                 """ Report that the subtitles file is being written """
277                 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
278
279         def report_writeinfojson(self, infofn):
280                 """ Report that the metadata file has been written """
281                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
282
283         def report_destination(self, filename):
284                 """Report destination filename."""
285                 self.to_screen(u'[download] Destination: ' + filename)
286
287         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
288                 """Report download progress."""
289                 if self.params.get('noprogress', False):
290                         return
291                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
292                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
293                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
294                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
295
296         def report_resuming_byte(self, resume_len):
297                 """Report attempt to resume at given byte."""
298                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
299
300         def report_retry(self, count, retries):
301                 """Report retry in case of HTTP error 5xx"""
302                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
303
304         def report_file_already_downloaded(self, file_name):
305                 """Report file has already been fully downloaded."""
306                 try:
307                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
308                 except (UnicodeEncodeError), err:
309                         self.to_screen(u'[download] The file has already been downloaded')
310
311         def report_unable_to_resume(self):
312                 """Report it was impossible to resume download."""
313                 self.to_screen(u'[download] Unable to resume')
314
315         def report_finish(self):
316                 """Report download finished."""
317                 if self.params.get('noprogress', False):
318                         self.to_screen(u'[download] Download completed')
319                 else:
320                         self.to_screen(u'')
321
322         def increment_downloads(self):
323                 """Increment the ordinal that assigns a number to each file."""
324                 self._num_downloads += 1
325
326         def prepare_filename(self, info_dict):
327                 """Generate the output filename."""
328                 try:
329                         template_dict = dict(info_dict)
330
331                         template_dict['epoch'] = int(time.time())
332                         template_dict['autonumber'] = u'%05d' % self._num_downloads
333
334                         template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
335                         template_dict = dict((k, sanitize_filename(u(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
336
337                         filename = self.params['outtmpl'] % template_dict
338                         return filename
339                 except (ValueError, KeyError), err:
340                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
341                         return None
342
343         def _match_entry(self, info_dict):
344                 """ Returns None iff the file should be downloaded """
345
346                 title = info_dict['title']
347                 matchtitle = self.params.get('matchtitle', False)
348                 if matchtitle:
349                         matchtitle = matchtitle.decode('utf8')
350                         if not re.search(matchtitle, title, re.IGNORECASE):
351                                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
352                 rejecttitle = self.params.get('rejecttitle', False)
353                 if rejecttitle:
354                         rejecttitle = rejecttitle.decode('utf8')
355                         if re.search(rejecttitle, title, re.IGNORECASE):
356                                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
357                 return None
358
359         def process_info(self, info_dict):
360                 """Process a single dictionary returned by an InfoExtractor."""
361
362                 # Keep for backwards compatibility
363                 info_dict['stitle'] = info_dict['title']
364
365                 if not 'format' in info_dict:
366                         info_dict['format'] = info_dict['ext']
367
368                 reason = self._match_entry(info_dict)
369                 if reason is not None:
370                         self.to_screen(u'[download] ' + reason)
371                         return
372
373                 max_downloads = self.params.get('max_downloads')
374                 if max_downloads is not None:
375                         if self._num_downloads > int(max_downloads):
376                                 raise MaxDownloadsReached()
377
378                 filename = self.prepare_filename(info_dict)
379
380                 # Forced printings
381                 if self.params.get('forcetitle', False):
382                         print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
383                 if self.params.get('forceurl', False):
384                         print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
385                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
386                         print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
387                 if self.params.get('forcedescription', False) and 'description' in info_dict:
388                         print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
389                 if self.params.get('forcefilename', False) and filename is not None:
390                         print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
391                 if self.params.get('forceformat', False):
392                         print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
393
394                 # Do nothing else if in simulate mode
395                 if self.params.get('simulate', False):
396                         return
397
398                 if filename is None:
399                         return
400
401                 try:
402                         dn = os.path.dirname(encodeFilename(filename))
403                         if dn != '' and not os.path.exists(dn): # dn is already encoded
404                                 os.makedirs(dn)
405                 except (OSError, IOError), err:
406                         self.trouble(u'ERROR: unable to create directory ' + u(err))
407                         return
408
409                 if self.params.get('writedescription', False):
410                         try:
411                                 descfn = filename + u'.description'
412                                 self.report_writedescription(descfn)
413                                 descfile = open(encodeFilename(descfn), 'wb')
414                                 try:
415                                         descfile.write(info_dict['description'].encode('utf-8'))
416                                 finally:
417                                         descfile.close()
418                         except (OSError, IOError):
419                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
420                                 return
421
422                 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
423                         # subtitles download errors are already managed as troubles in relevant IE
424                         # that way it will silently go on when used with unsupporting IE
425                         try:
426                                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
427                                 self.report_writesubtitles(srtfn)
428                                 srtfile = open(encodeFilename(srtfn), 'wb')
429                                 try:
430                                         srtfile.write(info_dict['subtitles'].encode('utf-8'))
431                                 finally:
432                                         srtfile.close()
433                         except (OSError, IOError):
434                                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
435                                 return
436
437                 if self.params.get('writeinfojson', False):
438                         infofn = filename + u'.info.json'
439                         self.report_writeinfojson(infofn)
440                         try:
441                                 json.dump
442                         except (NameError,AttributeError):
443                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
444                                 return
445                         try:
446                                 infof = open(encodeFilename(infofn), 'wb')
447                                 try:
448                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
449                                         json.dump(json_info_dict, infof)
450                                 finally:
451                                         infof.close()
452                         except (OSError, IOError):
453                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
454                                 return
455
456                 if not self.params.get('skip_download', False):
457                         if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
458                                 success = True
459                         else:
460                                 try:
461                                         success = self._do_download(filename, info_dict)
462                                 except (OSError, IOError), err:
463                                         raise UnavailableVideoError
464                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
465                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
466                                         return
467                                 except (ContentTooShortError, ), err:
468                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
469                                         return
470
471                         if success:
472                                 try:
473                                         self.post_process(filename, info_dict)
474                                 except (PostProcessingError), err:
475                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
476                                         return
477
478         def download(self, url_list):
479                 """Download a given list of URLs."""
480                 if len(url_list) > 1 and self.fixed_template():
481                         raise SameFileError(self.params['outtmpl'])
482
483                 for url in url_list:
484                         suitable_found = False
485                         for ie in self._ies:
486                                 # Go to next InfoExtractor if not suitable
487                                 if not ie.suitable(url):
488                                         continue
489
490                                 # Warn if the _WORKING attribute is False
491                                 if not ie.working():
492                                         self.trouble(u'WARNING: the program functionality for this site has been marked as broken, '
493                                                          u'and will probably not work. If you want to go on, use the -i option.')
494
495                                 # Suitable InfoExtractor found
496                                 suitable_found = True
497
498                                 # Extract information from URL and process it
499                                 videos = ie.extract(url)
500                                 for video in videos or []:
501                                         video['extractor'] = ie.IE_NAME
502                                         try:
503                                                 self.increment_downloads()
504                                                 self.process_info(video)
505                                         except UnavailableVideoError:
506                                                 self.trouble(u'\nERROR: unable to download video')
507
508                                 # Suitable InfoExtractor had been found; go to next URL
509                                 break
510
511                         if not suitable_found:
512                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
513
514                 return self._download_retcode
515
516         def post_process(self, filename, ie_info):
517                 """Run the postprocessing chain on the given file."""
518                 info = dict(ie_info)
519                 info['filepath'] = filename
520                 for pp in self._pps:
521                         info = pp.run(info)
522                         if info is None:
523                                 break
524
525         def _download_with_rtmpdump(self, filename, url, player_url):
526                 self.report_destination(filename)
527                 tmpfilename = self.temp_name(filename)
528
529                 # Check for rtmpdump first
530                 try:
531                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
532                 except (OSError, IOError):
533                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
534                         return False
535
536                 # Download using rtmpdump. rtmpdump returns exit code 2 when
537                 # the connection was interrumpted and resuming appears to be
538                 # possible. This is part of rtmpdump's normal usage, AFAIK.
539                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
540                 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
541                 if self.params.get('verbose', False):
542                         try:
543                                 import pipes
544                                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
545                         except ImportError:
546                                 shell_quote = repr
547                         self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
548                 retval = subprocess.call(args)
549                 while retval == 2 or retval == 1:
550                         prevsize = os.path.getsize(encodeFilename(tmpfilename))
551                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
552                         time.sleep(5.0) # This seems to be needed
553                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
554                         cursize = os.path.getsize(encodeFilename(tmpfilename))
555                         if prevsize == cursize and retval == 1:
556                                 break
557                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
558                         if prevsize == cursize and retval == 2 and cursize > 1024:
559                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
560                                 retval = 0
561                                 break
562                 if retval == 0:
563                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
564                         self.try_rename(tmpfilename, filename)
565                         return True
566                 else:
567                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
568                         return False
569
570         def _do_download(self, filename, info_dict):
571                 url = info_dict['url']
572                 player_url = info_dict.get('player_url', None)
573
574                 # Check file already present
575                 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
576                         self.report_file_already_downloaded(filename)
577                         return True
578
579                 # Attempt to download using rtmpdump
580                 if url.startswith('rtmp'):
581                         return self._download_with_rtmpdump(filename, url, player_url)
582
583                 tmpfilename = self.temp_name(filename)
584                 stream = None
585
586                 # Do not include the Accept-Encoding header
587                 headers = {'Youtubedl-no-compression': 'True'}
588                 basic_request = urllib2.Request(url, None, headers)
589                 request = urllib2.Request(url, None, headers)
590
591                 # Establish possible resume length
592                 if os.path.isfile(encodeFilename(tmpfilename)):
593                         resume_len = os.path.getsize(encodeFilename(tmpfilename))
594                 else:
595                         resume_len = 0
596
597                 open_mode = 'wb'
598                 if resume_len != 0:
599                         if self.params.get('continuedl', False):
600                                 self.report_resuming_byte(resume_len)
601                                 request.add_header('Range','bytes=%d-' % resume_len)
602                                 open_mode = 'ab'
603                         else:
604                                 resume_len = 0
605
606                 count = 0
607                 retries = self.params.get('retries', 0)
608                 while count <= retries:
609                         # Establish connection
610                         try:
611                                 if count == 0 and 'urlhandle' in info_dict:
612                                         data = info_dict['urlhandle']
613                                 data = urllib2.urlopen(request)
614                                 break
615                         except (urllib2.HTTPError, ), err:
616                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
617                                         # Unexpected HTTP error
618                                         raise
619                                 elif err.code == 416:
620                                         # Unable to resume (requested range not satisfiable)
621                                         try:
622                                                 # Open the connection again without the range header
623                                                 data = urllib2.urlopen(basic_request)
624                                                 content_length = data.info()['Content-Length']
625                                         except (urllib2.HTTPError, ), err:
626                                                 if err.code < 500 or err.code >= 600:
627                                                         raise
628                                         else:
629                                                 # Examine the reported length
630                                                 if (content_length is not None and
631                                                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
632                                                         # The file had already been fully downloaded.
633                                                         # Explanation to the above condition: in issue #175 it was revealed that
634                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
635                                                         # changing the file size slightly and causing problems for some users. So
636                                                         # I decided to implement a suggested change and consider the file
637                                                         # completely downloaded if the file size differs less than 100 bytes from
638                                                         # the one in the hard drive.
639                                                         self.report_file_already_downloaded(filename)
640                                                         self.try_rename(tmpfilename, filename)
641                                                         return True
642                                                 else:
643                                                         # The length does not match, we start the download over
644                                                         self.report_unable_to_resume()
645                                                         open_mode = 'wb'
646                                                         break
647                         # Retry
648                         count += 1
649                         if count <= retries:
650                                 self.report_retry(count, retries)
651
652                 if count > retries:
653                         self.trouble(u'ERROR: giving up after %s retries' % retries)
654                         return False
655
656                 data_len = data.info().get('Content-length', None)
657                 if data_len is not None:
658                         data_len = int(data_len) + resume_len
659                 data_len_str = self.format_bytes(data_len)
660                 byte_counter = 0 + resume_len
661                 block_size = self.params.get('buffersize', 1024)
662                 start = time.time()
663                 while True:
664                         # Download and write
665                         before = time.time()
666                         data_block = data.read(block_size)
667                         after = time.time()
668                         if len(data_block) == 0:
669                                 break
670                         byte_counter += len(data_block)
671
672                         # Open file just in time
673                         if stream is None:
674                                 try:
675                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
676                                         assert stream is not None
677                                         filename = self.undo_temp_name(tmpfilename)
678                                         self.report_destination(filename)
679                                 except (OSError, IOError), err:
680                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
681                                         return False
682                         try:
683                                 stream.write(data_block)
684                         except (IOError, OSError), err:
685                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
686                                 return False
687                         if not self.params.get('noresizebuffer', False):
688                                 block_size = self.best_block_size(after - before, len(data_block))
689
690                         # Progress message
691                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
692                         if data_len is None:
693                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
694                         else:
695                                 percent_str = self.calc_percent(byte_counter, data_len)
696                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
697                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
698
699                         # Apply rate limit
700                         self.slow_down(start, byte_counter - resume_len)
701
702                 if stream is None:
703                         self.trouble(u'\nERROR: Did not get any data blocks')
704                         return False
705                 stream.close()
706                 self.report_finish()
707                 if data_len is not None and byte_counter != data_len:
708                         raise ContentTooShortError(byte_counter, int(data_len))
709                 self.try_rename(tmpfilename, filename)
710
711                 # Update file modification time
712                 if self.params.get('updatetime', True):
713                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
714
715                 return True