Fix printing title etc.
[youtube-dl] / youtube_dl / FileDownloader.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import math
5 import os
6 import re
7 import socket
8 import subprocess
9 import sys
10 import time
11
12 if os.name == 'nt':
13         import ctypes
14
15 from utils import *
16
17
18 class FileDownloader(object):
19         """File Downloader class.
20
21         File downloader objects are the ones responsible of downloading the
22         actual video file and writing it to disk if the user has requested
23         it, among some other tasks. In most cases there should be one per
24         program. As, given a video URL, the downloader doesn't know how to
25         extract all the needed information, task that InfoExtractors do, it
26         has to pass the URL to one of them.
27
28         For this, file downloader objects have a method that allows
29         InfoExtractors to be registered in a given order. When it is passed
30         a URL, the file downloader handles it to the first InfoExtractor it
31         finds that reports being able to handle it. The InfoExtractor extracts
32         all the information about the video or videos the URL refers to, and
33         asks the FileDownloader to process the video information, possibly
34         downloading the video.
35
36         File downloaders accept a lot of parameters. In order not to saturate
37         the object constructor with arguments, it receives a dictionary of
38         options instead. These options are available through the params
39         attribute for the InfoExtractors to use. The FileDownloader also
40         registers itself as the downloader in charge for the InfoExtractors
41         that are added to it, so this is a "mutual registration".
42
43         Available options:
44
45         username:          Username for authentication purposes.
46         password:          Password for authentication purposes.
47         usenetrc:          Use netrc for authentication instead.
48         quiet:             Do not print messages to stdout.
49         forceurl:          Force printing final URL.
50         forcetitle:        Force printing title.
51         forcethumbnail:    Force printing thumbnail URL.
52         forcedescription:  Force printing description.
53         forcefilename:     Force printing final filename.
54         simulate:          Do not download the video files.
55         format:            Video format code.
56         format_limit:      Highest quality format to try.
57         outtmpl:           Template for output names.
58         restrictfilenames: Do not allow "&" and spaces in file names
59         ignoreerrors:      Do not stop on download errors.
60         ratelimit:         Download speed limit, in bytes/sec.
61         nooverwrites:      Prevent overwriting files.
62         retries:           Number of times to retry for HTTP error 5xx
63         buffersize:        Size of download buffer in bytes.
64         noresizebuffer:    Do not automatically resize the download buffer.
65         continuedl:        Try to continue downloads if possible.
66         noprogress:        Do not print the progress bar.
67         playliststart:     Playlist item to start at.
68         playlistend:       Playlist item to end at.
69         matchtitle:        Download only matching titles.
70         rejecttitle:       Reject downloads for matching titles.
71         logtostderr:       Log messages to stderr instead of stdout.
72         consoletitle:      Display progress in console window's titlebar.
73         nopart:            Do not use temporary .part files.
74         updatetime:        Use the Last-modified header to set output file timestamps.
75         writedescription:  Write the video description to a .description file
76         writeinfojson:     Write the video description to a .info.json file
77         writesubtitles:    Write the video subtitles to a .srt file
78         subtitleslang:     Language of the subtitles to download
79         """
80
81         params = None
82         _ies = []
83         _pps = []
84         _download_retcode = None
85         _num_downloads = None
86         _screen_file = None
87
88         def __init__(self, params):
89                 """Create a FileDownloader object with the given options."""
90                 self._ies = []
91                 self._pps = []
92                 self._download_retcode = 0
93                 self._num_downloads = 0
94                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
95                 self.params = params
96
97                 if '%(stitle)s' in self.params['outtmpl']:
98                         self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
99
100         @staticmethod
101         def format_bytes(bytes):
102                 if bytes is None:
103                         return 'N/A'
104                 if type(bytes) is str:
105                         bytes = float(bytes)
106                 if bytes == 0.0:
107                         exponent = 0
108                 else:
109                         exponent = int(math.log(bytes, 1024.0))
110                 suffix = 'bkMGTPEZY'[exponent]
111                 converted = float(bytes) / float(1024 ** exponent)
112                 return '%.2f%s' % (converted, suffix)
113
114         @staticmethod
115         def calc_percent(byte_counter, data_len):
116                 if data_len is None:
117                         return '---.-%'
118                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
119
120         @staticmethod
121         def calc_eta(start, now, total, current):
122                 if total is None:
123                         return '--:--'
124                 dif = now - start
125                 if current == 0 or dif < 0.001: # One millisecond
126                         return '--:--'
127                 rate = float(current) / dif
128                 eta = int((float(total) - float(current)) / rate)
129                 (eta_mins, eta_secs) = divmod(eta, 60)
130                 if eta_mins > 99:
131                         return '--:--'
132                 return '%02d:%02d' % (eta_mins, eta_secs)
133
134         @staticmethod
135         def calc_speed(start, now, bytes):
136                 dif = now - start
137                 if bytes == 0 or dif < 0.001: # One millisecond
138                         return '%10s' % '---b/s'
139                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
140
141         @staticmethod
142         def best_block_size(elapsed_time, bytes):
143                 new_min = max(bytes / 2.0, 1.0)
144                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
145                 if elapsed_time < 0.001:
146                         return int(new_max)
147                 rate = bytes / elapsed_time
148                 if rate > new_max:
149                         return int(new_max)
150                 if rate < new_min:
151                         return int(new_min)
152                 return int(rate)
153
154         @staticmethod
155         def parse_bytes(bytestr):
156                 """Parse a string indicating a byte quantity into an integer."""
157                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
158                 if matchobj is None:
159                         return None
160                 number = float(matchobj.group(1))
161                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
162                 return int(round(number * multiplier))
163
164         def add_info_extractor(self, ie):
165                 """Add an InfoExtractor object to the end of the list."""
166                 self._ies.append(ie)
167                 ie.set_downloader(self)
168
169         def add_post_processor(self, pp):
170                 """Add a PostProcessor object to the end of the chain."""
171                 self._pps.append(pp)
172                 pp.set_downloader(self)
173
174         def to_screen(self, message, skip_eol=False):
175                 """Print message to stdout if not in quiet mode."""
176                 assert type(message) == type(u'')
177                 if not self.params.get('quiet', False):
178                         terminator = [u'\n', u''][skip_eol]
179                         output = message + terminator
180                         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
181                                 output = output.encode(preferredencoding(), 'ignore')
182                         self._screen_file.write(output)
183                         self._screen_file.flush()
184
185         def to_stderr(self, message):
186                 """Print message to stderr."""
187                 assert type(message) == type(u'')
188                 output = message + u'\n'
189                 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
190                         output = output.encode(preferredencoding())
191                 sys.stderr.write(output)
192
193         def to_cons_title(self, message):
194                 """Set console/terminal window title to message."""
195                 if not self.params.get('consoletitle', False):
196                         return
197                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
198                         # c_wchar_p() might not be necessary if `message` is
199                         # already of type unicode()
200                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
201                 elif 'TERM' in os.environ:
202                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
203
204         def fixed_template(self):
205                 """Checks if the output template is fixed."""
206                 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
207
208         def trouble(self, message=None):
209                 """Determine action to take when a download problem appears.
210
211                 Depending on if the downloader has been configured to ignore
212                 download errors or not, this method may throw an exception or
213                 not when errors are found, after printing the message.
214                 """
215                 if message is not None:
216                         self.to_stderr(message)
217                 if not self.params.get('ignoreerrors', False):
218                         raise DownloadError(message)
219                 self._download_retcode = 1
220
221         def slow_down(self, start_time, byte_counter):
222                 """Sleep if the download speed is over the rate limit."""
223                 rate_limit = self.params.get('ratelimit', None)
224                 if rate_limit is None or byte_counter == 0:
225                         return
226                 now = time.time()
227                 elapsed = now - start_time
228                 if elapsed <= 0.0:
229                         return
230                 speed = float(byte_counter) / elapsed
231                 if speed > rate_limit:
232                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
233
234         def temp_name(self, filename):
235                 """Returns a temporary filename for the given filename."""
236                 if self.params.get('nopart', False) or filename == u'-' or \
237                                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
238                         return filename
239                 return filename + u'.part'
240
241         def undo_temp_name(self, filename):
242                 if filename.endswith(u'.part'):
243                         return filename[:-len(u'.part')]
244                 return filename
245
246         def try_rename(self, old_filename, new_filename):
247                 try:
248                         if old_filename == new_filename:
249                                 return
250                         os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
251                 except (IOError, OSError) as err:
252                         self.trouble(u'ERROR: unable to rename file')
253
254         def try_utime(self, filename, last_modified_hdr):
255                 """Try to set the last-modified time of the given file."""
256                 if last_modified_hdr is None:
257                         return
258                 if not os.path.isfile(encodeFilename(filename)):
259                         return
260                 timestr = last_modified_hdr
261                 if timestr is None:
262                         return
263                 filetime = timeconvert(timestr)
264                 if filetime is None:
265                         return filetime
266                 try:
267                         os.utime(filename, (time.time(), filetime))
268                 except:
269                         pass
270                 return filetime
271
272         def report_writedescription(self, descfn):
273                 """ Report that the description file is being written """
274                 self.to_screen(u'[info] Writing video description to: ' + descfn)
275
276         def report_writesubtitles(self, srtfn):
277                 """ Report that the subtitles file is being written """
278                 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
279
280         def report_writeinfojson(self, infofn):
281                 """ Report that the metadata file has been written """
282                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
283
284         def report_destination(self, filename):
285                 """Report destination filename."""
286                 self.to_screen(u'[download] Destination: ' + filename)
287
288         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
289                 """Report download progress."""
290                 if self.params.get('noprogress', False):
291                         return
292                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
293                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
294                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
295                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
296
297         def report_resuming_byte(self, resume_len):
298                 """Report attempt to resume at given byte."""
299                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
300
301         def report_retry(self, count, retries):
302                 """Report retry in case of HTTP error 5xx"""
303                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
304
305         def report_file_already_downloaded(self, file_name):
306                 """Report file has already been fully downloaded."""
307                 try:
308                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
309                 except (UnicodeEncodeError) as err:
310                         self.to_screen(u'[download] The file has already been downloaded')
311
312         def report_unable_to_resume(self):
313                 """Report it was impossible to resume download."""
314                 self.to_screen(u'[download] Unable to resume')
315
316         def report_finish(self):
317                 """Report download finished."""
318                 if self.params.get('noprogress', False):
319                         self.to_screen(u'[download] Download completed')
320                 else:
321                         self.to_screen(u'')
322
323         def increment_downloads(self):
324                 """Increment the ordinal that assigns a number to each file."""
325                 self._num_downloads += 1
326
327         def prepare_filename(self, info_dict):
328                 """Generate the output filename."""
329                 try:
330                         template_dict = dict(info_dict)
331
332                         template_dict['epoch'] = int(time.time())
333                         template_dict['autonumber'] = u'%05d' % self._num_downloads
334
335                         template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
336                         template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
337
338                         filename = self.params['outtmpl'] % template_dict
339                         return filename
340                 except (ValueError, KeyError) as err:
341                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
342                         return None
343
344         def _match_entry(self, info_dict):
345                 """ Returns None iff the file should be downloaded """
346
347                 title = info_dict['title']
348                 matchtitle = self.params.get('matchtitle', False)
349                 if matchtitle:
350                         matchtitle = matchtitle.decode('utf8')
351                         if not re.search(matchtitle, title, re.IGNORECASE):
352                                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
353                 rejecttitle = self.params.get('rejecttitle', False)
354                 if rejecttitle:
355                         rejecttitle = rejecttitle.decode('utf8')
356                         if re.search(rejecttitle, title, re.IGNORECASE):
357                                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
358                 return None
359
360         def process_info(self, info_dict):
361                 """Process a single dictionary returned by an InfoExtractor."""
362
363                 # Keep for backwards compatibility
364                 info_dict['stitle'] = info_dict['title']
365
366                 if not 'format' in info_dict:
367                         info_dict['format'] = info_dict['ext']
368
369                 reason = self._match_entry(info_dict)
370                 if reason is not None:
371                         self.to_screen(u'[download] ' + reason)
372                         return
373
374                 max_downloads = self.params.get('max_downloads')
375                 if max_downloads is not None:
376                         if self._num_downloads > int(max_downloads):
377                                 raise MaxDownloadsReached()
378
379                 filename = self.prepare_filename(info_dict)
380
381                 # Forced printings
382                 if self.params.get('forcetitle', False):
383                         compat_print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
384                 if self.params.get('forceurl', False):
385                         compat_print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
386                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
387                         compat_print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
388                 if self.params.get('forcedescription', False) and 'description' in info_dict:
389                         compat_print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
390                 if self.params.get('forcefilename', False) and filename is not None:
391                         compat_print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
392                 if self.params.get('forceformat', False):
393                         compat_print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
394
395                 # Do nothing else if in simulate mode
396                 if self.params.get('simulate', False):
397                         return
398
399                 if filename is None:
400                         return
401
402                 try:
403                         dn = os.path.dirname(encodeFilename(filename))
404                         if dn != '' and not os.path.exists(dn): # dn is already encoded
405                                 os.makedirs(dn)
406                 except (OSError, IOError) as err:
407                         self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
408                         return
409
410                 if self.params.get('writedescription', False):
411                         try:
412                                 descfn = filename + u'.description'
413                                 self.report_writedescription(descfn)
414                                 descfile = open(encodeFilename(descfn), 'wb')
415                                 try:
416                                         descfile.write(info_dict['description'].encode('utf-8'))
417                                 finally:
418                                         descfile.close()
419                         except (OSError, IOError):
420                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
421                                 return
422
423                 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
424                         # subtitles download errors are already managed as troubles in relevant IE
425                         # that way it will silently go on when used with unsupporting IE
426                         try:
427                                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
428                                 self.report_writesubtitles(srtfn)
429                                 srtfile = open(encodeFilename(srtfn), 'wb')
430                                 try:
431                                         srtfile.write(info_dict['subtitles'].encode('utf-8'))
432                                 finally:
433                                         srtfile.close()
434                         except (OSError, IOError):
435                                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
436                                 return
437
438                 if self.params.get('writeinfojson', False):
439                         infofn = filename + u'.info.json'
440                         self.report_writeinfojson(infofn)
441                         try:
442                                 json.dump
443                         except (NameError,AttributeError):
444                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
445                                 return
446                         try:
447                                 infof = open(encodeFilename(infofn), 'wb')
448                                 try:
449                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
450                                         json.dump(json_info_dict, infof)
451                                 finally:
452                                         infof.close()
453                         except (OSError, IOError):
454                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
455                                 return
456
457                 if not self.params.get('skip_download', False):
458                         if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
459                                 success = True
460                         else:
461                                 try:
462                                         success = self._do_download(filename, info_dict)
463                                 except (OSError, IOError) as err:
464                                         raise UnavailableVideoError
465                                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
466                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
467                                         return
468                                 except (ContentTooShortError, ) as err:
469                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
470                                         return
471
472                         if success:
473                                 try:
474                                         self.post_process(filename, info_dict)
475                                 except (PostProcessingError) as err:
476                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
477                                         return
478
479         def download(self, url_list):
480                 """Download a given list of URLs."""
481                 if len(url_list) > 1 and self.fixed_template():
482                         raise SameFileError(self.params['outtmpl'])
483
484                 for url in url_list:
485                         suitable_found = False
486                         for ie in self._ies:
487                                 # Go to next InfoExtractor if not suitable
488                                 if not ie.suitable(url):
489                                         continue
490
491                                 # Warn if the _WORKING attribute is False
492                                 if not ie.working():
493                                         self.trouble(u'WARNING: the program functionality for this site has been marked as broken, '
494                                                          u'and will probably not work. If you want to go on, use the -i option.')
495
496                                 # Suitable InfoExtractor found
497                                 suitable_found = True
498
499                                 # Extract information from URL and process it
500                                 videos = ie.extract(url)
501                                 for video in videos or []:
502                                         video['extractor'] = ie.IE_NAME
503                                         try:
504                                                 self.increment_downloads()
505                                                 self.process_info(video)
506                                         except UnavailableVideoError:
507                                                 self.trouble(u'\nERROR: unable to download video')
508
509                                 # Suitable InfoExtractor had been found; go to next URL
510                                 break
511
512                         if not suitable_found:
513                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
514
515                 return self._download_retcode
516
517         def post_process(self, filename, ie_info):
518                 """Run the postprocessing chain on the given file."""
519                 info = dict(ie_info)
520                 info['filepath'] = filename
521                 for pp in self._pps:
522                         info = pp.run(info)
523                         if info is None:
524                                 break
525
526         def _download_with_rtmpdump(self, filename, url, player_url):
527                 self.report_destination(filename)
528                 tmpfilename = self.temp_name(filename)
529
530                 # Check for rtmpdump first
531                 try:
532                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
533                 except (OSError, IOError):
534                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
535                         return False
536
537                 # Download using rtmpdump. rtmpdump returns exit code 2 when
538                 # the connection was interrumpted and resuming appears to be
539                 # possible. This is part of rtmpdump's normal usage, AFAIK.
540                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
541                 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
542                 if self.params.get('verbose', False):
543                         try:
544                                 import pipes
545                                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
546                         except ImportError:
547                                 shell_quote = repr
548                         self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
549                 retval = subprocess.call(args)
550                 while retval == 2 or retval == 1:
551                         prevsize = os.path.getsize(encodeFilename(tmpfilename))
552                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
553                         time.sleep(5.0) # This seems to be needed
554                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
555                         cursize = os.path.getsize(encodeFilename(tmpfilename))
556                         if prevsize == cursize and retval == 1:
557                                 break
558                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
559                         if prevsize == cursize and retval == 2 and cursize > 1024:
560                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
561                                 retval = 0
562                                 break
563                 if retval == 0:
564                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
565                         self.try_rename(tmpfilename, filename)
566                         return True
567                 else:
568                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
569                         return False
570
571         def _do_download(self, filename, info_dict):
572                 url = info_dict['url']
573                 player_url = info_dict.get('player_url', None)
574
575                 # Check file already present
576                 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
577                         self.report_file_already_downloaded(filename)
578                         return True
579
580                 # Attempt to download using rtmpdump
581                 if url.startswith('rtmp'):
582                         return self._download_with_rtmpdump(filename, url, player_url)
583
584                 tmpfilename = self.temp_name(filename)
585                 stream = None
586
587                 # Do not include the Accept-Encoding header
588                 headers = {'Youtubedl-no-compression': 'True'}
589                 basic_request = compat_urllib_request.Request(url, None, headers)
590                 request = compat_urllib_request.Request(url, None, headers)
591
592                 # Establish possible resume length
593                 if os.path.isfile(encodeFilename(tmpfilename)):
594                         resume_len = os.path.getsize(encodeFilename(tmpfilename))
595                 else:
596                         resume_len = 0
597
598                 open_mode = 'wb'
599                 if resume_len != 0:
600                         if self.params.get('continuedl', False):
601                                 self.report_resuming_byte(resume_len)
602                                 request.add_header('Range','bytes=%d-' % resume_len)
603                                 open_mode = 'ab'
604                         else:
605                                 resume_len = 0
606
607                 count = 0
608                 retries = self.params.get('retries', 0)
609                 while count <= retries:
610                         # Establish connection
611                         try:
612                                 if count == 0 and 'urlhandle' in info_dict:
613                                         data = info_dict['urlhandle']
614                                 data = compat_urllib_request.urlopen(request)
615                                 break
616                         except (compat_urllib_error.HTTPError, ) as err:
617                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
618                                         # Unexpected HTTP error
619                                         raise
620                                 elif err.code == 416:
621                                         # Unable to resume (requested range not satisfiable)
622                                         try:
623                                                 # Open the connection again without the range header
624                                                 data = compat_urllib_request.urlopen(basic_request)
625                                                 content_length = data.info()['Content-Length']
626                                         except (compat_urllib_error.HTTPError, ) as err:
627                                                 if err.code < 500 or err.code >= 600:
628                                                         raise
629                                         else:
630                                                 # Examine the reported length
631                                                 if (content_length is not None and
632                                                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
633                                                         # The file had already been fully downloaded.
634                                                         # Explanation to the above condition: in issue #175 it was revealed that
635                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
636                                                         # changing the file size slightly and causing problems for some users. So
637                                                         # I decided to implement a suggested change and consider the file
638                                                         # completely downloaded if the file size differs less than 100 bytes from
639                                                         # the one in the hard drive.
640                                                         self.report_file_already_downloaded(filename)
641                                                         self.try_rename(tmpfilename, filename)
642                                                         return True
643                                                 else:
644                                                         # The length does not match, we start the download over
645                                                         self.report_unable_to_resume()
646                                                         open_mode = 'wb'
647                                                         break
648                         # Retry
649                         count += 1
650                         if count <= retries:
651                                 self.report_retry(count, retries)
652
653                 if count > retries:
654                         self.trouble(u'ERROR: giving up after %s retries' % retries)
655                         return False
656
657                 data_len = data.info().get('Content-length', None)
658                 if data_len is not None:
659                         data_len = int(data_len) + resume_len
660                 data_len_str = self.format_bytes(data_len)
661                 byte_counter = 0 + resume_len
662                 block_size = self.params.get('buffersize', 1024)
663                 start = time.time()
664                 while True:
665                         # Download and write
666                         before = time.time()
667                         data_block = data.read(block_size)
668                         after = time.time()
669                         if len(data_block) == 0:
670                                 break
671                         byte_counter += len(data_block)
672
673                         # Open file just in time
674                         if stream is None:
675                                 try:
676                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
677                                         assert stream is not None
678                                         filename = self.undo_temp_name(tmpfilename)
679                                         self.report_destination(filename)
680                                 except (OSError, IOError) as err:
681                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
682                                         return False
683                         try:
684                                 stream.write(data_block)
685                         except (IOError, OSError) as err:
686                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
687                                 return False
688                         if not self.params.get('noresizebuffer', False):
689                                 block_size = self.best_block_size(after - before, len(data_block))
690
691                         # Progress message
692                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
693                         if data_len is None:
694                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
695                         else:
696                                 percent_str = self.calc_percent(byte_counter, data_len)
697                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
698                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
699
700                         # Apply rate limit
701                         self.slow_down(start, byte_counter - resume_len)
702
703                 if stream is None:
704                         self.trouble(u'\nERROR: Did not get any data blocks')
705                         return False
706                 stream.close()
707                 self.report_finish()
708                 if data_len is not None and byte_counter != data_len:
709                         raise ContentTooShortError(byte_counter, int(data_len))
710                 self.try_rename(tmpfilename, filename)
711
712                 # Update file modification time
713                 if self.params.get('updatetime', True):
714                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
715
716                 return True