2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
20 from .InfoExtractors import get_info_extractor
23 class FileDownloader(object):
24 """File Downloader class.
26 File downloader objects are the ones responsible of downloading the
27 actual video file and writing it to disk if the user has requested
28 it, among some other tasks. In most cases there should be one per
29 program. As, given a video URL, the downloader doesn't know how to
30 extract all the needed information, task that InfoExtractors do, it
31 has to pass the URL to one of them.
33 For this, file downloader objects have a method that allows
34 InfoExtractors to be registered in a given order. When it is passed
35 a URL, the file downloader handles it to the first InfoExtractor it
36 finds that reports being able to handle it. The InfoExtractor extracts
37 all the information about the video or videos the URL refers to, and
38 asks the FileDownloader to process the video information, possibly
39 downloading the video.
41 File downloaders accept a lot of parameters. In order not to saturate
42 the object constructor with arguments, it receives a dictionary of
43 options instead. These options are available through the params
44 attribute for the InfoExtractors to use. The FileDownloader also
45 registers itself as the downloader in charge for the InfoExtractors
46 that are added to it, so this is a "mutual registration".
50 username: Username for authentication purposes.
51 password: Password for authentication purposes.
52 usenetrc: Use netrc for authentication instead.
53 quiet: Do not print messages to stdout.
54 forceurl: Force printing final URL.
55 forcetitle: Force printing title.
56 forcethumbnail: Force printing thumbnail URL.
57 forcedescription: Force printing description.
58 forcefilename: Force printing final filename.
59 simulate: Do not download the video files.
60 format: Video format code.
61 format_limit: Highest quality format to try.
62 outtmpl: Template for output names.
63 restrictfilenames: Do not allow "&" and spaces in file names
64 ignoreerrors: Do not stop on download errors.
65 ratelimit: Download speed limit, in bytes/sec.
66 nooverwrites: Prevent overwriting files.
67 retries: Number of times to retry for HTTP error 5xx
68 buffersize: Size of download buffer in bytes.
69 noresizebuffer: Do not automatically resize the download buffer.
70 continuedl: Try to continue downloads if possible.
71 noprogress: Do not print the progress bar.
72 playliststart: Playlist item to start at.
73 playlistend: Playlist item to end at.
74 matchtitle: Download only matching titles.
75 rejecttitle: Reject downloads for matching titles.
76 logtostderr: Log messages to stderr instead of stdout.
77 consoletitle: Display progress in console window's titlebar.
78 nopart: Do not use temporary .part files.
79 updatetime: Use the Last-modified header to set output file timestamps.
80 writedescription: Write the video description to a .description file
81 writeinfojson: Write the video description to a .info.json file
82 writesubtitles: Write the video subtitles to a file
83 onlysubtitles: Downloads only the subtitles of the video
84 allsubtitles: Downloads all the subtitles of the video
85 listsubtitles: Lists all available subtitles for the video
86 subtitlesformat: Subtitle format [sbv/srt] (default=srt)
87 subtitleslang: Language of the subtitles to download
88 test: Download only first bytes to test the downloader.
89 keepvideo: Keep the video file after post-processing
90 min_filesize: Skip files smaller than this size
91 max_filesize: Skip files larger than this size
97 _download_retcode = None
101 def __init__(self, params):
102 """Create a FileDownloader object with the given options."""
105 self._progress_hooks = []
106 self._download_retcode = 0
107 self._num_downloads = 0
108 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
111 if '%(stitle)s' in self.params['outtmpl']:
112 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
115 def format_bytes(bytes):
118 if type(bytes) is str:
123 exponent = int(math.log(bytes, 1024.0))
124 suffix = 'bkMGTPEZY'[exponent]
125 converted = float(bytes) / float(1024 ** exponent)
126 return '%.2f%s' % (converted, suffix)
129 def calc_percent(byte_counter, data_len):
132 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
135 def calc_eta(start, now, total, current):
139 if current == 0 or dif < 0.001: # One millisecond
141 rate = float(current) / dif
142 eta = int((float(total) - float(current)) / rate)
143 (eta_mins, eta_secs) = divmod(eta, 60)
146 return '%02d:%02d' % (eta_mins, eta_secs)
149 def calc_speed(start, now, bytes):
151 if bytes == 0 or dif < 0.001: # One millisecond
152 return '%10s' % '---b/s'
153 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
156 def best_block_size(elapsed_time, bytes):
157 new_min = max(bytes / 2.0, 1.0)
158 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
159 if elapsed_time < 0.001:
161 rate = bytes / elapsed_time
169 def parse_bytes(bytestr):
170 """Parse a string indicating a byte quantity into an integer."""
171 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
174 number = float(matchobj.group(1))
175 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
176 return int(round(number * multiplier))
178 def add_info_extractor(self, ie):
179 """Add an InfoExtractor object to the end of the list."""
181 ie.set_downloader(self)
183 def add_post_processor(self, pp):
184 """Add a PostProcessor object to the end of the chain."""
186 pp.set_downloader(self)
188 def to_screen(self, message, skip_eol=False):
189 """Print message to stdout if not in quiet mode."""
190 assert type(message) == type(u'')
191 if not self.params.get('quiet', False):
192 terminator = [u'\n', u''][skip_eol]
193 output = message + terminator
194 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
195 output = output.encode(preferredencoding(), 'ignore')
196 self._screen_file.write(output)
197 self._screen_file.flush()
199 def to_stderr(self, message):
200 """Print message to stderr."""
201 assert type(message) == type(u'')
202 output = message + u'\n'
203 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
204 output = output.encode(preferredencoding())
205 sys.stderr.write(output)
207 def to_cons_title(self, message):
208 """Set console/terminal window title to message."""
209 if not self.params.get('consoletitle', False):
211 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
212 # c_wchar_p() might not be necessary if `message` is
213 # already of type unicode()
214 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
215 elif 'TERM' in os.environ:
216 self.to_screen('\033]0;%s\007' % message, skip_eol=True)
218 def fixed_template(self):
219 """Checks if the output template is fixed."""
220 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
222 def trouble(self, message=None, tb=None):
223 """Determine action to take when a download problem appears.
225 Depending on if the downloader has been configured to ignore
226 download errors or not, this method may throw an exception or
227 not when errors are found, after printing the message.
229 tb, if given, is additional traceback information.
231 if message is not None:
232 self.to_stderr(message)
233 if self.params.get('verbose'):
235 if sys.exc_info()[0]: # if .trouble has been called from an except block
237 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
238 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
239 tb += compat_str(traceback.format_exc())
241 tb_data = traceback.format_list(traceback.extract_stack())
242 tb = u''.join(tb_data)
244 if not self.params.get('ignoreerrors', False):
245 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
246 exc_info = sys.exc_info()[1].exc_info
248 exc_info = sys.exc_info()
249 raise DownloadError(message, exc_info)
250 self._download_retcode = 1
252 def report_warning(self, message):
254 Print the message to stderr, it will be prefixed with 'WARNING:'
255 If stderr is a tty file the 'WARNING:' will be colored
257 if sys.stderr.isatty():
258 _msg_header=u'\033[0;33mWARNING:\033[0m'
260 _msg_header=u'WARNING:'
261 warning_message=u'%s %s' % (_msg_header,message)
262 self.to_stderr(warning_message)
264 def report_error(self, message, tb=None):
266 Do the same as trouble, but prefixes the message with 'ERROR:', colored
267 in red if stderr is a tty file.
269 if sys.stderr.isatty():
270 _msg_header = u'\033[0;31mERROR:\033[0m'
272 _msg_header = u'ERROR:'
273 error_message = u'%s %s' % (_msg_header, message)
274 self.trouble(error_message, tb)
276 def slow_down(self, start_time, byte_counter):
277 """Sleep if the download speed is over the rate limit."""
278 rate_limit = self.params.get('ratelimit', None)
279 if rate_limit is None or byte_counter == 0:
282 elapsed = now - start_time
285 speed = float(byte_counter) / elapsed
286 if speed > rate_limit:
287 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
289 def temp_name(self, filename):
290 """Returns a temporary filename for the given filename."""
291 if self.params.get('nopart', False) or filename == u'-' or \
292 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
294 return filename + u'.part'
296 def undo_temp_name(self, filename):
297 if filename.endswith(u'.part'):
298 return filename[:-len(u'.part')]
301 def try_rename(self, old_filename, new_filename):
303 if old_filename == new_filename:
305 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
306 except (IOError, OSError) as err:
307 self.report_error(u'unable to rename file')
309 def try_utime(self, filename, last_modified_hdr):
310 """Try to set the last-modified time of the given file."""
311 if last_modified_hdr is None:
313 if not os.path.isfile(encodeFilename(filename)):
315 timestr = last_modified_hdr
318 filetime = timeconvert(timestr)
322 os.utime(filename, (time.time(), filetime))
327 def report_writedescription(self, descfn):
328 """ Report that the description file is being written """
329 self.to_screen(u'[info] Writing video description to: ' + descfn)
331 def report_writesubtitles(self, sub_filename):
332 """ Report that the subtitles file is being written """
333 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
335 def report_writeinfojson(self, infofn):
336 """ Report that the metadata file has been written """
337 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
339 def report_destination(self, filename):
340 """Report destination filename."""
341 self.to_screen(u'[download] Destination: ' + filename)
343 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
344 """Report download progress."""
345 if self.params.get('noprogress', False):
347 if self.params.get('progress_with_newline', False):
348 self.to_screen(u'[download] %s of %s at %s ETA %s' %
349 (percent_str, data_len_str, speed_str, eta_str))
351 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
352 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
353 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
354 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
356 def report_resuming_byte(self, resume_len):
357 """Report attempt to resume at given byte."""
358 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
360 def report_retry(self, count, retries):
361 """Report retry in case of HTTP error 5xx"""
362 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
364 def report_file_already_downloaded(self, file_name):
365 """Report file has already been fully downloaded."""
367 self.to_screen(u'[download] %s has already been downloaded' % file_name)
368 except (UnicodeEncodeError) as err:
369 self.to_screen(u'[download] The file has already been downloaded')
371 def report_unable_to_resume(self):
372 """Report it was impossible to resume download."""
373 self.to_screen(u'[download] Unable to resume')
375 def report_finish(self):
376 """Report download finished."""
377 if self.params.get('noprogress', False):
378 self.to_screen(u'[download] Download completed')
382 def increment_downloads(self):
383 """Increment the ordinal that assigns a number to each file."""
384 self._num_downloads += 1
386 def prepare_filename(self, info_dict):
387 """Generate the output filename."""
389 template_dict = dict(info_dict)
391 template_dict['epoch'] = int(time.time())
392 autonumber_size = self.params.get('autonumber_size')
393 if autonumber_size is None:
395 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
396 template_dict['autonumber'] = autonumber_templ % self._num_downloads
397 if template_dict['playlist_index'] is not None:
398 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
400 sanitize = lambda k,v: sanitize_filename(
401 u'NA' if v is None else compat_str(v),
402 restricted=self.params.get('restrictfilenames'),
404 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
406 filename = self.params['outtmpl'] % template_dict
408 except KeyError as err:
409 self.trouble(u'ERROR: Erroneous output template')
411 except ValueError as err:
412 self.trouble(u'ERROR: Insufficient system charset ' + repr(preferredencoding()))
415 def _match_entry(self, info_dict):
416 """ Returns None iff the file should be downloaded """
418 title = info_dict['title']
419 matchtitle = self.params.get('matchtitle', False)
421 if not re.search(matchtitle, title, re.IGNORECASE):
422 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
423 rejecttitle = self.params.get('rejecttitle', False)
425 if re.search(rejecttitle, title, re.IGNORECASE):
426 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
429 def extract_info(self, url, download = True, ie_name = None):
431 Returns a list with a dictionary for each video we find.
432 If 'download', also downloads the videos.
434 suitable_found = False
436 #We copy the original list
437 ies = list(self._ies)
439 if ie_name is not None:
440 #We put in the first place the given info extractor
441 first_ie = get_info_extractor(ie_name)()
442 first_ie.set_downloader(self)
443 ies.insert(0, first_ie)
446 # Go to next InfoExtractor if not suitable
447 if not ie.suitable(url):
450 # Warn if the _WORKING attribute is False
452 self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
453 u'and will probably not work. If you want to go on, use the -i option.')
455 # Suitable InfoExtractor found
456 suitable_found = True
458 # Extract information from URL and process it
460 ie_results = ie.extract(url)
462 for ie_result in ie_results:
463 if not 'extractor' in ie_result:
464 #The extractor has already been set somewhere else
465 ie_result['extractor'] = ie.IE_NAME
466 results.append(self.process_ie_result(ie_result, download))
468 except ExtractorError as de: # An error we somewhat expected
469 self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
471 except Exception as e:
472 if self.params.get('ignoreerrors', False):
473 self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
477 if not suitable_found:
478 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
480 def process_ie_result(self, ie_result, download = True):
482 Take the result of the ie and return a list of videos.
483 For url elements it will search the suitable ie and get the videos
484 For playlist elements it will process each of the elements of the 'entries' key
486 It will also download the videos if 'download'.
488 result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
489 if result_type == 'video':
490 if 'playlist' not in ie_result:
491 #It isn't part of a playlist
492 ie_result['playlist'] = None
493 ie_result['playlist_index'] = None
496 self.process_info(ie_result)
498 elif result_type == 'url':
499 #We get the video pointed by the url
500 result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0]
502 elif result_type == 'playlist':
503 #We process each entry in the playlist
504 playlist = ie_result.get('title', None) or ie_result.get('id', None)
505 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
507 playlist_results = []
509 n_all_entries = len(ie_result['entries'])
510 playliststart = self.params.get('playliststart', 1) - 1
511 playlistend = self.params.get('playlistend', -1)
513 if playlistend == -1:
514 entries = ie_result['entries'][playliststart:]
516 entries = ie_result['entries'][playliststart:playlistend]
518 n_entries = len(entries)
520 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
521 (ie_result['extractor'], playlist, n_all_entries, n_entries))
523 for i,entry in enumerate(entries,1):
524 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
525 entry_result = self.process_ie_result(entry, False)
526 entry_result['playlist'] = playlist
527 entry_result['playlist_index'] = i + playliststart
528 #We must do the download here to correctly set the 'playlist' key
530 self.process_info(entry_result)
531 playlist_results.append(entry_result)
532 result = ie_result.copy()
533 result['entries'] = playlist_results
536 def process_info(self, info_dict):
537 """Process a single dictionary returned by an InfoExtractor."""
539 #We increment the download the download count here to match the previous behaviour.
540 self.increment_downloads()
542 info_dict['fulltitle'] = info_dict['title']
543 if len(info_dict['title']) > 200:
544 info_dict['title'] = info_dict['title'][:197] + u'...'
546 # Keep for backwards compatibility
547 info_dict['stitle'] = info_dict['title']
549 if not 'format' in info_dict:
550 info_dict['format'] = info_dict['ext']
552 reason = self._match_entry(info_dict)
553 if reason is not None:
554 self.to_screen(u'[download] ' + reason)
557 max_downloads = self.params.get('max_downloads')
558 if max_downloads is not None:
559 if self._num_downloads > int(max_downloads):
560 raise MaxDownloadsReached()
562 filename = self.prepare_filename(info_dict)
565 if self.params.get('forcetitle', False):
566 compat_print(info_dict['title'])
567 if self.params.get('forceurl', False):
568 compat_print(info_dict['url'])
569 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
570 compat_print(info_dict['thumbnail'])
571 if self.params.get('forcedescription', False) and 'description' in info_dict:
572 compat_print(info_dict['description'])
573 if self.params.get('forcefilename', False) and filename is not None:
574 compat_print(filename)
575 if self.params.get('forceformat', False):
576 compat_print(info_dict['format'])
578 # Do nothing else if in simulate mode
579 if self.params.get('simulate', False):
586 dn = os.path.dirname(encodeFilename(filename))
587 if dn != '' and not os.path.exists(dn): # dn is already encoded
589 except (OSError, IOError) as err:
590 self.report_error(u'unable to create directory ' + compat_str(err))
593 if self.params.get('writedescription', False):
595 descfn = filename + u'.description'
596 self.report_writedescription(descfn)
597 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
598 descfile.write(info_dict['description'])
599 except (OSError, IOError):
600 self.report_error(u'Cannot write description file ' + descfn)
603 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
604 # subtitles download errors are already managed as troubles in relevant IE
605 # that way it will silently go on when used with unsupporting IE
606 subtitle = info_dict['subtitles'][0]
607 (sub_error, sub_lang, sub) = subtitle
608 sub_format = self.params.get('subtitlesformat')
610 self.report_warning("Some error while getting the subtitles")
613 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
614 self.report_writesubtitles(sub_filename)
615 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
617 except (OSError, IOError):
618 self.report_error(u'Cannot write subtitles file ' + descfn)
620 if self.params.get('onlysubtitles', False):
623 if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
624 subtitles = info_dict['subtitles']
625 sub_format = self.params.get('subtitlesformat')
626 for subtitle in subtitles:
627 (sub_error, sub_lang, sub) = subtitle
629 self.report_warning("Some error while getting the subtitles")
632 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
633 self.report_writesubtitles(sub_filename)
634 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
636 except (OSError, IOError):
637 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
639 if self.params.get('onlysubtitles', False):
642 if self.params.get('writeinfojson', False):
643 infofn = filename + u'.info.json'
644 self.report_writeinfojson(infofn)
646 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
647 write_json_file(json_info_dict, encodeFilename(infofn))
648 except (OSError, IOError):
649 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
652 if not self.params.get('skip_download', False):
653 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
657 success = self._do_download(filename, info_dict)
658 except (OSError, IOError) as err:
659 raise UnavailableVideoError()
660 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
661 self.report_error(u'unable to download video data: %s' % str(err))
663 except (ContentTooShortError, ) as err:
664 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
669 self.post_process(filename, info_dict)
670 except (PostProcessingError) as err:
671 self.report_error(u'postprocessing: %s' % str(err))
674 def download(self, url_list):
675 """Download a given list of URLs."""
676 if len(url_list) > 1 and self.fixed_template():
677 raise SameFileError(self.params['outtmpl'])
681 #It also downloads the videos
682 videos = self.extract_info(url)
683 except UnavailableVideoError:
684 self.trouble(u'\nERROR: unable to download video')
685 except MaxDownloadsReached:
686 self.to_screen(u'[info] Maximum number of downloaded files reached.')
689 return self._download_retcode
691 def post_process(self, filename, ie_info):
692 """Run all the postprocessors on the given file."""
694 info['filepath'] = filename
698 keep_video_wish,new_info = pp.run(info)
699 if keep_video_wish is not None:
701 keep_video = keep_video_wish
702 elif keep_video is None:
703 # No clear decision yet, let IE decide
704 keep_video = keep_video_wish
705 except PostProcessingError as e:
706 self.to_stderr(u'ERROR: ' + e.msg)
707 if keep_video is False and not self.params.get('keepvideo', False):
709 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
710 os.remove(encodeFilename(filename))
711 except (IOError, OSError):
712 self.report_warning(u'Unable to remove downloaded video file')
714 def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
715 self.report_destination(filename)
716 tmpfilename = self.temp_name(filename)
718 # Check for rtmpdump first
720 subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
721 except (OSError, IOError):
722 self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
725 # Download using rtmpdump. rtmpdump returns exit code 2 when
726 # the connection was interrumpted and resuming appears to be
727 # possible. This is part of rtmpdump's normal usage, AFAIK.
728 basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
729 if player_url is not None:
730 basic_args += ['-W', player_url]
731 if page_url is not None:
732 basic_args += ['--pageUrl', page_url]
733 if play_path is not None:
734 basic_args += ['-y', play_path]
735 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
736 if self.params.get('verbose', False):
739 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
742 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
743 retval = subprocess.call(args)
744 while retval == 2 or retval == 1:
745 prevsize = os.path.getsize(encodeFilename(tmpfilename))
746 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
747 time.sleep(5.0) # This seems to be needed
748 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
749 cursize = os.path.getsize(encodeFilename(tmpfilename))
750 if prevsize == cursize and retval == 1:
752 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
753 if prevsize == cursize and retval == 2 and cursize > 1024:
754 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
758 fsize = os.path.getsize(encodeFilename(tmpfilename))
759 self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
760 self.try_rename(tmpfilename, filename)
761 self._hook_progress({
762 'downloaded_bytes': fsize,
763 'total_bytes': fsize,
764 'filename': filename,
765 'status': 'finished',
769 self.to_stderr(u"\n")
770 self.report_error(u'rtmpdump exited with code %d' % retval)
773 def _do_download(self, filename, info_dict):
774 url = info_dict['url']
776 # Check file already present
777 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
778 self.report_file_already_downloaded(filename)
779 self._hook_progress({
780 'filename': filename,
781 'status': 'finished',
785 # Attempt to download using rtmpdump
786 if url.startswith('rtmp'):
787 return self._download_with_rtmpdump(filename, url,
788 info_dict.get('player_url', None),
789 info_dict.get('page_url', None),
790 info_dict.get('play_path', None))
792 tmpfilename = self.temp_name(filename)
795 # Do not include the Accept-Encoding header
796 headers = {'Youtubedl-no-compression': 'True'}
797 if 'user_agent' in info_dict:
798 headers['Youtubedl-user-agent'] = info_dict['user_agent']
799 basic_request = compat_urllib_request.Request(url, None, headers)
800 request = compat_urllib_request.Request(url, None, headers)
802 if self.params.get('test', False):
803 request.add_header('Range','bytes=0-10240')
805 # Establish possible resume length
806 if os.path.isfile(encodeFilename(tmpfilename)):
807 resume_len = os.path.getsize(encodeFilename(tmpfilename))
813 if self.params.get('continuedl', False):
814 self.report_resuming_byte(resume_len)
815 request.add_header('Range','bytes=%d-' % resume_len)
821 retries = self.params.get('retries', 0)
822 while count <= retries:
823 # Establish connection
825 if count == 0 and 'urlhandle' in info_dict:
826 data = info_dict['urlhandle']
827 data = compat_urllib_request.urlopen(request)
829 except (compat_urllib_error.HTTPError, ) as err:
830 if (err.code < 500 or err.code >= 600) and err.code != 416:
831 # Unexpected HTTP error
833 elif err.code == 416:
834 # Unable to resume (requested range not satisfiable)
836 # Open the connection again without the range header
837 data = compat_urllib_request.urlopen(basic_request)
838 content_length = data.info()['Content-Length']
839 except (compat_urllib_error.HTTPError, ) as err:
840 if err.code < 500 or err.code >= 600:
843 # Examine the reported length
844 if (content_length is not None and
845 (resume_len - 100 < int(content_length) < resume_len + 100)):
846 # The file had already been fully downloaded.
847 # Explanation to the above condition: in issue #175 it was revealed that
848 # YouTube sometimes adds or removes a few bytes from the end of the file,
849 # changing the file size slightly and causing problems for some users. So
850 # I decided to implement a suggested change and consider the file
851 # completely downloaded if the file size differs less than 100 bytes from
852 # the one in the hard drive.
853 self.report_file_already_downloaded(filename)
854 self.try_rename(tmpfilename, filename)
855 self._hook_progress({
856 'filename': filename,
857 'status': 'finished',
861 # The length does not match, we start the download over
862 self.report_unable_to_resume()
868 self.report_retry(count, retries)
871 self.report_error(u'giving up after %s retries' % retries)
874 data_len = data.info().get('Content-length', None)
875 if data_len is not None:
876 data_len = int(data_len) + resume_len
877 min_data_len = self.params.get("min_filesize", None)
878 max_data_len = self.params.get("max_filesize", None)
879 if min_data_len is not None and data_len < min_data_len:
880 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
882 if max_data_len is not None and data_len > max_data_len:
883 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
886 data_len_str = self.format_bytes(data_len)
887 byte_counter = 0 + resume_len
888 block_size = self.params.get('buffersize', 1024)
893 data_block = data.read(block_size)
895 if len(data_block) == 0:
897 byte_counter += len(data_block)
899 # Open file just in time
902 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
903 assert stream is not None
904 filename = self.undo_temp_name(tmpfilename)
905 self.report_destination(filename)
906 except (OSError, IOError) as err:
907 self.report_error(u'unable to open for writing: %s' % str(err))
910 stream.write(data_block)
911 except (IOError, OSError) as err:
912 self.to_stderr(u"\n")
913 self.report_error(u'unable to write data: %s' % str(err))
915 if not self.params.get('noresizebuffer', False):
916 block_size = self.best_block_size(after - before, len(data_block))
919 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
921 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
923 percent_str = self.calc_percent(byte_counter, data_len)
924 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
925 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
927 self._hook_progress({
928 'downloaded_bytes': byte_counter,
929 'total_bytes': data_len,
930 'tmpfilename': tmpfilename,
931 'filename': filename,
932 'status': 'downloading',
936 self.slow_down(start, byte_counter - resume_len)
939 self.to_stderr(u"\n")
940 self.report_error(u'Did not get any data blocks')
944 if data_len is not None and byte_counter != data_len:
945 raise ContentTooShortError(byte_counter, int(data_len))
946 self.try_rename(tmpfilename, filename)
948 # Update file modification time
949 if self.params.get('updatetime', True):
950 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
952 self._hook_progress({
953 'downloaded_bytes': byte_counter,
954 'total_bytes': byte_counter,
955 'filename': filename,
956 'status': 'finished',
961 def _hook_progress(self, status):
962 for ph in self._progress_hooks:
965 def add_progress_hook(self, ph):
966 """ ph gets called on download progress, with a dictionary with the entries
967 * filename: The final filename
968 * status: One of "downloading" and "finished"
970 It can also have some of the following entries:
972 * downloaded_bytes: Bytes on disks
973 * total_bytes: Total bytes, None if unknown
974 * tmpfilename: The filename we're currently writing to
976 Hooks are guaranteed to be called at least once (with status "finished")
977 if the download is successful.
979 self._progress_hooks.append(ph)