2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
16 from .extractor import get_info_extractor
17 from .FileDownloader import FileDownloader
20 class YoutubeDL(object):
23 YoutubeDL objects are the ones responsible of downloading the
24 actual video file and writing it to disk if the user has requested
25 it, among some other tasks. In most cases there should be one per
26 program. As, given a video URL, the downloader doesn't know how to
27 extract all the needed information, task that InfoExtractors do, it
28 has to pass the URL to one of them.
30 For this, YoutubeDL objects have a method that allows
31 InfoExtractors to be registered in a given order. When it is passed
32 a URL, the YoutubeDL object handles it to the first InfoExtractor it
33 finds that reports being able to handle it. The InfoExtractor extracts
34 all the information about the video or videos the URL refers to, and
35 YoutubeDL process the extracted information, possibly using a File
36 Downloader to download the video.
38 YoutubeDL objects accept a lot of parameters. In order not to saturate
39 the object constructor with arguments, it receives a dictionary of
40 options instead. These options are available through the params
41 attribute for the InfoExtractors to use. The YoutubeDL also
42 registers itself as the downloader in charge for the InfoExtractors
43 that are added to it, so this is a "mutual registration".
47 username: Username for authentication purposes.
48 password: Password for authentication purposes.
49 usenetrc: Use netrc for authentication instead.
50 verbose: Print additional info to stdout.
51 quiet: Do not print messages to stdout.
52 forceurl: Force printing final URL.
53 forcetitle: Force printing title.
54 forceid: Force printing ID.
55 forcethumbnail: Force printing thumbnail URL.
56 forcedescription: Force printing description.
57 forcefilename: Force printing final filename.
58 simulate: Do not download the video files.
59 format: Video format code.
60 format_limit: Highest quality format to try.
61 outtmpl: Template for output names.
62 restrictfilenames: Do not allow "&" and spaces in file names
63 ignoreerrors: Do not stop on download errors.
64 nooverwrites: Prevent overwriting files.
65 playliststart: Playlist item to start at.
66 playlistend: Playlist item to end at.
67 matchtitle: Download only matching titles.
68 rejecttitle: Reject downloads for matching titles.
69 logtostderr: Log messages to stderr instead of stdout.
70 writedescription: Write the video description to a .description file
71 writeinfojson: Write the video description to a .info.json file
72 writethumbnail: Write the thumbnail image to a file
73 writesubtitles: Write the video subtitles to a file
74 allsubtitles: Downloads all the subtitles of the video
75 listsubtitles: Lists all available subtitles for the video
76 subtitlesformat: Subtitle format [sbv/srt] (default=srt)
77 subtitleslang: Language of the subtitles to download
78 keepvideo: Keep the video file after post-processing
79 daterange: A DateRange object, download only if the upload_date is in the range.
80 skip_download: Skip the actual download of the video file
82 The following parameters are not used by YoutubeDL itself, they are used by
84 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
85 noresizebuffer, retries, continuedl, noprogress, consoletitle
91 _download_retcode = None
95 def __init__(self, params):
96 """Create a FileDownloader object with the given options."""
99 self._progress_hooks = []
100 self._download_retcode = 0
101 self._num_downloads = 0
102 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
104 self.fd = FileDownloader(self, self.params)
106 if '%(stitle)s' in self.params['outtmpl']:
107 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
109 def add_info_extractor(self, ie):
110 """Add an InfoExtractor object to the end of the list."""
112 ie.set_downloader(self)
114 def add_post_processor(self, pp):
115 """Add a PostProcessor object to the end of the chain."""
117 pp.set_downloader(self)
119 def to_screen(self, message, skip_eol=False):
120 """Print message to stdout if not in quiet mode."""
121 assert type(message) == type(u'')
122 if not self.params.get('quiet', False):
123 terminator = [u'\n', u''][skip_eol]
124 output = message + terminator
125 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
126 output = output.encode(preferredencoding(), 'ignore')
127 self._screen_file.write(output)
128 self._screen_file.flush()
130 def to_stderr(self, message):
131 """Print message to stderr."""
132 assert type(message) == type(u'')
133 output = message + u'\n'
134 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
135 output = output.encode(preferredencoding())
136 sys.stderr.write(output)
138 def fixed_template(self):
139 """Checks if the output template is fixed."""
140 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
142 def trouble(self, message=None, tb=None):
143 """Determine action to take when a download problem appears.
145 Depending on if the downloader has been configured to ignore
146 download errors or not, this method may throw an exception or
147 not when errors are found, after printing the message.
149 tb, if given, is additional traceback information.
151 if message is not None:
152 self.to_stderr(message)
153 if self.params.get('verbose'):
155 if sys.exc_info()[0]: # if .trouble has been called from an except block
157 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
158 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
159 tb += compat_str(traceback.format_exc())
161 tb_data = traceback.format_list(traceback.extract_stack())
162 tb = u''.join(tb_data)
164 if not self.params.get('ignoreerrors', False):
165 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
166 exc_info = sys.exc_info()[1].exc_info
168 exc_info = sys.exc_info()
169 raise DownloadError(message, exc_info)
170 self._download_retcode = 1
172 def report_warning(self, message):
174 Print the message to stderr, it will be prefixed with 'WARNING:'
175 If stderr is a tty file the 'WARNING:' will be colored
177 if sys.stderr.isatty() and os.name != 'nt':
178 _msg_header=u'\033[0;33mWARNING:\033[0m'
180 _msg_header=u'WARNING:'
181 warning_message=u'%s %s' % (_msg_header,message)
182 self.to_stderr(warning_message)
184 def report_error(self, message, tb=None):
186 Do the same as trouble, but prefixes the message with 'ERROR:', colored
187 in red if stderr is a tty file.
189 if sys.stderr.isatty() and os.name != 'nt':
190 _msg_header = u'\033[0;31mERROR:\033[0m'
192 _msg_header = u'ERROR:'
193 error_message = u'%s %s' % (_msg_header, message)
194 self.trouble(error_message, tb)
196 def slow_down(self, start_time, byte_counter):
197 """Sleep if the download speed is over the rate limit."""
198 rate_limit = self.params.get('ratelimit', None)
199 if rate_limit is None or byte_counter == 0:
202 elapsed = now - start_time
205 speed = float(byte_counter) / elapsed
206 if speed > rate_limit:
207 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
209 def report_writedescription(self, descfn):
210 """ Report that the description file is being written """
211 self.to_screen(u'[info] Writing video description to: ' + descfn)
213 def report_writesubtitles(self, sub_filename):
214 """ Report that the subtitles file is being written """
215 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
217 def report_writeinfojson(self, infofn):
218 """ Report that the metadata file has been written """
219 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
221 def report_file_already_downloaded(self, file_name):
222 """Report file has already been fully downloaded."""
224 self.to_screen(u'[download] %s has already been downloaded' % file_name)
225 except (UnicodeEncodeError) as err:
226 self.to_screen(u'[download] The file has already been downloaded')
228 def increment_downloads(self):
229 """Increment the ordinal that assigns a number to each file."""
230 self._num_downloads += 1
232 def prepare_filename(self, info_dict):
233 """Generate the output filename."""
235 template_dict = dict(info_dict)
237 template_dict['epoch'] = int(time.time())
238 autonumber_size = self.params.get('autonumber_size')
239 if autonumber_size is None:
241 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
242 template_dict['autonumber'] = autonumber_templ % self._num_downloads
243 if template_dict['playlist_index'] is not None:
244 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
246 sanitize = lambda k,v: sanitize_filename(
247 u'NA' if v is None else compat_str(v),
248 restricted=self.params.get('restrictfilenames'),
250 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
252 filename = self.params['outtmpl'] % template_dict
254 except KeyError as err:
255 self.report_error(u'Erroneous output template')
257 except ValueError as err:
258 self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
261 def _match_entry(self, info_dict):
262 """ Returns None iff the file should be downloaded """
264 title = info_dict['title']
265 matchtitle = self.params.get('matchtitle', False)
267 if not re.search(matchtitle, title, re.IGNORECASE):
268 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
269 rejecttitle = self.params.get('rejecttitle', False)
271 if re.search(rejecttitle, title, re.IGNORECASE):
272 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
273 date = info_dict.get('upload_date', None)
275 dateRange = self.params.get('daterange', DateRange())
276 if date not in dateRange:
277 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
280 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
282 Returns a list with a dictionary for each video we find.
283 If 'download', also downloads the videos.
284 extra_info is a dict containing the extra values to add to each result
288 ie = get_info_extractor(ie_key)()
289 ie.set_downloader(self)
295 if not ie.suitable(url):
299 self.report_warning(u'The program functionality for this site has been marked as broken, '
300 u'and will probably not work.')
303 ie_result = ie.extract(url)
304 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
306 if isinstance(ie_result, list):
307 # Backwards compatibility: old IE result format
308 for result in ie_result:
309 result.update(extra_info)
311 '_type': 'compat_list',
312 'entries': ie_result,
315 ie_result.update(extra_info)
316 if 'extractor' not in ie_result:
317 ie_result['extractor'] = ie.IE_NAME
318 return self.process_ie_result(ie_result, download=download)
319 except ExtractorError as de: # An error we somewhat expected
320 self.report_error(compat_str(de), de.format_traceback())
322 except Exception as e:
323 if self.params.get('ignoreerrors', False):
324 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
329 self.report_error(u'no suitable InfoExtractor: %s' % url)
331 def process_ie_result(self, ie_result, download=True, extra_info={}):
333 Take the result of the ie(may be modified) and resolve all unresolved
334 references (URLs, playlist items).
336 It will also download the videos if 'download'.
337 Returns the resolved ie_result.
340 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
341 if result_type == 'video':
342 if 'playlist' not in ie_result:
343 # It isn't part of a playlist
344 ie_result['playlist'] = None
345 ie_result['playlist_index'] = None
347 self.process_info(ie_result)
349 elif result_type == 'url':
350 # We have to add extra_info to the results because it may be
351 # contained in a playlist
352 return self.extract_info(ie_result['url'],
354 ie_key=ie_result.get('ie_key'),
355 extra_info=extra_info)
356 elif result_type == 'playlist':
357 # We process each entry in the playlist
358 playlist = ie_result.get('title', None) or ie_result.get('id', None)
359 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
361 playlist_results = []
363 n_all_entries = len(ie_result['entries'])
364 playliststart = self.params.get('playliststart', 1) - 1
365 playlistend = self.params.get('playlistend', -1)
367 if playlistend == -1:
368 entries = ie_result['entries'][playliststart:]
370 entries = ie_result['entries'][playliststart:playlistend]
372 n_entries = len(entries)
374 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
375 (ie_result['extractor'], playlist, n_all_entries, n_entries))
377 for i,entry in enumerate(entries,1):
378 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
380 'playlist': playlist,
381 'playlist_index': i + playliststart,
383 if not 'extractor' in entry:
384 # We set the extractor, if it's an url it will be set then to
385 # the new extractor, but if it's already a video we must make
386 # sure it's present: see issue #877
387 entry['extractor'] = ie_result['extractor']
388 entry_result = self.process_ie_result(entry,
391 playlist_results.append(entry_result)
392 ie_result['entries'] = playlist_results
394 elif result_type == 'compat_list':
396 r.setdefault('extractor', ie_result['extractor'])
398 ie_result['entries'] = [
399 self.process_ie_result(_fixup(r), download=download)
400 for r in ie_result['entries']
404 raise Exception('Invalid result type: %s' % result_type)
406 def process_info(self, info_dict):
407 """Process a single resolved IE result."""
409 assert info_dict.get('_type', 'video') == 'video'
410 #We increment the download the download count here to match the previous behaviour.
411 self.increment_downloads()
413 info_dict['fulltitle'] = info_dict['title']
414 if len(info_dict['title']) > 200:
415 info_dict['title'] = info_dict['title'][:197] + u'...'
417 # Keep for backwards compatibility
418 info_dict['stitle'] = info_dict['title']
420 if not 'format' in info_dict:
421 info_dict['format'] = info_dict['ext']
423 reason = self._match_entry(info_dict)
424 if reason is not None:
425 self.to_screen(u'[download] ' + reason)
428 max_downloads = self.params.get('max_downloads')
429 if max_downloads is not None:
430 if self._num_downloads > int(max_downloads):
431 raise MaxDownloadsReached()
433 filename = self.prepare_filename(info_dict)
436 if self.params.get('forcetitle', False):
437 compat_print(info_dict['title'])
438 if self.params.get('forceid', False):
439 compat_print(info_dict['id'])
440 if self.params.get('forceurl', False):
441 compat_print(info_dict['url'])
442 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
443 compat_print(info_dict['thumbnail'])
444 if self.params.get('forcedescription', False) and 'description' in info_dict:
445 compat_print(info_dict['description'])
446 if self.params.get('forcefilename', False) and filename is not None:
447 compat_print(filename)
448 if self.params.get('forceformat', False):
449 compat_print(info_dict['format'])
451 # Do nothing else if in simulate mode
452 if self.params.get('simulate', False):
459 dn = os.path.dirname(encodeFilename(filename))
460 if dn != '' and not os.path.exists(dn):
462 except (OSError, IOError) as err:
463 self.report_error(u'unable to create directory ' + compat_str(err))
466 if self.params.get('writedescription', False):
468 descfn = filename + u'.description'
469 self.report_writedescription(descfn)
470 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
471 descfile.write(info_dict['description'])
472 except (OSError, IOError):
473 self.report_error(u'Cannot write description file ' + descfn)
476 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
477 # subtitles download errors are already managed as troubles in relevant IE
478 # that way it will silently go on when used with unsupporting IE
479 subtitle = info_dict['subtitles'][0]
480 (sub_error, sub_lang, sub) = subtitle
481 sub_format = self.params.get('subtitlesformat')
483 self.report_warning("Some error while getting the subtitles")
486 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
487 self.report_writesubtitles(sub_filename)
488 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
490 except (OSError, IOError):
491 self.report_error(u'Cannot write subtitles file ' + descfn)
494 if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
495 subtitles = info_dict['subtitles']
496 sub_format = self.params.get('subtitlesformat')
497 for subtitle in subtitles:
498 (sub_error, sub_lang, sub) = subtitle
500 self.report_warning("Some error while getting the subtitles")
503 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
504 self.report_writesubtitles(sub_filename)
505 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
507 except (OSError, IOError):
508 self.report_error(u'Cannot write subtitles file ' + descfn)
511 if self.params.get('writeinfojson', False):
512 infofn = filename + u'.info.json'
513 self.report_writeinfojson(infofn)
515 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
516 write_json_file(json_info_dict, encodeFilename(infofn))
517 except (OSError, IOError):
518 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
521 if self.params.get('writethumbnail', False):
522 if 'thumbnail' in info_dict:
523 thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
526 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
527 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
528 (info_dict['extractor'], info_dict['id']))
529 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
530 with open(thumb_filename, 'wb') as thumbf:
531 shutil.copyfileobj(uf, thumbf)
532 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
533 (info_dict['extractor'], info_dict['id'], thumb_filename))
535 if not self.params.get('skip_download', False):
536 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
540 success = self.fd._do_download(filename, info_dict)
541 except (OSError, IOError) as err:
542 raise UnavailableVideoError()
543 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
544 self.report_error(u'unable to download video data: %s' % str(err))
546 except (ContentTooShortError, ) as err:
547 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
552 self.post_process(filename, info_dict)
553 except (PostProcessingError) as err:
554 self.report_error(u'postprocessing: %s' % str(err))
557 def download(self, url_list):
558 """Download a given list of URLs."""
559 if len(url_list) > 1 and self.fixed_template():
560 raise SameFileError(self.params['outtmpl'])
564 #It also downloads the videos
565 videos = self.extract_info(url)
566 except UnavailableVideoError:
567 self.report_error(u'unable to download video')
568 except MaxDownloadsReached:
569 self.to_screen(u'[info] Maximum number of downloaded files reached.')
572 return self._download_retcode
574 def post_process(self, filename, ie_info):
575 """Run all the postprocessors on the given file."""
577 info['filepath'] = filename
581 keep_video_wish,new_info = pp.run(info)
582 if keep_video_wish is not None:
584 keep_video = keep_video_wish
585 elif keep_video is None:
586 # No clear decision yet, let IE decide
587 keep_video = keep_video_wish
588 except PostProcessingError as e:
589 self.to_stderr(u'ERROR: ' + e.msg)
590 if keep_video is False and not self.params.get('keepvideo', False):
592 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
593 os.remove(encodeFilename(filename))
594 except (IOError, OSError):
595 self.report_warning(u'Unable to remove downloaded video file')