2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
16 from .extractor import get_info_extractor, gen_extractors
17 from .FileDownloader import FileDownloader
20 class YoutubeDL(object):
23 YoutubeDL objects are the ones responsible of downloading the
24 actual video file and writing it to disk if the user has requested
25 it, among some other tasks. In most cases there should be one per
26 program. As, given a video URL, the downloader doesn't know how to
27 extract all the needed information, task that InfoExtractors do, it
28 has to pass the URL to one of them.
30 For this, YoutubeDL objects have a method that allows
31 InfoExtractors to be registered in a given order. When it is passed
32 a URL, the YoutubeDL object handles it to the first InfoExtractor it
33 finds that reports being able to handle it. The InfoExtractor extracts
34 all the information about the video or videos the URL refers to, and
35 YoutubeDL process the extracted information, possibly using a File
36 Downloader to download the video.
38 YoutubeDL objects accept a lot of parameters. In order not to saturate
39 the object constructor with arguments, it receives a dictionary of
40 options instead. These options are available through the params
41 attribute for the InfoExtractors to use. The YoutubeDL also
42 registers itself as the downloader in charge for the InfoExtractors
43 that are added to it, so this is a "mutual registration".
47 username: Username for authentication purposes.
48 password: Password for authentication purposes.
49 videopassword: Password for acces a video.
50 usenetrc: Use netrc for authentication instead.
51 verbose: Print additional info to stdout.
52 quiet: Do not print messages to stdout.
53 forceurl: Force printing final URL.
54 forcetitle: Force printing title.
55 forceid: Force printing ID.
56 forcethumbnail: Force printing thumbnail URL.
57 forcedescription: Force printing description.
58 forcefilename: Force printing final filename.
59 simulate: Do not download the video files.
60 format: Video format code.
61 format_limit: Highest quality format to try.
62 outtmpl: Template for output names.
63 restrictfilenames: Do not allow "&" and spaces in file names
64 ignoreerrors: Do not stop on download errors.
65 nooverwrites: Prevent overwriting files.
66 playliststart: Playlist item to start at.
67 playlistend: Playlist item to end at.
68 matchtitle: Download only matching titles.
69 rejecttitle: Reject downloads for matching titles.
70 logtostderr: Log messages to stderr instead of stdout.
71 writedescription: Write the video description to a .description file
72 writeinfojson: Write the video description to a .info.json file
73 writethumbnail: Write the thumbnail image to a file
74 writesubtitles: Write the video subtitles to a file
75 writeautomaticsub: Write the automatic subtitles to a file
76 allsubtitles: Downloads all the subtitles of the video
77 listsubtitles: Lists all available subtitles for the video
78 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
79 subtitleslang: Language of the subtitles to download
80 keepvideo: Keep the video file after post-processing
81 daterange: A DateRange object, download only if the upload_date is in the range.
82 skip_download: Skip the actual download of the video file
84 The following parameters are not used by YoutubeDL itself, they are used by
86 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
87 noresizebuffer, retries, continuedl, noprogress, consoletitle
93 _download_retcode = None
97 def __init__(self, params):
98 """Create a FileDownloader object with the given options."""
101 self._progress_hooks = []
102 self._download_retcode = 0
103 self._num_downloads = 0
104 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
106 self.fd = FileDownloader(self, self.params)
108 if '%(stitle)s' in self.params['outtmpl']:
109 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
111 def add_info_extractor(self, ie):
112 """Add an InfoExtractor object to the end of the list."""
114 ie.set_downloader(self)
116 def add_default_info_extractors(self):
118 Add the InfoExtractors returned by gen_extractors to the end of the list
120 for ie in gen_extractors():
121 self.add_info_extractor(ie)
123 def add_post_processor(self, pp):
124 """Add a PostProcessor object to the end of the chain."""
126 pp.set_downloader(self)
128 def to_screen(self, message, skip_eol=False):
129 """Print message to stdout if not in quiet mode."""
130 assert type(message) == type(u'')
131 if not self.params.get('quiet', False):
132 terminator = [u'\n', u''][skip_eol]
133 output = message + terminator
134 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
135 output = output.encode(preferredencoding(), 'ignore')
136 self._screen_file.write(output)
137 self._screen_file.flush()
139 def to_stderr(self, message):
140 """Print message to stderr."""
141 assert type(message) == type(u'')
142 output = message + u'\n'
143 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
144 output = output.encode(preferredencoding())
145 sys.stderr.write(output)
147 def fixed_template(self):
148 """Checks if the output template is fixed."""
149 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
151 def trouble(self, message=None, tb=None):
152 """Determine action to take when a download problem appears.
154 Depending on if the downloader has been configured to ignore
155 download errors or not, this method may throw an exception or
156 not when errors are found, after printing the message.
158 tb, if given, is additional traceback information.
160 if message is not None:
161 self.to_stderr(message)
162 if self.params.get('verbose'):
164 if sys.exc_info()[0]: # if .trouble has been called from an except block
166 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
167 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
168 tb += compat_str(traceback.format_exc())
170 tb_data = traceback.format_list(traceback.extract_stack())
171 tb = u''.join(tb_data)
173 if not self.params.get('ignoreerrors', False):
174 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
175 exc_info = sys.exc_info()[1].exc_info
177 exc_info = sys.exc_info()
178 raise DownloadError(message, exc_info)
179 self._download_retcode = 1
181 def report_warning(self, message):
183 Print the message to stderr, it will be prefixed with 'WARNING:'
184 If stderr is a tty file the 'WARNING:' will be colored
186 if sys.stderr.isatty() and os.name != 'nt':
187 _msg_header=u'\033[0;33mWARNING:\033[0m'
189 _msg_header=u'WARNING:'
190 warning_message=u'%s %s' % (_msg_header,message)
191 self.to_stderr(warning_message)
193 def report_error(self, message, tb=None):
195 Do the same as trouble, but prefixes the message with 'ERROR:', colored
196 in red if stderr is a tty file.
198 if sys.stderr.isatty() and os.name != 'nt':
199 _msg_header = u'\033[0;31mERROR:\033[0m'
201 _msg_header = u'ERROR:'
202 error_message = u'%s %s' % (_msg_header, message)
203 self.trouble(error_message, tb)
205 def slow_down(self, start_time, byte_counter):
206 """Sleep if the download speed is over the rate limit."""
207 rate_limit = self.params.get('ratelimit', None)
208 if rate_limit is None or byte_counter == 0:
211 elapsed = now - start_time
214 speed = float(byte_counter) / elapsed
215 if speed > rate_limit:
216 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
218 def report_writedescription(self, descfn):
219 """ Report that the description file is being written """
220 self.to_screen(u'[info] Writing video description to: ' + descfn)
222 def report_writesubtitles(self, sub_filename):
223 """ Report that the subtitles file is being written """
224 self.to_screen(u'[info] Writing subtitle: ' + sub_filename)
226 def report_existingsubtitles(self, sub_filename):
227 """ Report that the subtitles file has been already written """
228 self.to_screen(u'[info] Skipping existing subtitle: ' + sub_filename)
230 def report_writeinfojson(self, infofn):
231 """ Report that the metadata file has been written """
232 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
234 def increment_downloads(self):
235 """Increment the ordinal that assigns a number to each file."""
236 self._num_downloads += 1
238 def prepare_filename(self, info_dict):
239 """Generate the output filename."""
241 template_dict = dict(info_dict)
243 template_dict['epoch'] = int(time.time())
244 autonumber_size = self.params.get('autonumber_size')
245 if autonumber_size is None:
247 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
248 template_dict['autonumber'] = autonumber_templ % self._num_downloads
249 if template_dict['playlist_index'] is not None:
250 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
252 sanitize = lambda k,v: sanitize_filename(
253 u'NA' if v is None else compat_str(v),
254 restricted=self.params.get('restrictfilenames'),
256 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
258 filename = self.params['outtmpl'] % template_dict
260 except KeyError as err:
261 self.report_error(u'Erroneous output template')
263 except ValueError as err:
264 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
267 def _match_entry(self, info_dict):
268 """ Returns None iff the file should be downloaded """
270 title = info_dict['title']
271 matchtitle = self.params.get('matchtitle', False)
273 if not re.search(matchtitle, title, re.IGNORECASE):
274 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
275 rejecttitle = self.params.get('rejecttitle', False)
277 if re.search(rejecttitle, title, re.IGNORECASE):
278 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
279 date = info_dict.get('upload_date', None)
281 dateRange = self.params.get('daterange', DateRange())
282 if date not in dateRange:
283 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
286 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
288 Returns a list with a dictionary for each video we find.
289 If 'download', also downloads the videos.
290 extra_info is a dict containing the extra values to add to each result
294 ie = get_info_extractor(ie_key)()
295 ie.set_downloader(self)
301 if not ie.suitable(url):
305 self.report_warning(u'The program functionality for this site has been marked as broken, '
306 u'and will probably not work.')
309 ie_result = ie.extract(url)
310 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
312 if isinstance(ie_result, list):
313 # Backwards compatibility: old IE result format
314 for result in ie_result:
315 result.update(extra_info)
317 '_type': 'compat_list',
318 'entries': ie_result,
321 ie_result.update(extra_info)
322 if 'extractor' not in ie_result:
323 ie_result['extractor'] = ie.IE_NAME
324 return self.process_ie_result(ie_result, download=download)
325 except ExtractorError as de: # An error we somewhat expected
326 self.report_error(compat_str(de), de.format_traceback())
328 except Exception as e:
329 if self.params.get('ignoreerrors', False):
330 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
335 self.report_error(u'no suitable InfoExtractor: %s' % url)
337 def process_ie_result(self, ie_result, download=True, extra_info={}):
339 Take the result of the ie(may be modified) and resolve all unresolved
340 references (URLs, playlist items).
342 It will also download the videos if 'download'.
343 Returns the resolved ie_result.
346 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
347 if result_type == 'video':
348 ie_result.update(extra_info)
349 if 'playlist' not in ie_result:
350 # It isn't part of a playlist
351 ie_result['playlist'] = None
352 ie_result['playlist_index'] = None
354 self.process_info(ie_result)
356 elif result_type == 'url':
357 # We have to add extra_info to the results because it may be
358 # contained in a playlist
359 return self.extract_info(ie_result['url'],
361 ie_key=ie_result.get('ie_key'),
362 extra_info=extra_info)
363 elif result_type == 'playlist':
364 # We process each entry in the playlist
365 playlist = ie_result.get('title', None) or ie_result.get('id', None)
366 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
368 playlist_results = []
370 n_all_entries = len(ie_result['entries'])
371 playliststart = self.params.get('playliststart', 1) - 1
372 playlistend = self.params.get('playlistend', -1)
374 if playlistend == -1:
375 entries = ie_result['entries'][playliststart:]
377 entries = ie_result['entries'][playliststart:playlistend]
379 n_entries = len(entries)
381 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
382 (ie_result['extractor'], playlist, n_all_entries, n_entries))
384 for i,entry in enumerate(entries,1):
385 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
387 'playlist': playlist,
388 'playlist_index': i + playliststart,
390 if not 'extractor' in entry:
391 # We set the extractor, if it's an url it will be set then to
392 # the new extractor, but if it's already a video we must make
393 # sure it's present: see issue #877
394 entry['extractor'] = ie_result['extractor']
395 entry_result = self.process_ie_result(entry,
398 playlist_results.append(entry_result)
399 ie_result['entries'] = playlist_results
401 elif result_type == 'compat_list':
403 r.setdefault('extractor', ie_result['extractor'])
405 ie_result['entries'] = [
406 self.process_ie_result(_fixup(r), download=download)
407 for r in ie_result['entries']
411 raise Exception('Invalid result type: %s' % result_type)
413 def process_info(self, info_dict):
414 """Process a single resolved IE result."""
416 assert info_dict.get('_type', 'video') == 'video'
417 #We increment the download the download count here to match the previous behaviour.
418 self.increment_downloads()
420 info_dict['fulltitle'] = info_dict['title']
421 if len(info_dict['title']) > 200:
422 info_dict['title'] = info_dict['title'][:197] + u'...'
424 # Keep for backwards compatibility
425 info_dict['stitle'] = info_dict['title']
427 if not 'format' in info_dict:
428 info_dict['format'] = info_dict['ext']
430 reason = self._match_entry(info_dict)
431 if reason is not None:
432 self.to_screen(u'[download] ' + reason)
435 max_downloads = self.params.get('max_downloads')
436 if max_downloads is not None:
437 if self._num_downloads > int(max_downloads):
438 raise MaxDownloadsReached()
440 filename = self.prepare_filename(info_dict)
443 if self.params.get('forcetitle', False):
444 compat_print(info_dict['title'])
445 if self.params.get('forceid', False):
446 compat_print(info_dict['id'])
447 if self.params.get('forceurl', False):
448 compat_print(info_dict['url'])
449 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
450 compat_print(info_dict['thumbnail'])
451 if self.params.get('forcedescription', False) and 'description' in info_dict:
452 compat_print(info_dict['description'])
453 if self.params.get('forcefilename', False) and filename is not None:
454 compat_print(filename)
455 if self.params.get('forceformat', False):
456 compat_print(info_dict['format'])
458 # Do nothing else if in simulate mode
459 if self.params.get('simulate', False):
466 dn = os.path.dirname(encodeFilename(filename))
467 if dn != '' and not os.path.exists(dn):
469 except (OSError, IOError) as err:
470 self.report_error(u'unable to create directory ' + compat_str(err))
473 if self.params.get('writedescription', False):
475 descfn = filename + u'.description'
476 self.report_writedescription(descfn)
477 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
478 descfile.write(info_dict['description'])
479 except (OSError, IOError):
480 self.report_error(u'Cannot write description file ' + descfn)
483 subtitles_are_requested = any([self.params.get('writesubtitles', False),
484 self.params.get('writeautomaticsub'),
485 self.params.get('allsubtitles', False)])
487 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
488 # subtitles download errors are already managed as troubles in relevant IE
489 # that way it will silently go on when used with unsupporting IE
490 subtitles = info_dict['subtitles']
491 sub_format = self.params.get('subtitlesformat')
493 for sub_lang in subtitles.keys():
494 sub = subtitles[sub_lang]
498 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
499 if os.path.isfile(encodeFilename(sub_filename)):
500 self.report_existingsubtitles(sub_filename)
502 self.report_writesubtitles(sub_filename)
503 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
505 except (OSError, IOError):
506 self.report_error(u'Cannot write subtitles file ' + descfn)
509 if self.params.get('writeinfojson', False):
510 infofn = filename + u'.info.json'
511 self.report_writeinfojson(infofn)
513 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
514 write_json_file(json_info_dict, encodeFilename(infofn))
515 except (OSError, IOError):
516 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
519 if self.params.get('writethumbnail', False):
520 if info_dict.get('thumbnail') is not None:
521 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
522 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
523 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
524 (info_dict['extractor'], info_dict['id']))
525 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
526 with open(thumb_filename, 'wb') as thumbf:
527 shutil.copyfileobj(uf, thumbf)
528 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
529 (info_dict['extractor'], info_dict['id'], thumb_filename))
531 if not self.params.get('skip_download', False):
532 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
536 success = self.fd._do_download(filename, info_dict)
537 except (OSError, IOError) as err:
538 raise UnavailableVideoError(err)
539 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
540 self.report_error(u'unable to download video data: %s' % str(err))
542 except (ContentTooShortError, ) as err:
543 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
548 self.post_process(filename, info_dict)
549 except (PostProcessingError) as err:
550 self.report_error(u'postprocessing: %s' % str(err))
553 def download(self, url_list):
554 """Download a given list of URLs."""
555 if len(url_list) > 1 and self.fixed_template():
556 raise SameFileError(self.params['outtmpl'])
560 #It also downloads the videos
561 videos = self.extract_info(url)
562 except UnavailableVideoError:
563 self.report_error(u'unable to download video')
564 except MaxDownloadsReached:
565 self.to_screen(u'[info] Maximum number of downloaded files reached.')
568 return self._download_retcode
570 def post_process(self, filename, ie_info):
571 """Run all the postprocessors on the given file."""
573 info['filepath'] = filename
577 keep_video_wish,new_info = pp.run(info)
578 if keep_video_wish is not None:
580 keep_video = keep_video_wish
581 elif keep_video is None:
582 # No clear decision yet, let IE decide
583 keep_video = keep_video_wish
584 except PostProcessingError as e:
585 self.report_error(e.msg)
586 if keep_video is False and not self.params.get('keepvideo', False):
588 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
589 os.remove(encodeFilename(filename))
590 except (IOError, OSError):
591 self.report_warning(u'Unable to remove downloaded video file')