2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
47 UnavailableVideoError,
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
57 class YoutubeDL(object):
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 download_archive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
136 The following parameters are not used by YoutubeDL itself, they are used by
138 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
139 noresizebuffer, retries, continuedl, noprogress, consoletitle
145 _download_retcode = None
146 _num_downloads = None
149 def __init__(self, params=None):
150 """Create a FileDownloader object with the given options."""
152 self._ies_instances = {}
154 self._progress_hooks = []
155 self._download_retcode = 0
156 self._num_downloads = 0
157 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
158 self.params = {} if params is None else params
160 if (sys.version_info >= (3,) and sys.platform != 'win32' and
161 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
162 and not params['restrictfilenames']):
163 # On Python 3, the Unicode filesystem API will throw errors (#1474)
165 u'Assuming --restrict-filenames since file system encoding '
166 u'cannot encode all charactes. '
167 u'Set the LC_ALL environment variable to fix this.')
168 self.params['restrictfilenames'] = True
170 self.fd = FileDownloader(self, self.params)
172 if '%(stitle)s' in self.params.get('outtmpl', ''):
173 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
177 def add_info_extractor(self, ie):
178 """Add an InfoExtractor object to the end of the list."""
180 self._ies_instances[ie.ie_key()] = ie
181 ie.set_downloader(self)
183 def get_info_extractor(self, ie_key):
185 Get an instance of an IE with name ie_key, it will try to get one from
186 the _ies list, if there's no instance it will create a new one and add
187 it to the extractor list.
189 ie = self._ies_instances.get(ie_key)
191 ie = get_info_extractor(ie_key)()
192 self.add_info_extractor(ie)
195 def add_default_info_extractors(self):
197 Add the InfoExtractors returned by gen_extractors to the end of the list
199 for ie in gen_extractors():
200 self.add_info_extractor(ie)
202 def add_post_processor(self, pp):
203 """Add a PostProcessor object to the end of the chain."""
205 pp.set_downloader(self)
207 def to_screen(self, message, skip_eol=False):
208 """Print message to stdout if not in quiet mode."""
209 if self.params.get('logger'):
210 self.params['logger'].debug(message)
211 elif not self.params.get('quiet', False):
212 terminator = [u'\n', u''][skip_eol]
213 output = message + terminator
214 write_string(output, self._screen_file)
216 def to_stderr(self, message):
217 """Print message to stderr."""
218 assert type(message) == type(u'')
219 if self.params.get('logger'):
220 self.params['logger'].error(message)
222 output = message + u'\n'
223 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
224 output = output.encode(preferredencoding())
225 sys.stderr.write(output)
227 def to_console_title(self, message):
228 if not self.params.get('consoletitle', False):
230 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
231 # c_wchar_p() might not be necessary if `message` is
232 # already of type unicode()
233 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
234 elif 'TERM' in os.environ:
235 write_string(u'\033]0;%s\007' % message, self._screen_file)
237 def save_console_title(self):
238 if not self.params.get('consoletitle', False):
240 if 'TERM' in os.environ:
241 # Save the title on stack
242 write_string(u'\033[22;0t', self._screen_file)
244 def restore_console_title(self):
245 if not self.params.get('consoletitle', False):
247 if 'TERM' in os.environ:
248 # Restore the title from stack
249 write_string(u'\033[23;0t', self._screen_file)
252 self.save_console_title()
255 def __exit__(self, *args):
256 self.restore_console_title()
258 if self.params.get('cookiefile') is not None:
259 self.cookiejar.save()
261 def trouble(self, message=None, tb=None):
262 """Determine action to take when a download problem appears.
264 Depending on if the downloader has been configured to ignore
265 download errors or not, this method may throw an exception or
266 not when errors are found, after printing the message.
268 tb, if given, is additional traceback information.
270 if message is not None:
271 self.to_stderr(message)
272 if self.params.get('verbose'):
274 if sys.exc_info()[0]: # if .trouble has been called from an except block
276 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
277 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
278 tb += compat_str(traceback.format_exc())
280 tb_data = traceback.format_list(traceback.extract_stack())
281 tb = u''.join(tb_data)
283 if not self.params.get('ignoreerrors', False):
284 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
285 exc_info = sys.exc_info()[1].exc_info
287 exc_info = sys.exc_info()
288 raise DownloadError(message, exc_info)
289 self._download_retcode = 1
291 def report_warning(self, message):
293 Print the message to stderr, it will be prefixed with 'WARNING:'
294 If stderr is a tty file the 'WARNING:' will be colored
296 if sys.stderr.isatty() and os.name != 'nt':
297 _msg_header = u'\033[0;33mWARNING:\033[0m'
299 _msg_header = u'WARNING:'
300 warning_message = u'%s %s' % (_msg_header, message)
301 self.to_stderr(warning_message)
303 def report_error(self, message, tb=None):
305 Do the same as trouble, but prefixes the message with 'ERROR:', colored
306 in red if stderr is a tty file.
308 if sys.stderr.isatty() and os.name != 'nt':
309 _msg_header = u'\033[0;31mERROR:\033[0m'
311 _msg_header = u'ERROR:'
312 error_message = u'%s %s' % (_msg_header, message)
313 self.trouble(error_message, tb)
315 def report_writedescription(self, descfn):
316 """ Report that the description file is being written """
317 self.to_screen(u'[info] Writing video description to: ' + descfn)
319 def report_writesubtitles(self, sub_filename):
320 """ Report that the subtitles file is being written """
321 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
323 def report_writeinfojson(self, infofn):
324 """ Report that the metadata file has been written """
325 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
327 def report_writeannotations(self, annofn):
328 """ Report that the annotations file has been written. """
329 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
331 def report_file_already_downloaded(self, file_name):
332 """Report file has already been fully downloaded."""
334 self.to_screen(u'[download] %s has already been downloaded' % file_name)
335 except UnicodeEncodeError:
336 self.to_screen(u'[download] The file has already been downloaded')
338 def increment_downloads(self):
339 """Increment the ordinal that assigns a number to each file."""
340 self._num_downloads += 1
342 def prepare_filename(self, info_dict):
343 """Generate the output filename."""
345 template_dict = dict(info_dict)
347 template_dict['epoch'] = int(time.time())
348 autonumber_size = self.params.get('autonumber_size')
349 if autonumber_size is None:
351 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
352 template_dict['autonumber'] = autonumber_templ % self._num_downloads
353 if template_dict.get('playlist_index') is not None:
354 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
356 sanitize = lambda k, v: sanitize_filename(
357 u'NA' if v is None else compat_str(v),
358 restricted=self.params.get('restrictfilenames'),
360 template_dict = dict((k, sanitize(k, v))
361 for k, v in template_dict.items())
363 tmpl = os.path.expanduser(self.params['outtmpl'])
364 filename = tmpl % template_dict
366 except KeyError as err:
367 self.report_error(u'Erroneous output template')
369 except ValueError as err:
370 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
373 def _match_entry(self, info_dict):
374 """ Returns None iff the file should be downloaded """
376 if 'title' in info_dict:
377 # This can happen when we're just evaluating the playlist
378 title = info_dict['title']
379 matchtitle = self.params.get('matchtitle', False)
381 if not re.search(matchtitle, title, re.IGNORECASE):
382 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
383 rejecttitle = self.params.get('rejecttitle', False)
385 if re.search(rejecttitle, title, re.IGNORECASE):
386 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
387 date = info_dict.get('upload_date', None)
389 dateRange = self.params.get('daterange', DateRange())
390 if date not in dateRange:
391 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
392 age_limit = self.params.get('age_limit')
393 if age_limit is not None:
394 if age_limit < info_dict.get('age_limit', 0):
395 return u'Skipping "' + title + '" because it is age restricted'
396 if self.in_download_archive(info_dict):
397 return (u'%s has already been recorded in archive'
398 % info_dict.get('title', info_dict.get('id', u'video')))
402 def add_extra_info(info_dict, extra_info):
403 '''Set the keys from extra_info in info dict if they are missing'''
404 for key, value in extra_info.items():
405 info_dict.setdefault(key, value)
407 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
409 Returns a list with a dictionary for each video we find.
410 If 'download', also downloads the videos.
411 extra_info is a dict containing the extra values to add to each result
415 ies = [self.get_info_extractor(ie_key)]
420 if not ie.suitable(url):
424 self.report_warning(u'The program functionality for this site has been marked as broken, '
425 u'and will probably not work.')
428 ie_result = ie.extract(url)
429 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
431 if isinstance(ie_result, list):
432 # Backwards compatibility: old IE result format
434 '_type': 'compat_list',
435 'entries': ie_result,
437 self.add_extra_info(ie_result,
439 'extractor': ie.IE_NAME,
441 'extractor_key': ie.ie_key(),
443 return self.process_ie_result(ie_result, download, extra_info)
444 except ExtractorError as de: # An error we somewhat expected
445 self.report_error(compat_str(de), de.format_traceback())
447 except Exception as e:
448 if self.params.get('ignoreerrors', False):
449 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
454 self.report_error(u'no suitable InfoExtractor: %s' % url)
456 def process_ie_result(self, ie_result, download=True, extra_info={}):
458 Take the result of the ie(may be modified) and resolve all unresolved
459 references (URLs, playlist items).
461 It will also download the videos if 'download'.
462 Returns the resolved ie_result.
465 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
466 if result_type == 'video':
467 self.add_extra_info(ie_result, extra_info)
468 return self.process_video_result(ie_result, download=download)
469 elif result_type == 'url':
470 # We have to add extra_info to the results because it may be
471 # contained in a playlist
472 return self.extract_info(ie_result['url'],
474 ie_key=ie_result.get('ie_key'),
475 extra_info=extra_info)
476 elif result_type == 'playlist':
478 # We process each entry in the playlist
479 playlist = ie_result.get('title', None) or ie_result.get('id', None)
480 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
482 playlist_results = []
484 n_all_entries = len(ie_result['entries'])
485 playliststart = self.params.get('playliststart', 1) - 1
486 playlistend = self.params.get('playlistend', -1)
488 if playlistend == -1:
489 entries = ie_result['entries'][playliststart:]
491 entries = ie_result['entries'][playliststart:playlistend]
493 n_entries = len(entries)
495 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
496 (ie_result['extractor'], playlist, n_all_entries, n_entries))
498 for i, entry in enumerate(entries, 1):
499 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
501 'playlist': playlist,
502 'playlist_index': i + playliststart,
503 'extractor': ie_result['extractor'],
504 'webpage_url': ie_result['webpage_url'],
505 'extractor_key': ie_result['extractor_key'],
508 reason = self._match_entry(entry)
509 if reason is not None:
510 self.to_screen(u'[download] ' + reason)
513 entry_result = self.process_ie_result(entry,
516 playlist_results.append(entry_result)
517 ie_result['entries'] = playlist_results
519 elif result_type == 'compat_list':
521 self.add_extra_info(r,
523 'extractor': ie_result['extractor'],
524 'webpage_url': ie_result['webpage_url'],
525 'extractor_key': ie_result['extractor_key'],
528 ie_result['entries'] = [
529 self.process_ie_result(_fixup(r), download, extra_info)
530 for r in ie_result['entries']
534 raise Exception('Invalid result type: %s' % result_type)
536 def select_format(self, format_spec, available_formats):
537 if format_spec == 'best' or format_spec is None:
538 return available_formats[-1]
539 elif format_spec == 'worst':
540 return available_formats[0]
542 extensions = [u'mp4', u'flv', u'webm', u'3gp']
543 if format_spec in extensions:
544 filter_f = lambda f: f['ext'] == format_spec
546 filter_f = lambda f: f['format_id'] == format_spec
547 matches = list(filter(filter_f, available_formats))
552 def process_video_result(self, info_dict, download=True):
553 assert info_dict.get('_type', 'video') == 'video'
555 if 'playlist' not in info_dict:
556 # It isn't part of a playlist
557 info_dict['playlist'] = None
558 info_dict['playlist_index'] = None
560 # This extractors handle format selection themselves
561 if info_dict['extractor'] in [u'youtube', u'Youku']:
563 self.process_info(info_dict)
566 # We now pick which formats have to be downloaded
567 if info_dict.get('formats') is None:
568 # There's only one format available
569 formats = [info_dict]
571 formats = info_dict['formats']
573 # We check that all the formats have the format and format_id fields
574 for (i, format) in enumerate(formats):
575 if format.get('format_id') is None:
576 format['format_id'] = compat_str(i)
577 if format.get('format') is None:
578 format['format'] = u'{id} - {res}{note}'.format(
579 id=format['format_id'],
580 res=self.format_resolution(format),
581 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
583 # Automatically determine file extension if missing
584 if 'ext' not in format:
585 format['ext'] = determine_ext(format['url'])
587 if self.params.get('listformats', None):
588 self.list_formats(info_dict)
591 format_limit = self.params.get('format_limit', None)
593 formats = list(takewhile_inclusive(
594 lambda f: f['format_id'] != format_limit, formats
596 if self.params.get('prefer_free_formats'):
597 def _free_formats_key(f):
599 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
602 # We only compare the extension if they have the same height and width
603 return (f.get('height'), f.get('width'), ext_ord)
604 formats = sorted(formats, key=_free_formats_key)
606 req_format = self.params.get('format', 'best')
607 if req_format is None:
609 formats_to_download = []
610 # The -1 is for supporting YoutubeIE
611 if req_format in ('-1', 'all'):
612 formats_to_download = formats
614 # We can accept formats requestd in the format: 34/5/best, we pick
615 # the first that is available, starting from left
616 req_formats = req_format.split('/')
617 for rf in req_formats:
618 selected_format = self.select_format(rf, formats)
619 if selected_format is not None:
620 formats_to_download = [selected_format]
622 if not formats_to_download:
623 raise ExtractorError(u'requested format not available',
627 if len(formats_to_download) > 1:
628 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
629 for format in formats_to_download:
630 new_info = dict(info_dict)
631 new_info.update(format)
632 self.process_info(new_info)
633 # We update the info dict with the best quality format (backwards compatibility)
634 info_dict.update(formats_to_download[-1])
637 def process_info(self, info_dict):
638 """Process a single resolved IE result."""
640 assert info_dict.get('_type', 'video') == 'video'
641 #We increment the download the download count here to match the previous behaviour.
642 self.increment_downloads()
644 info_dict['fulltitle'] = info_dict['title']
645 if len(info_dict['title']) > 200:
646 info_dict['title'] = info_dict['title'][:197] + u'...'
648 # Keep for backwards compatibility
649 info_dict['stitle'] = info_dict['title']
651 if not 'format' in info_dict:
652 info_dict['format'] = info_dict['ext']
654 reason = self._match_entry(info_dict)
655 if reason is not None:
656 self.to_screen(u'[download] ' + reason)
659 max_downloads = self.params.get('max_downloads')
660 if max_downloads is not None:
661 if self._num_downloads > int(max_downloads):
662 raise MaxDownloadsReached()
664 filename = self.prepare_filename(info_dict)
667 if self.params.get('forcetitle', False):
668 compat_print(info_dict['fulltitle'])
669 if self.params.get('forceid', False):
670 compat_print(info_dict['id'])
671 if self.params.get('forceurl', False):
672 # For RTMP URLs, also include the playpath
673 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
674 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
675 compat_print(info_dict['thumbnail'])
676 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
677 compat_print(info_dict['description'])
678 if self.params.get('forcefilename', False) and filename is not None:
679 compat_print(filename)
680 if self.params.get('forceformat', False):
681 compat_print(info_dict['format'])
682 if self.params.get('forcejson', False):
683 compat_print(json.dumps(info_dict))
685 # Do nothing else if in simulate mode
686 if self.params.get('simulate', False):
693 dn = os.path.dirname(encodeFilename(filename))
694 if dn != '' and not os.path.exists(dn):
696 except (OSError, IOError) as err:
697 self.report_error(u'unable to create directory ' + compat_str(err))
700 if self.params.get('writedescription', False):
702 descfn = filename + u'.description'
703 self.report_writedescription(descfn)
704 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
705 descfile.write(info_dict['description'])
706 except (KeyError, TypeError):
707 self.report_warning(u'There\'s no description to write.')
708 except (OSError, IOError):
709 self.report_error(u'Cannot write description file ' + descfn)
712 if self.params.get('writeannotations', False):
714 annofn = filename + u'.annotations.xml'
715 self.report_writeannotations(annofn)
716 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
717 annofile.write(info_dict['annotations'])
718 except (KeyError, TypeError):
719 self.report_warning(u'There are no annotations to write.')
720 except (OSError, IOError):
721 self.report_error(u'Cannot write annotations file: ' + annofn)
724 subtitles_are_requested = any([self.params.get('writesubtitles', False),
725 self.params.get('writeautomaticsub')])
727 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
728 # subtitles download errors are already managed as troubles in relevant IE
729 # that way it will silently go on when used with unsupporting IE
730 subtitles = info_dict['subtitles']
731 sub_format = self.params.get('subtitlesformat', 'srt')
732 for sub_lang in subtitles.keys():
733 sub = subtitles[sub_lang]
737 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
738 self.report_writesubtitles(sub_filename)
739 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
741 except (OSError, IOError):
742 self.report_error(u'Cannot write subtitles file ' + descfn)
745 if self.params.get('writeinfojson', False):
746 infofn = os.path.splitext(filename)[0] + u'.info.json'
747 self.report_writeinfojson(infofn)
749 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
750 write_json_file(json_info_dict, encodeFilename(infofn))
751 except (OSError, IOError):
752 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
755 if self.params.get('writethumbnail', False):
756 if info_dict.get('thumbnail') is not None:
757 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
758 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
759 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
760 (info_dict['extractor'], info_dict['id']))
762 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
763 with open(thumb_filename, 'wb') as thumbf:
764 shutil.copyfileobj(uf, thumbf)
765 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
766 (info_dict['extractor'], info_dict['id'], thumb_filename))
767 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
768 self.report_warning(u'Unable to download thumbnail "%s": %s' %
769 (info_dict['thumbnail'], compat_str(err)))
771 if not self.params.get('skip_download', False):
772 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
776 success = self.fd._do_download(filename, info_dict)
777 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
778 self.report_error(u'unable to download video data: %s' % str(err))
780 except (OSError, IOError) as err:
781 raise UnavailableVideoError(err)
782 except (ContentTooShortError, ) as err:
783 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
788 self.post_process(filename, info_dict)
789 except (PostProcessingError) as err:
790 self.report_error(u'postprocessing: %s' % str(err))
793 self.record_download_archive(info_dict)
795 def download(self, url_list):
796 """Download a given list of URLs."""
797 if (len(url_list) > 1 and
798 '%' not in self.params['outtmpl']
799 and self.params.get('max_downloads') != 1):
800 raise SameFileError(self.params['outtmpl'])
804 #It also downloads the videos
805 self.extract_info(url)
806 except UnavailableVideoError:
807 self.report_error(u'unable to download video')
808 except MaxDownloadsReached:
809 self.to_screen(u'[info] Maximum number of downloaded files reached.')
812 return self._download_retcode
814 def post_process(self, filename, ie_info):
815 """Run all the postprocessors on the given file."""
817 info['filepath'] = filename
821 keep_video_wish, new_info = pp.run(info)
822 if keep_video_wish is not None:
824 keep_video = keep_video_wish
825 elif keep_video is None:
826 # No clear decision yet, let IE decide
827 keep_video = keep_video_wish
828 except PostProcessingError as e:
829 self.report_error(e.msg)
830 if keep_video is False and not self.params.get('keepvideo', False):
832 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
833 os.remove(encodeFilename(filename))
834 except (IOError, OSError):
835 self.report_warning(u'Unable to remove downloaded video file')
837 def _make_archive_id(self, info_dict):
838 # Future-proof against any change in case
839 # and backwards compatibility with prior versions
840 extractor = info_dict.get('extractor_key')
841 if extractor is None:
842 if 'id' in info_dict:
843 extractor = info_dict.get('ie_key') # key in a playlist
844 if extractor is None:
845 return None # Incomplete video information
846 return extractor.lower() + u' ' + info_dict['id']
848 def in_download_archive(self, info_dict):
849 fn = self.params.get('download_archive')
853 vid_id = self._make_archive_id(info_dict)
855 return False # Incomplete video information
858 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
859 for line in archive_file:
860 if line.strip() == vid_id:
862 except IOError as ioe:
863 if ioe.errno != errno.ENOENT:
867 def record_download_archive(self, info_dict):
868 fn = self.params.get('download_archive')
871 vid_id = self._make_archive_id(info_dict)
873 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
874 archive_file.write(vid_id + u'\n')
877 def format_resolution(format, default='unknown'):
878 if format.get('vcodec') == 'none':
880 if format.get('_resolution') is not None:
881 return format['_resolution']
882 if format.get('height') is not None:
883 if format.get('width') is not None:
884 res = u'%sx%s' % (format['width'], format['height'])
886 res = u'%sp' % format['height']
891 def list_formats(self, info_dict):
892 def format_note(fdict):
894 if fdict.get('format_note') is not None:
895 res += fdict['format_note'] + u' '
896 if (fdict.get('vcodec') is not None and
897 fdict.get('vcodec') != 'none'):
898 res += u'%-5s' % fdict['vcodec']
899 elif fdict.get('vbr') is not None:
901 if fdict.get('vbr') is not None:
902 res += u'@%4dk' % fdict['vbr']
903 if fdict.get('acodec') is not None:
906 res += u'%-5s' % fdict['acodec']
907 elif fdict.get('abr') is not None:
911 if fdict.get('abr') is not None:
912 res += u'@%3dk' % fdict['abr']
913 if fdict.get('filesize') is not None:
916 res += format_bytes(fdict['filesize'])
919 def line(format, idlen=20):
920 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
923 self.format_resolution(format),
927 formats = info_dict.get('formats', [info_dict])
928 idlen = max(len(u'format code'),
929 max(len(f['format_id']) for f in formats))
930 formats_s = [line(f, idlen) for f in formats]
932 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
933 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
936 'format_id': u'format code', 'ext': u'extension',
937 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
938 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
939 (info_dict['id'], header_line, u"\n".join(formats_s)))
941 def urlopen(self, req):
942 """ Start an HTTP download """
943 return self._opener.open(req)
945 def print_debug_header(self):
946 if not self.params.get('verbose'):
948 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
950 sp = subprocess.Popen(
951 ['git', 'rev-parse', '--short', 'HEAD'],
952 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
953 cwd=os.path.dirname(os.path.abspath(__file__)))
954 out, err = sp.communicate()
955 out = out.decode().strip()
956 if re.match('[0-9a-f]+', out):
957 write_string(u'[debug] Git HEAD: ' + out + u'\n')
963 write_string(u'[debug] Python version %s - %s' %
964 (platform.python_version(), platform_name()) + u'\n')
967 for handler in self._opener.handlers:
968 if hasattr(handler, 'proxies'):
969 proxy_map.update(handler.proxies)
970 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
972 def _setup_opener(self, timeout=20):
973 opts_cookiefile = self.params.get('cookiefile')
974 opts_proxy = self.params.get('proxy')
976 if opts_cookiefile is None:
977 self.cookiejar = compat_cookiejar.CookieJar()
979 self.cookiejar = compat_cookiejar.MozillaCookieJar(
981 if os.access(opts_cookiefile, os.R_OK):
982 self.cookiejar.load()
984 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
986 if opts_proxy is not None:
990 proxies = {'http': opts_proxy, 'https': opts_proxy}
992 proxies = compat_urllib_request.getproxies()
993 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
994 if 'http' in proxies and 'https' not in proxies:
995 proxies['https'] = proxies['http']
996 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
997 https_handler = make_HTTPS_handler(
998 self.params.get('nocheckcertificate', False))
999 opener = compat_urllib_request.build_opener(
1000 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1001 # Delete the default user-agent header, which would otherwise apply in
1002 # cases where our custom HTTP handler doesn't come into play
1003 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1004 opener.addheaders = []
1005 self._opener = opener
1007 # TODO remove this global modification
1008 compat_urllib_request.install_opener(opener)
1009 socket.setdefaulttimeout(timeout)