2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
46 UnavailableVideoError,
51 from .extractor import get_info_extractor, gen_extractors
52 from .FileDownloader import FileDownloader
53 from .version import __version__
56 class YoutubeDL(object):
59 YoutubeDL objects are the ones responsible of downloading the
60 actual video file and writing it to disk if the user has requested
61 it, among some other tasks. In most cases there should be one per
62 program. As, given a video URL, the downloader doesn't know how to
63 extract all the needed information, task that InfoExtractors do, it
64 has to pass the URL to one of them.
66 For this, YoutubeDL objects have a method that allows
67 InfoExtractors to be registered in a given order. When it is passed
68 a URL, the YoutubeDL object handles it to the first InfoExtractor it
69 finds that reports being able to handle it. The InfoExtractor extracts
70 all the information about the video or videos the URL refers to, and
71 YoutubeDL process the extracted information, possibly using a File
72 Downloader to download the video.
74 YoutubeDL objects accept a lot of parameters. In order not to saturate
75 the object constructor with arguments, it receives a dictionary of
76 options instead. These options are available through the params
77 attribute for the InfoExtractors to use. The YoutubeDL also
78 registers itself as the downloader in charge for the InfoExtractors
79 that are added to it, so this is a "mutual registration".
83 username: Username for authentication purposes.
84 password: Password for authentication purposes.
85 videopassword: Password for acces a video.
86 usenetrc: Use netrc for authentication instead.
87 verbose: Print additional info to stdout.
88 quiet: Do not print messages to stdout.
89 forceurl: Force printing final URL.
90 forcetitle: Force printing title.
91 forceid: Force printing ID.
92 forcethumbnail: Force printing thumbnail URL.
93 forcedescription: Force printing description.
94 forcefilename: Force printing final filename.
95 forcejson: Force printing info_dict as JSON.
96 simulate: Do not download the video files.
97 format: Video format code.
98 format_limit: Highest quality format to try.
99 outtmpl: Template for output names.
100 restrictfilenames: Do not allow "&" and spaces in file names
101 ignoreerrors: Do not stop on download errors.
102 nooverwrites: Prevent overwriting files.
103 playliststart: Playlist item to start at.
104 playlistend: Playlist item to end at.
105 matchtitle: Download only matching titles.
106 rejecttitle: Reject downloads for matching titles.
107 logtostderr: Log messages to stderr instead of stdout.
108 writedescription: Write the video description to a .description file
109 writeinfojson: Write the video description to a .info.json file
110 writeannotations: Write the video annotations to a .annotations.xml file
111 writethumbnail: Write the thumbnail image to a file
112 writesubtitles: Write the video subtitles to a file
113 writeautomaticsub: Write the automatic subtitles to a file
114 allsubtitles: Downloads all the subtitles of the video
115 (requires writesubtitles or writeautomaticsub)
116 listsubtitles: Lists all available subtitles for the video
117 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
118 subtitleslangs: List of languages of the subtitles to download
119 keepvideo: Keep the video file after post-processing
120 daterange: A DateRange object, download only if the upload_date is in the range.
121 skip_download: Skip the actual download of the video file
122 cachedir: Location of the cache files in the filesystem.
123 None to disable filesystem cache.
124 noplaylist: Download single video instead of a playlist if in doubt.
125 age_limit: An integer representing the user's age in years.
126 Unsuitable videos for the given age are skipped.
127 downloadarchive: File name of a file where all downloads are recorded.
128 Videos already present in the file are not downloaded
130 cookiefile: File name where cookies should be read from and dumped to.
131 nocheckcertificate:Do not verify SSL certificates
132 proxy: URL of the proxy server to use
134 The following parameters are not used by YoutubeDL itself, they are used by
136 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
137 noresizebuffer, retries, continuedl, noprogress, consoletitle
143 _download_retcode = None
144 _num_downloads = None
147 def __init__(self, params):
148 """Create a FileDownloader object with the given options."""
150 self._ies_instances = {}
152 self._progress_hooks = []
153 self._download_retcode = 0
154 self._num_downloads = 0
155 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
157 if (sys.version_info >= (3,) and sys.platform != 'win32' and
158 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
159 and not params['restrictfilenames']):
160 # On Python 3, the Unicode filesystem API will throw errors (#1474)
162 u'Assuming --restrict-filenames since file system encoding '
163 u'cannot encode all charactes. '
164 u'Set the LC_ALL environment variable to fix this.')
165 params['restrictfilenames'] = True
168 self.fd = FileDownloader(self, self.params)
170 if '%(stitle)s' in self.params['outtmpl']:
171 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
175 def add_info_extractor(self, ie):
176 """Add an InfoExtractor object to the end of the list."""
178 self._ies_instances[ie.ie_key()] = ie
179 ie.set_downloader(self)
181 def get_info_extractor(self, ie_key):
183 Get an instance of an IE with name ie_key, it will try to get one from
184 the _ies list, if there's no instance it will create a new one and add
185 it to the extractor list.
187 ie = self._ies_instances.get(ie_key)
189 ie = get_info_extractor(ie_key)()
190 self.add_info_extractor(ie)
193 def add_default_info_extractors(self):
195 Add the InfoExtractors returned by gen_extractors to the end of the list
197 for ie in gen_extractors():
198 self.add_info_extractor(ie)
200 def add_post_processor(self, pp):
201 """Add a PostProcessor object to the end of the chain."""
203 pp.set_downloader(self)
205 def to_screen(self, message, skip_eol=False):
206 """Print message to stdout if not in quiet mode."""
207 if not self.params.get('quiet', False):
208 terminator = [u'\n', u''][skip_eol]
209 output = message + terminator
210 write_string(output, self._screen_file)
212 def to_stderr(self, message):
213 """Print message to stderr."""
214 assert type(message) == type(u'')
215 output = message + u'\n'
216 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
217 output = output.encode(preferredencoding())
218 sys.stderr.write(output)
220 def to_console_title(self, message):
221 if not self.params.get('consoletitle', False):
223 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
224 # c_wchar_p() might not be necessary if `message` is
225 # already of type unicode()
226 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
227 elif 'TERM' in os.environ:
228 write_string(u'\033]0;%s\007' % message, self._screen_file)
230 def save_console_title(self):
231 if not self.params.get('consoletitle', False):
233 if 'TERM' in os.environ:
234 # Save the title on stack
235 write_string(u'\033[22;0t', self._screen_file)
237 def restore_console_title(self):
238 if not self.params.get('consoletitle', False):
240 if 'TERM' in os.environ:
241 # Restore the title from stack
242 write_string(u'\033[23;0t', self._screen_file)
245 self.save_console_title()
248 def __exit__(self, *args):
249 self.restore_console_title()
251 if self.params.get('cookiefile') is not None:
252 self.cookiejar.save()
254 def fixed_template(self):
255 """Checks if the output template is fixed."""
256 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
258 def trouble(self, message=None, tb=None):
259 """Determine action to take when a download problem appears.
261 Depending on if the downloader has been configured to ignore
262 download errors or not, this method may throw an exception or
263 not when errors are found, after printing the message.
265 tb, if given, is additional traceback information.
267 if message is not None:
268 self.to_stderr(message)
269 if self.params.get('verbose'):
271 if sys.exc_info()[0]: # if .trouble has been called from an except block
273 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
274 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
275 tb += compat_str(traceback.format_exc())
277 tb_data = traceback.format_list(traceback.extract_stack())
278 tb = u''.join(tb_data)
280 if not self.params.get('ignoreerrors', False):
281 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
282 exc_info = sys.exc_info()[1].exc_info
284 exc_info = sys.exc_info()
285 raise DownloadError(message, exc_info)
286 self._download_retcode = 1
288 def report_warning(self, message):
290 Print the message to stderr, it will be prefixed with 'WARNING:'
291 If stderr is a tty file the 'WARNING:' will be colored
293 if sys.stderr.isatty() and os.name != 'nt':
294 _msg_header = u'\033[0;33mWARNING:\033[0m'
296 _msg_header = u'WARNING:'
297 warning_message = u'%s %s' % (_msg_header, message)
298 self.to_stderr(warning_message)
300 def report_error(self, message, tb=None):
302 Do the same as trouble, but prefixes the message with 'ERROR:', colored
303 in red if stderr is a tty file.
305 if sys.stderr.isatty() and os.name != 'nt':
306 _msg_header = u'\033[0;31mERROR:\033[0m'
308 _msg_header = u'ERROR:'
309 error_message = u'%s %s' % (_msg_header, message)
310 self.trouble(error_message, tb)
312 def report_writedescription(self, descfn):
313 """ Report that the description file is being written """
314 self.to_screen(u'[info] Writing video description to: ' + descfn)
316 def report_writesubtitles(self, sub_filename):
317 """ Report that the subtitles file is being written """
318 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
320 def report_writeinfojson(self, infofn):
321 """ Report that the metadata file has been written """
322 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
324 def report_writeannotations(self, annofn):
325 """ Report that the annotations file has been written. """
326 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
328 def report_file_already_downloaded(self, file_name):
329 """Report file has already been fully downloaded."""
331 self.to_screen(u'[download] %s has already been downloaded' % file_name)
332 except UnicodeEncodeError:
333 self.to_screen(u'[download] The file has already been downloaded')
335 def increment_downloads(self):
336 """Increment the ordinal that assigns a number to each file."""
337 self._num_downloads += 1
339 def prepare_filename(self, info_dict):
340 """Generate the output filename."""
342 template_dict = dict(info_dict)
344 template_dict['epoch'] = int(time.time())
345 autonumber_size = self.params.get('autonumber_size')
346 if autonumber_size is None:
348 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
349 template_dict['autonumber'] = autonumber_templ % self._num_downloads
350 if template_dict.get('playlist_index') is not None:
351 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
353 sanitize = lambda k, v: sanitize_filename(
354 u'NA' if v is None else compat_str(v),
355 restricted=self.params.get('restrictfilenames'),
357 template_dict = dict((k, sanitize(k, v))
358 for k, v in template_dict.items())
360 tmpl = os.path.expanduser(self.params['outtmpl'])
361 filename = tmpl % template_dict
363 except KeyError as err:
364 self.report_error(u'Erroneous output template')
366 except ValueError as err:
367 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
370 def _match_entry(self, info_dict):
371 """ Returns None iff the file should be downloaded """
373 title = info_dict['title']
374 matchtitle = self.params.get('matchtitle', False)
376 if not re.search(matchtitle, title, re.IGNORECASE):
377 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
378 rejecttitle = self.params.get('rejecttitle', False)
380 if re.search(rejecttitle, title, re.IGNORECASE):
381 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
382 date = info_dict.get('upload_date', None)
384 dateRange = self.params.get('daterange', DateRange())
385 if date not in dateRange:
386 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
387 age_limit = self.params.get('age_limit')
388 if age_limit is not None:
389 if age_limit < info_dict.get('age_limit', 0):
390 return u'Skipping "' + title + '" because it is age restricted'
391 if self.in_download_archive(info_dict):
392 return (u'%(title)s has already been recorded in archive'
397 def add_extra_info(info_dict, extra_info):
398 '''Set the keys from extra_info in info dict if they are missing'''
399 for key, value in extra_info.items():
400 info_dict.setdefault(key, value)
402 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
404 Returns a list with a dictionary for each video we find.
405 If 'download', also downloads the videos.
406 extra_info is a dict containing the extra values to add to each result
410 ies = [self.get_info_extractor(ie_key)]
415 if not ie.suitable(url):
419 self.report_warning(u'The program functionality for this site has been marked as broken, '
420 u'and will probably not work.')
423 ie_result = ie.extract(url)
424 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
426 if isinstance(ie_result, list):
427 # Backwards compatibility: old IE result format
429 '_type': 'compat_list',
430 'entries': ie_result,
432 self.add_extra_info(ie_result,
434 'extractor': ie.IE_NAME,
436 'extractor_key': ie.ie_key(),
438 return self.process_ie_result(ie_result, download, extra_info)
439 except ExtractorError as de: # An error we somewhat expected
440 self.report_error(compat_str(de), de.format_traceback())
442 except Exception as e:
443 if self.params.get('ignoreerrors', False):
444 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
449 self.report_error(u'no suitable InfoExtractor: %s' % url)
451 def process_ie_result(self, ie_result, download=True, extra_info={}):
453 Take the result of the ie(may be modified) and resolve all unresolved
454 references (URLs, playlist items).
456 It will also download the videos if 'download'.
457 Returns the resolved ie_result.
460 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
461 if result_type == 'video':
462 self.add_extra_info(ie_result, extra_info)
463 return self.process_video_result(ie_result, download=download)
464 elif result_type == 'url':
465 # We have to add extra_info to the results because it may be
466 # contained in a playlist
467 return self.extract_info(ie_result['url'],
469 ie_key=ie_result.get('ie_key'),
470 extra_info=extra_info)
471 elif result_type == 'playlist':
472 self.add_extra_info(ie_result, extra_info)
473 # We process each entry in the playlist
474 playlist = ie_result.get('title', None) or ie_result.get('id', None)
475 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
477 playlist_results = []
479 n_all_entries = len(ie_result['entries'])
480 playliststart = self.params.get('playliststart', 1) - 1
481 playlistend = self.params.get('playlistend', -1)
483 if playlistend == -1:
484 entries = ie_result['entries'][playliststart:]
486 entries = ie_result['entries'][playliststart:playlistend]
488 n_entries = len(entries)
490 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
491 (ie_result['extractor'], playlist, n_all_entries, n_entries))
493 for i, entry in enumerate(entries, 1):
494 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
496 'playlist': playlist,
497 'playlist_index': i + playliststart,
498 'extractor': ie_result['extractor'],
499 'webpage_url': ie_result['webpage_url'],
500 'extractor_key': ie_result['extractor_key'],
502 entry_result = self.process_ie_result(entry,
505 playlist_results.append(entry_result)
506 ie_result['entries'] = playlist_results
508 elif result_type == 'compat_list':
510 self.add_extra_info(r,
512 'extractor': ie_result['extractor'],
513 'webpage_url': ie_result['webpage_url'],
514 'extractor_key': ie_result['extractor_key'],
517 ie_result['entries'] = [
518 self.process_ie_result(_fixup(r), download, extra_info)
519 for r in ie_result['entries']
523 raise Exception('Invalid result type: %s' % result_type)
525 def select_format(self, format_spec, available_formats):
526 if format_spec == 'best' or format_spec is None:
527 return available_formats[-1]
528 elif format_spec == 'worst':
529 return available_formats[0]
531 extensions = [u'mp4', u'flv', u'webm', u'3gp']
532 if format_spec in extensions:
533 filter_f = lambda f: f['ext'] == format_spec
535 filter_f = lambda f: f['format_id'] == format_spec
536 matches = list(filter(filter_f, available_formats))
541 def process_video_result(self, info_dict, download=True):
542 assert info_dict.get('_type', 'video') == 'video'
544 if 'playlist' not in info_dict:
545 # It isn't part of a playlist
546 info_dict['playlist'] = None
547 info_dict['playlist_index'] = None
549 # This extractors handle format selection themselves
550 if info_dict['extractor'] in [u'youtube', u'Youku']:
552 self.process_info(info_dict)
555 # We now pick which formats have to be downloaded
556 if info_dict.get('formats') is None:
557 # There's only one format available
558 formats = [info_dict]
560 formats = info_dict['formats']
562 # We check that all the formats have the format and format_id fields
563 for (i, format) in enumerate(formats):
564 if format.get('format_id') is None:
565 format['format_id'] = compat_str(i)
566 if format.get('format') is None:
567 format['format'] = u'{id} - {res}{note}'.format(
568 id=format['format_id'],
569 res=self.format_resolution(format),
570 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
572 # Automatically determine file extension if missing
573 if 'ext' not in format:
574 format['ext'] = determine_ext(format['url'])
576 if self.params.get('listformats', None):
577 self.list_formats(info_dict)
580 format_limit = self.params.get('format_limit', None)
582 formats = list(takewhile_inclusive(
583 lambda f: f['format_id'] != format_limit, formats
585 if self.params.get('prefer_free_formats'):
586 def _free_formats_key(f):
588 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
591 # We only compare the extension if they have the same height and width
592 return (f.get('height'), f.get('width'), ext_ord)
593 formats = sorted(formats, key=_free_formats_key)
595 req_format = self.params.get('format', 'best')
596 if req_format is None:
598 formats_to_download = []
599 # The -1 is for supporting YoutubeIE
600 if req_format in ('-1', 'all'):
601 formats_to_download = formats
603 # We can accept formats requestd in the format: 34/5/best, we pick
604 # the first that is available, starting from left
605 req_formats = req_format.split('/')
606 for rf in req_formats:
607 selected_format = self.select_format(rf, formats)
608 if selected_format is not None:
609 formats_to_download = [selected_format]
611 if not formats_to_download:
612 raise ExtractorError(u'requested format not available',
616 if len(formats_to_download) > 1:
617 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
618 for format in formats_to_download:
619 new_info = dict(info_dict)
620 new_info.update(format)
621 self.process_info(new_info)
622 # We update the info dict with the best quality format (backwards compatibility)
623 info_dict.update(formats_to_download[-1])
626 def process_info(self, info_dict):
627 """Process a single resolved IE result."""
629 assert info_dict.get('_type', 'video') == 'video'
630 #We increment the download the download count here to match the previous behaviour.
631 self.increment_downloads()
633 info_dict['fulltitle'] = info_dict['title']
634 if len(info_dict['title']) > 200:
635 info_dict['title'] = info_dict['title'][:197] + u'...'
637 # Keep for backwards compatibility
638 info_dict['stitle'] = info_dict['title']
640 if not 'format' in info_dict:
641 info_dict['format'] = info_dict['ext']
643 reason = self._match_entry(info_dict)
644 if reason is not None:
645 self.to_screen(u'[download] ' + reason)
648 max_downloads = self.params.get('max_downloads')
649 if max_downloads is not None:
650 if self._num_downloads > int(max_downloads):
651 raise MaxDownloadsReached()
653 filename = self.prepare_filename(info_dict)
656 if self.params.get('forcetitle', False):
657 compat_print(info_dict['title'])
658 if self.params.get('forceid', False):
659 compat_print(info_dict['id'])
660 if self.params.get('forceurl', False):
661 # For RTMP URLs, also include the playpath
662 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
663 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
664 compat_print(info_dict['thumbnail'])
665 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
666 compat_print(info_dict['description'])
667 if self.params.get('forcefilename', False) and filename is not None:
668 compat_print(filename)
669 if self.params.get('forceformat', False):
670 compat_print(info_dict['format'])
671 if self.params.get('forcejson', False):
672 compat_print(json.dumps(info_dict))
674 # Do nothing else if in simulate mode
675 if self.params.get('simulate', False):
682 dn = os.path.dirname(encodeFilename(filename))
683 if dn != '' and not os.path.exists(dn):
685 except (OSError, IOError) as err:
686 self.report_error(u'unable to create directory ' + compat_str(err))
689 if self.params.get('writedescription', False):
691 descfn = filename + u'.description'
692 self.report_writedescription(descfn)
693 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
694 descfile.write(info_dict['description'])
695 except (KeyError, TypeError):
696 self.report_warning(u'There\'s no description to write.')
697 except (OSError, IOError):
698 self.report_error(u'Cannot write description file ' + descfn)
701 if self.params.get('writeannotations', False):
703 annofn = filename + u'.annotations.xml'
704 self.report_writeannotations(annofn)
705 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
706 annofile.write(info_dict['annotations'])
707 except (KeyError, TypeError):
708 self.report_warning(u'There are no annotations to write.')
709 except (OSError, IOError):
710 self.report_error(u'Cannot write annotations file: ' + annofn)
713 subtitles_are_requested = any([self.params.get('writesubtitles', False),
714 self.params.get('writeautomaticsub')])
716 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
717 # subtitles download errors are already managed as troubles in relevant IE
718 # that way it will silently go on when used with unsupporting IE
719 subtitles = info_dict['subtitles']
720 sub_format = self.params.get('subtitlesformat', 'srt')
721 for sub_lang in subtitles.keys():
722 sub = subtitles[sub_lang]
726 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
727 self.report_writesubtitles(sub_filename)
728 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
730 except (OSError, IOError):
731 self.report_error(u'Cannot write subtitles file ' + descfn)
734 if self.params.get('writeinfojson', False):
735 infofn = os.path.splitext(filename)[0] + u'.info.json'
736 self.report_writeinfojson(infofn)
738 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
739 write_json_file(json_info_dict, encodeFilename(infofn))
740 except (OSError, IOError):
741 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
744 if self.params.get('writethumbnail', False):
745 if info_dict.get('thumbnail') is not None:
746 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
747 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
748 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
749 (info_dict['extractor'], info_dict['id']))
751 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
752 with open(thumb_filename, 'wb') as thumbf:
753 shutil.copyfileobj(uf, thumbf)
754 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
755 (info_dict['extractor'], info_dict['id'], thumb_filename))
756 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
757 self.report_warning(u'Unable to download thumbnail "%s": %s' %
758 (info_dict['thumbnail'], compat_str(err)))
760 if not self.params.get('skip_download', False):
761 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
765 success = self.fd._do_download(filename, info_dict)
766 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
767 self.report_error(u'unable to download video data: %s' % str(err))
769 except (OSError, IOError) as err:
770 raise UnavailableVideoError(err)
771 except (ContentTooShortError, ) as err:
772 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
777 self.post_process(filename, info_dict)
778 except (PostProcessingError) as err:
779 self.report_error(u'postprocessing: %s' % str(err))
782 self.record_download_archive(info_dict)
784 def download(self, url_list):
785 """Download a given list of URLs."""
786 if len(url_list) > 1 and self.fixed_template():
787 raise SameFileError(self.params['outtmpl'])
791 #It also downloads the videos
792 self.extract_info(url)
793 except UnavailableVideoError:
794 self.report_error(u'unable to download video')
795 except MaxDownloadsReached:
796 self.to_screen(u'[info] Maximum number of downloaded files reached.')
799 return self._download_retcode
801 def post_process(self, filename, ie_info):
802 """Run all the postprocessors on the given file."""
804 info['filepath'] = filename
808 keep_video_wish, new_info = pp.run(info)
809 if keep_video_wish is not None:
811 keep_video = keep_video_wish
812 elif keep_video is None:
813 # No clear decision yet, let IE decide
814 keep_video = keep_video_wish
815 except PostProcessingError as e:
816 self.report_error(e.msg)
817 if keep_video is False and not self.params.get('keepvideo', False):
819 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
820 os.remove(encodeFilename(filename))
821 except (IOError, OSError):
822 self.report_warning(u'Unable to remove downloaded video file')
824 def in_download_archive(self, info_dict):
825 fn = self.params.get('download_archive')
828 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
830 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
831 for line in archive_file:
832 if line.strip() == vid_id:
834 except IOError as ioe:
835 if ioe.errno != errno.ENOENT:
839 def record_download_archive(self, info_dict):
840 fn = self.params.get('download_archive')
843 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
844 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
845 archive_file.write(vid_id + u'\n')
848 def format_resolution(format, default='unknown'):
849 if format.get('_resolution') is not None:
850 return format['_resolution']
851 if format.get('height') is not None:
852 if format.get('width') is not None:
853 res = u'%sx%s' % (format['width'], format['height'])
855 res = u'%sp' % format['height']
860 def list_formats(self, info_dict):
861 def format_note(fdict):
862 if fdict.get('format_note') is not None:
863 return fdict['format_note']
865 if fdict.get('vcodec') is not None:
866 res += u'%-5s' % fdict['vcodec']
867 elif fdict.get('vbr') is not None:
869 if fdict.get('vbr') is not None:
870 res += u'@%4dk' % fdict['vbr']
871 if fdict.get('acodec') is not None:
874 res += u'%-5s' % fdict['acodec']
875 elif fdict.get('abr') is not None:
879 if fdict.get('abr') is not None:
880 res += u'@%3dk' % fdict['abr']
884 return (u'%-20s%-10s%-12s%s' % (
887 self.format_resolution(format),
892 formats = info_dict.get('formats', [info_dict])
893 formats_s = list(map(line, formats))
895 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
896 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
899 'format_id': u'format code', 'ext': u'extension',
900 '_resolution': u'resolution', 'format_note': u'note'})
901 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
902 (info_dict['id'], header_line, u"\n".join(formats_s)))
904 def urlopen(self, req):
905 """ Start an HTTP download """
906 return self._opener.open(req)
908 def print_debug_header(self):
909 if not self.params.get('verbose'):
911 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
913 sp = subprocess.Popen(
914 ['git', 'rev-parse', '--short', 'HEAD'],
915 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
916 cwd=os.path.dirname(os.path.abspath(__file__)))
917 out, err = sp.communicate()
918 out = out.decode().strip()
919 if re.match('[0-9a-f]+', out):
920 write_string(u'[debug] Git HEAD: ' + out + u'\n')
926 write_string(u'[debug] Python version %s - %s' %
927 (platform.python_version(), platform_name()) + u'\n')
930 for handler in self._opener.handlers:
931 if hasattr(handler, 'proxies'):
932 proxy_map.update(handler.proxies)
933 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
935 def _setup_opener(self, timeout=300):
936 opts_cookiefile = self.params.get('cookiefile')
937 opts_proxy = self.params.get('proxy')
939 if opts_cookiefile is None:
940 self.cookiejar = compat_cookiejar.CookieJar()
942 self.cookiejar = compat_cookiejar.MozillaCookieJar(
944 if os.access(opts_cookiefile, os.R_OK):
945 self.cookiejar.load()
947 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
949 if opts_proxy is not None:
953 proxies = {'http': opts_proxy, 'https': opts_proxy}
955 proxies = compat_urllib_request.getproxies()
956 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
957 if 'http' in proxies and 'https' not in proxies:
958 proxies['https'] = proxies['http']
959 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
960 https_handler = make_HTTPS_handler(
961 self.params.get('nocheckcertificate', False))
962 opener = compat_urllib_request.build_opener(
963 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
964 # Delete the default user-agent header, which would otherwise apply in
965 # cases where our custom HTTP handler doesn't come into play
966 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
967 opener.addheaders = []
968 self._opener = opener
970 # TODO remove this global modification
971 compat_urllib_request.install_opener(opener)
972 socket.setdefaulttimeout(timeout)