2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
47 UnavailableVideoError,
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
57 class YoutubeDL(object):
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 download_archive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
135 socket_timeout: Time to wait for unresponsive hosts, in seconds
136 bidi_workaround: Work around buggy terminals without bidirectional text
137 support, using fridibi
139 The following parameters are not used by YoutubeDL itself, they are used by
141 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
142 noresizebuffer, retries, continuedl, noprogress, consoletitle
148 _download_retcode = None
149 _num_downloads = None
152 def __init__(self, params=None):
153 """Create a FileDownloader object with the given options."""
155 self._ies_instances = {}
157 self._progress_hooks = []
158 self._download_retcode = 0
159 self._num_downloads = 0
160 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
161 self._err_file = sys.stderr
162 self.params = {} if params is None else params
164 # Pipe messsages through fribidi
165 if params.get('bidi_workaround', False):
166 # fribidi does not support ungetting, so force newlines
167 params['progress_with_newline'] = True
169 for fid in ['_screen_file', '_err_file']:
170 class FribidiOut(object):
171 def __init__(self, outfile, errfile):
172 self.outfile = outfile
173 self.process = subprocess.Popen(
175 stdin=subprocess.PIPE,
180 res = self.process.stdin.write(s)
185 return self.process.stdin.flush()
188 return self.outfile.isatty()
191 vout = FribidiOut(getattr(self, fid), self._err_file)
192 setattr(self, fid, vout)
193 except OSError as ose:
195 self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
200 if (sys.version_info >= (3,) and sys.platform != 'win32' and
201 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
202 and not params['restrictfilenames']):
203 # On Python 3, the Unicode filesystem API will throw errors (#1474)
205 u'Assuming --restrict-filenames since file system encoding '
206 u'cannot encode all charactes. '
207 u'Set the LC_ALL environment variable to fix this.')
208 self.params['restrictfilenames'] = True
210 self.fd = FileDownloader(self, self.params)
212 if '%(stitle)s' in self.params.get('outtmpl', ''):
213 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
217 def add_info_extractor(self, ie):
218 """Add an InfoExtractor object to the end of the list."""
220 self._ies_instances[ie.ie_key()] = ie
221 ie.set_downloader(self)
223 def get_info_extractor(self, ie_key):
225 Get an instance of an IE with name ie_key, it will try to get one from
226 the _ies list, if there's no instance it will create a new one and add
227 it to the extractor list.
229 ie = self._ies_instances.get(ie_key)
231 ie = get_info_extractor(ie_key)()
232 self.add_info_extractor(ie)
235 def add_default_info_extractors(self):
237 Add the InfoExtractors returned by gen_extractors to the end of the list
239 for ie in gen_extractors():
240 self.add_info_extractor(ie)
242 def add_post_processor(self, pp):
243 """Add a PostProcessor object to the end of the chain."""
245 pp.set_downloader(self)
247 def to_screen(self, message, skip_eol=False):
248 """Print message to stdout if not in quiet mode."""
249 return self.to_stdout(message, skip_eol, check_quiet=True)
251 def to_stdout(self, message, skip_eol=False, check_quiet=False):
252 """Print message to stdout if not in quiet mode."""
253 if self.params.get('logger'):
254 self.params['logger'].debug(message)
255 elif not check_quiet or not self.params.get('quiet', False):
256 terminator = [u'\n', u''][skip_eol]
257 output = message + terminator
258 write_string(output, self._screen_file)
260 def to_stderr(self, message):
261 """Print message to stderr."""
262 assert type(message) == type(u'')
263 if self.params.get('logger'):
264 self.params['logger'].error(message)
266 output = message + u'\n'
267 write_string(output, self._err_file)
269 def to_console_title(self, message):
270 if not self.params.get('consoletitle', False):
272 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
273 # c_wchar_p() might not be necessary if `message` is
274 # already of type unicode()
275 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
276 elif 'TERM' in os.environ:
277 write_string(u'\033]0;%s\007' % message, self._screen_file)
279 def save_console_title(self):
280 if not self.params.get('consoletitle', False):
282 if 'TERM' in os.environ:
283 # Save the title on stack
284 write_string(u'\033[22;0t', self._screen_file)
286 def restore_console_title(self):
287 if not self.params.get('consoletitle', False):
289 if 'TERM' in os.environ:
290 # Restore the title from stack
291 write_string(u'\033[23;0t', self._screen_file)
294 self.save_console_title()
297 def __exit__(self, *args):
298 self.restore_console_title()
300 if self.params.get('cookiefile') is not None:
301 self.cookiejar.save()
303 def trouble(self, message=None, tb=None):
304 """Determine action to take when a download problem appears.
306 Depending on if the downloader has been configured to ignore
307 download errors or not, this method may throw an exception or
308 not when errors are found, after printing the message.
310 tb, if given, is additional traceback information.
312 if message is not None:
313 self.to_stderr(message)
314 if self.params.get('verbose'):
316 if sys.exc_info()[0]: # if .trouble has been called from an except block
318 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
319 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
320 tb += compat_str(traceback.format_exc())
322 tb_data = traceback.format_list(traceback.extract_stack())
323 tb = u''.join(tb_data)
325 if not self.params.get('ignoreerrors', False):
326 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
327 exc_info = sys.exc_info()[1].exc_info
329 exc_info = sys.exc_info()
330 raise DownloadError(message, exc_info)
331 self._download_retcode = 1
333 def report_warning(self, message):
335 Print the message to stderr, it will be prefixed with 'WARNING:'
336 If stderr is a tty file the 'WARNING:' will be colored
338 if self._err_file.isatty() and os.name != 'nt':
339 _msg_header = u'\033[0;33mWARNING:\033[0m'
341 _msg_header = u'WARNING:'
342 warning_message = u'%s %s' % (_msg_header, message)
343 self.to_stderr(warning_message)
345 def report_error(self, message, tb=None):
347 Do the same as trouble, but prefixes the message with 'ERROR:', colored
348 in red if stderr is a tty file.
350 if self._err_file.isatty() and os.name != 'nt':
351 _msg_header = u'\033[0;31mERROR:\033[0m'
353 _msg_header = u'ERROR:'
354 error_message = u'%s %s' % (_msg_header, message)
355 self.trouble(error_message, tb)
357 def report_writedescription(self, descfn):
358 """ Report that the description file is being written """
359 self.to_screen(u'[info] Writing video description to: ' + descfn)
361 def report_writesubtitles(self, sub_filename):
362 """ Report that the subtitles file is being written """
363 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
365 def report_writeinfojson(self, infofn):
366 """ Report that the metadata file has been written """
367 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
369 def report_writeannotations(self, annofn):
370 """ Report that the annotations file has been written. """
371 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
373 def report_file_already_downloaded(self, file_name):
374 """Report file has already been fully downloaded."""
376 self.to_screen(u'[download] %s has already been downloaded' % file_name)
377 except UnicodeEncodeError:
378 self.to_screen(u'[download] The file has already been downloaded')
380 def increment_downloads(self):
381 """Increment the ordinal that assigns a number to each file."""
382 self._num_downloads += 1
384 def prepare_filename(self, info_dict):
385 """Generate the output filename."""
387 template_dict = dict(info_dict)
389 template_dict['epoch'] = int(time.time())
390 autonumber_size = self.params.get('autonumber_size')
391 if autonumber_size is None:
393 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
394 template_dict['autonumber'] = autonumber_templ % self._num_downloads
395 if template_dict.get('playlist_index') is not None:
396 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
398 sanitize = lambda k, v: sanitize_filename(
399 u'NA' if v is None else compat_str(v),
400 restricted=self.params.get('restrictfilenames'),
402 template_dict = dict((k, sanitize(k, v))
403 for k, v in template_dict.items())
405 tmpl = os.path.expanduser(self.params['outtmpl'])
406 filename = tmpl % template_dict
408 except KeyError as err:
409 self.report_error(u'Erroneous output template')
411 except ValueError as err:
412 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
415 def _match_entry(self, info_dict):
416 """ Returns None iff the file should be downloaded """
418 if 'title' in info_dict:
419 # This can happen when we're just evaluating the playlist
420 title = info_dict['title']
421 matchtitle = self.params.get('matchtitle', False)
423 if not re.search(matchtitle, title, re.IGNORECASE):
424 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
425 rejecttitle = self.params.get('rejecttitle', False)
427 if re.search(rejecttitle, title, re.IGNORECASE):
428 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
429 date = info_dict.get('upload_date', None)
431 dateRange = self.params.get('daterange', DateRange())
432 if date not in dateRange:
433 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
434 age_limit = self.params.get('age_limit')
435 if age_limit is not None:
436 if age_limit < info_dict.get('age_limit', 0):
437 return u'Skipping "' + title + '" because it is age restricted'
438 if self.in_download_archive(info_dict):
439 return (u'%s has already been recorded in archive'
440 % info_dict.get('title', info_dict.get('id', u'video')))
444 def add_extra_info(info_dict, extra_info):
445 '''Set the keys from extra_info in info dict if they are missing'''
446 for key, value in extra_info.items():
447 info_dict.setdefault(key, value)
449 def extract_info(self, url, download=True, ie_key=None, extra_info={},
452 Returns a list with a dictionary for each video we find.
453 If 'download', also downloads the videos.
454 extra_info is a dict containing the extra values to add to each result
458 ies = [self.get_info_extractor(ie_key)]
463 if not ie.suitable(url):
467 self.report_warning(u'The program functionality for this site has been marked as broken, '
468 u'and will probably not work.')
471 ie_result = ie.extract(url)
472 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
474 if isinstance(ie_result, list):
475 # Backwards compatibility: old IE result format
477 '_type': 'compat_list',
478 'entries': ie_result,
480 self.add_extra_info(ie_result,
482 'extractor': ie.IE_NAME,
484 'extractor_key': ie.ie_key(),
487 return self.process_ie_result(ie_result, download, extra_info)
490 except ExtractorError as de: # An error we somewhat expected
491 self.report_error(compat_str(de), de.format_traceback())
493 except Exception as e:
494 if self.params.get('ignoreerrors', False):
495 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
500 self.report_error(u'no suitable InfoExtractor: %s' % url)
502 def process_ie_result(self, ie_result, download=True, extra_info={}):
504 Take the result of the ie(may be modified) and resolve all unresolved
505 references (URLs, playlist items).
507 It will also download the videos if 'download'.
508 Returns the resolved ie_result.
511 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
512 if result_type == 'video':
513 self.add_extra_info(ie_result, extra_info)
514 return self.process_video_result(ie_result, download=download)
515 elif result_type == 'url':
516 # We have to add extra_info to the results because it may be
517 # contained in a playlist
518 return self.extract_info(ie_result['url'],
520 ie_key=ie_result.get('ie_key'),
521 extra_info=extra_info)
522 elif result_type == 'url_transparent':
523 # Use the information from the embedding page
524 info = self.extract_info(
525 ie_result['url'], ie_key=ie_result.get('ie_key'),
526 extra_info=extra_info, download=False, process=False)
528 def make_result(embedded_info):
529 new_result = ie_result.copy()
530 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
531 'entries', 'urlhandle', 'ie_key', 'duration',
532 'subtitles', 'annotations', 'format',
533 'thumbnail', 'thumbnails'):
536 if f in embedded_info:
537 new_result[f] = embedded_info[f]
539 new_result = make_result(info)
541 assert new_result.get('_type') != 'url_transparent'
542 if new_result.get('_type') == 'compat_list':
543 new_result['entries'] = [
544 make_result(e) for e in new_result['entries']]
546 return self.process_ie_result(
547 new_result, download=download, extra_info=extra_info)
548 elif result_type == 'playlist':
549 # We process each entry in the playlist
550 playlist = ie_result.get('title', None) or ie_result.get('id', None)
551 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
553 playlist_results = []
555 n_all_entries = len(ie_result['entries'])
556 playliststart = self.params.get('playliststart', 1) - 1
557 playlistend = self.params.get('playlistend', -1)
559 if playlistend == -1:
560 entries = ie_result['entries'][playliststart:]
562 entries = ie_result['entries'][playliststart:playlistend]
564 n_entries = len(entries)
566 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
567 (ie_result['extractor'], playlist, n_all_entries, n_entries))
569 for i, entry in enumerate(entries, 1):
570 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
572 'playlist': playlist,
573 'playlist_index': i + playliststart,
574 'extractor': ie_result['extractor'],
575 'webpage_url': ie_result['webpage_url'],
576 'extractor_key': ie_result['extractor_key'],
579 reason = self._match_entry(entry)
580 if reason is not None:
581 self.to_screen(u'[download] ' + reason)
584 entry_result = self.process_ie_result(entry,
587 playlist_results.append(entry_result)
588 ie_result['entries'] = playlist_results
590 elif result_type == 'compat_list':
592 self.add_extra_info(r,
594 'extractor': ie_result['extractor'],
595 'webpage_url': ie_result['webpage_url'],
596 'extractor_key': ie_result['extractor_key'],
599 ie_result['entries'] = [
600 self.process_ie_result(_fixup(r), download, extra_info)
601 for r in ie_result['entries']
605 raise Exception('Invalid result type: %s' % result_type)
607 def select_format(self, format_spec, available_formats):
608 if format_spec == 'best' or format_spec is None:
609 return available_formats[-1]
610 elif format_spec == 'worst':
611 return available_formats[0]
613 extensions = [u'mp4', u'flv', u'webm', u'3gp']
614 if format_spec in extensions:
615 filter_f = lambda f: f['ext'] == format_spec
617 filter_f = lambda f: f['format_id'] == format_spec
618 matches = list(filter(filter_f, available_formats))
623 def process_video_result(self, info_dict, download=True):
624 assert info_dict.get('_type', 'video') == 'video'
626 if 'playlist' not in info_dict:
627 # It isn't part of a playlist
628 info_dict['playlist'] = None
629 info_dict['playlist_index'] = None
631 # This extractors handle format selection themselves
632 if info_dict['extractor'] in [u'youtube', u'Youku']:
634 self.process_info(info_dict)
637 # We now pick which formats have to be downloaded
638 if info_dict.get('formats') is None:
639 # There's only one format available
640 formats = [info_dict]
642 formats = info_dict['formats']
644 # We check that all the formats have the format and format_id fields
645 for (i, format) in enumerate(formats):
646 if format.get('format_id') is None:
647 format['format_id'] = compat_str(i)
648 if format.get('format') is None:
649 format['format'] = u'{id} - {res}{note}'.format(
650 id=format['format_id'],
651 res=self.format_resolution(format),
652 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
654 # Automatically determine file extension if missing
655 if 'ext' not in format:
656 format['ext'] = determine_ext(format['url'])
658 if self.params.get('listformats', None):
659 self.list_formats(info_dict)
662 format_limit = self.params.get('format_limit', None)
664 formats = list(takewhile_inclusive(
665 lambda f: f['format_id'] != format_limit, formats
667 if self.params.get('prefer_free_formats'):
668 def _free_formats_key(f):
670 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
673 # We only compare the extension if they have the same height and width
674 return (f.get('height'), f.get('width'), ext_ord)
675 formats = sorted(formats, key=_free_formats_key)
677 req_format = self.params.get('format', 'best')
678 if req_format is None:
680 formats_to_download = []
681 # The -1 is for supporting YoutubeIE
682 if req_format in ('-1', 'all'):
683 formats_to_download = formats
685 # We can accept formats requestd in the format: 34/5/best, we pick
686 # the first that is available, starting from left
687 req_formats = req_format.split('/')
688 for rf in req_formats:
689 selected_format = self.select_format(rf, formats)
690 if selected_format is not None:
691 formats_to_download = [selected_format]
693 if not formats_to_download:
694 raise ExtractorError(u'requested format not available',
698 if len(formats_to_download) > 1:
699 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
700 for format in formats_to_download:
701 new_info = dict(info_dict)
702 new_info.update(format)
703 self.process_info(new_info)
704 # We update the info dict with the best quality format (backwards compatibility)
705 info_dict.update(formats_to_download[-1])
708 def process_info(self, info_dict):
709 """Process a single resolved IE result."""
711 assert info_dict.get('_type', 'video') == 'video'
712 #We increment the download the download count here to match the previous behaviour.
713 self.increment_downloads()
715 info_dict['fulltitle'] = info_dict['title']
716 if len(info_dict['title']) > 200:
717 info_dict['title'] = info_dict['title'][:197] + u'...'
719 # Keep for backwards compatibility
720 info_dict['stitle'] = info_dict['title']
722 if not 'format' in info_dict:
723 info_dict['format'] = info_dict['ext']
725 reason = self._match_entry(info_dict)
726 if reason is not None:
727 self.to_screen(u'[download] ' + reason)
730 max_downloads = self.params.get('max_downloads')
731 if max_downloads is not None:
732 if self._num_downloads > int(max_downloads):
733 raise MaxDownloadsReached()
735 filename = self.prepare_filename(info_dict)
738 if self.params.get('forcetitle', False):
739 self.to_stdout(info_dict['fulltitle'])
740 if self.params.get('forceid', False):
741 self.to_stdout(info_dict['id'])
742 if self.params.get('forceurl', False):
743 # For RTMP URLs, also include the playpath
744 self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
745 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
746 self.to_stdout(info_dict['thumbnail'])
747 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
748 self.to_stdout(info_dict['description'])
749 if self.params.get('forcefilename', False) and filename is not None:
750 self.to_stdout(filename)
751 if self.params.get('forceformat', False):
752 self.to_stdout(info_dict['format'])
753 if self.params.get('forcejson', False):
754 info_dict['_filename'] = filename
755 self.to_stdout(json.dumps(info_dict))
757 # Do nothing else if in simulate mode
758 if self.params.get('simulate', False):
765 dn = os.path.dirname(encodeFilename(filename))
766 if dn != '' and not os.path.exists(dn):
768 except (OSError, IOError) as err:
769 self.report_error(u'unable to create directory ' + compat_str(err))
772 if self.params.get('writedescription', False):
774 descfn = filename + u'.description'
775 self.report_writedescription(descfn)
776 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
777 descfile.write(info_dict['description'])
778 except (KeyError, TypeError):
779 self.report_warning(u'There\'s no description to write.')
780 except (OSError, IOError):
781 self.report_error(u'Cannot write description file ' + descfn)
784 if self.params.get('writeannotations', False):
786 annofn = filename + u'.annotations.xml'
787 self.report_writeannotations(annofn)
788 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
789 annofile.write(info_dict['annotations'])
790 except (KeyError, TypeError):
791 self.report_warning(u'There are no annotations to write.')
792 except (OSError, IOError):
793 self.report_error(u'Cannot write annotations file: ' + annofn)
796 subtitles_are_requested = any([self.params.get('writesubtitles', False),
797 self.params.get('writeautomaticsub')])
799 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
800 # subtitles download errors are already managed as troubles in relevant IE
801 # that way it will silently go on when used with unsupporting IE
802 subtitles = info_dict['subtitles']
803 sub_format = self.params.get('subtitlesformat', 'srt')
804 for sub_lang in subtitles.keys():
805 sub = subtitles[sub_lang]
809 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
810 self.report_writesubtitles(sub_filename)
811 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
813 except (OSError, IOError):
814 self.report_error(u'Cannot write subtitles file ' + descfn)
817 if self.params.get('writeinfojson', False):
818 infofn = os.path.splitext(filename)[0] + u'.info.json'
819 self.report_writeinfojson(infofn)
821 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
822 write_json_file(json_info_dict, encodeFilename(infofn))
823 except (OSError, IOError):
824 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
827 if self.params.get('writethumbnail', False):
828 if info_dict.get('thumbnail') is not None:
829 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
830 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
831 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
832 (info_dict['extractor'], info_dict['id']))
834 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
835 with open(thumb_filename, 'wb') as thumbf:
836 shutil.copyfileobj(uf, thumbf)
837 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
838 (info_dict['extractor'], info_dict['id'], thumb_filename))
839 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
840 self.report_warning(u'Unable to download thumbnail "%s": %s' %
841 (info_dict['thumbnail'], compat_str(err)))
843 if not self.params.get('skip_download', False):
844 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
848 success = self.fd._do_download(filename, info_dict)
849 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
850 self.report_error(u'unable to download video data: %s' % str(err))
852 except (OSError, IOError) as err:
853 raise UnavailableVideoError(err)
854 except (ContentTooShortError, ) as err:
855 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
860 self.post_process(filename, info_dict)
861 except (PostProcessingError) as err:
862 self.report_error(u'postprocessing: %s' % str(err))
865 self.record_download_archive(info_dict)
867 def download(self, url_list):
868 """Download a given list of URLs."""
869 if (len(url_list) > 1 and
870 '%' not in self.params['outtmpl']
871 and self.params.get('max_downloads') != 1):
872 raise SameFileError(self.params['outtmpl'])
876 #It also downloads the videos
877 self.extract_info(url)
878 except UnavailableVideoError:
879 self.report_error(u'unable to download video')
880 except MaxDownloadsReached:
881 self.to_screen(u'[info] Maximum number of downloaded files reached.')
884 return self._download_retcode
886 def post_process(self, filename, ie_info):
887 """Run all the postprocessors on the given file."""
889 info['filepath'] = filename
893 keep_video_wish, new_info = pp.run(info)
894 if keep_video_wish is not None:
896 keep_video = keep_video_wish
897 elif keep_video is None:
898 # No clear decision yet, let IE decide
899 keep_video = keep_video_wish
900 except PostProcessingError as e:
901 self.report_error(e.msg)
902 if keep_video is False and not self.params.get('keepvideo', False):
904 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
905 os.remove(encodeFilename(filename))
906 except (IOError, OSError):
907 self.report_warning(u'Unable to remove downloaded video file')
909 def _make_archive_id(self, info_dict):
910 # Future-proof against any change in case
911 # and backwards compatibility with prior versions
912 extractor = info_dict.get('extractor_key')
913 if extractor is None:
914 if 'id' in info_dict:
915 extractor = info_dict.get('ie_key') # key in a playlist
916 if extractor is None:
917 return None # Incomplete video information
918 return extractor.lower() + u' ' + info_dict['id']
920 def in_download_archive(self, info_dict):
921 fn = self.params.get('download_archive')
925 vid_id = self._make_archive_id(info_dict)
927 return False # Incomplete video information
930 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
931 for line in archive_file:
932 if line.strip() == vid_id:
934 except IOError as ioe:
935 if ioe.errno != errno.ENOENT:
939 def record_download_archive(self, info_dict):
940 fn = self.params.get('download_archive')
943 vid_id = self._make_archive_id(info_dict)
945 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
946 archive_file.write(vid_id + u'\n')
949 def format_resolution(format, default='unknown'):
950 if format.get('vcodec') == 'none':
952 if format.get('_resolution') is not None:
953 return format['_resolution']
954 if format.get('height') is not None:
955 if format.get('width') is not None:
956 res = u'%sx%s' % (format['width'], format['height'])
958 res = u'%sp' % format['height']
963 def list_formats(self, info_dict):
964 def format_note(fdict):
966 if fdict.get('format_note') is not None:
967 res += fdict['format_note'] + u' '
968 if (fdict.get('vcodec') is not None and
969 fdict.get('vcodec') != 'none'):
970 res += u'%-5s' % fdict['vcodec']
971 elif fdict.get('vbr') is not None:
973 if fdict.get('vbr') is not None:
974 res += u'@%4dk' % fdict['vbr']
975 if fdict.get('acodec') is not None:
978 res += u'%-5s' % fdict['acodec']
979 elif fdict.get('abr') is not None:
983 if fdict.get('abr') is not None:
984 res += u'@%3dk' % fdict['abr']
985 if fdict.get('filesize') is not None:
988 res += format_bytes(fdict['filesize'])
991 def line(format, idlen=20):
992 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
995 self.format_resolution(format),
999 formats = info_dict.get('formats', [info_dict])
1000 idlen = max(len(u'format code'),
1001 max(len(f['format_id']) for f in formats))
1002 formats_s = [line(f, idlen) for f in formats]
1003 if len(formats) > 1:
1004 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
1005 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
1007 header_line = line({
1008 'format_id': u'format code', 'ext': u'extension',
1009 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
1010 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
1011 (info_dict['id'], header_line, u"\n".join(formats_s)))
1013 def urlopen(self, req):
1014 """ Start an HTTP download """
1015 return self._opener.open(req)
1017 def print_debug_header(self):
1018 if not self.params.get('verbose'):
1020 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
1022 sp = subprocess.Popen(
1023 ['git', 'rev-parse', '--short', 'HEAD'],
1024 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1025 cwd=os.path.dirname(os.path.abspath(__file__)))
1026 out, err = sp.communicate()
1027 out = out.decode().strip()
1028 if re.match('[0-9a-f]+', out):
1029 write_string(u'[debug] Git HEAD: ' + out + u'\n')
1035 write_string(u'[debug] Python version %s - %s' %
1036 (platform.python_version(), platform_name()) + u'\n')
1039 for handler in self._opener.handlers:
1040 if hasattr(handler, 'proxies'):
1041 proxy_map.update(handler.proxies)
1042 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1044 def _setup_opener(self):
1045 timeout_val = self.params.get('socket_timeout')
1046 timeout = 600 if timeout_val is None else float(timeout_val)
1048 opts_cookiefile = self.params.get('cookiefile')
1049 opts_proxy = self.params.get('proxy')
1051 if opts_cookiefile is None:
1052 self.cookiejar = compat_cookiejar.CookieJar()
1054 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1056 if os.access(opts_cookiefile, os.R_OK):
1057 self.cookiejar.load()
1059 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1061 if opts_proxy is not None:
1062 if opts_proxy == '':
1065 proxies = {'http': opts_proxy, 'https': opts_proxy}
1067 proxies = compat_urllib_request.getproxies()
1068 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1069 if 'http' in proxies and 'https' not in proxies:
1070 proxies['https'] = proxies['http']
1071 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1072 https_handler = make_HTTPS_handler(
1073 self.params.get('nocheckcertificate', False))
1074 opener = compat_urllib_request.build_opener(
1075 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1076 # Delete the default user-agent header, which would otherwise apply in
1077 # cases where our custom HTTP handler doesn't come into play
1078 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1079 opener.addheaders = []
1080 self._opener = opener
1082 # TODO remove this global modification
1083 compat_urllib_request.install_opener(opener)
1084 socket.setdefaulttimeout(timeout)