2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import, unicode_literals
33 compat_get_terminal_size,
38 compat_urllib_request,
66 UnavailableVideoError,
76 from .cache import Cache
77 from .extractor import get_info_extractor, gen_extractors
78 from .downloader import get_suitable_downloader
79 from .downloader.rtmp import rtmpdump_version
80 from .postprocessor import (
82 FFmpegFixupStretchedPP,
87 from .version import __version__
90 class YoutubeDL(object):
93 YoutubeDL objects are the ones responsible of downloading the
94 actual video file and writing it to disk if the user has requested
95 it, among some other tasks. In most cases there should be one per
96 program. As, given a video URL, the downloader doesn't know how to
97 extract all the needed information, task that InfoExtractors do, it
98 has to pass the URL to one of them.
100 For this, YoutubeDL objects have a method that allows
101 InfoExtractors to be registered in a given order. When it is passed
102 a URL, the YoutubeDL object handles it to the first InfoExtractor it
103 finds that reports being able to handle it. The InfoExtractor extracts
104 all the information about the video or videos the URL refers to, and
105 YoutubeDL process the extracted information, possibly using a File
106 Downloader to download the video.
108 YoutubeDL objects accept a lot of parameters. In order not to saturate
109 the object constructor with arguments, it receives a dictionary of
110 options instead. These options are available through the params
111 attribute for the InfoExtractors to use. The YoutubeDL also
112 registers itself as the downloader in charge for the InfoExtractors
113 that are added to it, so this is a "mutual registration".
117 username: Username for authentication purposes.
118 password: Password for authentication purposes.
119 videopassword: Password for acces a video.
120 usenetrc: Use netrc for authentication instead.
121 verbose: Print additional info to stdout.
122 quiet: Do not print messages to stdout.
123 no_warnings: Do not print out anything for warnings.
124 forceurl: Force printing final URL.
125 forcetitle: Force printing title.
126 forceid: Force printing ID.
127 forcethumbnail: Force printing thumbnail URL.
128 forcedescription: Force printing description.
129 forcefilename: Force printing final filename.
130 forceduration: Force printing duration.
131 forcejson: Force printing info_dict as JSON.
132 dump_single_json: Force printing the info_dict of the whole playlist
133 (or video) as a single JSON line.
134 simulate: Do not download the video files.
135 format: Video format code. See options.py for more information.
136 format_limit: Highest quality format to try.
137 outtmpl: Template for output names.
138 restrictfilenames: Do not allow "&" and spaces in file names
139 ignoreerrors: Do not stop on download errors.
140 nooverwrites: Prevent overwriting files.
141 playliststart: Playlist item to start at.
142 playlistend: Playlist item to end at.
143 playlist_items: Specific indices of playlist to download.
144 playlistreverse: Download playlist items in reverse order.
145 matchtitle: Download only matching titles.
146 rejecttitle: Reject downloads for matching titles.
147 logger: Log messages to a logging.Logger instance.
148 logtostderr: Log messages to stderr instead of stdout.
149 writedescription: Write the video description to a .description file
150 writeinfojson: Write the video description to a .info.json file
151 writeannotations: Write the video annotations to a .annotations.xml file
152 writethumbnail: Write the thumbnail image to a file
153 write_all_thumbnails: Write all thumbnail formats to files
154 writesubtitles: Write the video subtitles to a file
155 writeautomaticsub: Write the automatic subtitles to a file
156 allsubtitles: Downloads all the subtitles of the video
157 (requires writesubtitles or writeautomaticsub)
158 listsubtitles: Lists all available subtitles for the video
159 subtitlesformat: The format code for subtitles
160 subtitleslangs: List of languages of the subtitles to download
161 keepvideo: Keep the video file after post-processing
162 daterange: A DateRange object, download only if the upload_date is in the range.
163 skip_download: Skip the actual download of the video file
164 cachedir: Location of the cache files in the filesystem.
165 False to disable filesystem cache.
166 noplaylist: Download single video instead of a playlist if in doubt.
167 age_limit: An integer representing the user's age in years.
168 Unsuitable videos for the given age are skipped.
169 min_views: An integer representing the minimum view count the video
170 must have in order to not be skipped.
171 Videos without view count information are always
172 downloaded. None for no limit.
173 max_views: An integer representing the maximum view count.
174 Videos that are more popular than that are not
176 Videos without view count information are always
177 downloaded. None for no limit.
178 download_archive: File name of a file where all downloads are recorded.
179 Videos already present in the file are not downloaded
181 cookiefile: File name where cookies should be read from and dumped to.
182 nocheckcertificate:Do not verify SSL certificates
183 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
184 At the moment, this is only supported by YouTube.
185 proxy: URL of the proxy server to use
186 socket_timeout: Time to wait for unresponsive hosts, in seconds
187 bidi_workaround: Work around buggy terminals without bidirectional text
188 support, using fridibi
189 debug_printtraffic:Print out sent and received HTTP traffic
190 include_ads: Download ads as well
191 default_search: Prepend this string if an input url is not valid.
192 'auto' for elaborate guessing
193 encoding: Use this encoding instead of the system-specified.
194 extract_flat: Do not resolve URLs, return the immediate result.
195 Pass in 'in_playlist' to only show this behavior for
197 postprocessors: A list of dictionaries, each with an entry
198 * key: The name of the postprocessor. See
199 youtube_dl/postprocessor/__init__.py for a list.
200 as well as any further keyword arguments for the
202 progress_hooks: A list of functions that get called on download
203 progress, with a dictionary with the entries
204 * status: One of "downloading", "error", or "finished".
205 Check this first and ignore unknown values.
207 If status is one of "downloading", or "finished", the
208 following properties may also be present:
209 * filename: The final filename (always present)
210 * tmpfilename: The filename we're currently writing to
211 * downloaded_bytes: Bytes on disk
212 * total_bytes: Size of the whole file, None if unknown
213 * total_bytes_estimate: Guess of the eventual file size,
215 * elapsed: The number of seconds since download started.
216 * eta: The estimated time in seconds, None if unknown
217 * speed: The download speed in bytes/second, None if
219 * fragment_index: The counter of the currently
220 downloaded video fragment.
221 * fragment_count: The number of fragments (= individual
222 files that will be merged)
224 Progress hooks are guaranteed to be called at least once
225 (with status "finished") if the download is successful.
226 merge_output_format: Extension to use when merging formats.
227 fixup: Automatically correct known faults of the file.
229 - "never": do nothing
230 - "warn": only emit a warning
231 - "detect_or_warn": check whether we can do anything
232 about it, warn otherwise (default)
233 source_address: (Experimental) Client-side IP address to bind to.
234 call_home: Boolean, true iff we are allowed to contact the
235 youtube-dl servers for debugging.
236 sleep_interval: Number of seconds to sleep before each download.
237 listformats: Print an overview of available video formats and exit.
238 list_thumbnails: Print a table of all thumbnails and exit.
239 match_filter: A function that gets called with the info_dict of
241 If it returns a message, the video is ignored.
242 If it returns None, the video is downloaded.
243 match_filter_func in utils.py is one example for this.
244 no_color: Do not emit color codes in output.
246 The following options determine which downloader is picked:
247 external_downloader: Executable of the external downloader to call.
248 None or unset for standard (built-in) downloader.
249 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
251 The following parameters are not used by YoutubeDL itself, they are used by
253 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
254 noresizebuffer, retries, continuedl, noprogress, consoletitle,
257 The following options are used by the post processors:
258 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
259 otherwise prefer avconv.
260 exec_cmd: Arbitrary command to run after downloading
266 _download_retcode = None
267 _num_downloads = None
270 def __init__(self, params=None, auto_init=True):
271 """Create a FileDownloader object with the given options."""
275 self._ies_instances = {}
277 self._progress_hooks = []
278 self._download_retcode = 0
279 self._num_downloads = 0
280 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
281 self._err_file = sys.stderr
283 self.cache = Cache(self)
285 if params.get('bidi_workaround', False):
288 master, slave = pty.openpty()
289 width = compat_get_terminal_size().columns
293 width_args = ['-w', str(width)]
295 stdin=subprocess.PIPE,
297 stderr=self._err_file)
299 self._output_process = subprocess.Popen(
300 ['bidiv'] + width_args, **sp_kwargs
303 self._output_process = subprocess.Popen(
304 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
305 self._output_channel = os.fdopen(master, 'rb')
306 except OSError as ose:
308 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
312 if (sys.version_info >= (3,) and sys.platform != 'win32' and
313 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
314 not params.get('restrictfilenames', False)):
315 # On Python 3, the Unicode filesystem API will throw errors (#1474)
317 'Assuming --restrict-filenames since file system encoding '
318 'cannot encode all characters. '
319 'Set the LC_ALL environment variable to fix this.')
320 self.params['restrictfilenames'] = True
322 if '%(stitle)s' in self.params.get('outtmpl', ''):
323 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
328 self.print_debug_header()
329 self.add_default_info_extractors()
331 for pp_def_raw in self.params.get('postprocessors', []):
332 pp_class = get_postprocessor(pp_def_raw['key'])
333 pp_def = dict(pp_def_raw)
335 pp = pp_class(self, **compat_kwargs(pp_def))
336 self.add_post_processor(pp)
338 for ph in self.params.get('progress_hooks', []):
339 self.add_progress_hook(ph)
341 def warn_if_short_id(self, argv):
342 # short YouTube ID starting with dash?
344 i for i, a in enumerate(argv)
345 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
349 [a for i, a in enumerate(argv) if i not in idxs] +
350 ['--'] + [argv[i] for i in idxs]
353 'Long argument string detected. '
354 'Use -- to separate parameters and URLs, like this:\n%s\n' %
355 args_to_str(correct_argv))
357 def add_info_extractor(self, ie):
358 """Add an InfoExtractor object to the end of the list."""
360 self._ies_instances[ie.ie_key()] = ie
361 ie.set_downloader(self)
363 def get_info_extractor(self, ie_key):
365 Get an instance of an IE with name ie_key, it will try to get one from
366 the _ies list, if there's no instance it will create a new one and add
367 it to the extractor list.
369 ie = self._ies_instances.get(ie_key)
371 ie = get_info_extractor(ie_key)()
372 self.add_info_extractor(ie)
375 def add_default_info_extractors(self):
377 Add the InfoExtractors returned by gen_extractors to the end of the list
379 for ie in gen_extractors():
380 self.add_info_extractor(ie)
382 def add_post_processor(self, pp):
383 """Add a PostProcessor object to the end of the chain."""
385 pp.set_downloader(self)
387 def add_progress_hook(self, ph):
388 """Add the progress hook (currently only for the file downloader)"""
389 self._progress_hooks.append(ph)
391 def _bidi_workaround(self, message):
392 if not hasattr(self, '_output_channel'):
395 assert hasattr(self, '_output_process')
396 assert isinstance(message, compat_str)
397 line_count = message.count('\n') + 1
398 self._output_process.stdin.write((message + '\n').encode('utf-8'))
399 self._output_process.stdin.flush()
400 res = ''.join(self._output_channel.readline().decode('utf-8')
401 for _ in range(line_count))
402 return res[:-len('\n')]
404 def to_screen(self, message, skip_eol=False):
405 """Print message to stdout if not in quiet mode."""
406 return self.to_stdout(message, skip_eol, check_quiet=True)
408 def _write_string(self, s, out=None):
409 write_string(s, out=out, encoding=self.params.get('encoding'))
411 def to_stdout(self, message, skip_eol=False, check_quiet=False):
412 """Print message to stdout if not in quiet mode."""
413 if self.params.get('logger'):
414 self.params['logger'].debug(message)
415 elif not check_quiet or not self.params.get('quiet', False):
416 message = self._bidi_workaround(message)
417 terminator = ['\n', ''][skip_eol]
418 output = message + terminator
420 self._write_string(output, self._screen_file)
422 def to_stderr(self, message):
423 """Print message to stderr."""
424 assert isinstance(message, compat_str)
425 if self.params.get('logger'):
426 self.params['logger'].error(message)
428 message = self._bidi_workaround(message)
429 output = message + '\n'
430 self._write_string(output, self._err_file)
432 def to_console_title(self, message):
433 if not self.params.get('consoletitle', False):
435 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
436 # c_wchar_p() might not be necessary if `message` is
437 # already of type unicode()
438 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
439 elif 'TERM' in os.environ:
440 self._write_string('\033]0;%s\007' % message, self._screen_file)
442 def save_console_title(self):
443 if not self.params.get('consoletitle', False):
445 if 'TERM' in os.environ:
446 # Save the title on stack
447 self._write_string('\033[22;0t', self._screen_file)
449 def restore_console_title(self):
450 if not self.params.get('consoletitle', False):
452 if 'TERM' in os.environ:
453 # Restore the title from stack
454 self._write_string('\033[23;0t', self._screen_file)
457 self.save_console_title()
460 def __exit__(self, *args):
461 self.restore_console_title()
463 if self.params.get('cookiefile') is not None:
464 self.cookiejar.save()
466 def trouble(self, message=None, tb=None):
467 """Determine action to take when a download problem appears.
469 Depending on if the downloader has been configured to ignore
470 download errors or not, this method may throw an exception or
471 not when errors are found, after printing the message.
473 tb, if given, is additional traceback information.
475 if message is not None:
476 self.to_stderr(message)
477 if self.params.get('verbose'):
479 if sys.exc_info()[0]: # if .trouble has been called from an except block
481 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
482 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
483 tb += compat_str(traceback.format_exc())
485 tb_data = traceback.format_list(traceback.extract_stack())
486 tb = ''.join(tb_data)
488 if not self.params.get('ignoreerrors', False):
489 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
490 exc_info = sys.exc_info()[1].exc_info
492 exc_info = sys.exc_info()
493 raise DownloadError(message, exc_info)
494 self._download_retcode = 1
496 def report_warning(self, message):
498 Print the message to stderr, it will be prefixed with 'WARNING:'
499 If stderr is a tty file the 'WARNING:' will be colored
501 if self.params.get('logger') is not None:
502 self.params['logger'].warning(message)
504 if self.params.get('no_warnings'):
506 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
507 _msg_header = '\033[0;33mWARNING:\033[0m'
509 _msg_header = 'WARNING:'
510 warning_message = '%s %s' % (_msg_header, message)
511 self.to_stderr(warning_message)
513 def report_error(self, message, tb=None):
515 Do the same as trouble, but prefixes the message with 'ERROR:', colored
516 in red if stderr is a tty file.
518 if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
519 _msg_header = '\033[0;31mERROR:\033[0m'
521 _msg_header = 'ERROR:'
522 error_message = '%s %s' % (_msg_header, message)
523 self.trouble(error_message, tb)
525 def report_file_already_downloaded(self, file_name):
526 """Report file has already been fully downloaded."""
528 self.to_screen('[download] %s has already been downloaded' % file_name)
529 except UnicodeEncodeError:
530 self.to_screen('[download] The file has already been downloaded')
532 def prepare_filename(self, info_dict):
533 """Generate the output filename."""
535 template_dict = dict(info_dict)
537 template_dict['epoch'] = int(time.time())
538 autonumber_size = self.params.get('autonumber_size')
539 if autonumber_size is None:
541 autonumber_templ = '%0' + str(autonumber_size) + 'd'
542 template_dict['autonumber'] = autonumber_templ % self._num_downloads
543 if template_dict.get('playlist_index') is not None:
544 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
545 if template_dict.get('resolution') is None:
546 if template_dict.get('width') and template_dict.get('height'):
547 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
548 elif template_dict.get('height'):
549 template_dict['resolution'] = '%sp' % template_dict['height']
550 elif template_dict.get('width'):
551 template_dict['resolution'] = '?x%d' % template_dict['width']
553 sanitize = lambda k, v: sanitize_filename(
555 restricted=self.params.get('restrictfilenames'),
557 template_dict = dict((k, sanitize(k, v))
558 for k, v in template_dict.items()
560 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
562 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
563 tmpl = compat_expanduser(outtmpl)
564 filename = tmpl % template_dict
565 # Temporary fix for #4787
566 # 'Treat' all problem characters by passing filename through preferredencoding
567 # to workaround encoding issues with subprocess on python2 @ Windows
568 if sys.version_info < (3, 0) and sys.platform == 'win32':
569 filename = encodeFilename(filename, True).decode(preferredencoding())
571 except ValueError as err:
572 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
575 def _match_entry(self, info_dict, incomplete):
576 """ Returns None iff the file should be downloaded """
578 video_title = info_dict.get('title', info_dict.get('id', 'video'))
579 if 'title' in info_dict:
580 # This can happen when we're just evaluating the playlist
581 title = info_dict['title']
582 matchtitle = self.params.get('matchtitle', False)
584 if not re.search(matchtitle, title, re.IGNORECASE):
585 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
586 rejecttitle = self.params.get('rejecttitle', False)
588 if re.search(rejecttitle, title, re.IGNORECASE):
589 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
590 date = info_dict.get('upload_date', None)
592 dateRange = self.params.get('daterange', DateRange())
593 if date not in dateRange:
594 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
595 view_count = info_dict.get('view_count', None)
596 if view_count is not None:
597 min_views = self.params.get('min_views')
598 if min_views is not None and view_count < min_views:
599 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
600 max_views = self.params.get('max_views')
601 if max_views is not None and view_count > max_views:
602 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
603 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
604 return 'Skipping "%s" because it is age restricted' % video_title
605 if self.in_download_archive(info_dict):
606 return '%s has already been recorded in archive' % video_title
609 match_filter = self.params.get('match_filter')
610 if match_filter is not None:
611 ret = match_filter(info_dict)
618 def add_extra_info(info_dict, extra_info):
619 '''Set the keys from extra_info in info dict if they are missing'''
620 for key, value in extra_info.items():
621 info_dict.setdefault(key, value)
623 def extract_info(self, url, download=True, ie_key=None, extra_info={},
626 Returns a list with a dictionary for each video we find.
627 If 'download', also downloads the videos.
628 extra_info is a dict containing the extra values to add to each result
632 ies = [self.get_info_extractor(ie_key)]
637 if not ie.suitable(url):
641 self.report_warning('The program functionality for this site has been marked as broken, '
642 'and will probably not work.')
645 ie_result = ie.extract(url)
646 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
648 if isinstance(ie_result, list):
649 # Backwards compatibility: old IE result format
651 '_type': 'compat_list',
652 'entries': ie_result,
654 self.add_default_extra_info(ie_result, ie, url)
656 return self.process_ie_result(ie_result, download, extra_info)
659 except ExtractorError as de: # An error we somewhat expected
660 self.report_error(compat_str(de), de.format_traceback())
662 except MaxDownloadsReached:
664 except Exception as e:
665 if self.params.get('ignoreerrors', False):
666 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
671 self.report_error('no suitable InfoExtractor for URL %s' % url)
673 def add_default_extra_info(self, ie_result, ie, url):
674 self.add_extra_info(ie_result, {
675 'extractor': ie.IE_NAME,
677 'webpage_url_basename': url_basename(url),
678 'extractor_key': ie.ie_key(),
681 def process_ie_result(self, ie_result, download=True, extra_info={}):
683 Take the result of the ie(may be modified) and resolve all unresolved
684 references (URLs, playlist items).
686 It will also download the videos if 'download'.
687 Returns the resolved ie_result.
690 result_type = ie_result.get('_type', 'video')
692 if result_type in ('url', 'url_transparent'):
693 extract_flat = self.params.get('extract_flat', False)
694 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
695 extract_flat is True):
696 if self.params.get('forcejson', False):
697 self.to_stdout(json.dumps(ie_result))
700 if result_type == 'video':
701 self.add_extra_info(ie_result, extra_info)
702 return self.process_video_result(ie_result, download=download)
703 elif result_type == 'url':
704 # We have to add extra_info to the results because it may be
705 # contained in a playlist
706 return self.extract_info(ie_result['url'],
708 ie_key=ie_result.get('ie_key'),
709 extra_info=extra_info)
710 elif result_type == 'url_transparent':
711 # Use the information from the embedding page
712 info = self.extract_info(
713 ie_result['url'], ie_key=ie_result.get('ie_key'),
714 extra_info=extra_info, download=False, process=False)
716 force_properties = dict(
717 (k, v) for k, v in ie_result.items() if v is not None)
718 for f in ('_type', 'url'):
719 if f in force_properties:
720 del force_properties[f]
721 new_result = info.copy()
722 new_result.update(force_properties)
724 assert new_result.get('_type') != 'url_transparent'
726 return self.process_ie_result(
727 new_result, download=download, extra_info=extra_info)
728 elif result_type == 'playlist' or result_type == 'multi_video':
729 # We process each entry in the playlist
730 playlist = ie_result.get('title', None) or ie_result.get('id', None)
731 self.to_screen('[download] Downloading playlist: %s' % playlist)
733 playlist_results = []
735 playliststart = self.params.get('playliststart', 1) - 1
736 playlistend = self.params.get('playlistend', None)
737 # For backwards compatibility, interpret -1 as whole list
738 if playlistend == -1:
741 playlistitems_str = self.params.get('playlist_items', None)
743 if playlistitems_str is not None:
744 def iter_playlistitems(format):
745 for string_segment in format.split(','):
746 if '-' in string_segment:
747 start, end = string_segment.split('-')
748 for item in range(int(start), int(end) + 1):
751 yield int(string_segment)
752 playlistitems = iter_playlistitems(playlistitems_str)
754 ie_entries = ie_result['entries']
755 if isinstance(ie_entries, list):
756 n_all_entries = len(ie_entries)
758 entries = [ie_entries[i - 1] for i in playlistitems]
760 entries = ie_entries[playliststart:playlistend]
761 n_entries = len(entries)
763 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
764 (ie_result['extractor'], playlist, n_all_entries, n_entries))
765 elif isinstance(ie_entries, PagedList):
768 for item in playlistitems:
769 entries.extend(ie_entries.getslice(
773 entries = ie_entries.getslice(
774 playliststart, playlistend)
775 n_entries = len(entries)
777 "[%s] playlist %s: Downloading %d videos" %
778 (ie_result['extractor'], playlist, n_entries))
781 entry_list = list(ie_entries)
782 entries = [entry_list[i - 1] for i in playlistitems]
784 entries = list(itertools.islice(
785 ie_entries, playliststart, playlistend))
786 n_entries = len(entries)
788 "[%s] playlist %s: Downloading %d videos" %
789 (ie_result['extractor'], playlist, n_entries))
791 if self.params.get('playlistreverse', False):
792 entries = entries[::-1]
794 for i, entry in enumerate(entries, 1):
795 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
797 'n_entries': n_entries,
798 'playlist': playlist,
799 'playlist_id': ie_result.get('id'),
800 'playlist_title': ie_result.get('title'),
801 'playlist_index': i + playliststart,
802 'extractor': ie_result['extractor'],
803 'webpage_url': ie_result['webpage_url'],
804 'webpage_url_basename': url_basename(ie_result['webpage_url']),
805 'extractor_key': ie_result['extractor_key'],
808 reason = self._match_entry(entry, incomplete=True)
809 if reason is not None:
810 self.to_screen('[download] ' + reason)
813 entry_result = self.process_ie_result(entry,
816 playlist_results.append(entry_result)
817 ie_result['entries'] = playlist_results
819 elif result_type == 'compat_list':
821 'Extractor %s returned a compat_list result. '
822 'It needs to be updated.' % ie_result.get('extractor'))
828 'extractor': ie_result['extractor'],
829 'webpage_url': ie_result['webpage_url'],
830 'webpage_url_basename': url_basename(ie_result['webpage_url']),
831 'extractor_key': ie_result['extractor_key'],
835 ie_result['entries'] = [
836 self.process_ie_result(_fixup(r), download, extra_info)
837 for r in ie_result['entries']
841 raise Exception('Invalid result type: %s' % result_type)
843 def _apply_format_filter(self, format_spec, available_formats):
844 " Returns a tuple of the remaining format_spec and filtered formats "
854 operator_rex = re.compile(r'''(?x)\s*\[
855 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
856 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
857 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
859 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
860 m = operator_rex.search(format_spec)
863 comparison_value = int(m.group('value'))
865 comparison_value = parse_filesize(m.group('value'))
866 if comparison_value is None:
867 comparison_value = parse_filesize(m.group('value') + 'B')
868 if comparison_value is None:
870 'Invalid value %r in format specification %r' % (
871 m.group('value'), format_spec))
872 op = OPERATORS[m.group('op')]
879 str_operator_rex = re.compile(r'''(?x)\s*\[
880 \s*(?P<key>ext|acodec|vcodec|container|protocol)
881 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
882 \s*(?P<value>[a-zA-Z0-9_-]+)
884 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
885 m = str_operator_rex.search(format_spec)
887 comparison_value = m.group('value')
888 op = STR_OPERATORS[m.group('op')]
891 raise ValueError('Invalid format specification %r' % format_spec)
894 actual_value = f.get(m.group('key'))
895 if actual_value is None:
896 return m.group('none_inclusive')
897 return op(actual_value, comparison_value)
898 new_formats = [f for f in available_formats if _filter(f)]
900 new_format_spec = format_spec[:-len(m.group(0))]
901 if not new_format_spec:
902 new_format_spec = 'best'
904 return (new_format_spec, new_formats)
906 def select_format(self, format_spec, available_formats):
907 while format_spec.endswith(']'):
908 format_spec, available_formats = self._apply_format_filter(
909 format_spec, available_formats)
910 if not available_formats:
913 if format_spec == 'best' or format_spec is None:
914 return available_formats[-1]
915 elif format_spec == 'worst':
916 return available_formats[0]
917 elif format_spec == 'bestaudio':
919 f for f in available_formats
920 if f.get('vcodec') == 'none']
922 return audio_formats[-1]
923 elif format_spec == 'worstaudio':
925 f for f in available_formats
926 if f.get('vcodec') == 'none']
928 return audio_formats[0]
929 elif format_spec == 'bestvideo':
931 f for f in available_formats
932 if f.get('acodec') == 'none']
934 return video_formats[-1]
935 elif format_spec == 'worstvideo':
937 f for f in available_formats
938 if f.get('acodec') == 'none']
940 return video_formats[0]
942 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
943 if format_spec in extensions:
944 filter_f = lambda f: f['ext'] == format_spec
946 filter_f = lambda f: f['format_id'] == format_spec
947 matches = list(filter(filter_f, available_formats))
952 def _calc_headers(self, info_dict):
953 res = std_headers.copy()
955 add_headers = info_dict.get('http_headers')
957 res.update(add_headers)
959 cookies = self._calc_cookies(info_dict)
961 res['Cookie'] = cookies
965 def _calc_cookies(self, info_dict):
966 pr = compat_urllib_request.Request(info_dict['url'])
967 self.cookiejar.add_cookie_header(pr)
968 return pr.get_header('Cookie')
970 def process_video_result(self, info_dict, download=True):
971 assert info_dict.get('_type', 'video') == 'video'
973 if 'id' not in info_dict:
974 raise ExtractorError('Missing "id" field in extractor result')
975 if 'title' not in info_dict:
976 raise ExtractorError('Missing "title" field in extractor result')
978 if 'playlist' not in info_dict:
979 # It isn't part of a playlist
980 info_dict['playlist'] = None
981 info_dict['playlist_index'] = None
983 thumbnails = info_dict.get('thumbnails')
984 if thumbnails is None:
985 thumbnail = info_dict.get('thumbnail')
987 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
989 thumbnails.sort(key=lambda t: (
990 t.get('preference'), t.get('width'), t.get('height'),
991 t.get('id'), t.get('url')))
992 for i, t in enumerate(thumbnails):
993 if 'width' in t and 'height' in t:
994 t['resolution'] = '%dx%d' % (t['width'], t['height'])
995 if t.get('id') is None:
998 if thumbnails and 'thumbnail' not in info_dict:
999 info_dict['thumbnail'] = thumbnails[-1]['url']
1001 if 'display_id' not in info_dict and 'id' in info_dict:
1002 info_dict['display_id'] = info_dict['id']
1004 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1005 # Working around negative timestamps in Windows
1006 # (see http://bugs.python.org/issue1646728)
1007 if info_dict['timestamp'] < 0 and os.name == 'nt':
1008 info_dict['timestamp'] = 0
1009 upload_date = datetime.datetime.utcfromtimestamp(
1010 info_dict['timestamp'])
1011 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1013 if self.params.get('listsubtitles', False):
1014 if 'automatic_captions' in info_dict:
1015 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1016 self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1018 info_dict['requested_subtitles'] = self.process_subtitles(
1019 info_dict['id'], info_dict.get('subtitles'),
1020 info_dict.get('automatic_captions'))
1022 # This extractors handle format selection themselves
1023 if info_dict['extractor'] in ['Youku']:
1025 self.process_info(info_dict)
1028 # We now pick which formats have to be downloaded
1029 if info_dict.get('formats') is None:
1030 # There's only one format available
1031 formats = [info_dict]
1033 formats = info_dict['formats']
1036 raise ExtractorError('No video formats found!')
1038 # We check that all the formats have the format and format_id fields
1039 for i, format in enumerate(formats):
1040 if 'url' not in format:
1041 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1043 if format.get('format_id') is None:
1044 format['format_id'] = compat_str(i)
1045 if format.get('format') is None:
1046 format['format'] = '{id} - {res}{note}'.format(
1047 id=format['format_id'],
1048 res=self.format_resolution(format),
1049 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1051 # Automatically determine file extension if missing
1052 if 'ext' not in format:
1053 format['ext'] = determine_ext(format['url']).lower()
1054 # Add HTTP headers, so that external programs can use them from the
1056 full_format_info = info_dict.copy()
1057 full_format_info.update(format)
1058 format['http_headers'] = self._calc_headers(full_format_info)
1060 format_limit = self.params.get('format_limit', None)
1062 formats = list(takewhile_inclusive(
1063 lambda f: f['format_id'] != format_limit, formats
1066 # TODO Central sorting goes here
1068 if formats[0] is not info_dict:
1069 # only set the 'formats' fields if the original info_dict list them
1070 # otherwise we end up with a circular reference, the first (and unique)
1071 # element in the 'formats' field in info_dict is info_dict itself,
1072 # wich can't be exported to json
1073 info_dict['formats'] = formats
1074 if self.params.get('listformats'):
1075 self.list_formats(info_dict)
1077 if self.params.get('list_thumbnails'):
1078 self.list_thumbnails(info_dict)
1081 req_format = self.params.get('format')
1082 if req_format is None:
1084 formats_to_download = []
1085 # The -1 is for supporting YoutubeIE
1086 if req_format in ('-1', 'all'):
1087 formats_to_download = formats
1089 for rfstr in req_format.split(','):
1090 # We can accept formats requested in the format: 34/5/best, we pick
1091 # the first that is available, starting from left
1092 req_formats = rfstr.split('/')
1093 for rf in req_formats:
1094 if re.match(r'.+?\+.+?', rf) is not None:
1095 # Two formats have been requested like '137+139'
1096 format_1, format_2 = rf.split('+')
1097 formats_info = (self.select_format(format_1, formats),
1098 self.select_format(format_2, formats))
1099 if all(formats_info):
1100 # The first format must contain the video and the
1102 if formats_info[0].get('vcodec') == 'none':
1103 self.report_error('The first format must '
1104 'contain the video, try using '
1105 '"-f %s+%s"' % (format_2, format_1))
1108 formats_info[0]['ext']
1109 if self.params.get('merge_output_format') is None
1110 else self.params['merge_output_format'])
1112 'requested_formats': formats_info,
1113 'format': '%s+%s' % (formats_info[0].get('format'),
1114 formats_info[1].get('format')),
1115 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1116 formats_info[1].get('format_id')),
1117 'width': formats_info[0].get('width'),
1118 'height': formats_info[0].get('height'),
1119 'resolution': formats_info[0].get('resolution'),
1120 'fps': formats_info[0].get('fps'),
1121 'vcodec': formats_info[0].get('vcodec'),
1122 'vbr': formats_info[0].get('vbr'),
1123 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1124 'acodec': formats_info[1].get('acodec'),
1125 'abr': formats_info[1].get('abr'),
1129 selected_format = None
1131 selected_format = self.select_format(rf, formats)
1132 if selected_format is not None:
1133 formats_to_download.append(selected_format)
1135 if not formats_to_download:
1136 raise ExtractorError('requested format not available',
1140 if len(formats_to_download) > 1:
1141 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1142 for format in formats_to_download:
1143 new_info = dict(info_dict)
1144 new_info.update(format)
1145 self.process_info(new_info)
1146 # We update the info dict with the best quality format (backwards compatibility)
1147 info_dict.update(formats_to_download[-1])
1150 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1151 """Select the requested subtitles and their format"""
1153 if normal_subtitles and self.params.get('writesubtitles'):
1154 available_subs.update(normal_subtitles)
1155 if automatic_captions and self.params.get('writeautomaticsub'):
1156 for lang, cap_info in automatic_captions.items():
1157 if lang not in available_subs:
1158 available_subs[lang] = cap_info
1160 if (not self.params.get('writesubtitles') and not
1161 self.params.get('writeautomaticsub') or not
1165 if self.params.get('allsubtitles', False):
1166 requested_langs = available_subs.keys()
1168 if self.params.get('subtitleslangs', False):
1169 requested_langs = self.params.get('subtitleslangs')
1170 elif 'en' in available_subs:
1171 requested_langs = ['en']
1173 requested_langs = [list(available_subs.keys())[0]]
1175 formats_query = self.params.get('subtitlesformat', 'best')
1176 formats_preference = formats_query.split('/') if formats_query else []
1178 for lang in requested_langs:
1179 formats = available_subs.get(lang)
1181 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1183 for ext in formats_preference:
1187 matches = list(filter(lambda f: f['ext'] == ext, formats))
1193 self.report_warning(
1194 'No subtitle format found matching "%s" for language %s, '
1195 'using %s' % (formats_query, lang, f['ext']))
1199 def process_info(self, info_dict):
1200 """Process a single resolved IE result."""
1202 assert info_dict.get('_type', 'video') == 'video'
1204 max_downloads = self.params.get('max_downloads')
1205 if max_downloads is not None:
1206 if self._num_downloads >= int(max_downloads):
1207 raise MaxDownloadsReached()
1209 info_dict['fulltitle'] = info_dict['title']
1210 if len(info_dict['title']) > 200:
1211 info_dict['title'] = info_dict['title'][:197] + '...'
1213 # Keep for backwards compatibility
1214 info_dict['stitle'] = info_dict['title']
1216 if 'format' not in info_dict:
1217 info_dict['format'] = info_dict['ext']
1219 reason = self._match_entry(info_dict, incomplete=False)
1220 if reason is not None:
1221 self.to_screen('[download] ' + reason)
1224 self._num_downloads += 1
1226 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1229 if self.params.get('forcetitle', False):
1230 self.to_stdout(info_dict['fulltitle'])
1231 if self.params.get('forceid', False):
1232 self.to_stdout(info_dict['id'])
1233 if self.params.get('forceurl', False):
1234 if info_dict.get('requested_formats') is not None:
1235 for f in info_dict['requested_formats']:
1236 self.to_stdout(f['url'] + f.get('play_path', ''))
1238 # For RTMP URLs, also include the playpath
1239 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1240 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1241 self.to_stdout(info_dict['thumbnail'])
1242 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1243 self.to_stdout(info_dict['description'])
1244 if self.params.get('forcefilename', False) and filename is not None:
1245 self.to_stdout(filename)
1246 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1247 self.to_stdout(formatSeconds(info_dict['duration']))
1248 if self.params.get('forceformat', False):
1249 self.to_stdout(info_dict['format'])
1250 if self.params.get('forcejson', False):
1251 self.to_stdout(json.dumps(info_dict))
1253 # Do nothing else if in simulate mode
1254 if self.params.get('simulate', False):
1257 if filename is None:
1261 dn = os.path.dirname(encodeFilename(filename))
1262 if dn and not os.path.exists(dn):
1264 except (OSError, IOError) as err:
1265 self.report_error('unable to create directory ' + compat_str(err))
1268 if self.params.get('writedescription', False):
1269 descfn = filename + '.description'
1270 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1271 self.to_screen('[info] Video description is already present')
1272 elif info_dict.get('description') is None:
1273 self.report_warning('There\'s no description to write.')
1276 self.to_screen('[info] Writing video description to: ' + descfn)
1277 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1278 descfile.write(info_dict['description'])
1279 except (OSError, IOError):
1280 self.report_error('Cannot write description file ' + descfn)
1283 if self.params.get('writeannotations', False):
1284 annofn = filename + '.annotations.xml'
1285 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1286 self.to_screen('[info] Video annotations are already present')
1289 self.to_screen('[info] Writing video annotations to: ' + annofn)
1290 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1291 annofile.write(info_dict['annotations'])
1292 except (KeyError, TypeError):
1293 self.report_warning('There are no annotations to write.')
1294 except (OSError, IOError):
1295 self.report_error('Cannot write annotations file: ' + annofn)
1298 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1299 self.params.get('writeautomaticsub')])
1301 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1302 # subtitles download errors are already managed as troubles in relevant IE
1303 # that way it will silently go on when used with unsupporting IE
1304 subtitles = info_dict['requested_subtitles']
1305 ie = self.get_info_extractor(info_dict['extractor_key'])
1306 for sub_lang, sub_info in subtitles.items():
1307 sub_format = sub_info['ext']
1308 if sub_info.get('data') is not None:
1309 sub_data = sub_info['data']
1312 sub_data = ie._download_webpage(
1313 sub_info['url'], info_dict['id'], note=False)
1314 except ExtractorError as err:
1315 self.report_warning('Unable to download subtitle for "%s": %s' %
1316 (sub_lang, compat_str(err.cause)))
1319 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1320 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1321 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1323 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1324 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1325 subfile.write(sub_data)
1326 except (OSError, IOError):
1327 self.report_error('Cannot write subtitles file ' + sub_filename)
1330 if self.params.get('writeinfojson', False):
1331 infofn = os.path.splitext(filename)[0] + '.info.json'
1332 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1333 self.to_screen('[info] Video description metadata is already present')
1335 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1337 write_json_file(info_dict, infofn)
1338 except (OSError, IOError):
1339 self.report_error('Cannot write metadata to JSON file ' + infofn)
1342 self._write_thumbnails(info_dict, filename)
1344 if not self.params.get('skip_download', False):
1347 fd = get_suitable_downloader(info, self.params)(self, self.params)
1348 for ph in self._progress_hooks:
1349 fd.add_progress_hook(ph)
1350 if self.params.get('verbose'):
1351 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1352 return fd.download(name, info)
1354 if info_dict.get('requested_formats') is not None:
1357 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1358 if not merger.available:
1360 self.report_warning('You have requested multiple '
1361 'formats but ffmpeg or avconv are not installed.'
1362 ' The formats won\'t be merged')
1364 postprocessors = [merger]
1365 for f in info_dict['requested_formats']:
1366 new_info = dict(info_dict)
1368 fname = self.prepare_filename(new_info)
1369 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1370 downloaded.append(fname)
1371 partial_success = dl(fname, new_info)
1372 success = success and partial_success
1373 info_dict['__postprocessors'] = postprocessors
1374 info_dict['__files_to_merge'] = downloaded
1376 # Just a single file
1377 success = dl(filename, info_dict)
1378 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1379 self.report_error('unable to download video data: %s' % str(err))
1381 except (OSError, IOError) as err:
1382 raise UnavailableVideoError(err)
1383 except (ContentTooShortError, ) as err:
1384 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1389 fixup_policy = self.params.get('fixup')
1390 if fixup_policy is None:
1391 fixup_policy = 'detect_or_warn'
1393 stretched_ratio = info_dict.get('stretched_ratio')
1394 if stretched_ratio is not None and stretched_ratio != 1:
1395 if fixup_policy == 'warn':
1396 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1397 info_dict['id'], stretched_ratio))
1398 elif fixup_policy == 'detect_or_warn':
1399 stretched_pp = FFmpegFixupStretchedPP(self)
1400 if stretched_pp.available:
1401 info_dict.setdefault('__postprocessors', [])
1402 info_dict['__postprocessors'].append(stretched_pp)
1404 self.report_warning(
1405 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1406 info_dict['id'], stretched_ratio))
1408 assert fixup_policy in ('ignore', 'never')
1410 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1411 if fixup_policy == 'warn':
1412 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1414 elif fixup_policy == 'detect_or_warn':
1415 fixup_pp = FFmpegFixupM4aPP(self)
1416 if fixup_pp.available:
1417 info_dict.setdefault('__postprocessors', [])
1418 info_dict['__postprocessors'].append(fixup_pp)
1420 self.report_warning(
1421 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1424 assert fixup_policy in ('ignore', 'never')
1427 self.post_process(filename, info_dict)
1428 except (PostProcessingError) as err:
1429 self.report_error('postprocessing: %s' % str(err))
1431 self.record_download_archive(info_dict)
1433 def download(self, url_list):
1434 """Download a given list of URLs."""
1435 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1436 if (len(url_list) > 1 and
1437 '%' not in outtmpl and
1438 self.params.get('max_downloads') != 1):
1439 raise SameFileError(outtmpl)
1441 for url in url_list:
1443 # It also downloads the videos
1444 res = self.extract_info(url)
1445 except UnavailableVideoError:
1446 self.report_error('unable to download video')
1447 except MaxDownloadsReached:
1448 self.to_screen('[info] Maximum number of downloaded files reached.')
1451 if self.params.get('dump_single_json', False):
1452 self.to_stdout(json.dumps(res))
1454 return self._download_retcode
1456 def download_with_info_file(self, info_filename):
1457 with contextlib.closing(fileinput.FileInput(
1458 [info_filename], mode='r',
1459 openhook=fileinput.hook_encoded('utf-8'))) as f:
1460 # FileInput doesn't have a read method, we can't call json.load
1461 info = json.loads('\n'.join(f))
1463 self.process_ie_result(info, download=True)
1464 except DownloadError:
1465 webpage_url = info.get('webpage_url')
1466 if webpage_url is not None:
1467 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1468 return self.download([webpage_url])
1471 return self._download_retcode
1473 def post_process(self, filename, ie_info):
1474 """Run all the postprocessors on the given file."""
1475 info = dict(ie_info)
1476 info['filepath'] = filename
1478 if ie_info.get('__postprocessors') is not None:
1479 pps_chain.extend(ie_info['__postprocessors'])
1480 pps_chain.extend(self._pps)
1481 for pp in pps_chain:
1483 old_filename = info['filepath']
1485 keep_video_wish, info = pp.run(info)
1486 if keep_video_wish is not None:
1488 keep_video = keep_video_wish
1489 elif keep_video is None:
1490 # No clear decision yet, let IE decide
1491 keep_video = keep_video_wish
1492 except PostProcessingError as e:
1493 self.report_error(e.msg)
1494 if keep_video is False and not self.params.get('keepvideo', False):
1496 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1497 os.remove(encodeFilename(old_filename))
1498 except (IOError, OSError):
1499 self.report_warning('Unable to remove downloaded video file')
1501 def _make_archive_id(self, info_dict):
1502 # Future-proof against any change in case
1503 # and backwards compatibility with prior versions
1504 extractor = info_dict.get('extractor_key')
1505 if extractor is None:
1506 if 'id' in info_dict:
1507 extractor = info_dict.get('ie_key') # key in a playlist
1508 if extractor is None:
1509 return None # Incomplete video information
1510 return extractor.lower() + ' ' + info_dict['id']
1512 def in_download_archive(self, info_dict):
1513 fn = self.params.get('download_archive')
1517 vid_id = self._make_archive_id(info_dict)
1519 return False # Incomplete video information
1522 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1523 for line in archive_file:
1524 if line.strip() == vid_id:
1526 except IOError as ioe:
1527 if ioe.errno != errno.ENOENT:
1531 def record_download_archive(self, info_dict):
1532 fn = self.params.get('download_archive')
1535 vid_id = self._make_archive_id(info_dict)
1537 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1538 archive_file.write(vid_id + '\n')
1541 def format_resolution(format, default='unknown'):
1542 if format.get('vcodec') == 'none':
1544 if format.get('resolution') is not None:
1545 return format['resolution']
1546 if format.get('height') is not None:
1547 if format.get('width') is not None:
1548 res = '%sx%s' % (format['width'], format['height'])
1550 res = '%sp' % format['height']
1551 elif format.get('width') is not None:
1552 res = '?x%d' % format['width']
1557 def _format_note(self, fdict):
1559 if fdict.get('ext') in ['f4f', 'f4m']:
1560 res += '(unsupported) '
1561 if fdict.get('format_note') is not None:
1562 res += fdict['format_note'] + ' '
1563 if fdict.get('tbr') is not None:
1564 res += '%4dk ' % fdict['tbr']
1565 if fdict.get('container') is not None:
1568 res += '%s container' % fdict['container']
1569 if (fdict.get('vcodec') is not None and
1570 fdict.get('vcodec') != 'none'):
1573 res += fdict['vcodec']
1574 if fdict.get('vbr') is not None:
1576 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1578 if fdict.get('vbr') is not None:
1579 res += '%4dk' % fdict['vbr']
1580 if fdict.get('fps') is not None:
1581 res += ', %sfps' % fdict['fps']
1582 if fdict.get('acodec') is not None:
1585 if fdict['acodec'] == 'none':
1588 res += '%-5s' % fdict['acodec']
1589 elif fdict.get('abr') is not None:
1593 if fdict.get('abr') is not None:
1594 res += '@%3dk' % fdict['abr']
1595 if fdict.get('asr') is not None:
1596 res += ' (%5dHz)' % fdict['asr']
1597 if fdict.get('filesize') is not None:
1600 res += format_bytes(fdict['filesize'])
1601 elif fdict.get('filesize_approx') is not None:
1604 res += '~' + format_bytes(fdict['filesize_approx'])
1607 def list_formats(self, info_dict):
1608 formats = info_dict.get('formats', [info_dict])
1610 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1612 if f.get('preference') is None or f['preference'] >= -1000]
1613 if len(formats) > 1:
1614 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1616 header_line = ['format code', 'extension', 'resolution', 'note']
1618 '[info] Available formats for %s:\n%s' %
1619 (info_dict['id'], render_table(header_line, table)))
1621 def list_thumbnails(self, info_dict):
1622 thumbnails = info_dict.get('thumbnails')
1624 tn_url = info_dict.get('thumbnail')
1626 thumbnails = [{'id': '0', 'url': tn_url}]
1629 '[info] No thumbnails present for %s' % info_dict['id'])
1633 '[info] Thumbnails for %s:' % info_dict['id'])
1634 self.to_screen(render_table(
1635 ['ID', 'width', 'height', 'URL'],
1636 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1638 def list_subtitles(self, video_id, subtitles, name='subtitles'):
1640 self.to_screen('%s has no %s' % (video_id, name))
1643 'Available %s for %s:' % (name, video_id))
1644 self.to_screen(render_table(
1645 ['Language', 'formats'],
1646 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1647 for lang, formats in subtitles.items()]))
1649 def urlopen(self, req):
1650 """ Start an HTTP download """
1652 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1653 # always respected by websites, some tend to give out URLs with non percent-encoded
1654 # non-ASCII characters (see telemb.py, ard.py [#3412])
1655 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1656 # To work around aforementioned issue we will replace request's original URL with
1657 # percent-encoded one
1658 req_is_string = isinstance(req, compat_basestring)
1659 url = req if req_is_string else req.get_full_url()
1660 url_escaped = escape_url(url)
1662 # Substitute URL if any change after escaping
1663 if url != url_escaped:
1667 req = compat_urllib_request.Request(
1668 url_escaped, data=req.data, headers=req.headers,
1669 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1671 return self._opener.open(req, timeout=self._socket_timeout)
1673 def print_debug_header(self):
1674 if not self.params.get('verbose'):
1677 if type('') is not compat_str:
1678 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1679 self.report_warning(
1680 'Your Python is broken! Update to a newer and supported version')
1682 stdout_encoding = getattr(
1683 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1685 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1686 locale.getpreferredencoding(),
1687 sys.getfilesystemencoding(),
1689 self.get_encoding()))
1690 write_string(encoding_str, encoding=None)
1692 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1694 sp = subprocess.Popen(
1695 ['git', 'rev-parse', '--short', 'HEAD'],
1696 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1697 cwd=os.path.dirname(os.path.abspath(__file__)))
1698 out, err = sp.communicate()
1699 out = out.decode().strip()
1700 if re.match('[0-9a-f]+', out):
1701 self._write_string('[debug] Git HEAD: ' + out + '\n')
1707 self._write_string('[debug] Python version %s - %s\n' % (
1708 platform.python_version(), platform_name()))
1710 exe_versions = FFmpegPostProcessor.get_versions(self)
1711 exe_versions['rtmpdump'] = rtmpdump_version()
1712 exe_str = ', '.join(
1714 for exe, v in sorted(exe_versions.items())
1719 self._write_string('[debug] exe versions: %s\n' % exe_str)
1722 for handler in self._opener.handlers:
1723 if hasattr(handler, 'proxies'):
1724 proxy_map.update(handler.proxies)
1725 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1727 if self.params.get('call_home', False):
1728 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1729 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1730 latest_version = self.urlopen(
1731 'https://yt-dl.org/latest/version').read().decode('utf-8')
1732 if version_tuple(latest_version) > version_tuple(__version__):
1733 self.report_warning(
1734 'You are using an outdated version (newest version: %s)! '
1735 'See https://yt-dl.org/update if you need help updating.' %
1738 def _setup_opener(self):
1739 timeout_val = self.params.get('socket_timeout')
1740 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1742 opts_cookiefile = self.params.get('cookiefile')
1743 opts_proxy = self.params.get('proxy')
1745 if opts_cookiefile is None:
1746 self.cookiejar = compat_cookiejar.CookieJar()
1748 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1750 if os.access(opts_cookiefile, os.R_OK):
1751 self.cookiejar.load()
1753 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1755 if opts_proxy is not None:
1756 if opts_proxy == '':
1759 proxies = {'http': opts_proxy, 'https': opts_proxy}
1761 proxies = compat_urllib_request.getproxies()
1762 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1763 if 'http' in proxies and 'https' not in proxies:
1764 proxies['https'] = proxies['http']
1765 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1767 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1768 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1769 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1770 opener = compat_urllib_request.build_opener(
1771 https_handler, proxy_handler, cookie_processor, ydlh)
1772 # Delete the default user-agent header, which would otherwise apply in
1773 # cases where our custom HTTP handler doesn't come into play
1774 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1775 opener.addheaders = []
1776 self._opener = opener
1778 def encode(self, s):
1779 if isinstance(s, bytes):
1780 return s # Already encoded
1783 return s.encode(self.get_encoding())
1784 except UnicodeEncodeError as err:
1785 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1788 def get_encoding(self):
1789 encoding = self.params.get('encoding')
1790 if encoding is None:
1791 encoding = preferredencoding()
1794 def _write_thumbnails(self, info_dict, filename):
1795 if self.params.get('writethumbnail', False):
1796 thumbnails = info_dict.get('thumbnails')
1798 thumbnails = [thumbnails[-1]]
1799 elif self.params.get('write_all_thumbnails', False):
1800 thumbnails = info_dict.get('thumbnails')
1805 # No thumbnails present, so return immediately
1808 for t in thumbnails:
1809 thumb_ext = determine_ext(t['url'], 'jpg')
1810 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1811 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1812 thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1814 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1815 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1816 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1818 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1819 (info_dict['extractor'], info_dict['id'], thumb_display_id))
1821 uf = self.urlopen(t['url'])
1822 with open(thumb_filename, 'wb') as thumbf:
1823 shutil.copyfileobj(uf, thumbf)
1824 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1825 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1826 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1827 self.report_warning('Unable to download thumbnail "%s": %s' %
1828 (t['url'], compat_str(err)))