8 compat_subprocess_get_DEVNULL,
18 class PostProcessor(object):
19 """Post Processor class.
21 PostProcessor objects can be added to downloaders with their
22 add_post_processor() method. When the downloader has finished a
23 successful download, it will take its internal chain of PostProcessors
24 and start calling the run() method on each one of them, first with
25 an initial argument and then with the returned value of the previous
28 The chain will be stopped if one of them ever returns None or the end
29 of the chain is reached.
31 PostProcessor objects follow a "mutual registration" process similar
32 to InfoExtractor objects.
37 def __init__(self, downloader=None):
38 self._downloader = downloader
40 def set_downloader(self, downloader):
41 """Sets the downloader for this PP."""
42 self._downloader = downloader
44 def run(self, information):
45 """Run the PostProcessor.
47 The "information" argument is a dictionary like the ones
48 composed by InfoExtractors. The only difference is that this
49 one has an extra field called "filepath" that points to the
52 This method returns a tuple, the first element of which describes
53 whether the original file should be kept (i.e. not deleted - None for
54 no preference), and the second of which is the updated information.
56 In addition, this method may raise a PostProcessingError
57 exception if post processing fails.
59 return None, information # by default, keep file and do nothing
61 class FFmpegPostProcessorError(PostProcessingError):
64 class AudioConversionError(PostProcessingError):
68 class FFmpegPostProcessor(PostProcessor):
69 def __init__(self,downloader=None):
70 PostProcessor.__init__(self, downloader)
71 self._exes = self.detect_executables()
74 def detect_executables():
77 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
81 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
82 return dict((program, executable(program)) for program in programs)
84 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
85 if not self._exes['ffmpeg'] and not self._exes['avconv']:
86 raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
89 for path in input_paths:
90 files_cmd.extend(['-i', encodeFilename(path, True)])
91 cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
93 [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
95 if self._downloader.params.get('verbose', False):
96 self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
97 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
98 stdout,stderr = p.communicate()
100 stderr = stderr.decode('utf-8', 'replace')
101 msg = stderr.strip().split('\n')[-1]
102 raise FFmpegPostProcessorError(msg)
104 def run_ffmpeg(self, path, out_path, opts):
105 self.run_ffmpeg_multiple_files([path], out_path, opts)
107 def _ffmpeg_filename_argument(self, fn):
108 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
109 if fn.startswith(u'-'):
114 class FFmpegExtractAudioPP(FFmpegPostProcessor):
115 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
116 FFmpegPostProcessor.__init__(self, downloader)
117 if preferredcodec is None:
118 preferredcodec = 'best'
119 self._preferredcodec = preferredcodec
120 self._preferredquality = preferredquality
121 self._nopostoverwrites = nopostoverwrites
123 def get_audio_codec(self, path):
124 if not self._exes['ffprobe'] and not self._exes['avprobe']:
125 raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
128 self._exes['avprobe'] or self._exes['ffprobe'],
130 encodeFilename(self._ffmpeg_filename_argument(path), True)]
131 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
132 output = handle.communicate()[0]
133 if handle.wait() != 0:
135 except (IOError, OSError):
138 for line in output.decode('ascii', 'ignore').split('\n'):
139 if line.startswith('codec_name='):
140 audio_codec = line.split('=')[1].strip()
141 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
145 def run_ffmpeg(self, path, out_path, codec, more_opts):
146 if not self._exes['ffmpeg'] and not self._exes['avconv']:
147 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
151 acodec_opts = ['-acodec', codec]
152 opts = ['-vn'] + acodec_opts + more_opts
154 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
155 except FFmpegPostProcessorError as err:
156 raise AudioConversionError(err.msg)
158 def run(self, information):
159 path = information['filepath']
161 filecodec = self.get_audio_codec(path)
162 if filecodec is None:
163 raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
166 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
167 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
168 # Lossless, but in another container
171 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
172 elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
173 # Lossless if possible
175 extension = filecodec
176 if filecodec == 'aac':
177 more_opts = ['-f', 'adts']
178 if filecodec == 'vorbis':
182 acodec = 'libmp3lame'
185 if self._preferredquality is not None:
186 if int(self._preferredquality) < 10:
187 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
189 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
191 # We convert the audio (lossy)
192 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
193 extension = self._preferredcodec
195 if self._preferredquality is not None:
196 # The opus codec doesn't support the -aq option
197 if int(self._preferredquality) < 10 and extension != 'opus':
198 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
200 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
201 if self._preferredcodec == 'aac':
202 more_opts += ['-f', 'adts']
203 if self._preferredcodec == 'm4a':
204 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
205 if self._preferredcodec == 'vorbis':
207 if self._preferredcodec == 'wav':
209 more_opts += ['-f', 'wav']
211 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
212 new_path = prefix + sep + extension
214 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
216 self._nopostoverwrites = True
219 if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
220 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
222 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
223 self.run_ffmpeg(path, new_path, acodec, more_opts)
225 etype,e,tb = sys.exc_info()
226 if isinstance(e, AudioConversionError):
227 msg = u'audio conversion failed: ' + e.msg
229 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
230 raise PostProcessingError(msg)
232 # Try to update the date time for extracted audio file.
233 if information.get('filetime') is not None:
235 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
237 self._downloader.report_warning(u'Cannot update utime of audio file')
239 information['filepath'] = new_path
240 return self._nopostoverwrites,information
243 class FFmpegVideoConvertor(FFmpegPostProcessor):
244 def __init__(self, downloader=None,preferedformat=None):
245 super(FFmpegVideoConvertor, self).__init__(downloader)
246 self._preferedformat=preferedformat
248 def run(self, information):
249 path = information['filepath']
250 prefix, sep, ext = path.rpartition(u'.')
251 outpath = prefix + sep + self._preferedformat
252 if information['ext'] == self._preferedformat:
253 self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
254 return True,information
255 self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
256 self.run_ffmpeg(path, outpath, [])
257 information['filepath'] = outpath
258 information['format'] = self._preferedformat
259 information['ext'] = self._preferedformat
260 return False,information
263 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
264 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
452 def __init__(self, downloader=None, subtitlesformat='srt'):
453 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
454 self._subformat = subtitlesformat
457 def _conver_lang_code(cls, code):
458 """Convert language code from ISO 639-1 to ISO 639-2/T"""
459 return cls._lang_map.get(code[:2])
461 def run(self, information):
462 if information['ext'] != u'mp4':
463 self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
464 return True, information
465 if not information.get('subtitles'):
466 self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
467 return True, information
469 sub_langs = [key for key in information['subtitles']]
470 filename = information['filepath']
471 input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
473 opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
474 for (i, lang) in enumerate(sub_langs):
475 opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
476 lang_code = self._conver_lang_code(lang)
477 if lang_code is not None:
478 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
479 opts.extend(['-f', 'mp4'])
481 temp_filename = filename + u'.temp'
482 self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
483 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
484 os.remove(encodeFilename(filename))
485 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
487 return True, information
490 class FFmpegMetadataPP(FFmpegPostProcessor):
493 if info.get('title') is not None:
494 metadata['title'] = info['title']
495 if info.get('upload_date') is not None:
496 metadata['date'] = info['upload_date']
497 if info.get('uploader') is not None:
498 metadata['artist'] = info['uploader']
499 elif info.get('uploader_id') is not None:
500 metadata['artist'] = info['uploader_id']
503 self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
506 filename = info['filepath']
507 temp_filename = prepend_extension(filename, 'temp')
509 options = ['-c', 'copy']
510 for (name, value) in metadata.items():
511 options.extend(['-metadata', '%s=%s' % (name, value)])
513 self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
514 self.run_ffmpeg(filename, temp_filename, options)
515 os.remove(encodeFilename(filename))
516 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
520 class FFmpegMergerPP(FFmpegPostProcessor):
522 filename = info['filepath']
523 args = ['-c', 'copy']
524 self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
528 class XAttrMetadataPP(PostProcessor):
531 # More info about extended attributes for media:
532 # http://freedesktop.org/wiki/CommonExtendedAttributes/
533 # http://www.freedesktop.org/wiki/PhreedomDraft/
534 # http://dublincore.org/documents/usageguide/elements.shtml
537 # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
538 # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
542 """ Set extended attributes on downloaded file (if xattr support is found). """
544 # This mess below finds the best xattr tool for the job and creates a
545 # "write_xattr" function.
547 # try the pyxattr module...
549 def write_xattr(path, key, value):
550 return xattr.setxattr(path, key, value)
554 if os.name == 'posix':
556 for dir in os.environ["PATH"].split(":"):
557 path = os.path.join(dir, bin)
558 if os.path.exists(path):
561 user_has_setfattr = which("setfattr")
562 user_has_xattr = which("xattr")
564 if user_has_setfattr or user_has_xattr:
566 def write_xattr(path, key, value):
569 # setfattr: /tmp/blah: Operation not supported
570 "Operation not supported": errno.EOPNOTSUPP,
571 # setfattr: ~/blah: No such file or directory
572 # xattr: No such file: ~/blah
573 "No such file": errno.ENOENT,
576 if user_has_setfattr:
577 cmd = ['setfattr', '-n', key, '-v', value, path]
579 cmd = ['xattr', '-w', key, value, path]
582 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
583 except subprocess.CalledProcessError as e:
584 errorstr = e.output.strip().decode()
585 for potential_errorstr, potential_errno in potential_errors.items():
586 if errorstr.find(potential_errorstr) > -1:
587 e = OSError(potential_errno, potential_errorstr)
590 raise # Reraise unhandled error
593 # On Unix, and can't find pyxattr, setfattr, or xattr.
594 if sys.platform.startswith('linux'):
595 self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'pyxattr' or 'xattr' modules, or the GNU 'attr' package (which contains the 'setfattr' tool).")
596 elif sys.platform == 'darwin':
597 self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'xattr' module, or the 'xattr' binary.")
599 # Write xattrs to NTFS Alternate Data Streams: http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
600 def write_xattr(path, key, value):
601 assert(key.find(":") < 0)
602 assert(path.find(":") < 0)
603 assert(os.path.exists(path))
605 ads_fn = path + ":" + key
606 with open(ads_fn, "w") as f:
609 # Write the metadata to the file's xattrs
610 self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs...')
612 filename = info['filepath']
616 'user.xdg.referrer.url': 'webpage_url',
617 # 'user.xdg.comment': 'description',
618 'user.dublincore.title': 'title',
619 'user.dublincore.date': 'upload_date',
620 'user.dublincore.description': 'description',
621 'user.dublincore.contributor': 'uploader',
622 'user.dublincore.format': 'format',
625 for xattrname, infoname in xattr_mapping.items():
627 value = info.get(infoname)
630 if infoname == "upload_date":
631 value = hyphenate_date(value)
633 write_xattr(filename, xattrname, value)
638 self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")