Don't use bare 'except:'
[youtube-dl] / youtube_dl / postprocessor / ffmpeg.py
index 4b93556af7604faf44dd95e28417ce4f89a8799f..55adf9685b3e1407d6cb1fe1e1205e290203fa95 100644 (file)
@@ -1,17 +1,20 @@
+from __future__ import unicode_literals
+
+import io
 import os
-import re
 import subprocess
-import sys
 import time
 
 
 from .common import AudioConversionError, PostProcessor
 
-from ..utils import (
-    check_executable,
+from ..compat import (
     compat_subprocess_get_DEVNULL,
+)
+from ..utils import (
     encodeArgument,
     encodeFilename,
+    get_exe_version,
     is_outdated_version,
     PostProcessingError,
     prepend_extension,
@@ -20,23 +23,6 @@ from ..utils import (
 )
 
 
-def get_version(executable):
-    """ Returns the version of the specified executable,
-    or False if the executable is not present """
-    try:
-        out, err = subprocess.Popen(
-            [executable, '-version'],
-            stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
-    except OSError:
-        return False
-    firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
-    m = re.search(r'version\s+([0-9._-a-zA-Z]+)', firstline)
-    if not m:
-        return u'present'
-    else:
-        return m.group(1)
-
-
 class FFmpegPostProcessorError(PostProcessingError):
     pass
 
@@ -44,58 +30,119 @@ class FFmpegPostProcessorError(PostProcessingError):
 class FFmpegPostProcessor(PostProcessor):
     def __init__(self, downloader=None, deletetempfiles=False):
         PostProcessor.__init__(self, downloader)
-        self._versions = self.get_versions()
         self._deletetempfiles = deletetempfiles
+        self._determine_executables()
 
     def check_version(self):
-        if not self._executable:
-            raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
+        if not self.available:
+            raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
 
-        REQUIRED_VERSION = '1.0'
+        required_version = '10-0' if self.basename == 'avconv' else '1.0'
         if is_outdated_version(
-                self._versions[self._executable], REQUIRED_VERSION):
-            warning = u'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
-                self._executable, self._executable, REQUIRED_VERSION)
+                self._versions[self.basename], required_version):
+            warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
+                self.basename, self.basename, required_version)
             if self._downloader:
                 self._downloader.report_warning(warning)
 
     @staticmethod
-    def get_versions():
-        programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
-        return dict((program, get_version(program)) for program in programs)
+    def get_versions(downloader=None):
+        return FFmpegPostProcessor(downloader)._versions
 
-    @property
-    def _executable(self):
-        if self._downloader.params.get('prefer_ffmpeg', False):
+    def _determine_executables(self):
+        programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
+        prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
+
+        self.basename = None
+        self.probe_basename = None
+
+        self._paths = None
+        self._versions = None
+        if self._downloader:
+            location = self._downloader.params.get('ffmpeg_location')
+            if location is not None:
+                if not os.path.exists(location):
+                    self._downloader.report_warning(
+                        'ffmpeg-location %s does not exist! '
+                        'Continuing without avconv/ffmpeg.' % (location))
+                    self._versions = {}
+                    return
+                elif not os.path.isdir(location):
+                    basename = os.path.splitext(os.path.basename(location))[0]
+                    if basename not in programs:
+                        self._downloader.report_warning(
+                            'Cannot identify executable %s, its basename should be one of %s. '
+                            'Continuing without avconv/ffmpeg.' %
+                            (location, ', '.join(programs)))
+                        self._versions = {}
+                        return None
+                    location = os.path.dirname(os.path.abspath(location))
+                    if basename in ('ffmpeg', 'ffprobe'):
+                        prefer_ffmpeg = True
+
+                self._paths = dict(
+                    (p, os.path.join(location, p)) for p in programs)
+                self._versions = dict(
+                    (p, get_exe_version(self._paths[p], args=['-version']))
+                    for p in programs)
+        if self._versions is None:
+            self._versions = dict(
+                (p, get_exe_version(p, args=['-version'])) for p in programs)
+            self._paths = dict((p, p) for p in programs)
+
+        if prefer_ffmpeg:
             prefs = ('ffmpeg', 'avconv')
         else:
             prefs = ('avconv', 'ffmpeg')
         for p in prefs:
             if self._versions[p]:
-                return p
-        return None
+                self.basename = p
+                break
+
+        if prefer_ffmpeg:
+            prefs = ('ffprobe', 'avprobe')
+        else:
+            prefs = ('avprobe', 'ffprobe')
+        for p in prefs:
+            if self._versions[p]:
+                self.probe_basename = p
+                break
 
-    def _uses_avconv(self):
-        return self._executable == 'avconv'
+    @property
+    def available(self):
+        return self.basename is not None
+
+    @property
+    def executable(self):
+        return self._paths[self.basename]
+
+    @property
+    def probe_executable(self):
+        return self._paths[self.probe_basename]
 
     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
         self.check_version()
 
+        oldest_mtime = min(
+            os.stat(encodeFilename(path)).st_mtime for path in input_paths)
+
         files_cmd = []
         for path in input_paths:
-            files_cmd.extend(['-i', encodeFilename(path, True)])
-        cmd = ([self._executable, '-y'] + files_cmd
-               + [encodeArgument(o) for o in opts] +
+            files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
+        cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
+               files_cmd +
+               [encodeArgument(o) for o in opts] +
                [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
 
         if self._downloader.params.get('verbose', False):
-            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
         stdout, stderr = p.communicate()
         if p.returncode != 0:
             stderr = stderr.decode('utf-8', 'replace')
             msg = stderr.strip().split('\n')[-1]
             raise FFmpegPostProcessorError(msg)
+        os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime))
         if self._deletetempfiles:
             for ipath in input_paths:
                 os.remove(ipath)
@@ -105,8 +152,8 @@ class FFmpegPostProcessor(PostProcessor):
 
     def _ffmpeg_filename_argument(self, fn):
         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
-        if fn.startswith(u'-'):
-            return u'./' + fn
+        if fn.startswith('-'):
+            return './' + fn
         return fn
 
 
@@ -120,14 +167,17 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         self._nopostoverwrites = nopostoverwrites
 
     def get_audio_codec(self, path):
-        if not self._exes['ffprobe'] and not self._exes['avprobe']:
-            raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
+
+        if not self.probe_executable:
+            raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
         try:
             cmd = [
-                self._exes['avprobe'] or self._exes['ffprobe'],
-                '-show_streams',
+                encodeFilename(self.probe_executable, True),
+                encodeArgument('-show_streams'),
                 encodeFilename(self._ffmpeg_filename_argument(path), True)]
-            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
+            if self._downloader.params.get('verbose', False):
+                self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
             output = handle.communicate()[0]
             if handle.wait() != 0:
                 return None
@@ -157,16 +207,15 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 
         filecodec = self.get_audio_codec(path)
         if filecodec is None:
-            raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
+            raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
 
-        uses_avconv = self._uses_avconv()
         more_opts = []
         if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
             if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
                 # Lossless, but in another container
                 acodec = 'copy'
                 extension = 'm4a'
-                more_opts = ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+                more_opts = ['-bsf:a', 'aac_adtstoasc']
             elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
                 # Lossless if possible
                 acodec = 'copy'
@@ -182,9 +231,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                 more_opts = []
                 if self._preferredquality is not None:
                     if int(self._preferredquality) < 10:
-                        more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+                        more_opts += ['-q:a', self._preferredquality]
                     else:
-                        more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+                        more_opts += ['-b:a', self._preferredquality + 'k']
         else:
             # We convert the audio (lossy)
             acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
@@ -193,20 +242,20 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
             if self._preferredquality is not None:
                 # The opus codec doesn't support the -aq option
                 if int(self._preferredquality) < 10 and extension != 'opus':
-                    more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+                    more_opts += ['-q:a', self._preferredquality]
                 else:
-                    more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+                    more_opts += ['-b:a', self._preferredquality + 'k']
             if self._preferredcodec == 'aac':
                 more_opts += ['-f', 'adts']
             if self._preferredcodec == 'm4a':
-                more_opts += ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+                more_opts += ['-bsf:a', 'aac_adtstoasc']
             if self._preferredcodec == 'vorbis':
                 extension = 'ogg'
             if self._preferredcodec == 'wav':
                 extension = 'wav'
                 more_opts += ['-f', 'wav']
 
-        prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
+        prefix, sep, ext = path.rpartition('.')  # not os.path.splitext, since the latter does not work on unicode in all setups
         new_path = prefix + sep + extension
 
         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
@@ -215,47 +264,45 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 
         try:
             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
-                self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
+                self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
             else:
-                self._downloader.to_screen(u'[' + self._executable + '] Destination: ' + new_path)
+                self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
                 self.run_ffmpeg(path, new_path, acodec, more_opts)
-        except:
-            etype,e,tb = sys.exc_info()
-            if isinstance(e, AudioConversionError):
-                msg = u'audio conversion failed: ' + e.msg
-            else:
-                msg = u'error running ' + self._executable
-            raise PostProcessingError(msg)
+        except AudioConversionError as e:
+            raise PostProcessingError(
+                'audio conversion failed: ' + e.msg)
+        except Exception:
+            raise PostProcessingError('error running ' + self.basename)
 
         # Try to update the date time for extracted audio file.
         if information.get('filetime') is not None:
             try:
                 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
-            except:
-                self._downloader.report_warning(u'Cannot update utime of audio file')
+            except Exception:
+                self._downloader.report_warning('Cannot update utime of audio file')
 
         information['filepath'] = new_path
-        return self._nopostoverwrites,information
+        return self._nopostoverwrites, information
 
 
-class FFmpegVideoConvertor(FFmpegPostProcessor):
-    def __init__(self, downloader=None,preferedformat=None):
-        super(FFmpegVideoConvertor, self).__init__(downloader)
-        self._preferedformat=preferedformat
+class FFmpegVideoConvertorPP(FFmpegPostProcessor):
+    def __init__(self, downloader=None, preferedformat=None):
+        super(FFmpegVideoConvertorPP, self).__init__(downloader)
+        self._preferedformat = preferedformat
 
     def run(self, information):
         path = information['filepath']
-        prefix, sep, ext = path.rpartition(u'.')
+        prefix, sep, ext = path.rpartition('.')
         outpath = prefix + sep + self._preferedformat
         if information['ext'] == self._preferedformat:
-            self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
-            return True,information
-        self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
+            self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
+            return True, information
+        self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
         self.run_ffmpeg(path, outpath, [])
         information['filepath'] = outpath
         information['format'] = self._preferedformat
         information['ext'] = self._preferedformat
-        return False,information
+        return False, information
 
 
 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
@@ -447,37 +494,40 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         'zu': 'zul',
     }
 
-    def __init__(self, downloader=None, subtitlesformat='srt'):
-        super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
-        self._subformat = subtitlesformat
-
     @classmethod
     def _conver_lang_code(cls, code):
         """Convert language code from ISO 639-1 to ISO 639-2/T"""
         return cls._lang_map.get(code[:2])
 
     def run(self, information):
-        if information['ext'] != u'mp4':
-            self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
+        if information['ext'] != 'mp4':
+            self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
             return True, information
-        if not information.get('subtitles'):
-            self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed') 
+        subtitles = information.get('requested_subtitles')
+        if not subtitles:
+            self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
             return True, information
 
-        sub_langs = [key for key in information['subtitles']]
+        sub_langs = list(subtitles.keys())
         filename = information['filepath']
-        input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
-
-        opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
+        input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
+
+        opts = [
+            '-map', '0',
+            '-c', 'copy',
+            # Don't copy the existing subtitles, we may be running the
+            # postprocessor a second time
+            '-map', '-0:s',
+            '-c:s', 'mov_text',
+        ]
         for (i, lang) in enumerate(sub_langs):
-            opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
+            opts.extend(['-map', '%d:0' % (i + 1)])
             lang_code = self._conver_lang_code(lang)
             if lang_code is not None:
                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
-        opts.extend(['-f', 'mp4'])
 
-        temp_filename = filename + u'.temp'
-        self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
+        temp_filename = prepend_extension(filename, 'temp')
+        self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
@@ -492,19 +542,28 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
             metadata['title'] = info['title']
         if info.get('upload_date') is not None:
             metadata['date'] = info['upload_date']
-        if info.get('uploader') is not None:
+        if info.get('artist') is not None:
+            metadata['artist'] = info['artist']
+        elif info.get('uploader') is not None:
             metadata['artist'] = info['uploader']
         elif info.get('uploader_id') is not None:
             metadata['artist'] = info['uploader_id']
+        if info.get('description') is not None:
+            metadata['description'] = info['description']
+            metadata['comment'] = info['description']
+        if info.get('webpage_url') is not None:
+            metadata['purl'] = info['webpage_url']
+        if info.get('album') is not None:
+            metadata['album'] = info['album']
 
         if not metadata:
-            self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
+            self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
             return True, info
 
         filename = info['filepath']
         temp_filename = prepend_extension(filename, 'temp')
 
-        if info['ext'] == u'm4a':
+        if info['ext'] == 'm4a':
             options = ['-vn', '-acodec', 'copy']
         else:
             options = ['-c', 'copy']
@@ -512,7 +571,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
         for (name, value) in metadata.items():
             options.extend(['-metadata', '%s=%s' % (name, value)])
 
-        self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
+        self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
         self.run_ffmpeg(filename, temp_filename, options)
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
@@ -522,8 +581,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
 class FFmpegMergerPP(FFmpegPostProcessor):
     def run(self, info):
         filename = info['filepath']
-        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
-        self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)
+        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
+        self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
         self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
         return True, info
 
@@ -534,10 +593,84 @@ class FFmpegAudioFixPP(FFmpegPostProcessor):
         temp_filename = prepend_extension(filename, 'temp')
 
         options = ['-vn', '-acodec', 'copy']
-        self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
+        self._downloader.to_screen('[ffmpeg] Fixing audio file "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
+
+
+class FFmpegFixupStretchedPP(FFmpegPostProcessor):
+    def run(self, info):
+        stretched_ratio = info.get('stretched_ratio')
+        if stretched_ratio is None or stretched_ratio == 1:
+            return True, info
+
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
+        self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
         self.run_ffmpeg(filename, temp_filename, options)
 
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
         return True, info
+
+
+class FFmpegFixupM4aPP(FFmpegPostProcessor):
+    def run(self, info):
+        if info.get('container') != 'm4a_dash':
+            return True, info
+
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-c', 'copy', '-f', 'mp4']
+        self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
+
+
+class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+    def __init__(self, downloader=None, format=None):
+        super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
+        self.format = format
+
+    def run(self, info):
+        subs = info.get('requested_subtitles')
+        filename = info['filepath']
+        new_ext = self.format
+        new_format = new_ext
+        if new_format == 'vtt':
+            new_format = 'webvtt'
+        if subs is None:
+            self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
+            return True, info
+        self._downloader.to_screen('[ffmpeg] Converting subtitles')
+        for lang, sub in subs.items():
+            ext = sub['ext']
+            if ext == new_ext:
+                self._downloader.to_screen(
+                    '[ffmpeg] Subtitle file for %s is already in the requested'
+                    'format' % new_ext)
+                continue
+            new_file = subtitles_filename(filename, lang, new_ext)
+            self.run_ffmpeg(
+                subtitles_filename(filename, lang, ext),
+                new_file, ['-f', new_format])
+
+            with io.open(new_file, 'rt', encoding='utf-8') as f:
+                subs[lang] = {
+                    'ext': ext,
+                    'data': f.read(),
+                }
+
+        return True, info