_ Git - youtube-dl/blob - youtube_dl/PostProcessor.py

   1 import os
   2 import subprocess
   3 import sys
   4 import time
   5
   6
   7 from .utils import (
   8     compat_subprocess_get_DEVNULL,
   9     encodeFilename,
  10     PostProcessingError,
  11     shell_quote,
  12     subtitles_filename,
  13     prepend_extension,
  14 )
  15
  16
  17 class PostProcessor(object):
  18     """Post Processor class.
  19
  20     PostProcessor objects can be added to downloaders with their
  21     add_post_processor() method. When the downloader has finished a
  22     successful download, it will take its internal chain of PostProcessors
  23     and start calling the run() method on each one of them, first with
  24     an initial argument and then with the returned value of the previous
  25     PostProcessor.
  26
  27     The chain will be stopped if one of them ever returns None or the end
  28     of the chain is reached.
  29
  30     PostProcessor objects follow a "mutual registration" process similar
  31     to InfoExtractor objects.
  32     """
  33
  34     _downloader = None
  35
  36     def __init__(self, downloader=None):
  37         self._downloader = downloader
  38
  39     def set_downloader(self, downloader):
  40         """Sets the downloader for this PP."""
  41         self._downloader = downloader
  42
  43     def run(self, information):
  44         """Run the PostProcessor.
  45
  46         The "information" argument is a dictionary like the ones
  47         composed by InfoExtractors. The only difference is that this
  48         one has an extra field called "filepath" that points to the
  49         downloaded file.
  50
  51         This method returns a tuple, the first element of which describes
  52         whether the original file should be kept (i.e. not deleted - None for
  53         no preference), and the second of which is the updated information.
  54
  55         In addition, this method may raise a PostProcessingError
  56         exception if post processing fails.
  57         """
  58         return None, information # by default, keep file and do nothing
  59
  60 class FFmpegPostProcessorError(PostProcessingError):
  61     pass
  62
  63 class AudioConversionError(PostProcessingError):
  64     pass
  65
  66 class FFmpegPostProcessor(PostProcessor):
  67     def __init__(self,downloader=None):
  68         PostProcessor.__init__(self, downloader)
  69         self._exes = self.detect_executables()
  70
  71     @staticmethod
  72     def detect_executables():
  73         def executable(exe):
  74             try:
  75                 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
  76             except OSError:
  77                 return False
  78             return exe
  79         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
  80         return dict((program, executable(program)) for program in programs)
  81
  82     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
  83         if not self._exes['ffmpeg'] and not self._exes['avconv']:
  84             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
  85
  86         files_cmd = []
  87         for path in input_paths:
  88             files_cmd.extend(['-i', encodeFilename(path, True)])
  89         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
  90                + opts +
  91                [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
  92
  93         if self._downloader.params.get('verbose', False):
  94             self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
  95         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  96         stdout,stderr = p.communicate()
  97         if p.returncode != 0:
  98             stderr = stderr.decode('utf-8', 'replace')
  99             msg = stderr.strip().split('\n')[-1]
 100             raise FFmpegPostProcessorError(msg)
 101
 102     def run_ffmpeg(self, path, out_path, opts):
 103         self.run_ffmpeg_multiple_files([path], out_path, opts)
 104
 105     def _ffmpeg_filename_argument(self, fn):
 106         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
 107         if fn.startswith(u'-'):
 108             return u'./' + fn
 109         return fn
 110
 111 class FFmpegExtractAudioPP(FFmpegPostProcessor):
 112     def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
 113         FFmpegPostProcessor.__init__(self, downloader)
 114         if preferredcodec is None:
 115             preferredcodec = 'best'
 116         self._preferredcodec = preferredcodec
 117         self._preferredquality = preferredquality
 118         self._nopostoverwrites = nopostoverwrites
 119
 120     def get_audio_codec(self, path):
 121         if not self._exes['ffprobe'] and not self._exes['avprobe']:
 122             raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
 123         try:
 124             cmd = [
 125                 self._exes['avprobe'] or self._exes['ffprobe'],
 126                 '-show_streams',
 127                 encodeFilename(self._ffmpeg_filename_argument(path), True)]
 128             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
 129             output = handle.communicate()[0]
 130             if handle.wait() != 0:
 131                 return None
 132         except (IOError, OSError):
 133             return None
 134         audio_codec = None
 135         for line in output.decode('ascii', 'ignore').split('\n'):
 136             if line.startswith('codec_name='):
 137                 audio_codec = line.split('=')[1].strip()
 138             elif line.strip() == 'codec_type=audio' and audio_codec is not None:
 139                 return audio_codec
 140         return None
 141
 142     def run_ffmpeg(self, path, out_path, codec, more_opts):
 143         if not self._exes['ffmpeg'] and not self._exes['avconv']:
 144             raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
 145         if codec is None:
 146             acodec_opts = []
 147         else:
 148             acodec_opts = ['-acodec', codec]
 149         opts = ['-vn'] + acodec_opts + more_opts
 150         try:
 151             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
 152         except FFmpegPostProcessorError as err:
 153             raise AudioConversionError(err.msg)
 154
 155     def run(self, information):
 156         path = information['filepath']
 157
 158         filecodec = self.get_audio_codec(path)
 159         if filecodec is None:
 160             raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
 161
 162         more_opts = []
 163         if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
 164             if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
 165                 # Lossless, but in another container
 166                 acodec = 'copy'
 167                 extension = 'm4a'
 168                 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
 169             elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
 170                 # Lossless if possible
 171                 acodec = 'copy'
 172                 extension = filecodec
 173                 if filecodec == 'aac':
 174                     more_opts = ['-f', 'adts']
 175                 if filecodec == 'vorbis':
 176                     extension = 'ogg'
 177             else:
 178                 # MP3 otherwise.
 179                 acodec = 'libmp3lame'
 180                 extension = 'mp3'
 181                 more_opts = []
 182                 if self._preferredquality is not None:
 183                     if int(self._preferredquality) < 10:
 184                         more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
 185                     else:
 186                         more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
 187         else:
 188             # We convert the audio (lossy)
 189             acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
 190             extension = self._preferredcodec
 191             more_opts = []
 192             if self._preferredquality is not None:
 193                 # The opus codec doesn't support the -aq option
 194                 if int(self._preferredquality) < 10 and extension != 'opus':
 195                     more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
 196                 else:
 197                     more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
 198             if self._preferredcodec == 'aac':
 199                 more_opts += ['-f', 'adts']
 200             if self._preferredcodec == 'm4a':
 201                 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
 202             if self._preferredcodec == 'vorbis':
 203                 extension = 'ogg'
 204             if self._preferredcodec == 'wav':
 205                 extension = 'wav'
 206                 more_opts += ['-f', 'wav']
 207
 208         prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
 209         new_path = prefix + sep + extension
 210
 211         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
 212         if new_path == path:
 213             self._nopostoverwrites = True
 214
 215         try:
 216             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
 217                 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
 218             else:
 219                 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
 220                 self.run_ffmpeg(path, new_path, acodec, more_opts)
 221         except:
 222             etype,e,tb = sys.exc_info()
 223             if isinstance(e, AudioConversionError):
 224                 msg = u'audio conversion failed: ' + e.msg
 225             else:
 226                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
 227             raise PostProcessingError(msg)
 228
 229         # Try to update the date time for extracted audio file.
 230         if information.get('filetime') is not None:
 231             try:
 232                 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
 233             except:
 234                 self._downloader.report_warning(u'Cannot update utime of audio file')
 235
 236         information['filepath'] = new_path
 237         return self._nopostoverwrites,information
 238
 239 class FFmpegVideoConvertor(FFmpegPostProcessor):
 240     def __init__(self, downloader=None,preferedformat=None):
 241         super(FFmpegVideoConvertor, self).__init__(downloader)
 242         self._preferedformat=preferedformat
 243
 244     def run(self, information):
 245         path = information['filepath']
 246         prefix, sep, ext = path.rpartition(u'.')
 247         outpath = prefix + sep + self._preferedformat
 248         if information['ext'] == self._preferedformat:
 249             self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
 250             return True,information
 251         self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
 252         self.run_ffmpeg(path, outpath, [])
 253         information['filepath'] = outpath
 254         information['format'] = self._preferedformat
 255         information['ext'] = self._preferedformat
 256         return False,information
 257
 258
 259 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 260     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 261     _lang_map = {
 262         'aa': 'aar',
 263         'ab': 'abk',
 264         'ae': 'ave',
 265         'af': 'afr',
 266         'ak': 'aka',
 267         'am': 'amh',
 268         'an': 'arg',
 269         'ar': 'ara',
 270         'as': 'asm',
 271         'av': 'ava',
 272         'ay': 'aym',
 273         'az': 'aze',
 274         'ba': 'bak',
 275         'be': 'bel',
 276         'bg': 'bul',
 277         'bh': 'bih',
 278         'bi': 'bis',
 279         'bm': 'bam',
 280         'bn': 'ben',
 281         'bo': 'bod',
 282         'br': 'bre',
 283         'bs': 'bos',
 284         'ca': 'cat',
 285         'ce': 'che',
 286         'ch': 'cha',
 287         'co': 'cos',
 288         'cr': 'cre',
 289         'cs': 'ces',
 290         'cu': 'chu',
 291         'cv': 'chv',
 292         'cy': 'cym',
 293         'da': 'dan',
 294         'de': 'deu',
 295         'dv': 'div',
 296         'dz': 'dzo',
 297         'ee': 'ewe',
 298         'el': 'ell',
 299         'en': 'eng',
 300         'eo': 'epo',
 301         'es': 'spa',
 302         'et': 'est',
 303         'eu': 'eus',
 304         'fa': 'fas',
 305         'ff': 'ful',
 306         'fi': 'fin',
 307         'fj': 'fij',
 308         'fo': 'fao',
 309         'fr': 'fra',
 310         'fy': 'fry',
 311         'ga': 'gle',
 312         'gd': 'gla',
 313         'gl': 'glg',
 314         'gn': 'grn',
 315         'gu': 'guj',
 316         'gv': 'glv',
 317         'ha': 'hau',
 318         'he': 'heb',
 319         'hi': 'hin',
 320         'ho': 'hmo',
 321         'hr': 'hrv',
 322         'ht': 'hat',
 323         'hu': 'hun',
 324         'hy': 'hye',
 325         'hz': 'her',
 326         'ia': 'ina',
 327         'id': 'ind',
 328         'ie': 'ile',
 329         'ig': 'ibo',
 330         'ii': 'iii',
 331         'ik': 'ipk',
 332         'io': 'ido',
 333         'is': 'isl',
 334         'it': 'ita',
 335         'iu': 'iku',
 336         'ja': 'jpn',
 337         'jv': 'jav',
 338         'ka': 'kat',
 339         'kg': 'kon',
 340         'ki': 'kik',
 341         'kj': 'kua',
 342         'kk': 'kaz',
 343         'kl': 'kal',
 344         'km': 'khm',
 345         'kn': 'kan',
 346         'ko': 'kor',
 347         'kr': 'kau',
 348         'ks': 'kas',
 349         'ku': 'kur',
 350         'kv': 'kom',
 351         'kw': 'cor',
 352         'ky': 'kir',
 353         'la': 'lat',
 354         'lb': 'ltz',
 355         'lg': 'lug',
 356         'li': 'lim',
 357         'ln': 'lin',
 358         'lo': 'lao',
 359         'lt': 'lit',
 360         'lu': 'lub',
 361         'lv': 'lav',
 362         'mg': 'mlg',
 363         'mh': 'mah',
 364         'mi': 'mri',
 365         'mk': 'mkd',
 366         'ml': 'mal',
 367         'mn': 'mon',
 368         'mr': 'mar',
 369         'ms': 'msa',
 370         'mt': 'mlt',
 371         'my': 'mya',
 372         'na': 'nau',
 373         'nb': 'nob',
 374         'nd': 'nde',
 375         'ne': 'nep',
 376         'ng': 'ndo',
 377         'nl': 'nld',
 378         'nn': 'nno',
 379         'no': 'nor',
 380         'nr': 'nbl',
 381         'nv': 'nav',
 382         'ny': 'nya',
 383         'oc': 'oci',
 384         'oj': 'oji',
 385         'om': 'orm',
 386         'or': 'ori',
 387         'os': 'oss',
 388         'pa': 'pan',
 389         'pi': 'pli',
 390         'pl': 'pol',
 391         'ps': 'pus',
 392         'pt': 'por',
 393         'qu': 'que',
 394         'rm': 'roh',
 395         'rn': 'run',
 396         'ro': 'ron',
 397         'ru': 'rus',
 398         'rw': 'kin',
 399         'sa': 'san',
 400         'sc': 'srd',
 401         'sd': 'snd',
 402         'se': 'sme',
 403         'sg': 'sag',
 404         'si': 'sin',
 405         'sk': 'slk',
 406         'sl': 'slv',
 407         'sm': 'smo',
 408         'sn': 'sna',
 409         'so': 'som',
 410         'sq': 'sqi',
 411         'sr': 'srp',
 412         'ss': 'ssw',
 413         'st': 'sot',
 414         'su': 'sun',
 415         'sv': 'swe',
 416         'sw': 'swa',
 417         'ta': 'tam',
 418         'te': 'tel',
 419         'tg': 'tgk',
 420         'th': 'tha',
 421         'ti': 'tir',
 422         'tk': 'tuk',
 423         'tl': 'tgl',
 424         'tn': 'tsn',
 425         'to': 'ton',
 426         'tr': 'tur',
 427         'ts': 'tso',
 428         'tt': 'tat',
 429         'tw': 'twi',
 430         'ty': 'tah',
 431         'ug': 'uig',
 432         'uk': 'ukr',
 433         'ur': 'urd',
 434         'uz': 'uzb',
 435         've': 'ven',
 436         'vi': 'vie',
 437         'vo': 'vol',
 438         'wa': 'wln',
 439         'wo': 'wol',
 440         'xh': 'xho',
 441         'yi': 'yid',
 442         'yo': 'yor',
 443         'za': 'zha',
 444         'zh': 'zho',
 445         'zu': 'zul',
 446     }
 447
 448     def __init__(self, downloader=None, subtitlesformat='srt'):
 449         super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
 450         self._subformat = subtitlesformat
 451
 452     @classmethod
 453     def _conver_lang_code(cls, code):
 454         """Convert language code from ISO 639-1 to ISO 639-2/T"""
 455         return cls._lang_map.get(code[:2])
 456
 457     def run(self, information):
 458         if information['ext'] != u'mp4':
 459             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
 460             return True, information
 461         if not information.get('subtitles'):
 462             self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
 463             return True, information
 464
 465         sub_langs = [key for key in information['subtitles']]
 466         filename = information['filepath']
 467         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
 468
 469         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
 470         for (i, lang) in enumerate(sub_langs):
 471             opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
 472             lang_code = self._conver_lang_code(lang)
 473             if lang_code is not None:
 474                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
 475         opts.extend(['-f', 'mp4'])
 476
 477         temp_filename = filename + u'.temp'
 478         self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
 479         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
 480         os.remove(encodeFilename(filename))
 481         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 482
 483         return True, information
 484
 485
 486 class FFmpegMetadataPP(FFmpegPostProcessor):
 487     def run(self, info):
 488         metadata = {}
 489         if info.get('title') is not None:
 490             metadata['title'] = info['title']
 491         if info.get('upload_date') is not None:
 492             metadata['date'] = info['upload_date']
 493         if info.get('uploader') is not None:
 494             metadata['artist'] = info['uploader']
 495         elif info.get('uploader_id') is not None:
 496             metadata['artist'] = info['uploader_id']
 497
 498         if not metadata:
 499             self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
 500             return True, info
 501
 502         filename = info['filepath']
 503         temp_filename = prepend_extension(filename, 'temp')
 504
 505         options = ['-c', 'copy']
 506         for (name, value) in metadata.items():
 507             options.extend(['-metadata', '%s=%s' % (name, value)])
 508
 509         self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
 510         self.run_ffmpeg(filename, temp_filename, options)
 511         os.remove(encodeFilename(filename))
 512         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 513         return True, info
 514
 515
 516 class FFmpegMergerPP(FFmpegPostProcessor):
 517     def run(self, info):
 518         filename = info['filepath']
 519         args = ['-c', 'copy']
 520         self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
 521         return True, info