_ Git - youtube-dl/blob - youtube_dl/PostProcessor.py

   1 import os
   2 import subprocess
   3 import sys
   4 import time
   5
   6
   7 from .utils import (
   8     compat_subprocess_get_DEVNULL,
   9     encodeFilename,
  10     PostProcessingError,
  11     shell_quote,
  12     subtitles_filename,
  13     prepend_extension,
  14 )
  15
  16
  17 class PostProcessor(object):
  18     """Post Processor class.
  19
  20     PostProcessor objects can be added to downloaders with their
  21     add_post_processor() method. When the downloader has finished a
  22     successful download, it will take its internal chain of PostProcessors
  23     and start calling the run() method on each one of them, first with
  24     an initial argument and then with the returned value of the previous
  25     PostProcessor.
  26
  27     The chain will be stopped if one of them ever returns None or the end
  28     of the chain is reached.
  29
  30     PostProcessor objects follow a "mutual registration" process similar
  31     to InfoExtractor objects.
  32     """
  33
  34     _downloader = None
  35
  36     def __init__(self, downloader=None):
  37         self._downloader = downloader
  38
  39     def set_downloader(self, downloader):
  40         """Sets the downloader for this PP."""
  41         self._downloader = downloader
  42
  43     def run(self, information):
  44         """Run the PostProcessor.
  45
  46         The "information" argument is a dictionary like the ones
  47         composed by InfoExtractors. The only difference is that this
  48         one has an extra field called "filepath" that points to the
  49         downloaded file.
  50
  51         This method returns a tuple, the first element of which describes
  52         whether the original file should be kept (i.e. not deleted - None for
  53         no preference), and the second of which is the updated information.
  54
  55         In addition, this method may raise a PostProcessingError
  56         exception if post processing fails.
  57         """
  58         return None, information # by default, keep file and do nothing
  59
  60 class FFmpegPostProcessorError(PostProcessingError):
  61     pass
  62
  63 class AudioConversionError(PostProcessingError):
  64     pass
  65
  66 class FFmpegPostProcessor(PostProcessor):
  67     def __init__(self,downloader=None):
  68         PostProcessor.__init__(self, downloader)
  69         self._exes = self.detect_executables()
  70
  71     @staticmethod
  72     def detect_executables():
  73         def executable(exe):
  74             try:
  75                 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
  76             except OSError:
  77                 return False
  78             return exe
  79         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
  80         return dict((program, executable(program)) for program in programs)
  81
  82     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
  83         if not self._exes['ffmpeg'] and not self._exes['avconv']:
  84             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
  85
  86         files_cmd = []
  87         for path in input_paths:
  88             files_cmd.extend(['-i', encodeFilename(path)])
  89         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
  90                + opts +
  91                [encodeFilename(self._ffmpeg_filename_argument(out_path))])
  92
  93         if self._downloader.params.get('verbose', False):
  94             self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
  95         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  96         stdout,stderr = p.communicate()
  97         if p.returncode != 0:
  98             stderr = stderr.decode('utf-8', 'replace')
  99             msg = stderr.strip().split('\n')[-1]
 100             raise FFmpegPostProcessorError(msg)
 101
 102     def run_ffmpeg(self, path, out_path, opts):
 103         self.run_ffmpeg_multiple_files([path], out_path, opts)
 104
 105     def _ffmpeg_filename_argument(self, fn):
 106         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
 107         if fn.startswith(u'-'):
 108             return u'./' + fn
 109         return fn
 110
 111 class FFmpegExtractAudioPP(FFmpegPostProcessor):
 112     def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
 113         FFmpegPostProcessor.__init__(self, downloader)
 114         if preferredcodec is None:
 115             preferredcodec = 'best'
 116         self._preferredcodec = preferredcodec
 117         self._preferredquality = preferredquality
 118         self._nopostoverwrites = nopostoverwrites
 119
 120     def get_audio_codec(self, path):
 121         if not self._exes['ffprobe'] and not self._exes['avprobe']:
 122             raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
 123         try:
 124             cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
 125             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
 126             output = handle.communicate()[0]
 127             if handle.wait() != 0:
 128                 return None
 129         except (IOError, OSError):
 130             return None
 131         audio_codec = None
 132         for line in output.decode('ascii', 'ignore').split('\n'):
 133             if line.startswith('codec_name='):
 134                 audio_codec = line.split('=')[1].strip()
 135             elif line.strip() == 'codec_type=audio' and audio_codec is not None:
 136                 return audio_codec
 137         return None
 138
 139     def run_ffmpeg(self, path, out_path, codec, more_opts):
 140         if not self._exes['ffmpeg'] and not self._exes['avconv']:
 141             raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
 142         if codec is None:
 143             acodec_opts = []
 144         else:
 145             acodec_opts = ['-acodec', codec]
 146         opts = ['-vn'] + acodec_opts + more_opts
 147         try:
 148             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
 149         except FFmpegPostProcessorError as err:
 150             raise AudioConversionError(err.msg)
 151
 152     def run(self, information):
 153         path = information['filepath']
 154
 155         filecodec = self.get_audio_codec(path)
 156         if filecodec is None:
 157             raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
 158
 159         more_opts = []
 160         if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
 161             if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
 162                 # Lossless, but in another container
 163                 acodec = 'copy'
 164                 extension = 'm4a'
 165                 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
 166             elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
 167                 # Lossless if possible
 168                 acodec = 'copy'
 169                 extension = filecodec
 170                 if filecodec == 'aac':
 171                     more_opts = ['-f', 'adts']
 172                 if filecodec == 'vorbis':
 173                     extension = 'ogg'
 174             else:
 175                 # MP3 otherwise.
 176                 acodec = 'libmp3lame'
 177                 extension = 'mp3'
 178                 more_opts = []
 179                 if self._preferredquality is not None:
 180                     if int(self._preferredquality) < 10:
 181                         more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
 182                     else:
 183                         more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
 184         else:
 185             # We convert the audio (lossy)
 186             acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
 187             extension = self._preferredcodec
 188             more_opts = []
 189             if self._preferredquality is not None:
 190                 # The opus codec doesn't support the -aq option
 191                 if int(self._preferredquality) < 10 and extension != 'opus':
 192                     more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
 193                 else:
 194                     more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
 195             if self._preferredcodec == 'aac':
 196                 more_opts += ['-f', 'adts']
 197             if self._preferredcodec == 'm4a':
 198                 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
 199             if self._preferredcodec == 'vorbis':
 200                 extension = 'ogg'
 201             if self._preferredcodec == 'wav':
 202                 extension = 'wav'
 203                 more_opts += ['-f', 'wav']
 204
 205         prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
 206         new_path = prefix + sep + extension
 207
 208         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
 209         if new_path == path:
 210             self._nopostoverwrites = True
 211
 212         try:
 213             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
 214                 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
 215             else:
 216                 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
 217                 self.run_ffmpeg(path, new_path, acodec, more_opts)
 218         except:
 219             etype,e,tb = sys.exc_info()
 220             if isinstance(e, AudioConversionError):
 221                 msg = u'audio conversion failed: ' + e.msg
 222             else:
 223                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
 224             raise PostProcessingError(msg)
 225
 226         # Try to update the date time for extracted audio file.
 227         if information.get('filetime') is not None:
 228             try:
 229                 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
 230             except:
 231                 self._downloader.report_warning(u'Cannot update utime of audio file')
 232
 233         information['filepath'] = new_path
 234         return self._nopostoverwrites,information
 235
 236 class FFmpegVideoConvertor(FFmpegPostProcessor):
 237     def __init__(self, downloader=None,preferedformat=None):
 238         super(FFmpegVideoConvertor, self).__init__(downloader)
 239         self._preferedformat=preferedformat
 240
 241     def run(self, information):
 242         path = information['filepath']
 243         prefix, sep, ext = path.rpartition(u'.')
 244         outpath = prefix + sep + self._preferedformat
 245         if information['ext'] == self._preferedformat:
 246             self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
 247             return True,information
 248         self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
 249         self.run_ffmpeg(path, outpath, [])
 250         information['filepath'] = outpath
 251         information['format'] = self._preferedformat
 252         information['ext'] = self._preferedformat
 253         return False,information
 254
 255
 256 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 257     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 258     _lang_map = {
 259         'aa': 'aar',
 260         'ab': 'abk',
 261         'ae': 'ave',
 262         'af': 'afr',
 263         'ak': 'aka',
 264         'am': 'amh',
 265         'an': 'arg',
 266         'ar': 'ara',
 267         'as': 'asm',
 268         'av': 'ava',
 269         'ay': 'aym',
 270         'az': 'aze',
 271         'ba': 'bak',
 272         'be': 'bel',
 273         'bg': 'bul',
 274         'bh': 'bih',
 275         'bi': 'bis',
 276         'bm': 'bam',
 277         'bn': 'ben',
 278         'bo': 'bod',
 279         'br': 'bre',
 280         'bs': 'bos',
 281         'ca': 'cat',
 282         'ce': 'che',
 283         'ch': 'cha',
 284         'co': 'cos',
 285         'cr': 'cre',
 286         'cs': 'ces',
 287         'cu': 'chu',
 288         'cv': 'chv',
 289         'cy': 'cym',
 290         'da': 'dan',
 291         'de': 'deu',
 292         'dv': 'div',
 293         'dz': 'dzo',
 294         'ee': 'ewe',
 295         'el': 'ell',
 296         'en': 'eng',
 297         'eo': 'epo',
 298         'es': 'spa',
 299         'et': 'est',
 300         'eu': 'eus',
 301         'fa': 'fas',
 302         'ff': 'ful',
 303         'fi': 'fin',
 304         'fj': 'fij',
 305         'fo': 'fao',
 306         'fr': 'fra',
 307         'fy': 'fry',
 308         'ga': 'gle',
 309         'gd': 'gla',
 310         'gl': 'glg',
 311         'gn': 'grn',
 312         'gu': 'guj',
 313         'gv': 'glv',
 314         'ha': 'hau',
 315         'he': 'heb',
 316         'hi': 'hin',
 317         'ho': 'hmo',
 318         'hr': 'hrv',
 319         'ht': 'hat',
 320         'hu': 'hun',
 321         'hy': 'hye',
 322         'hz': 'her',
 323         'ia': 'ina',
 324         'id': 'ind',
 325         'ie': 'ile',
 326         'ig': 'ibo',
 327         'ii': 'iii',
 328         'ik': 'ipk',
 329         'io': 'ido',
 330         'is': 'isl',
 331         'it': 'ita',
 332         'iu': 'iku',
 333         'ja': 'jpn',
 334         'jv': 'jav',
 335         'ka': 'kat',
 336         'kg': 'kon',
 337         'ki': 'kik',
 338         'kj': 'kua',
 339         'kk': 'kaz',
 340         'kl': 'kal',
 341         'km': 'khm',
 342         'kn': 'kan',
 343         'ko': 'kor',
 344         'kr': 'kau',
 345         'ks': 'kas',
 346         'ku': 'kur',
 347         'kv': 'kom',
 348         'kw': 'cor',
 349         'ky': 'kir',
 350         'la': 'lat',
 351         'lb': 'ltz',
 352         'lg': 'lug',
 353         'li': 'lim',
 354         'ln': 'lin',
 355         'lo': 'lao',
 356         'lt': 'lit',
 357         'lu': 'lub',
 358         'lv': 'lav',
 359         'mg': 'mlg',
 360         'mh': 'mah',
 361         'mi': 'mri',
 362         'mk': 'mkd',
 363         'ml': 'mal',
 364         'mn': 'mon',
 365         'mr': 'mar',
 366         'ms': 'msa',
 367         'mt': 'mlt',
 368         'my': 'mya',
 369         'na': 'nau',
 370         'nb': 'nob',
 371         'nd': 'nde',
 372         'ne': 'nep',
 373         'ng': 'ndo',
 374         'nl': 'nld',
 375         'nn': 'nno',
 376         'no': 'nor',
 377         'nr': 'nbl',
 378         'nv': 'nav',
 379         'ny': 'nya',
 380         'oc': 'oci',
 381         'oj': 'oji',
 382         'om': 'orm',
 383         'or': 'ori',
 384         'os': 'oss',
 385         'pa': 'pan',
 386         'pi': 'pli',
 387         'pl': 'pol',
 388         'ps': 'pus',
 389         'pt': 'por',
 390         'qu': 'que',
 391         'rm': 'roh',
 392         'rn': 'run',
 393         'ro': 'ron',
 394         'ru': 'rus',
 395         'rw': 'kin',
 396         'sa': 'san',
 397         'sc': 'srd',
 398         'sd': 'snd',
 399         'se': 'sme',
 400         'sg': 'sag',
 401         'si': 'sin',
 402         'sk': 'slk',
 403         'sl': 'slv',
 404         'sm': 'smo',
 405         'sn': 'sna',
 406         'so': 'som',
 407         'sq': 'sqi',
 408         'sr': 'srp',
 409         'ss': 'ssw',
 410         'st': 'sot',
 411         'su': 'sun',
 412         'sv': 'swe',
 413         'sw': 'swa',
 414         'ta': 'tam',
 415         'te': 'tel',
 416         'tg': 'tgk',
 417         'th': 'tha',
 418         'ti': 'tir',
 419         'tk': 'tuk',
 420         'tl': 'tgl',
 421         'tn': 'tsn',
 422         'to': 'ton',
 423         'tr': 'tur',
 424         'ts': 'tso',
 425         'tt': 'tat',
 426         'tw': 'twi',
 427         'ty': 'tah',
 428         'ug': 'uig',
 429         'uk': 'ukr',
 430         'ur': 'urd',
 431         'uz': 'uzb',
 432         've': 'ven',
 433         'vi': 'vie',
 434         'vo': 'vol',
 435         'wa': 'wln',
 436         'wo': 'wol',
 437         'xh': 'xho',
 438         'yi': 'yid',
 439         'yo': 'yor',
 440         'za': 'zha',
 441         'zh': 'zho',
 442         'zu': 'zul',
 443     }
 444
 445     def __init__(self, downloader=None, subtitlesformat='srt'):
 446         super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
 447         self._subformat = subtitlesformat
 448
 449     @classmethod
 450     def _conver_lang_code(cls, code):
 451         """Convert language code from ISO 639-1 to ISO 639-2/T"""
 452         return cls._lang_map.get(code[:2])
 453
 454     def run(self, information):
 455         if information['ext'] != u'mp4':
 456             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
 457             return True, information
 458         if not information.get('subtitles'):
 459             self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
 460             return True, information
 461
 462         sub_langs = [key for key in information['subtitles']]
 463         filename = information['filepath']
 464         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
 465
 466         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
 467         for (i, lang) in enumerate(sub_langs):
 468             opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
 469             lang_code = self._conver_lang_code(lang)
 470             if lang_code is not None:
 471                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
 472         opts.extend(['-f', 'mp4'])
 473
 474         temp_filename = filename + u'.temp'
 475         self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
 476         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
 477         os.remove(encodeFilename(filename))
 478         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 479
 480         return True, information
 481
 482
 483 class FFmpegMetadataPP(FFmpegPostProcessor):
 484     def run(self, info):
 485         metadata = {}
 486         if info.get('title') is not None:
 487             metadata['title'] = info['title']
 488         if info.get('upload_date') is not None:
 489             metadata['date'] = info['upload_date']
 490         if info.get('uploader') is not None:
 491             metadata['artist'] = info['uploader']
 492         elif info.get('uploader_id') is not None:
 493             metadata['artist'] = info['uploader_id']
 494
 495         if not metadata:
 496             self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
 497             return True, info
 498
 499         filename = info['filepath']
 500         temp_filename = prepend_extension(filename, 'temp')
 501
 502         options = ['-c', 'copy']
 503         for (name, value) in metadata.items():
 504             options.extend(['-metadata', '%s=%s' % (name, value)])
 505
 506         self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
 507         self.run_ffmpeg(filename, temp_filename, options)
 508         os.remove(encodeFilename(filename))
 509         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 510         return True, info