_ Git - youtube-dl/blob - youtube_dl/PostProcessor.py

   1 import os
   2 import subprocess
   3 import sys
   4 import time
   5 import datetime
   6
   7 from .utils import *
   8
   9
  10 class PostProcessor(object):
  11     """Post Processor class.
  12
  13     PostProcessor objects can be added to downloaders with their
  14     add_post_processor() method. When the downloader has finished a
  15     successful download, it will take its internal chain of PostProcessors
  16     and start calling the run() method on each one of them, first with
  17     an initial argument and then with the returned value of the previous
  18     PostProcessor.
  19
  20     The chain will be stopped if one of them ever returns None or the end
  21     of the chain is reached.
  22
  23     PostProcessor objects follow a "mutual registration" process similar
  24     to InfoExtractor objects.
  25     """
  26
  27     _downloader = None
  28
  29     def __init__(self, downloader=None):
  30         self._downloader = downloader
  31
  32     def set_downloader(self, downloader):
  33         """Sets the downloader for this PP."""
  34         self._downloader = downloader
  35
  36     def run(self, information):
  37         """Run the PostProcessor.
  38
  39         The "information" argument is a dictionary like the ones
  40         composed by InfoExtractors. The only difference is that this
  41         one has an extra field called "filepath" that points to the
  42         downloaded file.
  43
  44         This method returns a tuple, the first element of which describes
  45         whether the original file should be kept (i.e. not deleted - None for
  46         no preference), and the second of which is the updated information.
  47
  48         In addition, this method may raise a PostProcessingError
  49         exception if post processing fails.
  50         """
  51         return None, information # by default, keep file and do nothing
  52
  53 class FFmpegPostProcessorError(PostProcessingError):
  54     pass
  55
  56 class AudioConversionError(PostProcessingError):
  57     pass
  58
  59 class FFmpegPostProcessor(PostProcessor):
  60     def __init__(self,downloader=None):
  61         PostProcessor.__init__(self, downloader)
  62         self._exes = self.detect_executables()
  63
  64     @staticmethod
  65     def detect_executables():
  66         def executable(exe):
  67             try:
  68                 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
  69             except OSError:
  70                 return False
  71             return exe
  72         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
  73         return dict((program, executable(program)) for program in programs)
  74
  75     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
  76         if not self._exes['ffmpeg'] and not self._exes['avconv']:
  77             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
  78
  79         files_cmd = []
  80         for path in input_paths:
  81             files_cmd.extend(['-i', encodeFilename(path)])
  82         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
  83                + opts +
  84                [encodeFilename(self._ffmpeg_filename_argument(out_path))])
  85
  86         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  87         stdout,stderr = p.communicate()
  88         if p.returncode != 0:
  89             stderr = stderr.decode('utf-8', 'replace')
  90             msg = stderr.strip().split('\n')[-1]
  91             raise FFmpegPostProcessorError(msg)
  92
  93     def run_ffmpeg(self, path, out_path, opts):
  94         self.run_ffmpeg_multiple_files([path], out_path, opts)
  95
  96     def _ffmpeg_filename_argument(self, fn):
  97         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
  98         if fn.startswith(u'-'):
  99             return u'./' + fn
 100         return fn
 101
 102 class FFmpegExtractAudioPP(FFmpegPostProcessor):
 103     def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
 104         FFmpegPostProcessor.__init__(self, downloader)
 105         if preferredcodec is None:
 106             preferredcodec = 'best'
 107         self._preferredcodec = preferredcodec
 108         self._preferredquality = preferredquality
 109         self._nopostoverwrites = nopostoverwrites
 110
 111     def get_audio_codec(self, path):
 112         if not self._exes['ffprobe'] and not self._exes['avprobe']:
 113             raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
 114         try:
 115             cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
 116             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
 117             output = handle.communicate()[0]
 118             if handle.wait() != 0:
 119                 return None
 120         except (IOError, OSError):
 121             return None
 122         audio_codec = None
 123         for line in output.decode('ascii', 'ignore').split('\n'):
 124             if line.startswith('codec_name='):
 125                 audio_codec = line.split('=')[1].strip()
 126             elif line.strip() == 'codec_type=audio' and audio_codec is not None:
 127                 return audio_codec
 128         return None
 129
 130     def run_ffmpeg(self, path, out_path, codec, more_opts):
 131         if not self._exes['ffmpeg'] and not self._exes['avconv']:
 132             raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
 133         if codec is None:
 134             acodec_opts = []
 135         else:
 136             acodec_opts = ['-acodec', codec]
 137         opts = ['-vn'] + acodec_opts + more_opts
 138         try:
 139             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
 140         except FFmpegPostProcessorError as err:
 141             raise AudioConversionError(err.msg)
 142
 143     def run(self, information):
 144         path = information['filepath']
 145
 146         filecodec = self.get_audio_codec(path)
 147         if filecodec is None:
 148             raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
 149
 150         more_opts = []
 151         if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
 152             if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
 153                 # Lossless, but in another container
 154                 acodec = 'copy'
 155                 extension = 'm4a'
 156                 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
 157             elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
 158                 # Lossless if possible
 159                 acodec = 'copy'
 160                 extension = filecodec
 161                 if filecodec == 'aac':
 162                     more_opts = ['-f', 'adts']
 163                 if filecodec == 'vorbis':
 164                     extension = 'ogg'
 165             else:
 166                 # MP3 otherwise.
 167                 acodec = 'libmp3lame'
 168                 extension = 'mp3'
 169                 more_opts = []
 170                 if self._preferredquality is not None:
 171                     if int(self._preferredquality) < 10:
 172                         more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
 173                     else:
 174                         more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
 175         else:
 176             # We convert the audio (lossy)
 177             acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
 178             extension = self._preferredcodec
 179             more_opts = []
 180             if self._preferredquality is not None:
 181                 # The opus codec doesn't support the -aq option
 182                 if int(self._preferredquality) < 10 and extension != 'opus':
 183                     more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
 184                 else:
 185                     more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
 186             if self._preferredcodec == 'aac':
 187                 more_opts += ['-f', 'adts']
 188             if self._preferredcodec == 'm4a':
 189                 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
 190             if self._preferredcodec == 'vorbis':
 191                 extension = 'ogg'
 192             if self._preferredcodec == 'wav':
 193                 extension = 'wav'
 194                 more_opts += ['-f', 'wav']
 195
 196         prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
 197         new_path = prefix + sep + extension
 198
 199         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
 200         if new_path == path:
 201             self._nopostoverwrites = True
 202
 203         try:
 204             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
 205                 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
 206             else:
 207                 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
 208                 self.run_ffmpeg(path, new_path, acodec, more_opts)
 209         except:
 210             etype,e,tb = sys.exc_info()
 211             if isinstance(e, AudioConversionError):
 212                 msg = u'audio conversion failed: ' + e.msg
 213             else:
 214                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
 215             raise PostProcessingError(msg)
 216
 217         # Try to update the date time for extracted audio file.
 218         if information.get('filetime') is not None:
 219             try:
 220                 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
 221             except:
 222                 self._downloader.report_warning(u'Cannot update utime of audio file')
 223
 224         information['filepath'] = new_path
 225         return self._nopostoverwrites,information
 226
 227 class FFmpegVideoConvertor(FFmpegPostProcessor):
 228     def __init__(self, downloader=None,preferedformat=None):
 229         super(FFmpegVideoConvertor, self).__init__(downloader)
 230         self._preferedformat=preferedformat
 231
 232     def run(self, information):
 233         path = information['filepath']
 234         prefix, sep, ext = path.rpartition(u'.')
 235         outpath = prefix + sep + self._preferedformat
 236         if information['ext'] == self._preferedformat:
 237             self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
 238             return True,information
 239         self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
 240         self.run_ffmpeg(path, outpath, [])
 241         information['filepath'] = outpath
 242         information['format'] = self._preferedformat
 243         information['ext'] = self._preferedformat
 244         return False,information
 245
 246
 247 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 248     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 249     _lang_map = {
 250         'aa': 'aar',
 251         'ab': 'abk',
 252         'ae': 'ave',
 253         'af': 'afr',
 254         'ak': 'aka',
 255         'am': 'amh',
 256         'an': 'arg',
 257         'ar': 'ara',
 258         'as': 'asm',
 259         'av': 'ava',
 260         'ay': 'aym',
 261         'az': 'aze',
 262         'ba': 'bak',
 263         'be': 'bel',
 264         'bg': 'bul',
 265         'bh': 'bih',
 266         'bi': 'bis',
 267         'bm': 'bam',
 268         'bn': 'ben',
 269         'bo': 'bod',
 270         'br': 'bre',
 271         'bs': 'bos',
 272         'ca': 'cat',
 273         'ce': 'che',
 274         'ch': 'cha',
 275         'co': 'cos',
 276         'cr': 'cre',
 277         'cs': 'ces',
 278         'cu': 'chu',
 279         'cv': 'chv',
 280         'cy': 'cym',
 281         'da': 'dan',
 282         'de': 'deu',
 283         'dv': 'div',
 284         'dz': 'dzo',
 285         'ee': 'ewe',
 286         'el': 'ell',
 287         'en': 'eng',
 288         'eo': 'epo',
 289         'es': 'spa',
 290         'et': 'est',
 291         'eu': 'eus',
 292         'fa': 'fas',
 293         'ff': 'ful',
 294         'fi': 'fin',
 295         'fj': 'fij',
 296         'fo': 'fao',
 297         'fr': 'fra',
 298         'fy': 'fry',
 299         'ga': 'gle',
 300         'gd': 'gla',
 301         'gl': 'glg',
 302         'gn': 'grn',
 303         'gu': 'guj',
 304         'gv': 'glv',
 305         'ha': 'hau',
 306         'he': 'heb',
 307         'hi': 'hin',
 308         'ho': 'hmo',
 309         'hr': 'hrv',
 310         'ht': 'hat',
 311         'hu': 'hun',
 312         'hy': 'hye',
 313         'hz': 'her',
 314         'ia': 'ina',
 315         'id': 'ind',
 316         'ie': 'ile',
 317         'ig': 'ibo',
 318         'ii': 'iii',
 319         'ik': 'ipk',
 320         'io': 'ido',
 321         'is': 'isl',
 322         'it': 'ita',
 323         'iu': 'iku',
 324         'ja': 'jpn',
 325         'jv': 'jav',
 326         'ka': 'kat',
 327         'kg': 'kon',
 328         'ki': 'kik',
 329         'kj': 'kua',
 330         'kk': 'kaz',
 331         'kl': 'kal',
 332         'km': 'khm',
 333         'kn': 'kan',
 334         'ko': 'kor',
 335         'kr': 'kau',
 336         'ks': 'kas',
 337         'ku': 'kur',
 338         'kv': 'kom',
 339         'kw': 'cor',
 340         'ky': 'kir',
 341         'la': 'lat',
 342         'lb': 'ltz',
 343         'lg': 'lug',
 344         'li': 'lim',
 345         'ln': 'lin',
 346         'lo': 'lao',
 347         'lt': 'lit',
 348         'lu': 'lub',
 349         'lv': 'lav',
 350         'mg': 'mlg',
 351         'mh': 'mah',
 352         'mi': 'mri',
 353         'mk': 'mkd',
 354         'ml': 'mal',
 355         'mn': 'mon',
 356         'mr': 'mar',
 357         'ms': 'msa',
 358         'mt': 'mlt',
 359         'my': 'mya',
 360         'na': 'nau',
 361         'nb': 'nob',
 362         'nd': 'nde',
 363         'ne': 'nep',
 364         'ng': 'ndo',
 365         'nl': 'nld',
 366         'nn': 'nno',
 367         'no': 'nor',
 368         'nr': 'nbl',
 369         'nv': 'nav',
 370         'ny': 'nya',
 371         'oc': 'oci',
 372         'oj': 'oji',
 373         'om': 'orm',
 374         'or': 'ori',
 375         'os': 'oss',
 376         'pa': 'pan',
 377         'pi': 'pli',
 378         'pl': 'pol',
 379         'ps': 'pus',
 380         'pt': 'por',
 381         'qu': 'que',
 382         'rm': 'roh',
 383         'rn': 'run',
 384         'ro': 'ron',
 385         'ru': 'rus',
 386         'rw': 'kin',
 387         'sa': 'san',
 388         'sc': 'srd',
 389         'sd': 'snd',
 390         'se': 'sme',
 391         'sg': 'sag',
 392         'si': 'sin',
 393         'sk': 'slk',
 394         'sl': 'slv',
 395         'sm': 'smo',
 396         'sn': 'sna',
 397         'so': 'som',
 398         'sq': 'sqi',
 399         'sr': 'srp',
 400         'ss': 'ssw',
 401         'st': 'sot',
 402         'su': 'sun',
 403         'sv': 'swe',
 404         'sw': 'swa',
 405         'ta': 'tam',
 406         'te': 'tel',
 407         'tg': 'tgk',
 408         'th': 'tha',
 409         'ti': 'tir',
 410         'tk': 'tuk',
 411         'tl': 'tgl',
 412         'tn': 'tsn',
 413         'to': 'ton',
 414         'tr': 'tur',
 415         'ts': 'tso',
 416         'tt': 'tat',
 417         'tw': 'twi',
 418         'ty': 'tah',
 419         'ug': 'uig',
 420         'uk': 'ukr',
 421         'ur': 'urd',
 422         'uz': 'uzb',
 423         've': 'ven',
 424         'vi': 'vie',
 425         'vo': 'vol',
 426         'wa': 'wln',
 427         'wo': 'wol',
 428         'xh': 'xho',
 429         'yi': 'yid',
 430         'yo': 'yor',
 431         'za': 'zha',
 432         'zh': 'zho',
 433         'zu': 'zul',
 434     }
 435
 436     def __init__(self, downloader=None, subtitlesformat='srt'):
 437         super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
 438         self._subformat = subtitlesformat
 439
 440     @classmethod
 441     def _conver_lang_code(cls, code):
 442         """Convert language code from ISO 639-1 to ISO 639-2/T"""
 443         return cls._lang_map.get(code[:2])
 444
 445     def run(self, information):
 446         if information['ext'] != u'mp4':
 447             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
 448             return True, information
 449         if not information.get('subtitles'):
 450             self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
 451             return True, information
 452
 453         sub_langs = [key for key in information['subtitles']]
 454         filename = information['filepath']
 455         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
 456
 457         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
 458         for (i, lang) in enumerate(sub_langs):
 459             opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
 460             lang_code = self._conver_lang_code(lang)
 461             if lang_code is not None:
 462                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
 463         opts.extend(['-f', 'mp4'])
 464
 465         temp_filename = filename + u'.temp'
 466         self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
 467         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
 468         os.remove(encodeFilename(filename))
 469         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 470
 471         return True, information
 472
 473
 474 class FFmpegMetadataPP(FFmpegPostProcessor):
 475     def run(self, info):
 476         metadata = {}
 477         if info.get('title') is not None:
 478             metadata['title'] = info['title']
 479         if info.get('upload_date') is not None:
 480             metadata['date'] = info['upload_date']
 481         if info.get('uploader') is not None:
 482             metadata['artist'] = info['uploader']
 483         elif info.get('uploader_id') is not None:
 484             metadata['artist'] = info['uploader_id']
 485
 486         if not metadata:
 487             self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
 488             return True, info
 489
 490         filename = info['filepath']
 491         ext = os.path.splitext(filename)[1][1:]
 492         temp_filename = filename + u'.temp'
 493
 494         options = ['-c', 'copy']
 495         for (name, value) in metadata.items():
 496             options.extend(['-metadata', '%s="%s"' % (name, value)])
 497         options.extend(['-f', ext])
 498
 499         self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
 500         self.run_ffmpeg(filename, temp_filename, options)
 501         os.remove(encodeFilename(filename))
 502         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 503         return True, info