[youtube] Fix extraction.
[youtube-dl] / youtube_dl / postprocessor / embedthumbnail.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4
5 import os
6 import subprocess
7
8 from .ffmpeg import FFmpegPostProcessor
9
10 from ..utils import (
11     check_executable,
12     encodeArgument,
13     encodeFilename,
14     PostProcessingError,
15     prepend_extension,
16     replace_extension,
17     shell_quote
18 )
19
20
21 class EmbedThumbnailPPError(PostProcessingError):
22     pass
23
24
25 class EmbedThumbnailPP(FFmpegPostProcessor):
26     def __init__(self, downloader=None, already_have_thumbnail=False):
27         super(EmbedThumbnailPP, self).__init__(downloader)
28         self._already_have_thumbnail = already_have_thumbnail
29
30     def run(self, info):
31         filename = info['filepath']
32         temp_filename = prepend_extension(filename, 'temp')
33
34         if not info.get('thumbnails'):
35             self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
36             return [], info
37
38         thumbnail_filename = info['thumbnails'][-1]['filename']
39
40         if not os.path.exists(encodeFilename(thumbnail_filename)):
41             self._downloader.report_warning(
42                 'Skipping embedding the thumbnail because the file is missing.')
43             return [], info
44
45         def is_webp(path):
46             with open(encodeFilename(path), 'rb') as f:
47                 b = f.read(12)
48             return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
49
50         # Correct extension for WebP file with wrong extension (see #25687, #25717)
51         _, thumbnail_ext = os.path.splitext(thumbnail_filename)
52         if thumbnail_ext:
53             thumbnail_ext = thumbnail_ext[1:].lower()
54             if thumbnail_ext != 'webp' and is_webp(thumbnail_filename):
55                 self._downloader.to_screen(
56                     '[ffmpeg] Correcting extension to webp and escaping path for thumbnail "%s"' % thumbnail_filename)
57                 thumbnail_webp_filename = replace_extension(thumbnail_filename, 'webp')
58                 os.rename(encodeFilename(thumbnail_filename), encodeFilename(thumbnail_webp_filename))
59                 thumbnail_filename = thumbnail_webp_filename
60                 thumbnail_ext = 'webp'
61
62         # Convert unsupported thumbnail formats to JPEG (see #25687, #25717)
63         if thumbnail_ext not in ['jpg', 'png']:
64             # NB: % is supposed to be escaped with %% but this does not work
65             # for input files so working around with standard substitution
66             escaped_thumbnail_filename = thumbnail_filename.replace('%', '#')
67             os.rename(encodeFilename(thumbnail_filename), encodeFilename(escaped_thumbnail_filename))
68             escaped_thumbnail_jpg_filename = replace_extension(escaped_thumbnail_filename, 'jpg')
69             self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % escaped_thumbnail_filename)
70             self.run_ffmpeg(escaped_thumbnail_filename, escaped_thumbnail_jpg_filename, ['-bsf:v', 'mjpeg2jpeg'])
71             os.remove(encodeFilename(escaped_thumbnail_filename))
72             thumbnail_jpg_filename = replace_extension(thumbnail_filename, 'jpg')
73             # Rename back to unescaped for further processing
74             os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename))
75             thumbnail_filename = thumbnail_jpg_filename
76
77         if info['ext'] == 'mp3':
78             options = [
79                 '-c', 'copy', '-map', '0', '-map', '1',
80                 '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
81
82             self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
83
84             self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
85
86             if not self._already_have_thumbnail:
87                 os.remove(encodeFilename(thumbnail_filename))
88             os.remove(encodeFilename(filename))
89             os.rename(encodeFilename(temp_filename), encodeFilename(filename))
90
91         elif info['ext'] in ['m4a', 'mp4']:
92             if not check_executable('AtomicParsley', ['-v']):
93                 raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
94
95             cmd = [encodeFilename('AtomicParsley', True),
96                    encodeFilename(filename, True),
97                    encodeArgument('--artwork'),
98                    encodeFilename(thumbnail_filename, True),
99                    encodeArgument('-o'),
100                    encodeFilename(temp_filename, True)]
101
102             self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
103
104             if self._downloader.params.get('verbose', False):
105                 self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
106
107             p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
108             stdout, stderr = p.communicate()
109
110             if p.returncode != 0:
111                 msg = stderr.decode('utf-8', 'replace').strip()
112                 raise EmbedThumbnailPPError(msg)
113
114             if not self._already_have_thumbnail:
115                 os.remove(encodeFilename(thumbnail_filename))
116             # for formats that don't support thumbnails (like 3gp) AtomicParsley
117             # won't create to the temporary file
118             if b'No changes' in stdout:
119                 self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
120             else:
121                 os.remove(encodeFilename(filename))
122                 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
123         else:
124             raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
125
126         return [], info