X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=3dff723b81fff6947ac8cf08c62a275843f359f9;hb=78149a962b9aa0daa09b16a57234064f11cbc3a8;hp=ae0ec49f84ae0c4765230def95ea2d2020a888d4;hpb=98acdc895bb6795efa1b5890a7af1b6e662051bd;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py old mode 100644 new mode 100755 index ae0ec49f8..3dff723b8 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -8,6 +8,7 @@ import datetime import errno import io import json +import locale import os import platform import re @@ -30,6 +31,7 @@ from .utils import ( ContentTooShortError, date_from_str, DateRange, + DEFAULT_OUTTMPL, determine_ext, DownloadError, encodeFilename, @@ -159,6 +161,7 @@ class YoutubeDL(object): include_ads: Download ads as well default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing + encoding: Use this encoding instead of the system-specified. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -284,6 +287,9 @@ class YoutubeDL(object): """Print message to stdout if not in quiet mode.""" return self.to_stdout(message, skip_eol, check_quiet=True) + def _write_string(self, s, out=None): + write_string(s, out=out, encoding=self.params.get('encoding')) + def to_stdout(self, message, skip_eol=False, check_quiet=False): """Print message to stdout if not in quiet mode.""" if self.params.get('logger'): @@ -293,7 +299,7 @@ class YoutubeDL(object): terminator = ['\n', ''][skip_eol] output = message + terminator - write_string(output, self._screen_file) + self._write_string(output, self._screen_file) def to_stderr(self, message): """Print message to stderr.""" @@ -303,7 +309,7 @@ class YoutubeDL(object): else: message = self._bidi_workaround(message) output = message + '\n' - write_string(output, self._err_file) + self._write_string(output, self._err_file) def to_console_title(self, message): if not self.params.get('consoletitle', False): @@ -313,21 +319,21 @@ class YoutubeDL(object): # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) elif 'TERM' in os.environ: - write_string('\033]0;%s\007' % message, self._screen_file) + self._write_string('\033]0;%s\007' % message, self._screen_file) def save_console_title(self): if not self.params.get('consoletitle', False): return if 'TERM' in os.environ: # Save the title on stack - write_string('\033[22;0t', self._screen_file) + self._write_string('\033[22;0t', self._screen_file) def restore_console_title(self): if not self.params.get('consoletitle', False): return if 'TERM' in os.environ: # Restore the title from stack - write_string('\033[23;0t', self._screen_file) + self._write_string('\033[23;0t', self._screen_file) def __enter__(self): self.save_console_title() @@ -435,7 +441,8 @@ class YoutubeDL(object): if v is not None) template_dict = collections.defaultdict(lambda: 'NA', template_dict) - tmpl = os.path.expanduser(self.params['outtmpl']) + outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) + tmpl = os.path.expanduser(outtmpl) filename = tmpl % template_dict return filename except ValueError as err: @@ -700,11 +707,27 @@ class YoutubeDL(object): def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' + if 'id' not in info_dict: + raise ExtractorError('Missing "id" field in extractor result') + if 'title' not in info_dict: + raise ExtractorError('Missing "title" field in extractor result') + if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None info_dict['playlist_index'] = None + thumbnails = info_dict.get('thumbnails') + if thumbnails: + thumbnails.sort(key=lambda t: ( + t.get('width'), t.get('height'), t.get('url'))) + for t in thumbnails: + if 'width' in t and 'height' in t: + t['resolution'] = '%dx%d' % (t['width'], t['height']) + + if thumbnails and 'thumbnail' not in info_dict: + info_dict['thumbnail'] = thumbnails[-1]['url'] + if 'display_id' not in info_dict and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] @@ -731,6 +754,9 @@ class YoutubeDL(object): # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): + if 'url' not in format: + raise ExtractorError('Missing "url" key in result (index %d)' % i) + if format.get('format_id') is None: format['format_id'] = compat_str(i) if format.get('format') is None: @@ -741,7 +767,7 @@ class YoutubeDL(object): ) # Automatically determine file extension if missing if 'ext' not in format: - format['ext'] = determine_ext(format['url']) + format['ext'] = determine_ext(format['url']).lower() format_limit = self.params.get('format_limit', None) if format_limit: @@ -866,7 +892,7 @@ class YoutubeDL(object): try: dn = os.path.dirname(encodeFilename(filename)) - if dn != '' and not os.path.exists(dn): + if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: self.report_error('unable to create directory ' + compat_str(err)) @@ -923,7 +949,7 @@ class YoutubeDL(object): with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: subfile.write(sub) except (OSError, IOError): - self.report_error('Cannot write subtitles file ' + descfn) + self.report_error('Cannot write subtitles file ' + sub_filename) return if self.params.get('writeinfojson', False): @@ -967,6 +993,8 @@ class YoutubeDL(object): fd = get_suitable_downloader(info)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) if info_dict.get('requested_formats') is not None: downloaded = [] @@ -1012,10 +1040,11 @@ class YoutubeDL(object): def download(self, url_list): """Download a given list of URLs.""" + outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) if (len(url_list) > 1 and - '%' not in self.params['outtmpl'] + '%' not in outtmpl and self.params.get('max_downloads') != 1): - raise SameFileError(self.params['outtmpl']) + raise SameFileError(outtmpl) for url in url_list: try: @@ -1126,57 +1155,57 @@ class YoutubeDL(object): res = default return res - def list_formats(self, info_dict): - def format_note(fdict): - res = '' - if fdict.get('ext') in ['f4f', 'f4m']: - res += '(unsupported) ' - if fdict.get('format_note') is not None: - res += fdict['format_note'] + ' ' - if fdict.get('tbr') is not None: - res += '%4dk ' % fdict['tbr'] - if fdict.get('container') is not None: - if res: - res += ', ' - res += '%s container' % fdict['container'] - if (fdict.get('vcodec') is not None and - fdict.get('vcodec') != 'none'): - if res: - res += ', ' - res += fdict['vcodec'] - if fdict.get('vbr') is not None: - res += '@' - elif fdict.get('vbr') is not None and fdict.get('abr') is not None: - res += 'video@' + def _format_note(self, fdict): + res = '' + if fdict.get('ext') in ['f4f', 'f4m']: + res += '(unsupported) ' + if fdict.get('format_note') is not None: + res += fdict['format_note'] + ' ' + if fdict.get('tbr') is not None: + res += '%4dk ' % fdict['tbr'] + if fdict.get('container') is not None: + if res: + res += ', ' + res += '%s container' % fdict['container'] + if (fdict.get('vcodec') is not None and + fdict.get('vcodec') != 'none'): + if res: + res += ', ' + res += fdict['vcodec'] if fdict.get('vbr') is not None: - res += '%4dk' % fdict['vbr'] - if fdict.get('acodec') is not None: - if res: - res += ', ' - if fdict['acodec'] == 'none': - res += 'video only' - else: - res += '%-5s' % fdict['acodec'] - elif fdict.get('abr') is not None: - if res: - res += ', ' - res += 'audio' - if fdict.get('abr') is not None: - res += '@%3dk' % fdict['abr'] - if fdict.get('asr') is not None: - res += ' (%5dHz)' % fdict['asr'] - if fdict.get('filesize') is not None: - if res: - res += ', ' - res += format_bytes(fdict['filesize']) - return res + res += '@' + elif fdict.get('vbr') is not None and fdict.get('abr') is not None: + res += 'video@' + if fdict.get('vbr') is not None: + res += '%4dk' % fdict['vbr'] + if fdict.get('acodec') is not None: + if res: + res += ', ' + if fdict['acodec'] == 'none': + res += 'video only' + else: + res += '%-5s' % fdict['acodec'] + elif fdict.get('abr') is not None: + if res: + res += ', ' + res += 'audio' + if fdict.get('abr') is not None: + res += '@%3dk' % fdict['abr'] + if fdict.get('asr') is not None: + res += ' (%5dHz)' % fdict['asr'] + if fdict.get('filesize') is not None: + if res: + res += ', ' + res += format_bytes(fdict['filesize']) + return res + def list_formats(self, info_dict): def line(format, idlen=20): return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( format['format_id'], format['ext'], self.format_resolution(format), - format_note(format), + self._format_note(format), )) formats = info_dict.get('formats', [info_dict]) @@ -1184,8 +1213,8 @@ class YoutubeDL(object): max(len(f['format_id']) for f in formats)) formats_s = [line(f, idlen) for f in formats] if len(formats) > 1: - formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)' - formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)' + formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' + formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' header_line = line({ 'format_id': 'format code', 'ext': 'extension', @@ -1200,7 +1229,17 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return - write_string('[debug] youtube-dl version ' + __version__ + '\n') + + write_string( + '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( + locale.getpreferredencoding(), + sys.getfilesystemencoding(), + sys.stdout.encoding, + self.get_encoding()), + encoding=None + ) + + self._write_string('[debug] youtube-dl version ' + __version__ + '\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -1209,20 +1248,20 @@ class YoutubeDL(object): out, err = sp.communicate() out = out.decode().strip() if re.match('[0-9a-f]+', out): - write_string('[debug] Git HEAD: ' + out + '\n') + self._write_string('[debug] Git HEAD: ' + out + '\n') except: try: sys.exc_clear() except: pass - write_string('[debug] Python version %s - %s' % + self._write_string('[debug] Python version %s - %s' % (platform.python_version(), platform_name()) + '\n') proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) - write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') + self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') def _setup_opener(self): timeout_val = self.params.get('socket_timeout') @@ -1264,3 +1303,19 @@ class YoutubeDL(object): # (See https://github.com/rg3/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener + + def encode(self, s): + if isinstance(s, bytes): + return s # Already encoded + + try: + return s.encode(self.get_encoding()) + except UnicodeEncodeError as err: + err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' + raise + + def get_encoding(self): + encoding = self.params.get('encoding') + if encoding is None: + encoding = preferredencoding() + return encoding