X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=76fc394bcff44f30ae6fa383ea54621a654a0864;hb=bfc993cc9183d5f001e30267551bcdf9f0a98be9;hp=36c71f9475233731f44f5ea44dec5fec7a83588f;hpb=e4a8eae701f22395dae607ed000d39f7a57e80a0;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 36c71f947..76fc394bc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -10,6 +10,7 @@ import io import itertools import json import locale +import operator import os import platform import re @@ -24,6 +25,7 @@ if os.name == 'nt': import ctypes from .compat import ( + compat_basestring, compat_cookiejar, compat_expanduser, compat_http_client, @@ -49,15 +51,19 @@ from .utils import ( make_HTTPS_handler, MaxDownloadsReached, PagedList, + parse_filesize, PostProcessingError, platform_name, preferredencoding, + render_table, SameFileError, sanitize_filename, + std_headers, subtitles_filename, takewhile_inclusive, UnavailableVideoError, url_basename, + version_tuple, write_json_file, write_string, YoutubeDLHandler, @@ -70,6 +76,8 @@ from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( + FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, FFmpegMergerPP, FFmpegPostProcessor, get_postprocessor, @@ -130,6 +138,7 @@ class YoutubeDL(object): nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. + playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. @@ -139,12 +148,13 @@ class YoutubeDL(object): writeinfojson: Write the video description to a .info.json file writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file + write_all_thumbnails: Write all thumbnail formats to files writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video - subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) + subtitlesformat: The format code for subtitles subtitleslangs: List of languages of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. @@ -189,26 +199,58 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * filename: The final filename - * status: One of "downloading" and "finished" - - The dict may also have some of the following entries: + * status: One of "downloading", "error", or "finished". + Check this first and ignore unknown values. + If status is one of "downloading", or "finished", the + following properties may also be present: + * filename: The final filename (always present) + * tmpfilename: The filename we're currently writing to * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown - * tmpfilename: The filename we're currently writing to + * total_bytes_estimate: Guess of the eventual file size, + None if unavailable. + * elapsed: The number of seconds since download started. * eta: The estimated time in seconds, None if unknown * speed: The download speed in bytes/second, None if unknown + * fragment_index: The counter of the currently + downloaded video fragment. + * fragment_count: The number of fragments (= individual + files that will be merged) Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. - + merge_output_format: Extension to use when merging formats. + fixup: Automatically correct known faults of the file. + One of: + - "never": do nothing + - "warn": only emit a warning + - "detect_or_warn": check whether we can do anything + about it, warn otherwise (default) + source_address: (Experimental) Client-side IP address to bind to. + call_home: Boolean, true iff we are allowed to contact the + youtube-dl servers for debugging. + sleep_interval: Number of seconds to sleep before each download. + listformats: Print an overview of available video formats and exit. + list_thumbnails: Print a table of all thumbnails and exit. + match_filter: A function that gets called with the info_dict of + every video. + If it returns a message, the video is ignored. + If it returns None, the video is downloaded. + match_filter_func in utils.py is one example for this. + no_color: Do not emit color codes in output. + + The following options determine which downloader is picked: + external_downloader: Executable of the external downloader to call. + None or unset for standard (built-in) downloader. + hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, - noresizebuffer, retries, continuedl, noprogress, consoletitle + noresizebuffer, retries, continuedl, noprogress, consoletitle, + xattr_set_filesize. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -266,8 +308,8 @@ class YoutubeDL(object): raise if (sys.version_info >= (3,) and sys.platform != 'win32' and - sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] - and not params.get('restrictfilenames', False)): + sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and + not params.get('restrictfilenames', False)): # On Python 3, the Unicode filesystem API will throw errors (#1474) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' @@ -459,7 +501,7 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' @@ -471,7 +513,7 @@ class YoutubeDL(object): Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' @@ -518,12 +560,17 @@ class YoutubeDL(object): outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict + # Temporary fix for #4787 + # 'Treat' all problem characters by passing filename through preferredencoding + # to workaround encoding issues with subprocess on python2 @ Windows + if sys.version_info < (3, 0) and sys.platform == 'win32': + filename = encodeFilename(filename, True).decode(preferredencoding()) return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None - def _match_entry(self, info_dict): + def _match_entry(self, info_dict, incomplete): """ Returns None iff the file should be downloaded """ video_title = info_dict.get('title', info_dict.get('id', 'video')) @@ -552,9 +599,17 @@ class YoutubeDL(object): if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % title + return 'Skipping "%s" because it is age restricted' % video_title if self.in_download_archive(info_dict): return '%s has already been recorded in archive' % video_title + + if not incomplete: + match_filter = self.params.get('match_filter') + if match_filter is not None: + ret = match_filter(info_dict) + if ret is not None: + return ret + return None @staticmethod @@ -681,24 +736,51 @@ class YoutubeDL(object): if playlistend == -1: playlistend = None + playlistitems_str = self.params.get('playlist_items', None) + playlistitems = None + if playlistitems_str is not None: + def iter_playlistitems(format): + for string_segment in format.split(','): + if '-' in string_segment: + start, end = string_segment.split('-') + for item in range(int(start), int(end) + 1): + yield int(item) + else: + yield int(string_segment) + playlistitems = iter_playlistitems(playlistitems_str) + ie_entries = ie_result['entries'] if isinstance(ie_entries, list): n_all_entries = len(ie_entries) - entries = ie_entries[playliststart:playlistend] + if playlistitems: + entries = [ie_entries[i - 1] for i in playlistitems] + else: + entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, PagedList): - entries = ie_entries.getslice( - playliststart, playlistend) + if playlistitems: + entries = [] + for item in playlistitems: + entries.extend(ie_entries.getslice( + item - 1, item + )) + else: + entries = ie_entries.getslice( + playliststart, playlistend) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % (ie_result['extractor'], playlist, n_entries)) else: # iterable - entries = list(itertools.islice( - ie_entries, playliststart, playlistend)) + if playlistitems: + entry_list = list(ie_entries) + entries = [entry_list[i - 1] for i in playlistitems] + else: + entries = list(itertools.islice( + ie_entries, playliststart, playlistend)) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % @@ -721,7 +803,7 @@ class YoutubeDL(object): 'extractor_key': ie_result['extractor_key'], } - reason = self._match_entry(entry) + reason = self._match_entry(entry, incomplete=True) if reason is not None: self.to_screen('[download] ' + reason) continue @@ -756,7 +838,76 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def _apply_format_filter(self, format_spec, available_formats): + " Returns a tuple of the remaining format_spec and filtered formats " + + OPERATORS = { + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, + '=': operator.eq, + '!=': operator.ne, + } + operator_rex = re.compile(r'''(?x)\s*\[ + (?Pwidth|height|tbr|abr|vbr|asr|filesize|fps) + \s*(?P%s)(?P\s*\?)?\s* + (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) + \]$ + ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + m = operator_rex.search(format_spec) + if m: + try: + comparison_value = int(m.group('value')) + except ValueError: + comparison_value = parse_filesize(m.group('value')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('value') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid value %r in format specification %r' % ( + m.group('value'), format_spec)) + op = OPERATORS[m.group('op')] + + if not m: + STR_OPERATORS = { + '=': operator.eq, + '!=': operator.ne, + } + str_operator_rex = re.compile(r'''(?x)\s*\[ + \s*(?Pext|acodec|vcodec|container|protocol) + \s*(?P%s)(?P\s*\?)? + \s*(?P[a-zA-Z0-9_-]+) + \s*\]$ + ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + m = str_operator_rex.search(format_spec) + if m: + comparison_value = m.group('value') + op = STR_OPERATORS[m.group('op')] + + if not m: + raise ValueError('Invalid format specification %r' % format_spec) + + def _filter(f): + actual_value = f.get(m.group('key')) + if actual_value is None: + return m.group('none_inclusive') + return op(actual_value, comparison_value) + new_formats = [f for f in available_formats if _filter(f)] + + new_format_spec = format_spec[:-len(m.group(0))] + if not new_format_spec: + new_format_spec = 'best' + + return (new_format_spec, new_formats) + def select_format(self, format_spec, available_formats): + while format_spec.endswith(']'): + format_spec, available_formats = self._apply_format_filter( + format_spec, available_formats) + if not available_formats: + return None + if format_spec == 'best' or format_spec is None: return available_formats[-1] elif format_spec == 'worst': @@ -786,7 +937,7 @@ class YoutubeDL(object): if video_formats: return video_formats[0] else: - extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a'] + extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] if format_spec in extensions: filter_f = lambda f: f['ext'] == format_spec else: @@ -796,6 +947,24 @@ class YoutubeDL(object): return matches[-1] return None + def _calc_headers(self, info_dict): + res = std_headers.copy() + + add_headers = info_dict.get('http_headers') + if add_headers: + res.update(add_headers) + + cookies = self._calc_cookies(info_dict) + if cookies: + res['Cookie'] = cookies + + return res + + def _calc_cookies(self, info_dict): + pr = compat_urllib_request.Request(info_dict['url']) + self.cookiejar.add_cookie_header(pr) + return pr.get_header('Cookie') + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -810,12 +979,19 @@ class YoutubeDL(object): info_dict['playlist_index'] = None thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('width'), t.get('height'), t.get('url'))) - for t in thumbnails: + t.get('preference'), t.get('width'), t.get('height'), + t.get('id'), t.get('url'))) + for i, t in enumerate(thumbnails): if 'width' in t and 'height' in t: t['resolution'] = '%dx%d' % (t['width'], t['height']) + if t.get('id') is None: + t['id'] = '%d' % i if thumbnails and 'thumbnail' not in info_dict: info_dict['thumbnail'] = thumbnails[-1]['url'] @@ -832,6 +1008,15 @@ class YoutubeDL(object): info_dict['timestamp']) info_dict['upload_date'] = upload_date.strftime('%Y%m%d') + if self.params.get('listsubtitles', False): + if 'automatic_captions' in info_dict: + self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') + self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') + return + info_dict['requested_subtitles'] = self.process_subtitles( + info_dict['id'], info_dict.get('subtitles'), + info_dict.get('automatic_captions')) + # This extractors handle format selection themselves if info_dict['extractor'] in ['Youku']: if download: @@ -864,6 +1049,11 @@ class YoutubeDL(object): # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']).lower() + # Add HTTP headers, so that external programs can use them from the + # json output + full_format_info = info_dict.copy() + full_format_info.update(format) + format['http_headers'] = self._calc_headers(full_format_info) format_limit = self.params.get('format_limit', None) if format_limit: @@ -879,9 +1069,12 @@ class YoutubeDL(object): # element in the 'formats' field in info_dict is info_dict itself, # wich can't be exported to json info_dict['formats'] = formats - if self.params.get('listformats', None): + if self.params.get('listformats'): self.list_formats(info_dict) return + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + return req_format = self.params.get('format') if req_format is None: @@ -909,10 +1102,26 @@ class YoutubeDL(object): 'contain the video, try using ' '"-f %s+%s"' % (format_2, format_1)) return + output_ext = ( + formats_info[0]['ext'] + if self.params.get('merge_output_format') is None + else self.params['merge_output_format']) selected_format = { 'requested_formats': formats_info, - 'format': rf, - 'ext': formats_info[0]['ext'], + 'format': '%s+%s' % (formats_info[0].get('format'), + formats_info[1].get('format')), + 'format_id': '%s+%s' % (formats_info[0].get('format_id'), + formats_info[1].get('format_id')), + 'width': formats_info[0].get('width'), + 'height': formats_info[0].get('height'), + 'resolution': formats_info[0].get('resolution'), + 'fps': formats_info[0].get('fps'), + 'vcodec': formats_info[0].get('vcodec'), + 'vbr': formats_info[0].get('vbr'), + 'stretched_ratio': formats_info[0].get('stretched_ratio'), + 'acodec': formats_info[1].get('acodec'), + 'abr': formats_info[1].get('abr'), + 'ext': output_ext, } else: selected_format = None @@ -936,6 +1145,55 @@ class YoutubeDL(object): info_dict.update(formats_to_download[-1]) return info_dict + def process_subtitles(self, video_id, normal_subtitles, automatic_captions): + """Select the requested subtitles and their format""" + available_subs = {} + if normal_subtitles and self.params.get('writesubtitles'): + available_subs.update(normal_subtitles) + if automatic_captions and self.params.get('writeautomaticsub'): + for lang, cap_info in automatic_captions.items(): + if lang not in available_subs: + available_subs[lang] = cap_info + + if (not self.params.get('writesubtitles') and not + self.params.get('writeautomaticsub') or not + available_subs): + return None + + if self.params.get('allsubtitles', False): + requested_langs = available_subs.keys() + else: + if self.params.get('subtitleslangs', False): + requested_langs = self.params.get('subtitleslangs') + elif 'en' in available_subs: + requested_langs = ['en'] + else: + requested_langs = [list(available_subs.keys())[0]] + + formats_query = self.params.get('subtitlesformat', 'best') + formats_preference = formats_query.split('/') if formats_query else [] + subs = {} + for lang in requested_langs: + formats = available_subs.get(lang) + if formats is None: + self.report_warning('%s subtitles not available for %s' % (lang, video_id)) + continue + for ext in formats_preference: + if ext == 'best': + f = formats[-1] + break + matches = list(filter(lambda f: f['ext'] == ext, formats)) + if matches: + f = matches[-1] + break + else: + f = formats[-1] + self.report_warning( + 'No subtitle format found matching "%s" for language %s, ' + 'using %s' % (formats_query, lang, f['ext'])) + subs[lang] = f + return subs + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -956,14 +1214,14 @@ class YoutubeDL(object): if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] - reason = self._match_entry(info_dict) + reason = self._match_entry(info_dict, incomplete=False) if reason is not None: self.to_screen('[download] ' + reason) return self._num_downloads += 1 - filename = self.prepare_filename(info_dict) + info_dict['_filename'] = filename = self.prepare_filename(info_dict) # Forced printings if self.params.get('forcetitle', False): @@ -988,10 +1246,7 @@ class YoutubeDL(object): if self.params.get('forceformat', False): self.to_stdout(info_dict['format']) if self.params.get('forcejson', False): - info_dict['_filename'] = filename self.to_stdout(json.dumps(info_dict)) - if self.params.get('dump_single_json', False): - info_dict['_filename'] = filename # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -1041,15 +1296,22 @@ class YoutubeDL(object): subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) - if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: + if subtitles_are_requested and info_dict.get('requested_subtitles'): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE - subtitles = info_dict['subtitles'] - sub_format = self.params.get('subtitlesformat', 'srt') - for sub_lang in subtitles.keys(): - sub = subtitles[sub_lang] - if sub is None: - continue + subtitles = info_dict['requested_subtitles'] + for sub_lang, sub_info in subtitles.items(): + sub_format = sub_info['ext'] + if sub_info.get('data') is not None: + sub_data = sub_info['data'] + else: + try: + uf = self.urlopen(sub_info['url']) + sub_data = uf.read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning('Unable to download subtitle for "%s": %s' % + (sub_lang, compat_str(err))) + continue try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): @@ -1057,7 +1319,7 @@ class YoutubeDL(object): else: self.to_screen('[info] Writing video subtitles to: ' + sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: - subfile.write(sub) + subfile.write(sub_data) except (OSError, IOError): self.report_error('Cannot write subtitles file ' + sub_filename) return @@ -1074,72 +1336,90 @@ class YoutubeDL(object): self.report_error('Cannot write metadata to JSON file ' + infofn) return - if self.params.get('writethumbnail', False): - if info_dict.get('thumbnail') is not None: - thumb_format = determine_ext(info_dict['thumbnail'], 'jpg') - thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen('[%s] %s: Thumbnail is already present' % - (info_dict['extractor'], info_dict['id'])) - else: - self.to_screen('[%s] %s: Downloading thumbnail ...' % - (info_dict['extractor'], info_dict['id'])) - try: - uf = self.urlopen(info_dict['thumbnail']) - with open(thumb_filename, 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen('[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (info_dict['thumbnail'], compat_str(err))) + self._write_thumbnails(info_dict, filename) if not self.params.get('skip_download', False): - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): - success = True - else: - try: - def dl(name, info): - fd = get_suitable_downloader(info)(self, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - if self.params.get('verbose'): - self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) - if info_dict.get('requested_formats') is not None: - downloaded = [] - success = True - merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) - if not merger._executable: - postprocessors = [] - self.report_warning('You have requested multiple ' - 'formats but ffmpeg or avconv are not installed.' - ' The formats won\'t be merged') - else: - postprocessors = [merger] - for f in info_dict['requested_formats']: - new_info = dict(info_dict) - new_info.update(f) - fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id']) - downloaded.append(fname) - partial_success = dl(fname, new_info) - success = success and partial_success - info_dict['__postprocessors'] = postprocessors - info_dict['__files_to_merge'] = downloaded + try: + def dl(name, info): + fd = get_suitable_downloader(info, self.params)(self, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) + return fd.download(name, info) + + if info_dict.get('requested_formats') is not None: + downloaded = [] + success = True + merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) + if not merger.available: + postprocessors = [] + self.report_warning('You have requested multiple ' + 'formats but ffmpeg or avconv are not installed.' + ' The formats won\'t be merged') else: - # Just a single file - success = dl(filename, info_dict) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_error('unable to download video data: %s' % str(err)) - return - except (OSError, IOError) as err: - raise UnavailableVideoError(err) - except (ContentTooShortError, ) as err: - self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) - return + postprocessors = [merger] + for f in info_dict['requested_formats']: + new_info = dict(info_dict) + new_info.update(f) + fname = self.prepare_filename(new_info) + fname = prepend_extension(fname, 'f%s' % f['format_id']) + downloaded.append(fname) + partial_success = dl(fname, new_info) + success = success and partial_success + info_dict['__postprocessors'] = postprocessors + info_dict['__files_to_merge'] = downloaded + else: + # Just a single file + success = dl(filename, info_dict) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_error('unable to download video data: %s' % str(err)) + return + except (OSError, IOError) as err: + raise UnavailableVideoError(err) + except (ContentTooShortError, ) as err: + self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return if success: + # Fixup content + fixup_policy = self.params.get('fixup') + if fixup_policy is None: + fixup_policy = 'detect_or_warn' + + stretched_ratio = info_dict.get('stretched_ratio') + if stretched_ratio is not None and stretched_ratio != 1: + if fixup_policy == 'warn': + self.report_warning('%s: Non-uniform pixel ratio (%s)' % ( + info_dict['id'], stretched_ratio)) + elif fixup_policy == 'detect_or_warn': + stretched_pp = FFmpegFixupStretchedPP(self) + if stretched_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(stretched_pp) + else: + self.report_warning( + '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( + info_dict['id'], stretched_ratio)) + else: + assert fixup_policy in ('ignore', 'never') + + if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': + if fixup_policy == 'warn': + self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( + info_dict['id'])) + elif fixup_policy == 'detect_or_warn': + fixup_pp = FFmpegFixupM4aPP(self) + if fixup_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(fixup_pp) + else: + self.report_warning( + '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( + info_dict['id'])) + else: + assert fixup_policy in ('ignore', 'never') + try: self.post_process(filename, info_dict) except (PostProcessingError) as err: @@ -1151,8 +1431,8 @@ class YoutubeDL(object): """Download a given list of URLs.""" outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) if (len(url_list) > 1 and - '%' not in outtmpl - and self.params.get('max_downloads') != 1): + '%' not in outtmpl and + self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) for url in url_list: @@ -1188,14 +1468,15 @@ class YoutubeDL(object): """Run all the postprocessors on the given file.""" info = dict(ie_info) info['filepath'] = filename - keep_video = None pps_chain = [] if ie_info.get('__postprocessors') is not None: pps_chain.extend(ie_info['__postprocessors']) pps_chain.extend(self._pps) for pp in pps_chain: + keep_video = None + old_filename = info['filepath'] try: - keep_video_wish, new_info = pp.run(info) + keep_video_wish, info = pp.run(info) if keep_video_wish is not None: if keep_video_wish: keep_video = keep_video_wish @@ -1204,12 +1485,12 @@ class YoutubeDL(object): keep_video = keep_video_wish except PostProcessingError as e: self.report_error(e.msg) - if keep_video is False and not self.params.get('keepvideo', False): - try: - self.to_screen('Deleting original file %s (pass -k to keep)' % filename) - os.remove(encodeFilename(filename)) - except (IOError, OSError): - self.report_warning('Unable to remove downloaded video file') + if keep_video is False and not self.params.get('keepvideo', False): + try: + self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) + os.remove(encodeFilename(old_filename)) + except (IOError, OSError): + self.report_warning('Unable to remove downloaded video file') def _make_archive_id(self, info_dict): # Future-proof against any change in case @@ -1318,29 +1599,46 @@ class YoutubeDL(object): return res def list_formats(self, info_dict): - def line(format, idlen=20): - return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( - format['format_id'], - format['ext'], - self.format_resolution(format), - self._format_note(format), - )) - formats = info_dict.get('formats', [info_dict]) - idlen = max(len('format code'), - max(len(f['format_id']) for f in formats)) - formats_s = [ - line(f, idlen) for f in formats + table = [ + [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] + for f in formats if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: - formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' - formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' + table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' - header_line = line({ - 'format_id': 'format code', 'ext': 'extension', - 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) - self.to_screen('[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, '\n'.join(formats_s))) + header_line = ['format code', 'extension', 'resolution', 'note'] + self.to_screen( + '[info] Available formats for %s:\n%s' % + (info_dict['id'], render_table(header_line, table))) + + def list_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if not thumbnails: + tn_url = info_dict.get('thumbnail') + if tn_url: + thumbnails = [{'id': '0', 'url': tn_url}] + else: + self.to_screen( + '[info] No thumbnails present for %s' % info_dict['id']) + return + + self.to_screen( + '[info] Thumbnails for %s:' % info_dict['id']) + self.to_screen(render_table( + ['ID', 'width', 'height', 'URL'], + [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) + + def list_subtitles(self, video_id, subtitles, name='subtitles'): + if not subtitles: + self.to_screen('%s has no %s' % (video_id, name)) + return + self.to_screen( + 'Available %s for %s:' % (name, video_id)) + self.to_screen(render_table( + ['Language', 'formats'], + [[lang, ', '.join(f['ext'] for f in reversed(formats))] + for lang, formats in subtitles.items()])) def urlopen(self, req): """ Start an HTTP download """ @@ -1351,7 +1649,7 @@ class YoutubeDL(object): # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) # To work around aforementioned issue we will replace request's original URL with # percent-encoded one - req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) + req_is_string = isinstance(req, compat_basestring) url = req if req_is_string else req.get_full_url() url_escaped = escape_url(url) @@ -1403,7 +1701,7 @@ class YoutubeDL(object): self._write_string('[debug] Python version %s - %s\n' % ( platform.python_version(), platform_name())) - exe_versions = FFmpegPostProcessor.get_versions() + exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions['rtmpdump'] = rtmpdump_version() exe_str = ', '.join( '%s %s' % (exe, v) @@ -1420,6 +1718,17 @@ class YoutubeDL(object): proxy_map.update(handler.proxies) self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') + if self.params.get('call_home', False): + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') + self._write_string('[debug] Public IP address: %s\n' % ipaddr) + latest_version = self.urlopen( + 'https://yt-dl.org/latest/version').read().decode('utf-8') + if version_tuple(latest_version) > version_tuple(__version__): + self.report_warning( + 'You are using an outdated version (newest version: %s)! ' + 'See https://yt-dl.org/update if you need help updating.' % + latest_version) + def _setup_opener(self): timeout_val = self.params.get('socket_timeout') self._socket_timeout = 600 if timeout_val is None else float(timeout_val) @@ -1450,9 +1759,8 @@ class YoutubeDL(object): proxy_handler = compat_urllib_request.ProxyHandler(proxies) debuglevel = 1 if self.params.get('debug_printtraffic') else 0 - https_handler = make_HTTPS_handler( - self.params.get('nocheckcertificate', False), debuglevel=debuglevel) - ydlh = YoutubeDLHandler(debuglevel=debuglevel) + https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) + ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( https_handler, proxy_handler, cookie_processor, ydlh) # Delete the default user-agent header, which would otherwise apply in @@ -1476,3 +1784,39 @@ class YoutubeDL(object): if encoding is None: encoding = preferredencoding() return encoding + + def _write_thumbnails(self, info_dict, filename): + if self.params.get('writethumbnail', False): + thumbnails = info_dict.get('thumbnails') + if thumbnails: + thumbnails = [thumbnails[-1]] + elif self.params.get('write_all_thumbnails', False): + thumbnails = info_dict.get('thumbnails') + else: + return + + if not thumbnails: + # No thumbnails present, so return immediately + return + + for t in thumbnails: + thumb_ext = determine_ext(t['url'], 'jpg') + suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' + thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' + thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext + + if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): + self.to_screen('[%s] %s: Thumbnail %sis already present' % + (info_dict['extractor'], info_dict['id'], thumb_display_id)) + else: + self.to_screen('[%s] %s: Downloading thumbnail %s...' % + (info_dict['extractor'], info_dict['id'], thumb_display_id)) + try: + uf = self.urlopen(t['url']) + with open(thumb_filename, 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % + (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning('Unable to download thumbnail "%s": %s' % + (t['url'], compat_str(err)))