X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=8f34b17b4135ff26263e52fc8e9e0badc197680d;hp=14a1d06ab1ed3350547822cac71501745a14842a;hb=5f0d813d9395848e92a1c6d83335360652d654c1;hpb=ca7a9c1bf7c57d1a5da9a24dd7618d95cb93102a diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 14a1d06ab..8f34b17b4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -7,8 +7,10 @@ import collections import datetime import errno import io +import itertools import json import locale +import operator import os import platform import re @@ -22,12 +24,17 @@ import traceback if os.name == 'nt': import ctypes -from .utils import ( +from .compat import ( compat_cookiejar, + compat_expanduser, compat_http_client, + compat_kwargs, compat_str, compat_urllib_error, compat_urllib_request, +) +from .utils import ( + escape_url, ContentTooShortError, date_from_str, DateRange, @@ -43,6 +50,7 @@ from .utils import ( make_HTTPS_handler, MaxDownloadsReached, PagedList, + parse_filesize, PostProcessingError, platform_name, preferredencoding, @@ -52,14 +60,24 @@ from .utils import ( takewhile_inclusive, UnavailableVideoError, url_basename, + version_tuple, write_json_file, write_string, YoutubeDLHandler, prepend_extension, + args_to_str, + age_restricted, ) +from .cache import Cache from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader -from .postprocessor import FFmpegMergerPP +from .downloader.rtmp import rtmpdump_version +from .postprocessor import ( + FFmpegFixupStretchedPP, + FFmpegMergerPP, + FFmpegPostProcessor, + get_postprocessor, +) from .version import __version__ @@ -105,8 +123,10 @@ class YoutubeDL(object): forcefilename: Force printing final filename. forceduration: Force printing duration. forcejson: Force printing info_dict as JSON. + dump_single_json: Force printing the info_dict of the whole playlist + (or video) as a single JSON line. simulate: Do not download the video files. - format: Video format code. + format: Video format code. See options.py for more information. format_limit: Highest quality format to try. outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names @@ -114,6 +134,7 @@ class YoutubeDL(object): nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. + playlistreverse: Download playlist items in reverse order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. @@ -133,7 +154,7 @@ class YoutubeDL(object): daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file cachedir: Location of the cache files in the filesystem. - None to disable filesystem cache. + False to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. age_limit: An integer representing the user's age in years. Unsuitable videos for the given age are skipped. @@ -162,6 +183,42 @@ class YoutubeDL(object): default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing encoding: Use this encoding instead of the system-specified. + extract_flat: Do not resolve URLs, return the immediate result. + Pass in 'in_playlist' to only show this behavior for + playlist items. + postprocessors: A list of dictionaries, each with an entry + * key: The name of the postprocessor. See + youtube_dl/postprocessor/__init__.py for a list. + as well as any further keyword arguments for the + postprocessor. + progress_hooks: A list of functions that get called on download + progress, with a dictionary with the entries + * filename: The final filename + * status: One of "downloading" and "finished" + + The dict may also have some of the following entries: + + * downloaded_bytes: Bytes on disk + * total_bytes: Size of the whole file, None if unknown + * tmpfilename: The filename we're currently writing to + * eta: The estimated time in seconds, None if unknown + * speed: The download speed in bytes/second, None if + unknown + + Progress hooks are guaranteed to be called at least once + (with status "finished") if the download is successful. + merge_output_format: Extension to use when merging formats. + fixup: Automatically correct known faults of the file. + One of: + - "never": do nothing + - "warn": only emit a warning + - "detect_or_warn": check whether we can do anything + about it, warn otherwise + source_address: (Experimental) Client-side IP address to bind to. + call_home: Boolean, true iff we are allowed to contact the + youtube-dl servers for debugging. + sleep_interval: Number of seconds to sleep before each download. + The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -171,6 +228,7 @@ class YoutubeDL(object): The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, otherwise prefer avconv. + exec_cmd: Arbitrary command to run after downloading """ params = None @@ -180,7 +238,7 @@ class YoutubeDL(object): _num_downloads = None _screen_file = None - def __init__(self, params=None): + def __init__(self, params=None, auto_init=True): """Create a FileDownloader object with the given options.""" if params is None: params = {} @@ -193,6 +251,7 @@ class YoutubeDL(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr self.params = params + self.cache = Cache(self) if params.get('bidi_workaround', False): try: @@ -223,11 +282,11 @@ class YoutubeDL(object): if (sys.version_info >= (3,) and sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] - and not params['restrictfilenames']): + and not params.get('restrictfilenames', False)): # On Python 3, the Unicode filesystem API will throw errors (#1474) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' - 'cannot encode all charactes. ' + 'cannot encode all characters. ' 'Set the LC_ALL environment variable to fix this.') self.params['restrictfilenames'] = True @@ -236,6 +295,36 @@ class YoutubeDL(object): self._setup_opener() + if auto_init: + self.print_debug_header() + self.add_default_info_extractors() + + for pp_def_raw in self.params.get('postprocessors', []): + pp_class = get_postprocessor(pp_def_raw['key']) + pp_def = dict(pp_def_raw) + del pp_def['key'] + pp = pp_class(self, **compat_kwargs(pp_def)) + self.add_post_processor(pp) + + for ph in self.params.get('progress_hooks', []): + self.add_progress_hook(ph) + + def warn_if_short_id(self, argv): + # short YouTube ID starting with dash? + idxs = [ + i for i, a in enumerate(argv) + if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] + if idxs: + correct_argv = ( + ['youtube-dl'] + + [a for i, a in enumerate(argv) if i not in idxs] + + ['--'] + [argv[i] for i in idxs] + ) + self.report_warning( + 'Long argument string detected. ' + 'Use -- to separate parameters and URLs, like this:\n%s\n' % + args_to_str(correct_argv)) + def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -280,7 +369,7 @@ class YoutubeDL(object): self._output_process.stdin.write((message + '\n').encode('utf-8')) self._output_process.stdin.flush() res = ''.join(self._output_channel.readline().decode('utf-8') - for _ in range(line_count)) + for _ in range(line_count)) return res[:-len('\n')] def to_screen(self, message, skip_eol=False): @@ -423,7 +512,7 @@ class YoutubeDL(object): autonumber_templ = '%0' + str(autonumber_size) + 'd' template_dict['autonumber'] = autonumber_templ % self._num_downloads if template_dict.get('playlist_index') is not None: - template_dict['playlist_index'] = '%05d' % template_dict['playlist_index'] + template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index']) if template_dict.get('resolution') is None: if template_dict.get('width') and template_dict.get('height'): template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) @@ -442,7 +531,7 @@ class YoutubeDL(object): template_dict = collections.defaultdict(lambda: 'NA', template_dict) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) - tmpl = os.path.expanduser(outtmpl) + tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict return filename except ValueError as err: @@ -477,10 +566,8 @@ class YoutubeDL(object): max_views = self.params.get('max_views') if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) - age_limit = self.params.get('age_limit') - if age_limit is not None: - if age_limit < info_dict.get('age_limit', 0): - return 'Skipping "' + title + '" because it is age restricted' + if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): + return 'Skipping "%s" because it is age restricted' % title if self.in_download_archive(info_dict): return '%s has already been recorded in archive' % video_title return None @@ -514,7 +601,7 @@ class YoutubeDL(object): try: ie_result = ie.extract(url) - if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) + if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) break if isinstance(ie_result, list): # Backwards compatibility: old IE result format @@ -527,7 +614,7 @@ class YoutubeDL(object): return self.process_ie_result(ie_result, download, extra_info) else: return ie_result - except ExtractorError as de: # An error we somewhat expected + except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) break except MaxDownloadsReached: @@ -558,7 +645,16 @@ class YoutubeDL(object): Returns the resolved ie_result. """ - result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system + result_type = ie_result.get('_type', 'video') + + if result_type in ('url', 'url_transparent'): + extract_flat = self.params.get('extract_flat', False) + if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or + extract_flat is True): + if self.params.get('forcejson', False): + self.to_stdout(json.dumps(ie_result)) + return ie_result + if result_type == 'video': self.add_extra_info(ie_result, extra_info) return self.process_video_result(ie_result, download=download) @@ -575,27 +671,19 @@ class YoutubeDL(object): ie_result['url'], ie_key=ie_result.get('ie_key'), extra_info=extra_info, download=False, process=False) - def make_result(embedded_info): - new_result = ie_result.copy() - for f in ('_type', 'url', 'ext', 'player_url', 'formats', - 'entries', 'ie_key', 'duration', - 'subtitles', 'annotations', 'format', - 'thumbnail', 'thumbnails'): - if f in new_result: - del new_result[f] - if f in embedded_info: - new_result[f] = embedded_info[f] - return new_result - new_result = make_result(info) + force_properties = dict( + (k, v) for k, v in ie_result.items() if v is not None) + for f in ('_type', 'url'): + if f in force_properties: + del force_properties[f] + new_result = info.copy() + new_result.update(force_properties) assert new_result.get('_type') != 'url_transparent' - if new_result.get('_type') == 'compat_list': - new_result['entries'] = [ - make_result(e) for e in new_result['entries']] return self.process_ie_result( new_result, download=download, extra_info=extra_info) - elif result_type == 'playlist': + elif result_type == 'playlist' or result_type == 'multi_video': # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen('[download] Downloading playlist: %s' % playlist) @@ -608,26 +696,39 @@ class YoutubeDL(object): if playlistend == -1: playlistend = None - if isinstance(ie_result['entries'], list): - n_all_entries = len(ie_result['entries']) - entries = ie_result['entries'][playliststart:playlistend] + ie_entries = ie_result['entries'] + if isinstance(ie_entries, list): + n_all_entries = len(ie_entries) + entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) - else: - assert isinstance(ie_result['entries'], PagedList) - entries = ie_result['entries'].getslice( + elif isinstance(ie_entries, PagedList): + entries = ie_entries.getslice( playliststart, playlistend) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % (ie_result['extractor'], playlist, n_entries)) + else: # iterable + entries = list(itertools.islice( + ie_entries, playliststart, playlistend)) + n_entries = len(entries) + self.to_screen( + "[%s] playlist %s: Downloading %d videos" % + (ie_result['extractor'], playlist, n_entries)) + + if self.params.get('playlistreverse', False): + entries = entries[::-1] for i, entry in enumerate(entries, 1): - self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) + self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) extra = { + 'n_entries': n_entries, 'playlist': playlist, + 'playlist_id': ie_result.get('id'), + 'playlist_title': ie_result.get('title'), 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], @@ -647,14 +748,20 @@ class YoutubeDL(object): ie_result['entries'] = playlist_results return ie_result elif result_type == 'compat_list': + self.report_warning( + 'Extractor %s returned a compat_list result. ' + 'It needs to be updated.' % ie_result.get('extractor')) + def _fixup(r): - self.add_extra_info(r, + self.add_extra_info( + r, { 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], - }) + } + ) return r ie_result['entries'] = [ self.process_ie_result(_fixup(r), download, extra_info) @@ -664,7 +771,59 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def _apply_format_filter(self, format_spec, available_formats): + " Returns a tuple of the remaining format_spec and filtered formats " + + OPERATORS = { + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, + '=': operator.eq, + '!=': operator.ne, + } + operator_rex = re.compile(r'''(?x)\s*\[ + (?Pwidth|height|tbr|abr|vbr|filesize) + \s*(?P%s)(?P\s*\?)?\s* + (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) + \]$ + ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + m = operator_rex.search(format_spec) + if not m: + raise ValueError('Invalid format specification %r' % format_spec) + + try: + comparison_value = int(m.group('value')) + except ValueError: + comparison_value = parse_filesize(m.group('value')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('value') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid value %r in format specification %r' % ( + m.group('value'), format_spec)) + op = OPERATORS[m.group('op')] + + def _filter(f): + actual_value = f.get(m.group('key')) + if actual_value is None: + return m.group('none_inclusive') + return op(actual_value, comparison_value) + new_formats = [f for f in available_formats if _filter(f)] + + new_format_spec = format_spec[:-len(m.group(0))] + if not new_format_spec: + new_format_spec = 'best' + + return (new_format_spec, new_formats) + def select_format(self, format_spec, available_formats): + while format_spec.endswith(']'): + format_spec, available_formats = self._apply_format_filter( + format_spec, available_formats) + if not available_formats: + return None + if format_spec == 'best' or format_spec is None: return available_formats[-1] elif format_spec == 'worst': @@ -694,7 +853,7 @@ class YoutubeDL(object): if video_formats: return video_formats[0] else: - extensions = ['mp4', 'flv', 'webm', '3gp'] + extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] if format_spec in extensions: filter_f = lambda f: f['ext'] == format_spec else: @@ -732,6 +891,10 @@ class YoutubeDL(object): info_dict['display_id'] = info_dict['id'] if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: + # Working around negative timestamps in Windows + # (see http://bugs.python.org/issue1646728) + if info_dict['timestamp'] < 0 and os.name == 'nt': + info_dict['timestamp'] = 0 upload_date = datetime.datetime.utcfromtimestamp( info_dict['timestamp']) info_dict['upload_date'] = upload_date.strftime('%Y%m%d') @@ -795,28 +958,50 @@ class YoutubeDL(object): if req_format in ('-1', 'all'): formats_to_download = formats else: - # We can accept formats requested in the format: 34/5/best, we pick - # the first that is available, starting from left - req_formats = req_format.split('/') - for rf in req_formats: - if re.match(r'.+?\+.+?', rf) is not None: - # Two formats have been requested like '137+139' - format_1, format_2 = rf.split('+') - formats_info = (self.select_format(format_1, formats), - self.select_format(format_2, formats)) - if all(formats_info): - selected_format = { - 'requested_formats': formats_info, - 'format': rf, - 'ext': formats_info[0]['ext'], - } + for rfstr in req_format.split(','): + # We can accept formats requested in the format: 34/5/best, we pick + # the first that is available, starting from left + req_formats = rfstr.split('/') + for rf in req_formats: + if re.match(r'.+?\+.+?', rf) is not None: + # Two formats have been requested like '137+139' + format_1, format_2 = rf.split('+') + formats_info = (self.select_format(format_1, formats), + self.select_format(format_2, formats)) + if all(formats_info): + # The first format must contain the video and the + # second the audio + if formats_info[0].get('vcodec') == 'none': + self.report_error('The first format must ' + 'contain the video, try using ' + '"-f %s+%s"' % (format_2, format_1)) + return + output_ext = ( + formats_info[0]['ext'] + if self.params.get('merge_output_format') is None + else self.params['merge_output_format']) + selected_format = { + 'requested_formats': formats_info, + 'format': rf, + 'ext': formats_info[0]['ext'], + 'width': formats_info[0].get('width'), + 'height': formats_info[0].get('height'), + 'resolution': formats_info[0].get('resolution'), + 'fps': formats_info[0].get('fps'), + 'vcodec': formats_info[0].get('vcodec'), + 'vbr': formats_info[0].get('vbr'), + 'stretched_ratio': formats_info[0].get('stretched_ratio'), + 'acodec': formats_info[1].get('acodec'), + 'abr': formats_info[1].get('abr'), + 'ext': output_ext, + } + else: + selected_format = None else: - selected_format = None - else: - selected_format = self.select_format(rf, formats) - if selected_format is not None: - formats_to_download = [selected_format] - break + selected_format = self.select_format(rf, formats) + if selected_format is not None: + formats_to_download.append(selected_format) + break if not formats_to_download: raise ExtractorError('requested format not available', expected=True) @@ -867,8 +1052,12 @@ class YoutubeDL(object): if self.params.get('forceid', False): self.to_stdout(info_dict['id']) if self.params.get('forceurl', False): - # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) + if info_dict.get('requested_formats') is not None: + for f in info_dict['requested_formats']: + self.to_stdout(f['url'] + f.get('play_path', '')) + else: + # For RTMP URLs, also include the playpath + self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: self.to_stdout(info_dict['thumbnail']) if self.params.get('forcedescription', False) and info_dict.get('description') is not None: @@ -882,6 +1071,8 @@ class YoutubeDL(object): if self.params.get('forcejson', False): info_dict['_filename'] = filename self.to_stdout(json.dumps(info_dict)) + if self.params.get('dump_single_json', False): + info_dict['_filename'] = filename # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -902,13 +1093,13 @@ class YoutubeDL(object): descfn = filename + '.description' if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): self.to_screen('[info] Video description is already present') + elif info_dict.get('description') is None: + self.report_warning('There\'s no description to write.') else: try: self.to_screen('[info] Writing video description to: ' + descfn) with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(info_dict['description']) - except (KeyError, TypeError): - self.report_warning('There\'s no description to write.') except (OSError, IOError): self.report_error('Cannot write description file ' + descfn) return @@ -947,7 +1138,7 @@ class YoutubeDL(object): else: self.to_screen('[info] Writing video subtitles to: ' + sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: - subfile.write(sub) + subfile.write(sub) except (OSError, IOError): self.report_error('Cannot write subtitles file ' + sub_filename) return @@ -959,7 +1150,7 @@ class YoutubeDL(object): else: self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) try: - write_json_file(info_dict, encodeFilename(infofn)) + write_json_file(info_dict, infofn) except (OSError, IOError): self.report_error('Cannot write metadata to JSON file ' + infofn) return @@ -979,64 +1170,81 @@ class YoutubeDL(object): with open(thumb_filename, 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) self.to_screen('[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) + (info_dict['extractor'], info_dict['id'], thumb_filename)) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download thumbnail "%s": %s' % - (info_dict['thumbnail'], compat_str(err))) + (info_dict['thumbnail'], compat_str(err))) if not self.params.get('skip_download', False): - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): - success = True - else: - try: - def dl(name, info): - fd = get_suitable_downloader(info)(self, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - if self.params.get('verbose'): - self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) - if info_dict.get('requested_formats') is not None: - downloaded = [] - success = True - merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) - if not merger._get_executable(): - postprocessors = [] - self.report_warning('You have requested multiple ' - 'formats but ffmpeg or avconv are not installed.' - ' The formats won\'t be merged') - else: - postprocessors = [merger] - for f in info_dict['requested_formats']: - new_info = dict(info_dict) - new_info.update(f) - fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id']) - downloaded.append(fname) - partial_success = dl(fname, new_info) - success = success and partial_success - info_dict['__postprocessors'] = postprocessors - info_dict['__files_to_merge'] = downloaded + try: + def dl(name, info): + fd = get_suitable_downloader(info)(self, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) + return fd.download(name, info) + if info_dict.get('requested_formats') is not None: + downloaded = [] + success = True + merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) + if not merger._executable: + postprocessors = [] + self.report_warning('You have requested multiple ' + 'formats but ffmpeg or avconv are not installed.' + ' The formats won\'t be merged') else: - # Just a single file - success = dl(filename, info_dict) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_error('unable to download video data: %s' % str(err)) - return - except (OSError, IOError) as err: - raise UnavailableVideoError(err) - except (ContentTooShortError, ) as err: - self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) - return + postprocessors = [merger] + for f in info_dict['requested_formats']: + new_info = dict(info_dict) + new_info.update(f) + fname = self.prepare_filename(new_info) + fname = prepend_extension(fname, 'f%s' % f['format_id']) + downloaded.append(fname) + partial_success = dl(fname, new_info) + success = success and partial_success + info_dict['__postprocessors'] = postprocessors + info_dict['__files_to_merge'] = downloaded + else: + # Just a single file + success = dl(filename, info_dict) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_error('unable to download video data: %s' % str(err)) + return + except (OSError, IOError) as err: + raise UnavailableVideoError(err) + except (ContentTooShortError, ) as err: + self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return if success: + # Fixup content + stretched_ratio = info_dict.get('stretched_ratio') + if stretched_ratio is not None and stretched_ratio != 1: + fixup_policy = self.params.get('fixup') + if fixup_policy is None: + fixup_policy = 'detect_or_warn' + if fixup_policy == 'warn': + self.report_warning('%s: Non-uniform pixel ratio (%s)' % ( + info_dict['id'], stretched_ratio)) + elif fixup_policy == 'detect_or_warn': + stretched_pp = FFmpegFixupStretchedPP(self) + if stretched_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(stretched_pp) + else: + self.report_warning( + '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( + info_dict['id'], stretched_ratio)) + else: + assert fixup_policy == 'ignore' + try: self.post_process(filename, info_dict) except (PostProcessingError) as err: self.report_error('postprocessing: %s' % str(err)) return - - self.record_download_archive(info_dict) + self.record_download_archive(info_dict) def download(self, url_list): """Download a given list of URLs.""" @@ -1048,13 +1256,16 @@ class YoutubeDL(object): for url in url_list: try: - #It also downloads the videos - self.extract_info(url) + # It also downloads the videos + res = self.extract_info(url) except UnavailableVideoError: self.report_error('unable to download video') except MaxDownloadsReached: self.to_screen('[info] Maximum number of downloaded files reached.') raise + else: + if self.params.get('dump_single_json', False): + self.to_stdout(json.dumps(res)) return self._download_retcode @@ -1076,14 +1287,15 @@ class YoutubeDL(object): """Run all the postprocessors on the given file.""" info = dict(ie_info) info['filepath'] = filename - keep_video = None pps_chain = [] if ie_info.get('__postprocessors') is not None: pps_chain.extend(ie_info['__postprocessors']) pps_chain.extend(self._pps) for pp in pps_chain: + keep_video = None + old_filename = info['filepath'] try: - keep_video_wish, new_info = pp.run(info) + keep_video_wish, info = pp.run(info) if keep_video_wish is not None: if keep_video_wish: keep_video = keep_video_wish @@ -1092,12 +1304,12 @@ class YoutubeDL(object): keep_video = keep_video_wish except PostProcessingError as e: self.report_error(e.msg) - if keep_video is False and not self.params.get('keepvideo', False): - try: - self.to_screen('Deleting original file %s (pass -k to keep)' % filename) - os.remove(encodeFilename(filename)) - except (IOError, OSError): - self.report_warning('Unable to remove downloaded video file') + if keep_video is False and not self.params.get('keepvideo', False): + try: + self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) + os.remove(encodeFilename(old_filename)) + except (IOError, OSError): + self.report_warning('Unable to remove downloaded video file') def _make_archive_id(self, info_dict): # Future-proof against any change in case @@ -1178,6 +1390,8 @@ class YoutubeDL(object): res += 'video@' if fdict.get('vbr') is not None: res += '%4dk' % fdict['vbr'] + if fdict.get('fps') is not None: + res += ', %sfps' % fdict['fps'] if fdict.get('acodec') is not None: if res: res += ', ' @@ -1215,7 +1429,9 @@ class YoutubeDL(object): formats = info_dict.get('formats', [info_dict]) idlen = max(len('format code'), max(len(f['format_id']) for f in formats)) - formats_s = [line(f, idlen) for f in formats] + formats_s = [ + line(f, idlen) for f in formats + if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' @@ -1228,6 +1444,26 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ + + # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not + # always respected by websites, some tend to give out URLs with non percent-encoded + # non-ASCII characters (see telemb.py, ard.py [#3412]) + # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) + # To work around aforementioned issue we will replace request's original URL with + # percent-encoded one + req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) + url = req if req_is_string else req.get_full_url() + url_escaped = escape_url(url) + + # Substitute URL if any change after escaping + if url != url_escaped: + if req_is_string: + req = url_escaped + else: + req = compat_urllib_request.Request( + url_escaped, data=req.data, headers=req.headers, + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) + return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): @@ -1239,11 +1475,13 @@ class YoutubeDL(object): self.report_warning( 'Your Python is broken! Update to a newer and supported version') + stdout_encoding = getattr( + sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__) encoding_str = ( '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - sys.stdout.encoding, + stdout_encoding, self.get_encoding())) write_string(encoding_str, encoding=None) @@ -1262,8 +1500,19 @@ class YoutubeDL(object): sys.exc_clear() except: pass - self._write_string('[debug] Python version %s - %s' % - (platform.python_version(), platform_name()) + '\n') + self._write_string('[debug] Python version %s - %s\n' % ( + platform.python_version(), platform_name())) + + exe_versions = FFmpegPostProcessor.get_versions() + exe_versions['rtmpdump'] = rtmpdump_version() + exe_str = ', '.join( + '%s %s' % (exe, v) + for exe, v in sorted(exe_versions.items()) + if v + ) + if not exe_str: + exe_str = 'none' + self._write_string('[debug] exe versions: %s\n' % exe_str) proxy_map = {} for handler in self._opener.handlers: @@ -1271,6 +1520,17 @@ class YoutubeDL(object): proxy_map.update(handler.proxies) self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') + if self.params.get('call_home', False): + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') + self._write_string('[debug] Public IP address: %s\n' % ipaddr) + latest_version = self.urlopen( + 'https://yt-dl.org/latest/version').read().decode('utf-8') + if version_tuple(latest_version) > version_tuple(__version__): + self.report_warning( + 'You are using an outdated version (newest version: %s)! ' + 'See https://yt-dl.org/update if you need help updating.' % + latest_version) + def _setup_opener(self): timeout_val = self.params.get('socket_timeout') self._socket_timeout = 600 if timeout_val is None else float(timeout_val) @@ -1301,9 +1561,8 @@ class YoutubeDL(object): proxy_handler = compat_urllib_request.ProxyHandler(proxies) debuglevel = 1 if self.params.get('debug_printtraffic') else 0 - https_handler = make_HTTPS_handler( - self.params.get('nocheckcertificate', False), debuglevel=debuglevel) - ydlh = YoutubeDLHandler(debuglevel=debuglevel) + https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) + ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( https_handler, proxy_handler, cookie_processor, ydlh) # Delete the default user-agent header, which would otherwise apply in