X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=414aa5a80cb575642ee4ff20e393c7c96afb4e14;hb=3bc2ddccc8622379ec11e802dff30a635285a9c8;hp=50f750593d1dbe54c97bad9946ad36aa5d798bf1;hpb=8b134b106251cae4b8cf478fc2466c6976ae8e51;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 50f750593..414aa5a80 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import collections import errno import io import json @@ -22,7 +23,6 @@ if os.name == 'nt': from .utils import ( compat_cookiejar, compat_http_client, - compat_print, compat_str, compat_urllib_error, compat_urllib_request, @@ -34,6 +34,7 @@ from .utils import ( encodeFilename, ExtractorError, format_bytes, + get_term_width, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -50,7 +51,7 @@ from .utils import ( YoutubeDLHandler, ) from .extractor import get_info_extractor, gen_extractors -from .FileDownloader import FileDownloader +from .downloader import get_suitable_downloader from .version import __version__ @@ -126,12 +127,15 @@ class YoutubeDL(object): noplaylist: Download single video instead of a playlist if in doubt. age_limit: An integer representing the user's age in years. Unsuitable videos for the given age are skipped. - downloadarchive: File name of a file where all downloads are recorded. + download_archive: File name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. cookiefile: File name where cookies should be read from and dumped to. nocheckcertificate:Do not verify SSL certificates proxy: URL of the proxy server to use + socket_timeout: Time to wait for unresponsive hosts, in seconds + bidi_workaround: Work around buggy terminals without bidirectional text + support, using fridibi The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -146,15 +150,38 @@ class YoutubeDL(object): _num_downloads = None _screen_file = None - def __init__(self, params): + def __init__(self, params=None): """Create a FileDownloader object with the given options.""" self._ies = [] self._ies_instances = {} self._pps = [] - self._progress_hooks = [] + self._fd_progress_hooks = [] self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] + self._err_file = sys.stderr + self.params = {} if params is None else params + + if params.get('bidi_workaround', False): + try: + import pty + master, slave = pty.openpty() + width = get_term_width() + if width is None: + width_args = [] + else: + width_args = ['-w', str(width)] + self._fribidi = subprocess.Popen( + ['fribidi', '-c', 'UTF-8'] + width_args, + stdin=subprocess.PIPE, + stdout=slave, + stderr=self._err_file) + self._fribidi_channel = os.fdopen(master, 'rb') + except OSError as ose: + if ose.errno == 2: + self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') + else: + raise if (sys.version_info >= (3,) and sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] @@ -164,12 +191,9 @@ class YoutubeDL(object): u'Assuming --restrict-filenames since file system encoding ' u'cannot encode all charactes. ' u'Set the LC_ALL environment variable to fix this.') - params['restrictfilenames'] = True - - self.params = params - self.fd = FileDownloader(self, self.params) + self.params['restrictfilenames'] = True - if '%(stitle)s' in self.params['outtmpl']: + if '%(stitle)s' in self.params.get('outtmpl', ''): self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') self._setup_opener() @@ -204,13 +228,35 @@ class YoutubeDL(object): self._pps.append(pp) pp.set_downloader(self) + def add_downloader_progress_hook(self, ph): + """Add the progress hook to the file downloader""" + self._fd_progress_hooks.append(ph) + + def _bidi_workaround(self, message): + if not hasattr(self, '_fribidi_channel'): + return message + + assert type(message) == type(u'') + line_count = message.count(u'\n') + 1 + self._fribidi.stdin.write((message + u'\n').encode('utf-8')) + self._fribidi.stdin.flush() + res = u''.join(self._fribidi_channel.readline().decode('utf-8') + for _ in range(line_count)) + return res[:-len(u'\n')] + def to_screen(self, message, skip_eol=False): + """Print message to stdout if not in quiet mode.""" + return self.to_stdout(message, skip_eol, check_quiet=True) + + def to_stdout(self, message, skip_eol=False, check_quiet=False): """Print message to stdout if not in quiet mode.""" if self.params.get('logger'): self.params['logger'].debug(message) - elif not self.params.get('quiet', False): + elif not check_quiet or not self.params.get('quiet', False): + message = self._bidi_workaround(message) terminator = [u'\n', u''][skip_eol] output = message + terminator + write_string(output, self._screen_file) def to_stderr(self, message): @@ -219,10 +265,9 @@ class YoutubeDL(object): if self.params.get('logger'): self.params['logger'].error(message) else: + message = self._bidi_workaround(message) output = message + u'\n' - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding()) - sys.stderr.write(output) + write_string(output, self._err_file) def to_console_title(self, message): if not self.params.get('consoletitle', False): @@ -293,7 +338,7 @@ class YoutubeDL(object): Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' - if sys.stderr.isatty() and os.name != 'nt': + if self._err_file.isatty() and os.name != 'nt': _msg_header = u'\033[0;33mWARNING:\033[0m' else: _msg_header = u'WARNING:' @@ -305,7 +350,7 @@ class YoutubeDL(object): Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if sys.stderr.isatty() and os.name != 'nt': + if self._err_file.isatty() and os.name != 'nt': _msg_header = u'\033[0;31mERROR:\033[0m' else: _msg_header = u'ERROR:' @@ -354,18 +399,17 @@ class YoutubeDL(object): template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] sanitize = lambda k, v: sanitize_filename( - u'NA' if v is None else compat_str(v), + compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k == u'id')) template_dict = dict((k, sanitize(k, v)) - for k, v in template_dict.items()) + for k, v in template_dict.items() + if v is not None) + template_dict = collections.defaultdict(lambda: u'NA', template_dict) tmpl = os.path.expanduser(self.params['outtmpl']) filename = tmpl % template_dict return filename - except KeyError as err: - self.report_error(u'Erroneous output template') - return None except ValueError as err: self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')') return None @@ -404,7 +448,8 @@ class YoutubeDL(object): for key, value in extra_info.items(): info_dict.setdefault(key, value) - def extract_info(self, url, download=True, ie_key=None, extra_info={}): + def extract_info(self, url, download=True, ie_key=None, extra_info={}, + process=True): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. @@ -440,7 +485,10 @@ class YoutubeDL(object): 'webpage_url': url, 'extractor_key': ie.ie_key(), }) - return self.process_ie_result(ie_result, download, extra_info) + if process: + return self.process_ie_result(ie_result, download, extra_info) + else: + return ie_result except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) break @@ -473,8 +521,33 @@ class YoutubeDL(object): download, ie_key=ie_result.get('ie_key'), extra_info=extra_info) + elif result_type == 'url_transparent': + # Use the information from the embedding page + info = self.extract_info( + ie_result['url'], ie_key=ie_result.get('ie_key'), + extra_info=extra_info, download=False, process=False) + + def make_result(embedded_info): + new_result = ie_result.copy() + for f in ('_type', 'url', 'ext', 'player_url', 'formats', + 'entries', 'urlhandle', 'ie_key', 'duration', + 'subtitles', 'annotations', 'format', + 'thumbnail', 'thumbnails'): + if f in new_result: + del new_result[f] + if f in embedded_info: + new_result[f] = embedded_info[f] + return new_result + new_result = make_result(info) + + assert new_result.get('_type') != 'url_transparent' + if new_result.get('_type') == 'compat_list': + new_result['entries'] = [ + make_result(e) for e in new_result['entries']] + + return self.process_ie_result( + new_result, download=download, extra_info=extra_info) elif result_type == 'playlist': - # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) @@ -665,22 +738,23 @@ class YoutubeDL(object): # Forced printings if self.params.get('forcetitle', False): - compat_print(info_dict['fulltitle']) + self.to_stdout(info_dict['fulltitle']) if self.params.get('forceid', False): - compat_print(info_dict['id']) + self.to_stdout(info_dict['id']) if self.params.get('forceurl', False): # For RTMP URLs, also include the playpath - compat_print(info_dict['url'] + info_dict.get('play_path', u'')) + self.to_stdout(info_dict['url'] + info_dict.get('play_path', u'')) if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: - compat_print(info_dict['thumbnail']) + self.to_stdout(info_dict['thumbnail']) if self.params.get('forcedescription', False) and info_dict.get('description') is not None: - compat_print(info_dict['description']) + self.to_stdout(info_dict['description']) if self.params.get('forcefilename', False) and filename is not None: - compat_print(filename) + self.to_stdout(filename) if self.params.get('forceformat', False): - compat_print(info_dict['format']) + self.to_stdout(info_dict['format']) if self.params.get('forcejson', False): - compat_print(json.dumps(info_dict)) + info_dict['_filename'] = filename + self.to_stdout(json.dumps(info_dict)) # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -755,7 +829,7 @@ class YoutubeDL(object): if self.params.get('writethumbnail', False): if info_dict.get('thumbnail') is not None: thumb_format = determine_ext(info_dict['thumbnail'], u'jpg') - thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format + thumb_filename = os.path.splitext(filename)[0] + u'.' + thumb_format self.to_screen(u'[%s] %s: Downloading thumbnail ...' % (info_dict['extractor'], info_dict['id'])) try: @@ -773,7 +847,10 @@ class YoutubeDL(object): success = True else: try: - success = self.fd._do_download(filename, info_dict) + fd = get_suitable_downloader(info_dict)(self, self.params) + for ph in self._fd_progress_hooks: + fd.add_progress_hook(ph) + success = fd.download(filename, info_dict) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error(u'unable to download video data: %s' % str(err)) return @@ -811,6 +888,20 @@ class YoutubeDL(object): return self._download_retcode + def download_with_info_file(self, info_filename): + with io.open(info_filename, 'r', encoding='utf-8') as f: + info = json.load(f) + try: + self.process_ie_result(info, download=True) + except DownloadError: + webpage_url = info.get('webpage_url') + if webpage_url is not None: + self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url) + return self.download([webpage_url]) + else: + raise + return self._download_retcode + def post_process(self, filename, ie_info): """Run all the postprocessors on the given file.""" info = dict(ie_info) @@ -837,7 +928,7 @@ class YoutubeDL(object): def _make_archive_id(self, info_dict): # Future-proof against any change in case # and backwards compatibility with prior versions - extractor = info_dict.get('extractor') + extractor = info_dict.get('extractor_key') if extractor is None: if 'id' in info_dict: extractor = info_dict.get('ie_key') # key in a playlist @@ -875,6 +966,8 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): + if format.get('vcodec') == 'none': + return 'audio only' if format.get('_resolution') is not None: return format['_resolution'] if format.get('height') is not None: @@ -891,7 +984,8 @@ class YoutubeDL(object): res = u'' if fdict.get('format_note') is not None: res += fdict['format_note'] + u' ' - if fdict.get('vcodec') is not None: + if (fdict.get('vcodec') is not None and + fdict.get('vcodec') != 'none'): res += u'%-5s' % fdict['vcodec'] elif fdict.get('vbr') is not None: res += u'video' @@ -966,7 +1060,10 @@ class YoutubeDL(object): proxy_map.update(handler.proxies) write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') - def _setup_opener(self, timeout=20): + def _setup_opener(self): + timeout_val = self.params.get('socket_timeout') + timeout = 600 if timeout_val is None else float(timeout_val) + opts_cookiefile = self.params.get('cookiefile') opts_proxy = self.params.get('proxy')