X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=640b8c99d75d6215b232c0d2d678dd903f41e8bb;hb=70a1165b32acf253905109e9b4f245295d67af1f;hp=088b111eb835f684bf934b4ba9a2334a9822f43a;hpb=4d1718481755dde078678b6e55d457fc6351fcdd;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 088b111eb..640b8c99d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -4,8 +4,10 @@ from __future__ import absolute_import, unicode_literals import collections +import contextlib import datetime import errno +import fileinput import io import itertools import json @@ -28,6 +30,7 @@ from .compat import ( compat_basestring, compat_cookiejar, compat_expanduser, + compat_get_terminal_size, compat_http_client, compat_kwargs, compat_str, @@ -46,18 +49,19 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, - get_term_width, locked_file, make_HTTPS_handler, MaxDownloadsReached, PagedList, parse_filesize, + PerRequestProxyHandler, PostProcessingError, platform_name, preferredencoding, render_table, SameFileError, sanitize_filename, + sanitize_path, std_headers, subtitles_filename, takewhile_inclusive, @@ -181,6 +185,8 @@ class YoutubeDL(object): prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. proxy: URL of the proxy server to use + cn_verification_proxy: URL of the proxy to use for IP address verification + on Chinese sites. (Experimental) socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi @@ -199,18 +205,25 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * status: One of "downloading" and "finished". + * status: One of "downloading", "error", or "finished". Check this first and ignore unknown values. - If status is one of "downloading" or "finished", the + If status is one of "downloading", or "finished", the following properties may also be present: * filename: The final filename (always present) + * tmpfilename: The filename we're currently writing to * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown - * tmpfilename: The filename we're currently writing to + * total_bytes_estimate: Guess of the eventual file size, + None if unavailable. + * elapsed: The number of seconds since download started. * eta: The estimated time in seconds, None if unknown * speed: The download speed in bytes/second, None if unknown + * fragment_index: The counter of the currently + downloaded video fragment. + * fragment_count: The number of fragments (= individual + files that will be merged) Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. @@ -225,7 +238,6 @@ class YoutubeDL(object): call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download. - external_downloader: Executable of the external downloader to call. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. match_filter: A function that gets called with the info_dict of @@ -235,12 +247,16 @@ class YoutubeDL(object): match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. + The following options determine which downloader is picked: + external_downloader: Executable of the external downloader to call. + None or unset for standard (built-in) downloader. + hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. The following parameters are not used by YoutubeDL itself, they are used by - the FileDownloader: + the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize. + xattr_set_filesize, external_downloader_args. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -274,7 +290,7 @@ class YoutubeDL(object): try: import pty master, slave = pty.openpty() - width = get_term_width() + width = compat_get_terminal_size().columns if width is None: width_args = [] else: @@ -298,8 +314,8 @@ class YoutubeDL(object): raise if (sys.version_info >= (3,) and sys.platform != 'win32' and - sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] - and not params.get('restrictfilenames', False)): + sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and + not params.get('restrictfilenames', False)): # On Python 3, the Unicode filesystem API will throw errors (#1474) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' @@ -307,8 +323,10 @@ class YoutubeDL(object): 'Set the LC_ALL environment variable to fix this.') self.params['restrictfilenames'] = True - if '%(stitle)s' in self.params.get('outtmpl', ''): - self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') + if isinstance(params.get('outtmpl'), bytes): + self.report_warning( + 'Parameter outtmpl is bytes, but should be a unicode string. ' + 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') self._setup_opener() @@ -547,7 +565,7 @@ class YoutubeDL(object): if v is not None) template_dict = collections.defaultdict(lambda: 'NA', template_dict) - outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) + outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL)) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 @@ -614,7 +632,7 @@ class YoutubeDL(object): Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. extra_info is a dict containing the extra values to add to each result - ''' + ''' if ie_key: ies = [self.get_info_extractor(ie_key)] @@ -951,30 +969,9 @@ class YoutubeDL(object): return res def _calc_cookies(self, info_dict): - class _PseudoRequest(object): - def __init__(self, url): - self.url = url - self.headers = {} - self.unverifiable = False - - def add_unredirected_header(self, k, v): - self.headers[k] = v - - def get_full_url(self): - return self.url - - def is_unverifiable(self): - return self.unverifiable - - def has_header(self, h): - return h in self.headers - - def get_header(self, h, default=None): - return self.headers.get(h, default) - - pr = _PseudoRequest(info_dict['url']) + pr = compat_urllib_request.Request(info_dict['url']) self.cookiejar.add_cookie_header(pr) - return pr.headers.get('Cookie') + return pr.get_header('Cookie') def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -1091,8 +1088,7 @@ class YoutubeDL(object): if req_format is None: req_format = 'best' formats_to_download = [] - # The -1 is for supporting YoutubeIE - if req_format in ('-1', 'all'): + if req_format == 'all': formats_to_download = formats else: for rfstr in req_format.split(','): @@ -1156,11 +1152,13 @@ class YoutubeDL(object): info_dict.update(formats_to_download[-1]) return info_dict - def process_subtitles(self, video_id, available_subs, available_autocaps): + def process_subtitles(self, video_id, normal_subtitles, automatic_captions): """Select the requested subtitles and their format""" - if available_autocaps and self.params.get('writeautomaticsub'): - available_subs = available_subs.copy() - for lang, cap_info in available_autocaps.items(): + available_subs = {} + if normal_subtitles and self.params.get('writesubtitles'): + available_subs.update(normal_subtitles) + if automatic_captions and self.params.get('writeautomaticsub'): + for lang, cap_info in automatic_captions.items(): if lang not in available_subs: available_subs[lang] = cap_info @@ -1187,14 +1185,6 @@ class YoutubeDL(object): if formats is None: self.report_warning('%s subtitles not available for %s' % (lang, video_id)) continue - if isinstance(formats, compat_str): - # TODO: convert all IE with subtitles support to the new format - # and remove this - subs[lang] = { - 'ext': formats_preference[0], - 'data': formats, - } - continue for ext in formats_preference: if ext == 'best': f = formats[-1] @@ -1225,9 +1215,6 @@ class YoutubeDL(object): if len(info_dict['title']) > 200: info_dict['title'] = info_dict['title'][:197] + '...' - # Keep for backwards compatibility - info_dict['stitle'] = info_dict['title'] - if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] @@ -1273,7 +1260,7 @@ class YoutubeDL(object): return try: - dn = os.path.dirname(encodeFilename(filename)) + dn = os.path.dirname(sanitize_path(encodeFilename(filename))) if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: @@ -1317,17 +1304,18 @@ class YoutubeDL(object): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['requested_subtitles'] + ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] if sub_info.get('data') is not None: sub_data = sub_info['data'] else: try: - uf = self.urlopen(sub_info['url']) - sub_data = uf.read().decode('utf-8') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + sub_data = ie._download_webpage( + sub_info['url'], info_dict['id'], note=False) + except ExtractorError as err: self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, compat_str(err))) + (sub_lang, compat_str(err.cause))) continue try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) @@ -1369,7 +1357,7 @@ class YoutubeDL(object): downloaded = [] success = True merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) - if not merger._executable: + if not merger.available: postprocessors = [] self.report_warning('You have requested multiple ' 'formats but ffmpeg or avconv are not installed.' @@ -1448,8 +1436,8 @@ class YoutubeDL(object): """Download a given list of URLs.""" outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) if (len(url_list) > 1 and - '%' not in outtmpl - and self.params.get('max_downloads') != 1): + '%' not in outtmpl and + self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) for url in url_list: @@ -1468,8 +1456,11 @@ class YoutubeDL(object): return self._download_retcode def download_with_info_file(self, info_filename): - with io.open(info_filename, 'r', encoding='utf-8') as f: - info = json.load(f) + with contextlib.closing(fileinput.FileInput( + [info_filename], mode='r', + openhook=fileinput.hook_encoded('utf-8'))) as f: + # FileInput doesn't have a read method, we can't call json.load + info = json.loads('\n'.join(f)) try: self.process_ie_result(info, download=True) except DownloadError: @@ -1616,29 +1607,18 @@ class YoutubeDL(object): return res def list_formats(self, info_dict): - def line(format, idlen=20): - return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( - format['format_id'], - format['ext'], - self.format_resolution(format), - self._format_note(format), - )) - formats = info_dict.get('formats', [info_dict]) - idlen = max(len('format code'), - max(len(f['format_id']) for f in formats)) - formats_s = [ - line(f, idlen) for f in formats + table = [ + [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] + for f in formats if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: - formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' + table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' - header_line = line({ - 'format_id': 'format code', 'ext': 'extension', - 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) + header_line = ['format code', 'extension', 'resolution', 'note'] self.to_screen( - '[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, '\n'.join(formats_s))) + '[info] Available formats for %s:\n%s' % + (info_dict['id'], render_table(header_line, table))) def list_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') @@ -1721,15 +1701,15 @@ class YoutubeDL(object): out = out.decode().strip() if re.match('[0-9a-f]+', out): self._write_string('[debug] Git HEAD: ' + out + '\n') - except: + except Exception: try: sys.exc_clear() - except: + except Exception: pass self._write_string('[debug] Python version %s - %s\n' % ( platform.python_version(), platform_name())) - exe_versions = FFmpegPostProcessor.get_versions() + exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions['rtmpdump'] = rtmpdump_version() exe_str = ', '.join( '%s %s' % (exe, v) @@ -1784,13 +1764,14 @@ class YoutubeDL(object): # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] - proxy_handler = compat_urllib_request.ProxyHandler(proxies) + proxy_handler = PerRequestProxyHandler(proxies) debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( - https_handler, proxy_handler, cookie_processor, ydlh) + proxy_handler, https_handler, cookie_processor, ydlh) + # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play # (See https://github.com/rg3/youtube-dl/issues/1309 for details)