X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=30ba94666a642c45bfc03af75eb09019c04ae9ad;hb=31cb6d8fefaa42aff293b03e37471bb05d5b8391;hp=19dabef2d012a9e9f678cc2bf1a649ab8714ec11;hpb=78a3a9f89ef4a9918c0e6dc854b99df9c2a94e4e;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 19dabef2d..30ba94666 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -5,17 +5,53 @@ from __future__ import absolute_import import errno import io +import json import os +import platform import re import shutil +import subprocess import socket import sys import time import traceback -from .utils import * +if os.name == 'nt': + import ctypes + +from .utils import ( + compat_cookiejar, + compat_http_client, + compat_print, + compat_str, + compat_urllib_error, + compat_urllib_request, + ContentTooShortError, + date_from_str, + DateRange, + determine_ext, + DownloadError, + encodeFilename, + ExtractorError, + format_bytes, + locked_file, + make_HTTPS_handler, + MaxDownloadsReached, + PostProcessingError, + platform_name, + preferredencoding, + SameFileError, + sanitize_filename, + subtitles_filename, + takewhile_inclusive, + UnavailableVideoError, + write_json_file, + write_string, + YoutubeDLHandler, +) from .extractor import get_info_extractor, gen_extractors from .FileDownloader import FileDownloader +from .version import __version__ class YoutubeDL(object): @@ -57,6 +93,7 @@ class YoutubeDL(object): forcethumbnail: Force printing thumbnail URL. forcedescription: Force printing description. forcefilename: Force printing final filename. + forcejson: Force printing info_dict as JSON. simulate: Do not download the video files. format: Video format code. format_limit: Highest quality format to try. @@ -68,6 +105,7 @@ class YoutubeDL(object): playlistend: Playlist item to end at. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. + logger: Log messages to a logging.Logger instance. logtostderr: Log messages to stderr instead of stdout. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file @@ -91,6 +129,9 @@ class YoutubeDL(object): downloadarchive: File name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. + cookiefile: File name where cookies should be read from and dumped to. + nocheckcertificate:Do not verify SSL certificates + proxy: URL of the proxy server to use The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -131,6 +172,8 @@ class YoutubeDL(object): if '%(stitle)s' in self.params['outtmpl']: self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') + self._setup_opener() + def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -163,7 +206,9 @@ class YoutubeDL(object): def to_screen(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" - if not self.params.get('quiet', False): + if self.params.get('logger'): + self.params['logger'].debug(message) + elif not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator write_string(output, self._screen_file) @@ -171,10 +216,47 @@ class YoutubeDL(object): def to_stderr(self, message): """Print message to stderr.""" assert type(message) == type(u'') - output = message + u'\n' - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding()) - sys.stderr.write(output) + if self.params.get('logger'): + self.params['logger'].error(message) + else: + output = message + u'\n' + if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr + output = output.encode(preferredencoding()) + sys.stderr.write(output) + + def to_console_title(self, message): + if not self.params.get('consoletitle', False): + return + if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): + # c_wchar_p() might not be necessary if `message` is + # already of type unicode() + ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) + elif 'TERM' in os.environ: + write_string(u'\033]0;%s\007' % message, self._screen_file) + + def save_console_title(self): + if not self.params.get('consoletitle', False): + return + if 'TERM' in os.environ: + # Save the title on stack + write_string(u'\033[22;0t', self._screen_file) + + def restore_console_title(self): + if not self.params.get('consoletitle', False): + return + if 'TERM' in os.environ: + # Restore the title from stack + write_string(u'\033[23;0t', self._screen_file) + + def __enter__(self): + self.save_console_title() + return self + + def __exit__(self, *args): + self.restore_console_title() + + if self.params.get('cookiefile') is not None: + self.cookiejar.save() def fixed_template(self): """Checks if the output template is fixed.""" @@ -254,7 +336,7 @@ class YoutubeDL(object): """Report file has already been fully downloaded.""" try: self.to_screen(u'[download] %s has already been downloaded' % file_name) - except (UnicodeEncodeError) as err: + except UnicodeEncodeError: self.to_screen(u'[download] The file has already been downloaded') def increment_downloads(self): @@ -272,7 +354,7 @@ class YoutubeDL(object): autonumber_size = 5 autonumber_templ = u'%0' + str(autonumber_size) + u'd' template_dict['autonumber'] = autonumber_templ % self._num_downloads - if template_dict['playlist_index'] is not None: + if template_dict.get('playlist_index') is not None: template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] sanitize = lambda k, v: sanitize_filename( @@ -295,15 +377,17 @@ class YoutubeDL(object): def _match_entry(self, info_dict): """ Returns None iff the file should be downloaded """ - title = info_dict['title'] - matchtitle = self.params.get('matchtitle', False) - if matchtitle: - if not re.search(matchtitle, title, re.IGNORECASE): - return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' - rejecttitle = self.params.get('rejecttitle', False) - if rejecttitle: - if re.search(rejecttitle, title, re.IGNORECASE): - return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + if 'title' in info_dict: + # This can happen when we're just evaluating the playlist + title = info_dict['title'] + matchtitle = self.params.get('matchtitle', False) + if matchtitle: + if not re.search(matchtitle, title, re.IGNORECASE): + return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' + rejecttitle = self.params.get('rejecttitle', False) + if rejecttitle: + if re.search(rejecttitle, title, re.IGNORECASE): + return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' date = info_dict.get('upload_date', None) if date is not None: dateRange = self.params.get('daterange', DateRange()) @@ -314,10 +398,16 @@ class YoutubeDL(object): if age_limit < info_dict.get('age_limit', 0): return u'Skipping "' + title + '" because it is age restricted' if self.in_download_archive(info_dict): - return (u'%(title)s has already been recorded in archive' - % info_dict) + return (u'%s has already been recorded in archive' + % info_dict.get('title', info_dict.get('id', u'video'))) return None + @staticmethod + def add_extra_info(info_dict, extra_info): + '''Set the keys from extra_info in info dict if they are missing''' + for key, value in extra_info.items(): + info_dict.setdefault(key, value) + def extract_info(self, url, download=True, ie_key=None, extra_info={}): ''' Returns a list with a dictionary for each video we find. @@ -344,17 +434,17 @@ class YoutubeDL(object): break if isinstance(ie_result, list): # Backwards compatibility: old IE result format - for result in ie_result: - result.update(extra_info) ie_result = { '_type': 'compat_list', 'entries': ie_result, } - else: - ie_result.update(extra_info) - if 'extractor' not in ie_result: - ie_result['extractor'] = ie.IE_NAME - return self.process_ie_result(ie_result, download=download) + self.add_extra_info(ie_result, + { + 'extractor': ie.IE_NAME, + 'webpage_url': url, + 'extractor_key': ie.ie_key(), + }) + return self.process_ie_result(ie_result, download, extra_info) except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) break @@ -378,8 +468,8 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': - ie_result.update(extra_info) - return self.process_video_result(ie_result) + self.add_extra_info(ie_result, extra_info) + return self.process_video_result(ie_result, download=download) elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist @@ -388,6 +478,7 @@ class YoutubeDL(object): ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'playlist': + # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) @@ -413,12 +504,16 @@ class YoutubeDL(object): extra = { 'playlist': playlist, 'playlist_index': i + playliststart, + 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], + 'extractor_key': ie_result['extractor_key'], } - if not 'extractor' in entry: - # We set the extractor, if it's an url it will be set then to - # the new extractor, but if it's already a video we must make - # sure it's present: see issue #877 - entry['extractor'] = ie_result['extractor'] + + reason = self._match_entry(entry) + if reason is not None: + self.to_screen(u'[download] ' + reason) + continue + entry_result = self.process_ie_result(entry, download=download, extra_info=extra) @@ -427,10 +522,15 @@ class YoutubeDL(object): return ie_result elif result_type == 'compat_list': def _fixup(r): - r.setdefault('extractor', ie_result['extractor']) + self.add_extra_info(r, + { + 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], + 'extractor_key': ie_result['extractor_key'], + }) return r ie_result['entries'] = [ - self.process_ie_result(_fixup(r), download=download) + self.process_ie_result(_fixup(r), download, extra_info) for r in ie_result['entries'] ] return ie_result @@ -482,7 +582,7 @@ class YoutubeDL(object): format['format'] = u'{id} - {res}{note}'.format( id=format['format_id'], res=self.format_resolution(format), - note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', + note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', ) # Automatically determine file extension if missing if 'ext' not in format: @@ -569,20 +669,22 @@ class YoutubeDL(object): # Forced printings if self.params.get('forcetitle', False): - compat_print(info_dict['title']) + compat_print(info_dict['fulltitle']) if self.params.get('forceid', False): compat_print(info_dict['id']) if self.params.get('forceurl', False): # For RTMP URLs, also include the playpath compat_print(info_dict['url'] + info_dict.get('play_path', u'')) - if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: + if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: compat_print(info_dict['thumbnail']) - if self.params.get('forcedescription', False) and 'description' in info_dict: + if self.params.get('forcedescription', False) and info_dict.get('description') is not None: compat_print(info_dict['description']) if self.params.get('forcefilename', False) and filename is not None: compat_print(filename) if self.params.get('forceformat', False): compat_print(info_dict['format']) + if self.params.get('forcejson', False): + compat_print(json.dumps(info_dict)) # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -630,7 +732,7 @@ class YoutubeDL(object): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['subtitles'] - sub_format = self.params.get('subtitlesformat') + sub_format = self.params.get('subtitlesformat', 'srt') for sub_lang in subtitles.keys(): sub = subtitles[sub_lang] if sub is None: @@ -645,7 +747,7 @@ class YoutubeDL(object): return if self.params.get('writeinfojson', False): - infofn = filename + u'.info.json' + infofn = os.path.splitext(filename)[0] + u'.info.json' self.report_writeinfojson(infofn) try: json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle']) @@ -702,7 +804,7 @@ class YoutubeDL(object): for url in url_list: try: #It also downloads the videos - videos = self.extract_info(url) + self.extract_info(url) except UnavailableVideoError: self.report_error(u'unable to download video') except MaxDownloadsReached: @@ -738,7 +840,16 @@ class YoutubeDL(object): fn = self.params.get('download_archive') if fn is None: return False - vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + extractor = info_dict.get('extractor_id') + if extractor is None: + if 'id' in info_dict: + extractor = info_dict.get('ie_key') # key in a playlist + if extractor is None: + return False # Incomplete video information + # Future-proof against any change in case + # and backwards compatibility with prior versions + extractor = extractor.lower() + vid_id = extractor + u' ' + info_dict['id'] try: with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: @@ -759,6 +870,8 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): + if format.get('_resolution') is not None: + return format['_resolution'] if format.get('height') is not None: if format.get('width') is not None: res = u'%sx%s' % (format['width'], format['height']) @@ -769,19 +882,120 @@ class YoutubeDL(object): return res def list_formats(self, info_dict): - formats_s = [] - for format in info_dict.get('formats', [info_dict]): - formats_s.append(u'%-15s%-7s %-15s%s' % ( + def format_note(fdict): + res = u'' + if fdict.get('format_note') is not None: + res += fdict['format_note'] + u' ' + if fdict.get('vcodec') is not None: + res += u'%-5s' % fdict['vcodec'] + elif fdict.get('vbr') is not None: + res += u'video' + if fdict.get('vbr') is not None: + res += u'@%4dk' % fdict['vbr'] + if fdict.get('acodec') is not None: + if res: + res += u', ' + res += u'%-5s' % fdict['acodec'] + elif fdict.get('abr') is not None: + if res: + res += u', ' + res += 'audio' + if fdict.get('abr') is not None: + res += u'@%3dk' % fdict['abr'] + if fdict.get('filesize') is not None: + if res: + res += u', ' + res += format_bytes(fdict['filesize']) + return res + + def line(format, idlen=20): + return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % ( format['format_id'], format['ext'], - format.get('format_note', ''), self.format_resolution(format), - ) - ) - if len(formats_s) != 1: - formats_s[0] += ' (worst)' - formats_s[-1] += ' (best)' - formats_s = "\n".join(formats_s) - self.to_screen(u'[info] Available formats for %s:\n' - u'format code extension note resolution\n%s' % ( - info_dict['id'], formats_s)) + format_note(format), + )) + + formats = info_dict.get('formats', [info_dict]) + idlen = max(len(u'format code'), + max(len(f['format_id']) for f in formats)) + formats_s = [line(f, idlen) for f in formats] + if len(formats) > 1: + formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)' + formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)' + + header_line = line({ + 'format_id': u'format code', 'ext': u'extension', + '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen) + self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % + (info_dict['id'], header_line, u"\n".join(formats_s))) + + def urlopen(self, req): + """ Start an HTTP download """ + return self._opener.open(req) + + def print_debug_header(self): + if not self.params.get('verbose'): + return + write_string(u'[debug] youtube-dl version ' + __version__ + u'\n') + try: + sp = subprocess.Popen( + ['git', 'rev-parse', '--short', 'HEAD'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + cwd=os.path.dirname(os.path.abspath(__file__))) + out, err = sp.communicate() + out = out.decode().strip() + if re.match('[0-9a-f]+', out): + write_string(u'[debug] Git HEAD: ' + out + u'\n') + except: + try: + sys.exc_clear() + except: + pass + write_string(u'[debug] Python version %s - %s' % + (platform.python_version(), platform_name()) + u'\n') + + proxy_map = {} + for handler in self._opener.handlers: + if hasattr(handler, 'proxies'): + proxy_map.update(handler.proxies) + write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n') + + def _setup_opener(self, timeout=300): + opts_cookiefile = self.params.get('cookiefile') + opts_proxy = self.params.get('proxy') + + if opts_cookiefile is None: + self.cookiejar = compat_cookiejar.CookieJar() + else: + self.cookiejar = compat_cookiejar.MozillaCookieJar( + opts_cookiefile) + if os.access(opts_cookiefile, os.R_OK): + self.cookiejar.load() + + cookie_processor = compat_urllib_request.HTTPCookieProcessor( + self.cookiejar) + if opts_proxy is not None: + if opts_proxy == '': + proxies = {} + else: + proxies = {'http': opts_proxy, 'https': opts_proxy} + else: + proxies = compat_urllib_request.getproxies() + # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) + if 'http' in proxies and 'https' not in proxies: + proxies['https'] = proxies['http'] + proxy_handler = compat_urllib_request.ProxyHandler(proxies) + https_handler = make_HTTPS_handler( + self.params.get('nocheckcertificate', False)) + opener = compat_urllib_request.build_opener( + https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) + # Delete the default user-agent header, which would otherwise apply in + # cases where our custom HTTP handler doesn't come into play + # (See https://github.com/rg3/youtube-dl/issues/1309 for details) + opener.addheaders = [] + self._opener = opener + + # TODO remove this global modification + compat_urllib_request.install_opener(opener) + socket.setdefaulttimeout(timeout)