X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=09d2b18f262690023598dcc0aacf0628729a9c79;hb=24114fee747c5cec4aae41f93581561fcb902e10;hp=6afc1b73070cd410992733a59c0cd8bc9b9bff0e;hpb=36a0e46c39ea4f211dea9944177976e8f8364736;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6afc1b730..09d2b18f2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -46,6 +46,7 @@ from .utils import ( DateRange, DEFAULT_OUTTMPL, determine_ext, + determine_protocol, DownloadError, encode_compat_str, encodeFilename, @@ -241,7 +242,7 @@ class YoutubeDL(object): - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) source_address: (Experimental) Client-side IP address to bind to. - call_home: Boolean, true if we are allowed to contact the + call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download. listformats: Print an overview of available video formats and exit. @@ -590,7 +591,7 @@ class YoutubeDL(object): return None def _match_entry(self, info_dict, incomplete): - """ Returns None if the file should be downloaded """ + """ Returns None iff the file should be downloaded """ video_title = info_dict.get('title', info_dict.get('id', 'video')) if 'title' in info_dict: @@ -898,6 +899,9 @@ class YoutubeDL(object): STR_OPERATORS = { '=': operator.eq, '!=': operator.ne, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), + '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) \s*(?Pext|acodec|vcodec|container|protocol) @@ -1244,6 +1248,12 @@ class YoutubeDL(object): except (ValueError, OverflowError, OSError): pass + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + subtitles = info_dict.get('subtitles') if subtitles: for _, subtitle in subtitles.items(): @@ -1300,6 +1310,10 @@ class YoutubeDL(object): # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']).lower() + # Automatically determine protocol if missing (useful for format + # selection purposes) + if 'protocol' not in format: + format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() @@ -1986,8 +2000,19 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) data_handler = compat_urllib_request_DataHandler() + + # When passing our own FileHandler instance, build_opener won't add the + # default FileHandler and allows us to disable the file protocol, which + # can be used for malicious purposes (see + # https://github.com/rg3/youtube-dl/issues/8227) + file_handler = compat_urllib_request.FileHandler() + + def file_open(*args, **kwargs): + raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') + file_handler.file_open = file_open + opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler) + proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play