X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=09d2b18f262690023598dcc0aacf0628729a9c79;hb=ab180fc648d331643aaf340c3cf7e92bcbb10bce;hp=57df6a2795971929166f191b705bbfdc55e85835;hpb=8900ab4d9bc6bf8a05a1a2608efaff08561945e8;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 57df6a279..09d2b18f2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -46,9 +46,11 @@ from .utils import ( DateRange, DEFAULT_OUTTMPL, determine_ext, + determine_protocol, DownloadError, + encode_compat_str, encodeFilename, - error_to_str, + error_to_compat_str, ExtractorError, format_bytes, formatSeconds, @@ -496,7 +498,7 @@ class YoutubeDL(object): tb = '' if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) - tb += compat_str(traceback.format_exc()) + tb += encode_compat_str(traceback.format_exc()) else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) @@ -676,13 +678,13 @@ class YoutubeDL(object): else: return ie_result except ExtractorError as e: # An error we somewhat expected - self.report_error(error_to_str(e), e.format_traceback()) + self.report_error(compat_str(e), e.format_traceback()) break except MaxDownloadsReached: raise except Exception as e: if self.params.get('ignoreerrors', False): - self.report_error(error_to_str(e), tb=compat_str(traceback.format_exc())) + self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) break else: raise @@ -897,6 +899,9 @@ class YoutubeDL(object): STR_OPERATORS = { '=': operator.eq, '!=': operator.ne, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), + '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) \s*(?Pext|acodec|vcodec|container|protocol) @@ -1243,6 +1248,12 @@ class YoutubeDL(object): except (ValueError, OverflowError, OSError): pass + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + subtitles = info_dict.get('subtitles') if subtitles: for _, subtitle in subtitles.items(): @@ -1299,6 +1310,10 @@ class YoutubeDL(object): # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']).lower() + # Automatically determine protocol if missing (useful for format + # selection purposes) + if 'protocol' not in format: + format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() @@ -1311,7 +1326,7 @@ class YoutubeDL(object): # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, - # wich can't be exported to json + # which can't be exported to json info_dict['formats'] = formats if self.params.get('listformats'): self.list_formats(info_dict) @@ -1460,7 +1475,7 @@ class YoutubeDL(object): if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: - self.report_error('unable to create directory ' + error_to_str(err)) + self.report_error('unable to create directory ' + error_to_compat_str(err)) return if self.params.get('writedescription', False): @@ -1511,7 +1526,7 @@ class YoutubeDL(object): sub_info['url'], info_dict['id'], note=False) except ExtractorError as err: self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, error_to_str(err.cause))) + (sub_lang, error_to_compat_str(err.cause))) continue try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) @@ -1790,6 +1805,10 @@ class YoutubeDL(object): res = '' if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported) ' + if fdict.get('language'): + if res: + res += ' ' + res += '[%s]' % fdict['language'] if fdict.get('format_note') is not None: res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: @@ -1981,8 +2000,19 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) data_handler = compat_urllib_request_DataHandler() + + # When passing our own FileHandler instance, build_opener won't add the + # default FileHandler and allows us to disable the file protocol, which + # can be used for malicious purposes (see + # https://github.com/rg3/youtube-dl/issues/8227) + file_handler = compat_urllib_request.FileHandler() + + def file_open(*args, **kwargs): + raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') + file_handler.file_open = file_open + opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler) + proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play @@ -2040,4 +2070,4 @@ class YoutubeDL(object): (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download thumbnail "%s": %s' % - (t['url'], error_to_str(err))) + (t['url'], error_to_compat_str(err)))