X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=09d2b18f262690023598dcc0aacf0628729a9c79;hb=ed7cd1e859cf97e975a28a5e8c58a1d1aca819fe;hp=50425b8d7ea4855ddf1c67ce17df715b19cc0fc2;hpb=b0eeaf4f40b12d9a6a3b45918b0ec531db4d310c;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 50425b8d7..09d2b18f2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -46,6 +46,7 @@ from .utils import ( DateRange, DEFAULT_OUTTMPL, determine_ext, + determine_protocol, DownloadError, encode_compat_str, encodeFilename, @@ -898,6 +899,9 @@ class YoutubeDL(object): STR_OPERATORS = { '=': operator.eq, '!=': operator.ne, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), + '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) \s*(?Pext|acodec|vcodec|container|protocol) @@ -1244,6 +1248,12 @@ class YoutubeDL(object): except (ValueError, OverflowError, OSError): pass + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + subtitles = info_dict.get('subtitles') if subtitles: for _, subtitle in subtitles.items(): @@ -1300,6 +1310,10 @@ class YoutubeDL(object): # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']).lower() + # Automatically determine protocol if missing (useful for format + # selection purposes) + if 'protocol' not in format: + format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() @@ -1312,7 +1326,7 @@ class YoutubeDL(object): # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, - # wich can't be exported to json + # which can't be exported to json info_dict['formats'] = formats if self.params.get('listformats'): self.list_formats(info_dict) @@ -1791,6 +1805,10 @@ class YoutubeDL(object): res = '' if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported) ' + if fdict.get('language'): + if res: + res += ' ' + res += '[%s]' % fdict['language'] if fdict.get('format_note') is not None: res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: @@ -1982,8 +2000,19 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) data_handler = compat_urllib_request_DataHandler() + + # When passing our own FileHandler instance, build_opener won't add the + # default FileHandler and allows us to disable the file protocol, which + # can be used for malicious purposes (see + # https://github.com/rg3/youtube-dl/issues/8227) + file_handler = compat_urllib_request.FileHandler() + + def file_open(*args, **kwargs): + raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') + file_handler.file_open = file_open + opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler) + proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play