X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=422f3ffca3ac583366fd6546ca950b967d1a5954;hb=536a55dabd7bcc2f34195beb84211028c934ed7a;hp=17a5407b983fbadc00453d081ccd567f10f3dade;hpb=67134eaba1a56cec4117000acb2fc9284c9cdd9a;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 17a5407b9..cd0805303 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -24,9 +24,6 @@ import time import tokenize import traceback -if os.name == 'nt': - import ctypes - from .compat import ( compat_basestring, compat_cookiejar, @@ -34,37 +31,46 @@ from .compat import ( compat_get_terminal_size, compat_http_client, compat_kwargs, + compat_os_name, compat_str, compat_tokenize_tokenize, compat_urllib_error, compat_urllib_request, + compat_urllib_request_DataHandler, ) from .utils import ( - escape_url, + age_restricted, + args_to_str, ContentTooShortError, date_from_str, DateRange, DEFAULT_OUTTMPL, determine_ext, + determine_protocol, DownloadError, + encode_compat_str, encodeFilename, + error_to_compat_str, ExtractorError, format_bytes, formatSeconds, - HEADRequest, locked_file, make_HTTPS_handler, MaxDownloadsReached, PagedList, parse_filesize, PerRequestProxyHandler, - PostProcessingError, platform_name, + PostProcessingError, preferredencoding, + prepend_extension, render_table, + replace_extension, SameFileError, sanitize_filename, sanitize_path, + sanitize_url, + sanitized_Request, std_headers, subtitles_filename, UnavailableVideoError, @@ -72,17 +78,15 @@ from .utils import ( version_tuple, write_json_file, write_string, + YoutubeDLCookieProcessor, YoutubeDLHandler, - prepend_extension, - replace_extension, - args_to_str, - age_restricted, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( + FFmpegFixupM3u8PP, FFmpegFixupM4aPP, FFmpegFixupStretchedPP, FFmpegMergerPP, @@ -91,6 +95,9 @@ from .postprocessor import ( ) from .version import __version__ +if compat_os_name == 'nt': + import ctypes + class YoutubeDL(object): """YoutubeDL class. @@ -157,7 +164,7 @@ class YoutubeDL(object): writethumbnail: Write the thumbnail image to a file write_all_thumbnails: Write all thumbnail formats to files writesubtitles: Write the video subtitles to a file - writeautomaticsub: Write the automatic subtitles to a file + writeautomaticsub: Write the automatically generated subtitles to a file allsubtitles: Downloads all the subtitles of the video (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video @@ -259,11 +266,13 @@ class YoutubeDL(object): the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize, external_downloader_args. + xattr_set_filesize, external_downloader_args, hls_use_mpegts. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, otherwise prefer avconv. + postprocessor_args: A list of additional command-line arguments for the + postprocessor. """ params = None @@ -285,7 +294,11 @@ class YoutubeDL(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr - self.params = params + self.params = { + # Default parameters + 'nocheckcertificate': False, + } + self.params.update(params) self.cache = Cache(self) if params.get('bidi_workaround', False): @@ -440,7 +453,7 @@ class YoutubeDL(object): def to_console_title(self, message): if not self.params.get('consoletitle', False): return - if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): + if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) @@ -488,7 +501,7 @@ class YoutubeDL(object): tb = '' if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) - tb += compat_str(traceback.format_exc()) + tb += encode_compat_str(traceback.format_exc()) else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) @@ -511,7 +524,7 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' @@ -523,7 +536,7 @@ class YoutubeDL(object): Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' @@ -556,7 +569,7 @@ class YoutubeDL(object): elif template_dict.get('height'): template_dict['resolution'] = '%sp' % template_dict['height'] elif template_dict.get('width'): - template_dict['resolution'] = '?x%d' % template_dict['width'] + template_dict['resolution'] = '%dx?' % template_dict['width'] sanitize = lambda k, v: sanitize_filename( compat_str(v), @@ -567,7 +580,7 @@ class YoutubeDL(object): if v is not None) template_dict = collections.defaultdict(lambda: 'NA', template_dict) - outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL)) + outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 @@ -575,7 +588,7 @@ class YoutubeDL(object): # to workaround encoding issues with subprocess on python2 @ Windows if sys.version_info < (3, 0) and sys.platform == 'win32': filename = encodeFilename(filename, True).decode(preferredencoding()) - return filename + return sanitize_path(filename) except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None @@ -595,12 +608,12 @@ class YoutubeDL(object): if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' - date = info_dict.get('upload_date', None) + date = info_dict.get('upload_date') if date is not None: dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) - view_count = info_dict.get('view_count', None) + view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') if min_views is not None and view_count < min_views: @@ -667,14 +680,14 @@ class YoutubeDL(object): return self.process_ie_result(ie_result, download, extra_info) else: return ie_result - except ExtractorError as de: # An error we somewhat expected - self.report_error(compat_str(de), de.format_traceback()) + except ExtractorError as e: # An error we somewhat expected + self.report_error(compat_str(e), e.format_traceback()) break except MaxDownloadsReached: raise except Exception as e: if self.params.get('ignoreerrors', False): - self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) + self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) break else: raise @@ -697,7 +710,6 @@ class YoutubeDL(object): It will also download the videos if 'download'. Returns the resolved ie_result. """ - result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): @@ -726,7 +738,7 @@ class YoutubeDL(object): force_properties = dict( (k, v) for k, v in ie_result.items() if v is not None) - for f in ('_type', 'url'): + for f in ('_type', 'url', 'ie_key'): if f in force_properties: del force_properties[f] new_result = info.copy() @@ -738,18 +750,18 @@ class YoutubeDL(object): new_result, download=download, extra_info=extra_info) elif result_type == 'playlist' or result_type == 'multi_video': # We process each entry in the playlist - playlist = ie_result.get('title', None) or ie_result.get('id', None) + playlist = ie_result.get('title') or ie_result.get('id') self.to_screen('[download] Downloading playlist: %s' % playlist) playlist_results = [] playliststart = self.params.get('playliststart', 1) - 1 - playlistend = self.params.get('playlistend', None) + playlistend = self.params.get('playlistend') # For backwards compatibility, interpret -1 as whole list if playlistend == -1: playlistend = None - playlistitems_str = self.params.get('playlist_items', None) + playlistitems_str = self.params.get('playlist_items') playlistitems = None if playlistitems_str is not None: def iter_playlistitems(format): @@ -773,7 +785,7 @@ class YoutubeDL(object): entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( - "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % + '[%s] playlist %s: Collected %d video ids (downloading %d of them)' % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, PagedList): if playlistitems: @@ -787,7 +799,7 @@ class YoutubeDL(object): playliststart, playlistend) n_entries = len(entries) self.to_screen( - "[%s] playlist %s: Downloading %d videos" % + '[%s] playlist %s: Downloading %d videos' % (ie_result['extractor'], playlist, n_entries)) else: # iterable if playlistitems: @@ -798,7 +810,7 @@ class YoutubeDL(object): ie_entries, playliststart, playlistend)) n_entries = len(entries) self.to_screen( - "[%s] playlist %s: Downloading %d videos" % + '[%s] playlist %s: Downloading %d videos' % (ie_result['extractor'], playlist, n_entries)) if self.params.get('playlistreverse', False): @@ -828,6 +840,7 @@ class YoutubeDL(object): extra_info=extra) playlist_results.append(entry_result) ie_result['entries'] = playlist_results + self.to_screen('[download] Finished downloading playlist: %s' % playlist) return ie_result elif result_type == 'compat_list': self.report_warning( @@ -888,11 +901,14 @@ class YoutubeDL(object): STR_OPERATORS = { '=': operator.eq, '!=': operator.ne, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), + '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) - \s*(?Pext|acodec|vcodec|container|protocol) + \s*(?Pext|acodec|vcodec|container|protocol|format_id) \s*(?P%s)(?P\s*\?)? - \s*(?P[a-zA-Z0-9_-]+) + \s*(?P[a-zA-Z0-9._-]+) \s*$ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.search(filter_spec) @@ -920,6 +936,7 @@ class YoutubeDL(object): PICKFIRST = 'PICKFIRST' MERGE = 'MERGE' SINGLE = 'SINGLE' + GROUP = 'GROUP' FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) def _parse_filter(tokens): @@ -930,7 +947,38 @@ class YoutubeDL(object): else: filter_parts.append(string) - def _parse_format_selection(tokens, endwith=[]): + def _remove_unused_ops(tokens): + # Remove operators that we don't use and join them with the surrounding strings + # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' + ALLOWED_OPS = ('/', '+', ',', '(', ')') + last_string, last_start, last_end, last_line = None, None, None, None + for type, string, start, end, line in tokens: + if type == tokenize.OP and string == '[': + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + # everything inside brackets will be handled by _parse_filter + for type, string, start, end, line in tokens: + yield type, string, start, end, line + if type == tokenize.OP and string == ']': + break + elif type == tokenize.OP and string in ALLOWED_OPS: + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + if not last_string: + last_string = string + last_start = start + last_end = end + else: + last_string += string + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + + def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None for type, string, start, _, _ in tokens: @@ -940,26 +988,44 @@ class YoutubeDL(object): elif type in [tokenize.NAME, tokenize.NUMBER]: current_selector = FormatSelector(SINGLE, string, []) elif type == tokenize.OP: - if string in endwith: + if string == ')': + if not inside_group: + # ')' will be handled by the parentheses group + tokens.restore_last_token() break - if string == ',': + elif inside_merge and string in ['/', ',']: + tokens.restore_last_token() + break + elif inside_choice and string == ',': + tokens.restore_last_token() + break + elif string == ',': + if not current_selector: + raise syntax_error('"," must follow a format selector', start) selectors.append(current_selector) current_selector = None elif string == '/': + if not current_selector: + raise syntax_error('"/" must follow a format selector', start) first_choice = current_selector - second_choice = _parse_format_selection(tokens, [',']) - current_selector = None - selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), [])) + second_choice = _parse_format_selection(tokens, inside_choice=True) + current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) elif string == '[': if not current_selector: current_selector = FormatSelector(SINGLE, 'best', []) format_filter = _parse_filter(tokens) current_selector.filters.append(format_filter) + elif string == '(': + if current_selector: + raise syntax_error('Unexpected "("', start) + group = _parse_format_selection(tokens, inside_group=True) + current_selector = FormatSelector(GROUP, group, []) elif string == '+': video_selector = current_selector - audio_selector = _parse_format_selection(tokens, [',']) - current_selector = None - selectors.append(FormatSelector(MERGE, (video_selector, audio_selector), [])) + audio_selector = _parse_format_selection(tokens, inside_merge=True) + if not video_selector or not audio_selector: + raise syntax_error('"+" must be between two format selectors', start) + current_selector = FormatSelector(MERGE, (video_selector, audio_selector), []) else: raise syntax_error('Operator not recognized: "{0}"'.format(string), start) elif type == tokenize.ENDMARKER: @@ -977,6 +1043,8 @@ class YoutubeDL(object): for format in f(formats): yield format return selector_function + elif selector.type == GROUP: + selector_function = _build_selector_function(selector.selector) elif selector.type == PICKFIRST: fs = [_build_selector_function(s) for s in selector.selector] @@ -990,7 +1058,13 @@ class YoutubeDL(object): format_spec = selector.selector def selector_function(formats): - if format_spec in ['best', 'worst', None]: + formats = list(formats) + if not formats: + return + if format_spec == 'all': + for f in formats: + yield f + elif format_spec in ['best', 'worst', None]: format_idx = 0 if format_spec == 'worst' else -1 audiovideo_formats = [ f for f in formats @@ -1044,6 +1118,12 @@ class YoutubeDL(object): 'contain the video, try using ' '"-f %s+%s"' % (format_2, format_1)) return + # Formats must be opposite (video+audio) + if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none': + self.report_error( + 'Both formats %s and %s are video-only, you must specify "-f video+audio"' + % (format_1, format_2)) + return output_ext = ( formats_info[0]['ext'] if self.params.get('merge_output_format') is None @@ -1081,8 +1161,32 @@ class YoutubeDL(object): return final_selector stream = io.BytesIO(format_spec.encode('utf-8')) - tokens = compat_tokenize_tokenize(stream.readline) - parsed_selector = _parse_format_selection(tokens) + try: + tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) + except tokenize.TokenError: + raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) + + class TokenIterator(object): + def __init__(self, tokens): + self.tokens = tokens + self.counter = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.counter >= len(self.tokens): + raise StopIteration() + value = self.tokens[self.counter] + self.counter += 1 + return value + + next = __next__ + + def restore_last_token(self): + self.counter -= 1 + + parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) return _build_selector_function(parsed_selector) def _calc_headers(self, info_dict): @@ -1099,7 +1203,7 @@ class YoutubeDL(object): return res def _calc_cookies(self, info_dict): - pr = compat_urllib_request.Request(info_dict['url']) + pr = sanitized_Request(info_dict['url']) self.cookiejar.add_cookie_header(pr) return pr.get_header('Cookie') @@ -1126,12 +1230,20 @@ class YoutubeDL(object): t.get('preference'), t.get('width'), t.get('height'), t.get('id'), t.get('url'))) for i, t in enumerate(thumbnails): - if 'width' in t and 'height' in t: + t['url'] = sanitize_url(t['url']) + if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('id') is None: t['id'] = '%d' % i - if thumbnails and 'thumbnail' not in info_dict: + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + return + + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnail'] = sanitize_url(thumbnail) + elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] if 'display_id' not in info_dict and 'id' in info_dict: @@ -1146,13 +1258,28 @@ class YoutubeDL(object): except (ValueError, OverflowError, OSError): pass + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + + subtitles = info_dict.get('subtitles') + if subtitles: + for _, subtitle in subtitles.items(): + for subtitle_format in subtitle: + if subtitle_format.get('url'): + subtitle_format['url'] = sanitize_url(subtitle_format['url']) + if 'ext' not in subtitle_format: + subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() + if self.params.get('listsubtitles', False): if 'automatic_captions' in info_dict: self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') - self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') + self.list_subtitles(info_dict['id'], subtitles, 'subtitles') return info_dict['requested_subtitles'] = self.process_subtitles( - info_dict['id'], info_dict.get('subtitles'), + info_dict['id'], subtitles, info_dict.get('automatic_captions')) # We now pick which formats have to be downloaded @@ -1172,8 +1299,13 @@ class YoutubeDL(object): if 'url' not in format: raise ExtractorError('Missing "url" key in result (index %d)' % i) + format['url'] = sanitize_url(format['url']) + if format.get('format_id') is None: format['format_id'] = compat_str(i) + else: + # Sanitize format_id from characters used in format selector expression + format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id']) format_id = format['format_id'] if format_id not in formats_dict: formats_dict[format_id] = [] @@ -1195,6 +1327,10 @@ class YoutubeDL(object): # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']).lower() + # Automatically determine protocol if missing (useful for format + # selection purposes) + if 'protocol' not in format: + format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() @@ -1207,31 +1343,24 @@ class YoutubeDL(object): # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, - # wich can't be exported to json + # which can't be exported to json info_dict['formats'] = formats if self.params.get('listformats'): self.list_formats(info_dict) return - if self.params.get('list_thumbnails'): - self.list_thumbnails(info_dict) - return req_format = self.params.get('format') if req_format is None: req_format_list = [] if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and - info_dict['extractor'] in ['youtube', 'ted']): + not info_dict.get('is_live')): merger = FFmpegMergerPP(self) if merger.available and merger.can_merge(): req_format_list.append('bestvideo+bestaudio') req_format_list.append('best') req_format = '/'.join(req_format_list) - formats_to_download = [] - if req_format == 'all': - formats_to_download = formats - else: - format_selector = self.build_format_selector(req_format) - formats_to_download = list(format_selector(formats)) + format_selector = self.build_format_selector(req_format) + formats_to_download = list(format_selector(formats)) if not formats_to_download: raise ExtractorError('requested format not available', expected=True) @@ -1359,7 +1488,7 @@ class YoutubeDL(object): if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: - self.report_error('unable to create directory ' + compat_str(err)) + self.report_error('unable to create directory ' + error_to_compat_str(err)) return if self.params.get('writedescription', False): @@ -1410,7 +1539,7 @@ class YoutubeDL(object): sub_info['url'], info_dict['id'], note=False) except ExtractorError as err: self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, compat_str(err.cause))) + (sub_lang, error_to_compat_str(err.cause))) continue try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) @@ -1514,12 +1643,14 @@ class YoutubeDL(object): self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - if success: + if success and filename != '-': # Fixup content fixup_policy = self.params.get('fixup') if fixup_policy is None: fixup_policy = 'detect_or_warn' + INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.' + stretched_ratio = info_dict.get('stretched_ratio') if stretched_ratio is not None and stretched_ratio != 1: if fixup_policy == 'warn': @@ -1532,15 +1663,18 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(stretched_pp) else: self.report_warning( - '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( - info_dict['id'], stretched_ratio)) + '%s: Non-uniform pixel ratio (%s). %s' + % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') - if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': + if (info_dict.get('requested_formats') is None and + info_dict.get('container') == 'm4a_dash'): if fixup_policy == 'warn': - self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( - info_dict['id'])) + self.report_warning( + '%s: writing DASH m4a. ' + 'Only some players support this container.' + % info_dict['id']) elif fixup_policy == 'detect_or_warn': fixup_pp = FFmpegFixupM4aPP(self) if fixup_pp.available: @@ -1548,8 +1682,27 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(fixup_pp) else: self.report_warning( - '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( - info_dict['id'])) + '%s: writing DASH m4a. ' + 'Only some players support this container. %s' + % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) + else: + assert fixup_policy in ('ignore', 'never') + + if (info_dict.get('protocol') == 'm3u8_native' or + info_dict.get('protocol') == 'm3u8' and + self.params.get('hls_prefer_native')): + if fixup_policy == 'warn': + self.report_warning('%s: malformated aac bitstream.' % ( + info_dict['id'])) + elif fixup_policy == 'detect_or_warn': + fixup_pp = FFmpegFixupM3u8PP(self) + if fixup_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(fixup_pp) + else: + self.report_warning( + '%s: malformated aac bitstream. %s' + % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') @@ -1680,7 +1833,7 @@ class YoutubeDL(object): else: res = '%sp' % format['height'] elif format.get('width') is not None: - res = '?x%d' % format['width'] + res = '%dx?' % format['width'] else: res = default return res @@ -1689,6 +1842,10 @@ class YoutubeDL(object): res = '' if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported) ' + if fdict.get('language'): + if res: + res += ' ' + res += '[%s] ' % fdict['language'] if fdict.get('format_note') is not None: res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: @@ -1709,7 +1866,9 @@ class YoutubeDL(object): if fdict.get('vbr') is not None: res += '%4dk' % fdict['vbr'] if fdict.get('fps') is not None: - res += ', %sfps' % fdict['fps'] + if res: + res += ', ' + res += '%sfps' % fdict['fps'] if fdict.get('acodec') is not None: if res: res += ', ' @@ -1752,13 +1911,8 @@ class YoutubeDL(object): def list_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if not thumbnails: - tn_url = info_dict.get('thumbnail') - if tn_url: - thumbnails = [{'id': '0', 'url': tn_url}] - else: - self.to_screen( - '[info] No thumbnails present for %s' % info_dict['id']) - return + self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) + return self.to_screen( '[info] Thumbnails for %s:' % info_dict['id']) @@ -1779,27 +1933,8 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ - - # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not - # always respected by websites, some tend to give out URLs with non percent-encoded - # non-ASCII characters (see telemb.py, ard.py [#3412]) - # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) - # To work around aforementioned issue we will replace request's original URL with - # percent-encoded one - req_is_string = isinstance(req, compat_basestring) - url = req if req_is_string else req.get_full_url() - url_escaped = escape_url(url) - - # Substitute URL if any change after escaping - if url != url_escaped: - if req_is_string: - req = url_escaped - else: - req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request - req = req_type( - url_escaped, data=req.data, headers=req.headers, - origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) - + if isinstance(req, compat_basestring): + req = sanitized_Request(req) return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): @@ -1882,8 +2017,7 @@ class YoutubeDL(object): if os.access(opts_cookiefile, os.R_OK): self.cookiejar.load() - cookie_processor = compat_urllib_request.HTTPCookieProcessor( - self.cookiejar) + cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) if opts_proxy is not None: if opts_proxy == '': proxies = {} @@ -1899,8 +2033,20 @@ class YoutubeDL(object): debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) + data_handler = compat_urllib_request_DataHandler() + + # When passing our own FileHandler instance, build_opener won't add the + # default FileHandler and allows us to disable the file protocol, which + # can be used for malicious purposes (see + # https://github.com/rg3/youtube-dl/issues/8227) + file_handler = compat_urllib_request.FileHandler() + + def file_open(*args, **kwargs): + raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') + file_handler.file_open = file_open + opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh) + proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play @@ -1952,10 +2098,10 @@ class YoutubeDL(object): (info_dict['extractor'], info_dict['id'], thumb_display_id)) try: uf = self.urlopen(t['url']) - with open(thumb_filename, 'wb') as thumbf: + with open(encodeFilename(thumb_filename), 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download thumbnail "%s": %s' % - (t['url'], compat_str(err))) + (t['url'], error_to_compat_str(err)))