X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=702a6ad50b6c6bf2d3f3bfbd8c873cb3a64c8e7b;hb=c3124c3085e6a9a83ee31ace3a7d528a324c42da;hp=584dbf8a6e1c8b686c223a46dce856801a2f6118;hpb=df8301fef55f9144f06337c10b8570b6560caa24;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 584dbf8a6..702a6ad50 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -49,6 +49,7 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, + HEADRequest, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -71,6 +72,7 @@ from .utils import ( write_string, YoutubeDLHandler, prepend_extension, + replace_extension, args_to_str, age_restricted, ) @@ -117,7 +119,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. - videopassword: Password for acces a video. + videopassword: Password for accessing a video. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. @@ -137,6 +139,7 @@ class YoutubeDL(object): outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names ignoreerrors: Do not stop on download errors. + force_generic_extractor: Force downloader to use the generic extractor nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. @@ -259,7 +262,8 @@ class YoutubeDL(object): The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, otherwise prefer avconv. - exec_cmd: Arbitrary command to run after downloading + postprocessor_args: A list of additional command-line arguments for the + postprocessor. """ params = None @@ -625,13 +629,16 @@ class YoutubeDL(object): info_dict.setdefault(key, value) def extract_info(self, url, download=True, ie_key=None, extra_info={}, - process=True): + process=True, force_generic_extractor=False): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. extra_info is a dict containing the extra values to add to each result ''' + if not ie_key and force_generic_extractor: + ie_key = 'Generic' + if ie_key: ies = [self.get_info_extractor(ie_key)] else: @@ -759,7 +766,9 @@ class YoutubeDL(object): if isinstance(ie_entries, list): n_all_entries = len(ie_entries) if playlistitems: - entries = [ie_entries[i - 1] for i in playlistitems] + entries = [ + ie_entries[i - 1] for i in playlistitems + if -n_all_entries <= i - 1 < n_all_entries] else: entries = ie_entries[playliststart:playlistend] n_entries = len(entries) @@ -921,8 +930,9 @@ class YoutubeDL(object): if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] if audiovideo_formats: return audiovideo_formats[format_idx] - # for audio only urls, select the best/worst audio format - elif all(f.get('acodec') != 'none' for f in available_formats): + # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format + elif (all(f.get('acodec') != 'none' for f in available_formats) or + all(f.get('vcodec') != 'none' for f in available_formats)): return available_formats[format_idx] elif format_spec == 'bestaudio': audio_formats = [ @@ -1000,7 +1010,7 @@ class YoutubeDL(object): t.get('preference'), t.get('width'), t.get('height'), t.get('id'), t.get('url'))) for i, t in enumerate(thumbnails): - if 'width' in t and 'height' in t: + if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('id') is None: t['id'] = '%d' % i @@ -1012,13 +1022,13 @@ class YoutubeDL(object): info_dict['display_id'] = info_dict['id'] if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: - # Working around negative timestamps in Windows - # (see http://bugs.python.org/issue1646728) - if info_dict['timestamp'] < 0 and os.name == 'nt': - info_dict['timestamp'] = 0 - upload_date = datetime.datetime.utcfromtimestamp( - info_dict['timestamp']) - info_dict['upload_date'] = upload_date.strftime('%Y%m%d') + # Working around out-of-range timestamp values (e.g. negative ones on Windows, + # see http://bugs.python.org/issue1646728) + try: + upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp']) + info_dict['upload_date'] = upload_date.strftime('%Y%m%d') + except (ValueError, OverflowError, OSError): + pass if self.params.get('listsubtitles', False): if 'automatic_captions' in info_dict: @@ -1029,12 +1039,6 @@ class YoutubeDL(object): info_dict['id'], info_dict.get('subtitles'), info_dict.get('automatic_captions')) - # This extractors handle format selection themselves - if info_dict['extractor'] in ['Youku']: - if download: - self.process_info(info_dict) - return info_dict - # We now pick which formats have to be downloaded if info_dict.get('formats') is None: # There's only one format available @@ -1045,6 +1049,8 @@ class YoutubeDL(object): if not formats: raise ExtractorError('No video formats found!') + formats_dict = {} + # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): if 'url' not in format: @@ -1052,6 +1058,18 @@ class YoutubeDL(object): if format.get('format_id') is None: format['format_id'] = compat_str(i) + format_id = format['format_id'] + if format_id not in formats_dict: + formats_dict[format_id] = [] + formats_dict[format_id].append(format) + + # Make sure all formats have unique format_id + for format_id, ambiguous_formats in formats_dict.items(): + if len(ambiguous_formats) > 1: + for i, format in enumerate(ambiguous_formats): + format['format_id'] = '%s-%d' % (format_id, i) + + for i, format in enumerate(formats): if format.get('format') is None: format['format'] = '{id} - {res}{note}'.format( id=format['format_id'], @@ -1085,8 +1103,12 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: req_format_list = [] - if info_dict['extractor'] in ['youtube', 'ted'] and FFmpegMergerPP(self).available: - req_format_list.append('bestvideo+bestaudio') + if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and + info_dict['extractor'] in ['youtube', 'ted'] and + not info_dict.get('is_live')): + merger = FFmpegMergerPP(self) + if merger.available and merger.can_merge(): + req_format_list.append('bestvideo+bestaudio') req_format_list.append('best') req_format = '/'.join(req_format_list) formats_to_download = [] @@ -1270,7 +1292,7 @@ class YoutubeDL(object): return if self.params.get('writedescription', False): - descfn = filename + '.description' + descfn = replace_extension(filename, 'description', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): self.to_screen('[info] Video description is already present') elif info_dict.get('description') is None: @@ -1285,7 +1307,7 @@ class YoutubeDL(object): return if self.params.get('writeannotations', False): - annofn = filename + '.annotations.xml' + annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') else: @@ -1332,16 +1354,13 @@ class YoutubeDL(object): return if self.params.get('writeinfojson', False): - infofn = os.path.splitext(filename)[0] + '.info.json' + infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)): self.to_screen('[info] Video description metadata is already present') else: self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) - filtered_info_dict = dict( - (k, v) for k, v in info_dict.items() - if k not in ['requested_formats', 'requested_subtitles']) try: - write_json_file(filtered_info_dict, infofn) + write_json_file(self.filter_requested_info(info_dict), infofn) except (OSError, IOError): self.report_error('Cannot write metadata to JSON file ' + infofn) return @@ -1366,7 +1385,7 @@ class YoutubeDL(object): postprocessors = [] self.report_warning('You have requested multiple ' 'formats but ffmpeg or avconv are not installed.' - ' The formats won\'t be merged') + ' The formats won\'t be merged.') else: postprocessors = [merger] @@ -1385,11 +1404,18 @@ class YoutubeDL(object): # TODO: Check acodec/vcodec return False + filename_real_ext = os.path.splitext(filename)[1][1:] + filename_wo_ext = ( + os.path.splitext(filename)[0] + if filename_real_ext == info_dict['ext'] + else filename) requested_formats = info_dict['requested_formats'] if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): - filename = os.path.splitext(filename)[0] + '.mkv' - self.report_warning('You have requested formats incompatible for merge. ' - 'The formats will be merged into mkv') + info_dict['ext'] = 'mkv' + self.report_warning( + 'Requested formats are incompatible for merge and will be merged into mkv.') + # Ensure filename always has a correct extension for successful merge + filename = '%s.%s' % (filename_wo_ext, info_dict['ext']) if os.path.exists(encodeFilename(filename)): self.to_screen( '[download] %s has already been downloaded and ' @@ -1399,7 +1425,7 @@ class YoutubeDL(object): new_info = dict(info_dict) new_info.update(f) fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id']) + fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) downloaded.append(fname) partial_success = dl(fname, new_info) success = success and partial_success @@ -1474,7 +1500,8 @@ class YoutubeDL(object): for url in url_list: try: # It also downloads the videos - res = self.extract_info(url) + res = self.extract_info( + url, force_generic_extractor=self.params.get('force_generic_extractor', False)) except UnavailableVideoError: self.report_error('unable to download video') except MaxDownloadsReached: @@ -1491,7 +1518,7 @@ class YoutubeDL(object): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load - info = json.loads('\n'.join(f)) + info = self.filter_requested_info(json.loads('\n'.join(f))) try: self.process_ie_result(info, download=True) except DownloadError: @@ -1503,6 +1530,12 @@ class YoutubeDL(object): raise return self._download_retcode + @staticmethod + def filter_requested_info(info_dict): + return dict( + (k, v) for k, v in info_dict.items() + if k not in ['requested_formats', 'requested_subtitles']) + def post_process(self, filename, ie_info): """Run all the postprocessors on the given file.""" info = dict(ie_info) @@ -1512,6 +1545,7 @@ class YoutubeDL(object): pps_chain.extend(ie_info['__postprocessors']) pps_chain.extend(self._pps) for pp in pps_chain: + files_to_delete = [] try: files_to_delete, info = pp.run(info) except PostProcessingError as e: @@ -1690,7 +1724,8 @@ class YoutubeDL(object): if req_is_string: req = url_escaped else: - req = compat_urllib_request.Request( + req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request + req = req_type( url_escaped, data=req.data, headers=req.headers, origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) @@ -1836,7 +1871,7 @@ class YoutubeDL(object): thumb_ext = determine_ext(t['url'], 'jpg') suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' - thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext + t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): self.to_screen('[%s] %s: Thumbnail %sis already present' %