X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=f22a8bd0e044b9c10ecad56187a15a310a4c1d7d;hb=1c1218fefcbabb49a0d06fc45d64a299920460ec;hp=b289bd9e26bbc9993e6f1295a31d20b3275f5f48;hpb=591078babff1d783bed872c5b441dc570d354448;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b289bd9e2..f22a8bd0e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import errno import io import os import re @@ -70,16 +71,26 @@ class YoutubeDL(object): logtostderr: Log messages to stderr instead of stdout. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file + writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video + (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitleslangs: List of languages of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file + cachedir: Location of the cache files in the filesystem. + None to disable filesystem cache. + noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. + downloadarchive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -103,6 +114,17 @@ class YoutubeDL(object): self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] + + if (sys.version_info >= (3,) and sys.platform != 'win32' and + sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] + and not params['restrictfilenames']): + # On Python 3, the Unicode filesystem API will throw errors (#1474) + self.report_warning( + u'Assuming --restrict-filenames since file system encoding ' + u'cannot encode all charactes. ' + u'Set the LC_ALL environment variable to fix this.') + params['restrictfilenames'] = True + self.params = params self.fd = FileDownloader(self, self.params) @@ -141,14 +163,10 @@ class YoutubeDL(object): def to_screen(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" - assert type(message) == type(u'') if not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding(), 'ignore') - self._screen_file.write(output) - self._screen_file.flush() + write_string(output, self._screen_file) def to_stderr(self, message): """Print message to stderr.""" @@ -241,6 +259,10 @@ class YoutubeDL(object): """ Report that the metadata file has been written """ self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) + def report_writeannotations(self, annofn): + """ Report that the annotations file has been written. """ + self.to_screen(u'[info] Writing video annotations to: ' + annofn) + def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: @@ -298,6 +320,13 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + age_limit = self.params.get('age_limit') + if age_limit is not None: + if age_limit < info_dict.get('age_limit', 0): + return u'Skipping "' + title + '" because it is age restricted' + if self.in_download_archive(info_dict): + return (u'%(title)s has already been recorded in archive' + % info_dict) return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -361,13 +390,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': ie_result.update(extra_info) - if 'playlist' not in ie_result: - # It isn't part of a playlist - ie_result['playlist'] = None - ie_result['playlist_index'] = None - if download: - self.process_info(ie_result) - return ie_result + return self.process_video_result(ie_result) elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist @@ -425,6 +448,89 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def process_video_result(self, info_dict, download=True): + assert info_dict.get('_type', 'video') == 'video' + + if 'playlist' not in info_dict: + # It isn't part of a playlist + info_dict['playlist'] = None + info_dict['playlist_index'] = None + + # This extractors handle format selection themselves + if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']: + self.process_info(info_dict) + return info_dict + + # We now pick which formats have to be downloaded + if info_dict.get('formats') is None: + # There's only one format available + formats = [info_dict] + else: + formats = info_dict['formats'] + + # We check that all the formats have the format and format_id fields + for (i, format) in enumerate(formats): + if format.get('format') is None: + if format.get('height') is not None: + if format.get('width') is not None: + format_desc = u'%sx%s' % (format['width'], format['height']) + else: + format_desc = u'%sp' % format['height'] + else: + format_desc = '???' + format['format'] = format_desc + if format.get('format_id') is None: + format['format_id'] = compat_str(i) + + if self.params.get('listformats', None): + self.list_formats(info_dict) + return + + format_limit = self.params.get('format_limit', None) + if format_limit: + formats = [f for f in formats if f['format_id'] <= format_limit] + if self.params.get('prefer_free_formats'): + def _free_formats_key(f): + try: + ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext']) + except ValueError: + ext_ord = -1 + # We only compare the extension if they have the same height and width + return (f.get('height'), f.get('width'), ext_ord) + formats = sorted(formats, key=_free_formats_key) + + req_format = self.params.get('format', 'best') + formats_to_download = [] + if req_format == 'best' or req_format is None: + formats_to_download = [formats[-1]] + elif req_format == 'worst': + formats_to_download = [formats[0]] + # The -1 is for supporting YoutubeIE + elif req_format in ('-1', 'all'): + formats_to_download = formats + else: + # We can accept formats requestd in the format: 34/10/5, we pick + # the first that is availble, starting from left + req_formats = req_format.split('/') + for rf in req_formats: + matches = filter(lambda f:f['format_id'] == rf ,formats) + if matches: + formats_to_download = [matches[0]] + break + if not formats_to_download: + raise ExtractorError(u'requested format not available') + + if download: + if len(formats_to_download) > 1: + self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) + for format in formats_to_download: + new_info = dict(info_dict) + new_info.update(format) + self.process_info(new_info) + # We update the info dict with the best quality format (backwards compatibility) + info_dict.update(formats_to_download[-1]) + return info_dict + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -492,13 +598,26 @@ class YoutubeDL(object): self.report_writedescription(descfn) with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(info_dict['description']) + except (KeyError, TypeError): + self.report_warning(u'There\'s no description to write.') except (OSError, IOError): self.report_error(u'Cannot write description file ' + descfn) return + if self.params.get('writeannotations', False): + try: + annofn = filename + u'.annotations.xml' + self.report_writeannotations(annofn) + with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + annofile.write(info_dict['annotations']) + except (KeyError, TypeError): + self.report_warning(u'There are no annotations to write.') + except (OSError, IOError): + self.report_error(u'Cannot write annotations file: ' + annofn) + return + subtitles_are_requested = any([self.params.get('writesubtitles', False), - self.params.get('writeautomaticsub'), - self.params.get('allsubtitles', False)]) + self.params.get('writeautomaticsub')]) if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE @@ -534,11 +653,15 @@ class YoutubeDL(object): thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format self.to_screen(u'[%s] %s: Downloading thumbnail ...' % (info_dict['extractor'], info_dict['id'])) - uf = compat_urllib_request.urlopen(info_dict['thumbnail']) - with open(thumb_filename, 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) + try: + uf = compat_urllib_request.urlopen(info_dict['thumbnail']) + with open(thumb_filename, 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % + (info_dict['extractor'], info_dict['id'], thumb_filename)) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning(u'Unable to download thumbnail "%s": %s' % + (info_dict['thumbnail'], compat_str(err))) if not self.params.get('skip_download', False): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): @@ -546,11 +669,11 @@ class YoutubeDL(object): else: try: success = self.fd._do_download(filename, info_dict) - except (OSError, IOError) as err: - raise UnavailableVideoError(err) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error(u'unable to download video data: %s' % str(err)) return + except (OSError, IOError) as err: + raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return @@ -562,6 +685,8 @@ class YoutubeDL(object): self.report_error(u'postprocessing: %s' % str(err)) return + self.record_download_archive(info_dict) + def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -601,3 +726,40 @@ class YoutubeDL(object): os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + if line.strip() == vid_id: + return True + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + u'\n') + + def list_formats(self, info_dict): + formats_s = [] + for format in info_dict.get('formats', [info_dict]): + formats_s.append("%s\t:\t%s\t[%s]" % (format['format_id'], + format['ext'], + format.get('format', '???'), + ) + ) + if len(formats_s) != 1: + formats_s[0] += ' (worst)' + formats_s[-1] += ' (best)' + formats_s = "\n".join(formats_s) + self.to_screen(u"[info] Available formats for %s:\nformat code\textension\n%s" % (info_dict['id'], formats_s))