X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=c8054544a60db20f041d22c5ee6e5405d935061a;hb=f219743e33a9a640bfc3845d74282774e51e1ad4;hp=e53a2b8ad3ea353d17bf31b387bc17e9632623bc;hpb=cdbccafed9e5852a8d6ceb9b09058520078d37f5;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e53a2b8ad..c8054544a 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import errno import io import os import re @@ -70,6 +71,7 @@ class YoutubeDL(object): logtostderr: Log messages to stderr instead of stdout. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file + writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file @@ -81,6 +83,14 @@ class YoutubeDL(object): keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file + cachedir: Location of the cache files in the filesystem. + None to disable filesystem cache. + noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. + downloadarchive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -104,6 +114,17 @@ class YoutubeDL(object): self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] + + if (sys.version_info >= (3,) and sys.platform != 'win32' and + sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] + and not params['restrictfilenames']): + # On Python 3, the Unicode filesystem API will throw errors (#1474) + self.report_warning( + u'Assuming --restrict-filenames since file system encoding ' + u'cannot encode all charactes. ' + u'Set the LC_ALL environment variable to fix this.') + params['restrictfilenames'] = True + self.params = params self.fd = FileDownloader(self, self.params) @@ -142,14 +163,10 @@ class YoutubeDL(object): def to_screen(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" - assert type(message) == type(u'') if not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding(), 'ignore') - self._screen_file.write(output) - self._screen_file.flush() + write_string(output, self._screen_file) def to_stderr(self, message): """Print message to stderr.""" @@ -242,6 +259,10 @@ class YoutubeDL(object): """ Report that the metadata file has been written """ self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) + def report_writeannotations(self, annofn): + """ Report that the annotations file has been written. """ + self.to_screen(u'[info] Writing video annotations to: ' + annofn) + def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: @@ -299,6 +320,13 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + age_limit = self.params.get('age_limit') + if age_limit is not None: + if age_limit < info_dict.get('age_limit', 0): + return u'Skipping "' + title + '" because it is age restricted' + if self.in_download_archive(info_dict): + return (u'%(title)s has already been recorded in archive' + % info_dict) return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -499,6 +527,18 @@ class YoutubeDL(object): self.report_error(u'Cannot write description file ' + descfn) return + if self.params.get('writeannotations', False): + try: + annofn = filename + u'.annotations.xml' + self.report_writeannotations(annofn) + with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + annofile.write(info_dict['annotations']) + except (KeyError, TypeError): + self.report_warning(u'There are no annotations to write.') + except (OSError, IOError): + self.report_error(u'Cannot write annotations file: ' + annofn) + return + subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) @@ -536,11 +576,15 @@ class YoutubeDL(object): thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format self.to_screen(u'[%s] %s: Downloading thumbnail ...' % (info_dict['extractor'], info_dict['id'])) - uf = compat_urllib_request.urlopen(info_dict['thumbnail']) - with open(thumb_filename, 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) + try: + uf = compat_urllib_request.urlopen(info_dict['thumbnail']) + with open(thumb_filename, 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % + (info_dict['extractor'], info_dict['id'], thumb_filename)) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning(u'Unable to download thumbnail "%s": %s' % + (info_dict['thumbnail'], compat_str(err))) if not self.params.get('skip_download', False): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): @@ -548,11 +592,11 @@ class YoutubeDL(object): else: try: success = self.fd._do_download(filename, info_dict) - except (OSError, IOError) as err: - raise UnavailableVideoError(err) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error(u'unable to download video data: %s' % str(err)) return + except (OSError, IOError) as err: + raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return @@ -564,6 +608,8 @@ class YoutubeDL(object): self.report_error(u'postprocessing: %s' % str(err)) return + self.record_download_archive(info_dict) + def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -603,3 +649,26 @@ class YoutubeDL(object): os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + if line.strip() == vid_id: + return True + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + u'\n')