X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2FYoutubeDL.py;h=c8054544a60db20f041d22c5ee6e5405d935061a;hb=f219743e33a9a640bfc3845d74282774e51e1ad4;hp=cd3d6ea7b5b196abc4fc4ba4ed2b4077cbe899b6;hpb=56c7366547462ecec0536df58971249a8a870ddd;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index cd3d6ea7b..c8054544a 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import errno import io import os import re @@ -70,16 +71,26 @@ class YoutubeDL(object): logtostderr: Log messages to stderr instead of stdout. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file + writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video + (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) - subtitleslang: Language of the subtitles to download + subtitleslangs: List of languages of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file + cachedir: Location of the cache files in the filesystem. + None to disable filesystem cache. + noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. + downloadarchive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -103,6 +114,17 @@ class YoutubeDL(object): self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] + + if (sys.version_info >= (3,) and sys.platform != 'win32' and + sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] + and not params['restrictfilenames']): + # On Python 3, the Unicode filesystem API will throw errors (#1474) + self.report_warning( + u'Assuming --restrict-filenames since file system encoding ' + u'cannot encode all charactes. ' + u'Set the LC_ALL environment variable to fix this.') + params['restrictfilenames'] = True + self.params = params self.fd = FileDownloader(self, self.params) @@ -141,14 +163,10 @@ class YoutubeDL(object): def to_screen(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" - assert type(message) == type(u'') if not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding(), 'ignore') - self._screen_file.write(output) - self._screen_file.flush() + write_string(output, self._screen_file) def to_stderr(self, message): """Print message to stderr.""" @@ -241,6 +259,10 @@ class YoutubeDL(object): """ Report that the metadata file has been written """ self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) + def report_writeannotations(self, annofn): + """ Report that the annotations file has been written. """ + self.to_screen(u'[info] Writing video annotations to: ' + annofn) + def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: @@ -278,7 +300,7 @@ class YoutubeDL(object): self.report_error(u'Erroneous output template') return None except ValueError as err: - self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) + self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')') return None def _match_entry(self, info_dict): @@ -298,6 +320,13 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + age_limit = self.params.get('age_limit') + if age_limit is not None: + if age_limit < info_dict.get('age_limit', 0): + return u'Skipping "' + title + '" because it is age restricted' + if self.in_download_archive(info_dict): + return (u'%(title)s has already been recorded in archive' + % info_dict) return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -360,6 +389,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': + ie_result.update(extra_info) if 'playlist' not in ie_result: # It isn't part of a playlist ie_result['playlist'] = None @@ -459,7 +489,8 @@ class YoutubeDL(object): if self.params.get('forceid', False): compat_print(info_dict['id']) if self.params.get('forceurl', False): - compat_print(info_dict['url']) + # For RTMP URLs, also include the playpath + compat_print(info_dict['url'] + info_dict.get('play_path', u'')) if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: compat_print(info_dict['thumbnail']) if self.params.get('forcedescription', False) and 'description' in info_dict: @@ -490,45 +521,45 @@ class YoutubeDL(object): self.report_writedescription(descfn) with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(info_dict['description']) + except (KeyError, TypeError): + self.report_warning(u'There\'s no description to write.') except (OSError, IOError): self.report_error(u'Cannot write description file ' + descfn) return - if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']: + if self.params.get('writeannotations', False): + try: + annofn = filename + u'.annotations.xml' + self.report_writeannotations(annofn) + with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + annofile.write(info_dict['annotations']) + except (KeyError, TypeError): + self.report_warning(u'There are no annotations to write.') + except (OSError, IOError): + self.report_error(u'Cannot write annotations file: ' + annofn) + return + + subtitles_are_requested = any([self.params.get('writesubtitles', False), + self.params.get('writeautomaticsub')]) + + if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE - subtitle = info_dict['subtitles'][0] - (sub_error, sub_lang, sub) = subtitle + subtitles = info_dict['subtitles'] sub_format = self.params.get('subtitlesformat') - if sub_error: - self.report_warning("Some error while getting the subtitles") - else: + for sub_lang in subtitles.keys(): + sub = subtitles[sub_lang] + if sub is None: + continue try: - sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format + sub_filename = subtitles_filename(filename, sub_lang, sub_format) self.report_writesubtitles(sub_filename) with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: - subfile.write(sub) + subfile.write(sub) except (OSError, IOError): self.report_error(u'Cannot write subtitles file ' + descfn) return - if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: - subtitles = info_dict['subtitles'] - sub_format = self.params.get('subtitlesformat') - for subtitle in subtitles: - (sub_error, sub_lang, sub) = subtitle - if sub_error: - self.report_warning("Some error while getting the subtitles") - else: - try: - sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format - self.report_writesubtitles(sub_filename) - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: - subfile.write(sub) - except (OSError, IOError): - self.report_error(u'Cannot write subtitles file ' + descfn) - return - if self.params.get('writeinfojson', False): infofn = filename + u'.info.json' self.report_writeinfojson(infofn) @@ -540,18 +571,20 @@ class YoutubeDL(object): return if self.params.get('writethumbnail', False): - if 'thumbnail' in info_dict: - thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2] - if not thumb_format: - thumb_format = 'jpg' + if info_dict.get('thumbnail') is not None: + thumb_format = determine_ext(info_dict['thumbnail'], u'jpg') thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format self.to_screen(u'[%s] %s: Downloading thumbnail ...' % (info_dict['extractor'], info_dict['id'])) - uf = compat_urllib_request.urlopen(info_dict['thumbnail']) - with open(thumb_filename, 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) + try: + uf = compat_urllib_request.urlopen(info_dict['thumbnail']) + with open(thumb_filename, 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % + (info_dict['extractor'], info_dict['id'], thumb_filename)) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning(u'Unable to download thumbnail "%s": %s' % + (info_dict['thumbnail'], compat_str(err))) if not self.params.get('skip_download', False): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): @@ -559,11 +592,11 @@ class YoutubeDL(object): else: try: success = self.fd._do_download(filename, info_dict) - except (OSError, IOError) as err: - raise UnavailableVideoError() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error(u'unable to download video data: %s' % str(err)) return + except (OSError, IOError) as err: + raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return @@ -575,6 +608,8 @@ class YoutubeDL(object): self.report_error(u'postprocessing: %s' % str(err)) return + self.record_download_archive(info_dict) + def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -607,10 +642,33 @@ class YoutubeDL(object): # No clear decision yet, let IE decide keep_video = keep_video_wish except PostProcessingError as e: - self.to_stderr(u'ERROR: ' + e.msg) + self.report_error(e.msg) if keep_video is False and not self.params.get('keepvideo', False): try: self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + if line.strip() == vid_id: + return True + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + u'\n')