Merge branch 'download-archive'

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 9ada01bcc64ad00520c33d8fa86329197711db0d..073a3837c2c233535a2b5207f6ee4605952f6924 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -3,6 +3,7 @@
  
  from __future__ import absolute_import
  
+import errno
  import io
  import os
  import re
@@ -86,6 +87,9 @@ class YoutubeDL(object):
      noplaylist:        Download single video instead of a playlist if in doubt.
      age_limit:         An integer representing the user's age in years.
                         Unsuitable videos for the given age are skipped.
+    downloadarchive:   File name of a file where all downloads are recorded.
+                       Videos already present in the file are not downloaded
+                       again.
      
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -315,6 +319,9 @@ class YoutubeDL(object):
          if age_limit is not None:
              if age_limit < info_dict.get('age_limit', 0):
                  return u'Skipping "' + title + '" because it is age restricted'
+        if self.in_download_archive(info_dict):
+            return (u'%(title)s has already been recorded in archive'
+                    % info_dict)
          return None
          
      def extract_info(self, url, download=True, ie_key=None, extra_info={}):
@@ -584,6 +591,8 @@ class YoutubeDL(object):
                      self.report_error(u'postprocessing: %s' % str(err))
                      return
  
+        self.record_download_archive(info_dict)
+
      def download(self, url_list):
          """Download a given list of URLs."""
          if len(url_list) > 1 and self.fixed_template():
@@ -623,3 +632,26 @@ class YoutubeDL(object):
                  os.remove(encodeFilename(filename))
              except (IOError, OSError):
                  self.report_warning(u'Unable to remove downloaded video file')
+
+    def in_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return False
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        try:
+            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+                for line in archive_file:
+                    if line.strip() == vid_id:
+                        return True
+        except IOError as ioe:
+            if ioe.errno != errno.ENOENT:
+                raise
+        return False
+
+    def record_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+            archive_file.write(vid_id + u'\n')