Add basic --download-archive option
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 6 Oct 2013 02:27:09 +0000 (04:27 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 6 Oct 2013 02:27:10 +0000 (04:27 +0200)
Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/utils.py

index 2503fd09b976b86da60c9a82fd31495266106c97..1f5f75e309393caf39626259b65b4919c0011adf 100644 (file)
@@ -3,6 +3,7 @@
 
 from __future__ import absolute_import
 
+import errno
 import io
 import os
 import re
@@ -84,6 +85,9 @@ class YoutubeDL(object):
     cachedir:          Location of the cache files in the filesystem.
                        None to disable filesystem cache.
     noplaylist:        Download single video instead of a playlist if in doubt.
+    downloadarchive:   File name of a file where all downloads are recorded.
+                       Videos already present in the file are not downloaded
+                       again.
     
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -309,6 +313,9 @@ class YoutubeDL(object):
             dateRange = self.params.get('daterange', DateRange())
             if date not in dateRange:
                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+        if self.in_download_archive(info_dict):
+            return (u'%(title)s) has already been recorded in archive'
+                    % info_dict)
         return None
         
     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
@@ -578,6 +585,8 @@ class YoutubeDL(object):
                     self.report_error(u'postprocessing: %s' % str(err))
                     return
 
+        self.record_download_archive(info_dict)
+
     def download(self, url_list):
         """Download a given list of URLs."""
         if len(url_list) > 1 and self.fixed_template():
@@ -617,3 +626,26 @@ class YoutubeDL(object):
                 os.remove(encodeFilename(filename))
             except (IOError, OSError):
                 self.report_warning(u'Unable to remove downloaded video file')
+
+    def in_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return False
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        try:
+            with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+                for line in archive_file:
+                    if line.strip() == vid_id:
+                        return True
+        except IOError as ioe:
+            if ioe.errno != errno.ENOENT:
+                raise
+        return False
+
+    def record_download_archive(self, info_dict):
+        fn = self.params.get('download_archive')
+        if fn is None:
+            return
+        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+            archive_file.write(vid_id + u'\n')
index 03df835f23ebe58bd1048d336ba68019e4a2e33d..a680d7c55757a17f05f21a4f418990877b377503 100644 (file)
@@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):
     selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
     selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
     selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+    selection.add_option('--download-archive', metavar='FILE',
+                         dest='download_archive',
+                         help='Download only videos not present in the archive file. Record all downloaded videos in it.')
 
 
     authentication.add_option('-u', '--username',
@@ -631,6 +634,7 @@ def _real_main(argv=None):
         'daterange': date,
         'cachedir': opts.cachedir,
         'youtube_print_sig_code': opts.youtube_print_sig_code,
+        'download_archive': opts.download_archive,
         })
 
     if opts.verbose:
index f5f9cde99b0c65363a098923e4c350182896f7f1..a463049a4d189f3d2b065f7a5955c5c079a28ca5 100644 (file)
@@ -830,3 +830,99 @@ def get_cachedir(params={}):
     cache_root = os.environ.get('XDG_CACHE_HOME',
                                 os.path.expanduser('~/.cache'))
     return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+    import ctypes.wintypes
+    import msvcrt
+
+    class OVERLAPPED(ctypes.Structure):
+        _fields_ = [
+            ('Internal', ctypes.wintypes.LPVOID),
+            ('InternalHigh', ctypes.wintypes.LPVOID),
+            ('Offset', ctypes.wintypes.DWORD),
+            ('OffsetHigh', ctypes.wintypes.DWORD),
+            ('hEvent', ctypes.wintypes.HANDLE),
+        ]
+
+    kernel32 = ctypes.windll.kernel32
+    LockFileEx = kernel32.LockFileEx
+    LockFileEx.argtypes = [
+        ctypes.wintypes.HANDLE,     # hFile
+        ctypes.wintypes.DWORD,      # dwFlags
+        ctypes.wintypes.DWORD,      # dwReserved
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+        ctypes.POINTER(OVERLAPPED)  # Overlapped
+    ]
+    LockFileEx.restype = ctypes.wintypes.BOOL
+    UnlockFileEx = kernel32.UnlockFileEx
+    UnlockFileEx.argtypes = [
+        ctypes.wintypes.HANDLE,     # hFile
+        ctypes.wintypes.DWORD,      # dwReserved
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+        ctypes.POINTER(OVERLAPPED)  # Overlapped
+    ]
+    UnlockFileEx.restype = ctypes.wintypes.BOOL
+    whole_low = 0xffffffff
+    whole_high = 0x7fffffff
+
+    def _lock_file(f, exclusive):
+        overlapped = OVERLAPPED()
+        overlapped.Offset = 0
+        overlapped.OffsetHigh = 0
+        overlapped.hEvent = 0
+        f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+        handle = msvcrt.get_osfhandle(f.fileno())
+        if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+                          whole_low, whole_high, f._lock_file_overlapped_p):
+            raise OSError('Locking file failed: %r' % ctypes.FormatError())
+
+    def _unlock_file(f):
+        assert f._lock_file_overlapped_p
+        handle = msvcrt.get_osfhandle(f.fileno())
+        if not UnlockFileEx(handle, 0,
+                            whole_low, whole_high, f._lock_file_overlapped_p):
+            raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+    import fcntl
+
+    def _lock_file(f, exclusive):
+        fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+
+    def _unlock_file(f):
+        fcntl.lockf(f, fcntl.LOCK_UN)
+
+
+class locked_file(object):
+    def __init__(self, filename, mode, encoding=None):
+        assert mode in ['r', 'a', 'w']
+        self.f = io.open(filename, mode, encoding=encoding)
+        self.mode = mode
+
+    def __enter__(self):
+        exclusive = self.mode != 'r'
+        try:
+            _lock_file(self.f, exclusive)
+        except IOError:
+            self.f.close()
+            raise
+        return self
+
+    def __exit__(self, etype, value, traceback):
+        try:
+            _unlock_file(self.f)
+        finally:
+            self.f.close()
+
+    def __iter__(self):
+        return iter(self.f)
+
+    def write(self, *args):
+        return self.f.write(*args)
+
+    def read(self, *args):
+        return self.f.read(*args)