Merge branch 'download-archive'
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 6 Oct 2013 14:30:26 +0000 (16:30 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 6 Oct 2013 14:30:26 +0000 (16:30 +0200)
Conflicts:
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py

1  2 
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/utils.py

diff --combined youtube_dl/YoutubeDL.py
index 9ada01bcc64ad00520c33d8fa86329197711db0d,856e9ac929eb3512bedf6d0daf3f10f9558e25ad..073a3837c2c233535a2b5207f6ee4605952f6924
@@@ -3,6 -3,7 +3,7 @@@
  
  from __future__ import absolute_import
  
+ import errno
  import io
  import os
  import re
@@@ -84,8 -85,9 +85,11 @@@ class YoutubeDL(object)
      cachedir:          Location of the cache files in the filesystem.
                         None to disable filesystem cache.
      noplaylist:        Download single video instead of a playlist if in doubt.
 +    age_limit:         An integer representing the user's age in years.
 +                       Unsuitable videos for the given age are skipped.
+     downloadarchive:   File name of a file where all downloads are recorded.
+                        Videos already present in the file are not downloaded
+                        again.
      
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
              dateRange = self.params.get('daterange', DateRange())
              if date not in dateRange:
                  return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 +        age_limit = self.params.get('age_limit')
 +        if age_limit is not None:
 +            if age_limit < info_dict.get('age_limit', 0):
 +                return u'Skipping "' + title + '" because it is age restricted'
+         if self.in_download_archive(info_dict):
+             return (u'%(title)s has already been recorded in archive'
+                     % info_dict)
          return None
          
      def extract_info(self, url, download=True, ie_key=None, extra_info={}):
                      self.report_error(u'postprocessing: %s' % str(err))
                      return
  
+         self.record_download_archive(info_dict)
      def download(self, url_list):
          """Download a given list of URLs."""
          if len(url_list) > 1 and self.fixed_template():
                  os.remove(encodeFilename(filename))
              except (IOError, OSError):
                  self.report_warning(u'Unable to remove downloaded video file')
+     def in_download_archive(self, info_dict):
+         fn = self.params.get('download_archive')
+         if fn is None:
+             return False
+         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+         try:
+             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+                 for line in archive_file:
+                     if line.strip() == vid_id:
+                         return True
+         except IOError as ioe:
+             if ioe.errno != errno.ENOENT:
+                 raise
+         return False
+     def record_download_archive(self, info_dict):
+         fn = self.params.get('download_archive')
+         if fn is None:
+             return
+         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+             archive_file.write(vid_id + u'\n')
diff --combined youtube_dl/__init__.py
index 7a399273a0401fbdc36c8921146d9f09c720e146,a680d7c55757a17f05f21a4f418990877b377503..ba5206387a8b24e38a5594e3b411e6a6095f797a
@@@ -188,9 -188,9 +188,12 @@@ def parseOpts(overrideArguments=None)
      selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
      selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
      selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
 +    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
 +                         help='download only videos suitable for the given age',
 +                         default=None, type=int)
+     selection.add_option('--download-archive', metavar='FILE',
+                          dest='download_archive',
+                          help='Download only videos not present in the archive file. Record all downloaded videos in it.')
  
  
      authentication.add_option('-u', '--username',
@@@ -634,7 -634,7 +637,8 @@@ def _real_main(argv=None)
          'daterange': date,
          'cachedir': opts.cachedir,
          'youtube_print_sig_code': opts.youtube_print_sig_code,
 +        'age_limit': opts.age_limit,
+         'download_archive': opts.download_archive,
          })
  
      if opts.verbose:
diff --combined youtube_dl/utils.py
index e3feb12bfd05673d4c09797144afd4a14c0b6364,a463049a4d189f3d2b065f7a5955c5c079a28ca5..de26547621b2172b8bea45083606c407882346d2
@@@ -175,7 -175,7 +175,7 @@@ def compat_ord(c)
  compiled_regex_type = type(re.compile(''))
  
  std_headers = {
 -    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
 +    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Encoding': 'gzip, deflate',
@@@ -830,3 -830,99 +830,99 @@@ def get_cachedir(params={})
      cache_root = os.environ.get('XDG_CACHE_HOME',
                                  os.path.expanduser('~/.cache'))
      return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
+ # Cross-platform file locking
+ if sys.platform == 'win32':
+     import ctypes.wintypes
+     import msvcrt
+     class OVERLAPPED(ctypes.Structure):
+         _fields_ = [
+             ('Internal', ctypes.wintypes.LPVOID),
+             ('InternalHigh', ctypes.wintypes.LPVOID),
+             ('Offset', ctypes.wintypes.DWORD),
+             ('OffsetHigh', ctypes.wintypes.DWORD),
+             ('hEvent', ctypes.wintypes.HANDLE),
+         ]
+     kernel32 = ctypes.windll.kernel32
+     LockFileEx = kernel32.LockFileEx
+     LockFileEx.argtypes = [
+         ctypes.wintypes.HANDLE,     # hFile
+         ctypes.wintypes.DWORD,      # dwFlags
+         ctypes.wintypes.DWORD,      # dwReserved
+         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+         ctypes.POINTER(OVERLAPPED)  # Overlapped
+     ]
+     LockFileEx.restype = ctypes.wintypes.BOOL
+     UnlockFileEx = kernel32.UnlockFileEx
+     UnlockFileEx.argtypes = [
+         ctypes.wintypes.HANDLE,     # hFile
+         ctypes.wintypes.DWORD,      # dwReserved
+         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
+         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
+         ctypes.POINTER(OVERLAPPED)  # Overlapped
+     ]
+     UnlockFileEx.restype = ctypes.wintypes.BOOL
+     whole_low = 0xffffffff
+     whole_high = 0x7fffffff
+     def _lock_file(f, exclusive):
+         overlapped = OVERLAPPED()
+         overlapped.Offset = 0
+         overlapped.OffsetHigh = 0
+         overlapped.hEvent = 0
+         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+         handle = msvcrt.get_osfhandle(f.fileno())
+         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+                           whole_low, whole_high, f._lock_file_overlapped_p):
+             raise OSError('Locking file failed: %r' % ctypes.FormatError())
+     def _unlock_file(f):
+         assert f._lock_file_overlapped_p
+         handle = msvcrt.get_osfhandle(f.fileno())
+         if not UnlockFileEx(handle, 0,
+                             whole_low, whole_high, f._lock_file_overlapped_p):
+             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+ else:
+     import fcntl
+     def _lock_file(f, exclusive):
+         fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+     def _unlock_file(f):
+         fcntl.lockf(f, fcntl.LOCK_UN)
+ class locked_file(object):
+     def __init__(self, filename, mode, encoding=None):
+         assert mode in ['r', 'a', 'w']
+         self.f = io.open(filename, mode, encoding=encoding)
+         self.mode = mode
+     def __enter__(self):
+         exclusive = self.mode != 'r'
+         try:
+             _lock_file(self.f, exclusive)
+         except IOError:
+             self.f.close()
+             raise
+         return self
+     def __exit__(self, etype, value, traceback):
+         try:
+             _unlock_file(self.f)
+         finally:
+             self.f.close()
+     def __iter__(self):
+         return iter(self.f)
+     def write(self, *args):
+         return self.f.write(*args)
+     def read(self, *args):
+         return self.f.read(*args)