Match --download-archive during playlist processing (Fixes #1745)

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index d8d55d7d7d416fb307af5d5b98a0560f74791b49..beb7d0cd1970021509f704a79cab8b9b61781413 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import
  
  import errno
  import io
+import json
  import os
  import re
  import shutil
@@ -84,7 +85,7 @@ class YoutubeDL(object):
      forcethumbnail:    Force printing thumbnail URL.
      forcedescription:  Force printing description.
      forcefilename:     Force printing final filename.
-    forcejson:         Force printing json information.
+    forcejson:         Force printing info_dict as JSON.
      simulate:          Do not download the video files.
      format:            Video format code.
      format_limit:      Highest quality format to try.
@@ -354,15 +355,17 @@ class YoutubeDL(object):
      def _match_entry(self, info_dict):
          """ Returns None iff the file should be downloaded """
  
-        title = info_dict['title']
-        matchtitle = self.params.get('matchtitle', False)
-        if matchtitle:
-            if not re.search(matchtitle, title, re.IGNORECASE):
-                return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
-        rejecttitle = self.params.get('rejecttitle', False)
-        if rejecttitle:
-            if re.search(rejecttitle, title, re.IGNORECASE):
-                return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+        if 'title' in info_dict:
+            # This can happen when we're just evaluating the playlist
+            title = info_dict['title']
+            matchtitle = self.params.get('matchtitle', False)
+            if matchtitle:
+                if not re.search(matchtitle, title, re.IGNORECASE):
+                    return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+            rejecttitle = self.params.get('rejecttitle', False)
+            if rejecttitle:
+                if re.search(rejecttitle, title, re.IGNORECASE):
+                    return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
          date = info_dict.get('upload_date', None)
          if date is not None:
              dateRange = self.params.get('daterange', DateRange())
@@ -373,8 +376,8 @@ class YoutubeDL(object):
              if age_limit < info_dict.get('age_limit', 0):
                  return u'Skipping "' + title + '" because it is age restricted'
          if self.in_download_archive(info_dict):
-            return (u'%(title)s has already been recorded in archive'
-                    % info_dict)
+            return (u'%s has already been recorded in archive'
+                    % info_dict.get('title', info_dict.get('id', u'video')))
          return None
  
      @staticmethod
@@ -453,7 +456,7 @@ class YoutubeDL(object):
                                       ie_key=ie_result.get('ie_key'),
                                       extra_info=extra_info)
          elif result_type == 'playlist':
-            self.add_extra_info(ie_result, extra_info)
+
              # We process each entry in the playlist
              playlist = ie_result.get('title', None) or ie_result.get('id', None)
              self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -483,6 +486,12 @@ class YoutubeDL(object):
                      'webpage_url': ie_result['webpage_url'],
                      'extractor_key': ie_result['extractor_key'],
                  }
+
+                reason = self._match_entry(entry)
+                if reason is not None:
+                    self.to_screen(u'[download] ' + reason)
+                    continue
+
                  entry_result = self.process_ie_result(entry,
                                                        download=download,
                                                        extra_info=extra)
@@ -638,7 +647,7 @@ class YoutubeDL(object):
  
          # Forced printings
          if self.params.get('forcetitle', False):
-            compat_print(info_dict['title'])
+            compat_print(info_dict['fulltitle'])
          if self.params.get('forceid', False):
              compat_print(info_dict['id'])
          if self.params.get('forceurl', False):
@@ -716,7 +725,7 @@ class YoutubeDL(object):
                      return
  
          if self.params.get('writeinfojson', False):
-            infofn = filename + u'.info.json'
+            infofn = os.path.splitext(filename)[0] + u'.info.json'
              self.report_writeinfojson(infofn)
              try:
                  json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
@@ -809,7 +818,16 @@ class YoutubeDL(object):
          fn = self.params.get('download_archive')
          if fn is None:
              return False
-        vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+        extractor = info_dict.get('extractor_id')
+        if extractor is None:
+            if 'id' in info_dict:
+                extractor = info_dict.get('ie_key')  # key in a playlist
+        if extractor is None:
+            return False  # Incomplete video information
+        # Future-proof against any change in case
+        # and backwards compatibility with prior versions
+        extractor = extractor.lower()
+        vid_id = extractor + u' ' + info_dict['id']
          try:
              with locked_file(fn, 'r', encoding='utf-8') as archive_file:
                  for line in archive_file: