Redtube test now works

[youtube-dl] / youtube_dl / FileDownloader.py
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index ba3277577071f1c4ffdf9797da889b8bd15bd353..574863e7c9da066b1fc29741a83a58f2a2baaed4 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -7,6 +7,7 @@ import math
  import io
  import os
  import re
  import io
  import os
  import re
+import shutil
  import socket
  import subprocess
  import sys
  import socket
  import subprocess
  import sys
@@ -17,6 +18,7 @@ if os.name == 'nt':
      import ctypes
  
  from .utils import *
      import ctypes
  
  from .utils import *
+from .InfoExtractors import get_info_extractor
  
  
  class FileDownloader(object):
  
  
  class FileDownloader(object):
@@ -78,6 +80,7 @@ class FileDownloader(object):
      updatetime:        Use the Last-modified header to set output file timestamps.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
      updatetime:        Use the Last-modified header to set output file timestamps.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
+    writethumbnail:    Write the thumbnail image to a file
      writesubtitles:    Write the video subtitles to a file
      onlysubtitles:     Downloads only the subtitles of the video
      allsubtitles:      Downloads all the subtitles of the video
      writesubtitles:    Write the video subtitles to a file
      onlysubtitles:     Downloads only the subtitles of the video
      allsubtitles:      Downloads all the subtitles of the video
@@ -88,6 +91,7 @@ class FileDownloader(object):
      keepvideo:         Keep the video file after post-processing
      min_filesize:      Skip files smaller than this size
      max_filesize:      Skip files larger than this size
      keepvideo:         Keep the video file after post-processing
      min_filesize:      Skip files smaller than this size
      max_filesize:      Skip files larger than this size
+    daterange:         A DateRange object, download only if the upload_date is in the range.
      """
  
      params = None
      """
  
      params = None
@@ -120,7 +124,7 @@ class FileDownloader(object):
              exponent = 0
          else:
              exponent = int(math.log(bytes, 1024.0))
              exponent = 0
          else:
              exponent = int(math.log(bytes, 1024.0))
-        suffix = 'bkMGTPEZY'[exponent]
+        suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
          converted = float(bytes) / float(1024 ** exponent)
          return '%.2f%s' % (converted, suffix)
  
          converted = float(bytes) / float(1024 ** exponent)
          return '%.2f%s' % (converted, suffix)
  
@@ -253,7 +257,7 @@ class FileDownloader(object):
          Print the message to stderr, it will be prefixed with 'WARNING:'
          If stderr is a tty file the 'WARNING:' will be colored
          '''
          Print the message to stderr, it will be prefixed with 'WARNING:'
          If stderr is a tty file the 'WARNING:' will be colored
          '''
-        if sys.stderr.isatty():
+        if sys.stderr.isatty() and os.name != 'nt':
              _msg_header=u'\033[0;33mWARNING:\033[0m'
          else:
              _msg_header=u'WARNING:'
              _msg_header=u'\033[0;33mWARNING:\033[0m'
          else:
              _msg_header=u'WARNING:'
@@ -265,7 +269,7 @@ class FileDownloader(object):
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
          in red if stderr is a tty file.
          '''
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
          in red if stderr is a tty file.
          '''
-        if sys.stderr.isatty():
+        if sys.stderr.isatty() and os.name != 'nt':
              _msg_header = u'\033[0;31mERROR:\033[0m'
          else:
              _msg_header = u'ERROR:'
              _msg_header = u'\033[0;31mERROR:\033[0m'
          else:
              _msg_header = u'ERROR:'
@@ -343,12 +347,13 @@ class FileDownloader(object):
          """Report download progress."""
          if self.params.get('noprogress', False):
              return
          """Report download progress."""
          if self.params.get('noprogress', False):
              return
+        clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
          if self.params.get('progress_with_newline', False):
              self.to_screen(u'[download] %s of %s at %s ETA %s' %
                  (percent_str, data_len_str, speed_str, eta_str))
          else:
          if self.params.get('progress_with_newline', False):
              self.to_screen(u'[download] %s of %s at %s ETA %s' %
                  (percent_str, data_len_str, speed_str, eta_str))
          else:
-            self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
-                (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+            self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
+                (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
          self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
                  (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
  
          self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
                  (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
  
@@ -388,7 +393,13 @@ class FileDownloader(object):
              template_dict = dict(info_dict)
  
              template_dict['epoch'] = int(time.time())
              template_dict = dict(info_dict)
  
              template_dict['epoch'] = int(time.time())
-            template_dict['autonumber'] = u'%05d' % self._num_downloads
+            autonumber_size = self.params.get('autonumber_size')
+            if autonumber_size is None:
+                autonumber_size = 5
+            autonumber_templ = u'%0' + str(autonumber_size) + u'd'
+            template_dict['autonumber'] = autonumber_templ % self._num_downloads
+            if template_dict['playlist_index'] is not None:
+                template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
  
              sanitize = lambda k,v: sanitize_filename(
                  u'NA' if v is None else compat_str(v),
  
              sanitize = lambda k,v: sanitize_filename(
                  u'NA' if v is None else compat_str(v),
@@ -399,10 +410,10 @@ class FileDownloader(object):
              filename = self.params['outtmpl'] % template_dict
              return filename
          except KeyError as err:
              filename = self.params['outtmpl'] % template_dict
              return filename
          except KeyError as err:
-            self.trouble(u'ERROR: Erroneous output template')
+            self.report_error(u'Erroneous output template')
              return None
          except ValueError as err:
              return None
          except ValueError as err:
-            self.trouble(u'ERROR: Insufficient system charset ' + repr(preferredencoding()))
+            self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
              return None
  
      def _match_entry(self, info_dict):
              return None
  
      def _match_entry(self, info_dict):
@@ -417,21 +428,37 @@ class FileDownloader(object):
          if rejecttitle:
              if re.search(rejecttitle, title, re.IGNORECASE):
                  return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
          if rejecttitle:
              if re.search(rejecttitle, title, re.IGNORECASE):
                  return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+        date = info_dict.get('upload_date', None)
+        if date is not None:
+            dateRange = self.params.get('daterange', DateRange())
+            if date not in dateRange:
+                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
          return None
          
          return None
          
-    def extract_info(self, url):
+    def extract_info(self, url, download = True, ie_name = None):
          '''
          Returns a list with a dictionary for each video we find.
          '''
          Returns a list with a dictionary for each video we find.
+        If 'download', also downloads the videos.
           '''
          suitable_found = False
           '''
          suitable_found = False
-        for ie in self._ies:
+        
+        #We copy the original list
+        ies = list(self._ies)
+
+        if ie_name is not None:
+            #We put in the first place the given info extractor
+            first_ie = get_info_extractor(ie_name)()
+            first_ie.set_downloader(self)
+            ies.insert(0, first_ie)
+
+        for ie in ies:
              # Go to next InfoExtractor if not suitable
              if not ie.suitable(url):
                  continue
  
              # Warn if the _WORKING attribute is False
              if not ie.working():
              # Go to next InfoExtractor if not suitable
              if not ie.suitable(url):
                  continue
  
              # Warn if the _WORKING attribute is False
              if not ie.working():
-                self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
+                self.report_warning(u'the program functionality for this site has been marked as broken, '
                                 u'and will probably not work. If you want to go on, use the -i option.')
  
              # Suitable InfoExtractor found
                                 u'and will probably not work. If you want to go on, use the -i option.')
  
              # Suitable InfoExtractor found
@@ -440,65 +467,93 @@ class FileDownloader(object):
              # Extract information from URL and process it
              try:
                  ie_results = ie.extract(url)
              # Extract information from URL and process it
              try:
                  ie_results = ie.extract(url)
-                results = self.process_ie_results(ie_results, ie)
+                if ie_results is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+                    break
+                results = []
+                for ie_result in ie_results:
+                    if not 'extractor' in ie_result:
+                        #The extractor has already been set somewhere else
+                        ie_result['extractor'] = ie.IE_NAME
+                    results.append(self.process_ie_result(ie_result, download))
                  return results
              except ExtractorError as de: # An error we somewhat expected
                  return results
              except ExtractorError as de: # An error we somewhat expected
-                self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
+                self.report_error(compat_str(de), de.format_traceback())
                  break
              except Exception as e:
                  if self.params.get('ignoreerrors', False):
                  break
              except Exception as e:
                  if self.params.get('ignoreerrors', False):
-                    self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
+                    self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
                      break
                  else:
                      raise
          if not suitable_found:
                      break
                  else:
                      raise
          if not suitable_found:
-                self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
-    def extract_info_iterable(self, urls):
-        '''
-            Return the videos founded for the urls
-        '''
-        results = []
-        for url in urls:
-            results.extend(self.extract_info(url))
-        return results
+                self.report_error(u'no suitable InfoExtractor: %s' % url)
          
          
-    def process_ie_results(self, ie_results, ie):
+    def process_ie_result(self, ie_result, download = True):
          """
          """
-        Take the results of the ie and return a list of videos.
-        For url elements it will seartch the suitable ie and get the videos
+        Take the result of the ie and return a list of videos.
+        For url elements it will search the suitable ie and get the videos
          For playlist elements it will process each of the elements of the 'entries' key
          For playlist elements it will process each of the elements of the 'entries' key
+        
+        It will also download the videos if 'download'.
          """
          """
-        results = [] 
-        for result in ie_results or []:
-            result_type = result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
-            if result_type == 'video':
-                if not 'extractor' in result:
-                    #The extractor has already been set somewhere else
-                    result['extractor'] = ie.IE_NAME
-                results.append(result)
-            elif result_type == 'url':
-                #We get the videos pointed by the url
-                results.extend(self.extract_info(result['url']))
-            elif result_type == 'playlist':
-                #We process each entry in the playlist
-                entries_result = self.process_ie_results(result['entries'], ie)
-                result['entries'] = entries_result
-                results.extend([result])
-        return results
+        result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system
+        if result_type == 'video':
+            if 'playlist' not in ie_result:
+                #It isn't part of a playlist
+                ie_result['playlist'] = None
+                ie_result['playlist_index'] = None
+            if download:
+                #Do the download:
+                self.process_info(ie_result)
+            return ie_result
+        elif result_type == 'url':
+            #We get the video pointed by the url
+            result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0]
+            return result
+        elif result_type == 'playlist':
+            #We process each entry in the playlist
+            playlist = ie_result.get('title', None) or ie_result.get('id', None)
+            self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
+
+            playlist_results = []
+
+            n_all_entries = len(ie_result['entries'])
+            playliststart = self.params.get('playliststart', 1) - 1
+            playlistend = self.params.get('playlistend', -1)
+
+            if playlistend == -1:
+                entries = ie_result['entries'][playliststart:]
+            else:
+                entries = ie_result['entries'][playliststart:playlistend]
+
+            n_entries = len(entries)
+
+            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
+                (ie_result['extractor'], playlist, n_all_entries, n_entries))
+
+            for i,entry in enumerate(entries,1):
+                self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
+                entry_result = self.process_ie_result(entry, False)
+                entry_result['playlist'] = playlist
+                entry_result['playlist_index'] = i + playliststart
+                #We must do the download here to correctly set the 'playlist' key
+                if download:
+                    self.process_info(entry_result)
+                playlist_results.append(entry_result)
+            result = ie_result.copy()
+            result['entries'] = playlist_results
+            return result
  
      def process_info(self, info_dict):
          """Process a single dictionary returned by an InfoExtractor."""
  
  
      def process_info(self, info_dict):
          """Process a single dictionary returned by an InfoExtractor."""
  
-        if info_dict.get('_type','video') == 'playlist':
-            playlist = info_dict.get('title', None) or info_dict.get('id', None)
-            self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
-            n_videos = len(info_dict['entries'])
-            for i,video in enumerate(info_dict['entries'],1):
-                video['playlist'] = playlist
-                self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos))
-                self.process_info(video)
-            return
+        #We increment the download the download count here to match the previous behaviour.
+        self.increment_downloads()
          
          
+        info_dict['fulltitle'] = info_dict['title']
+        if len(info_dict['title']) > 200:
+            info_dict['title'] = info_dict['title'][:197] + u'...'
+
          # Keep for backwards compatibility
          info_dict['stitle'] = info_dict['title']
  
          # Keep for backwards compatibility
          info_dict['stitle'] = info_dict['title']
  
@@ -590,7 +645,7 @@ class FileDownloader(object):
                          with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                                  subfile.write(sub)
                      except (OSError, IOError):
                          with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                                  subfile.write(sub)
                      except (OSError, IOError):
-                        self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
+                        self.report_error(u'Cannot write subtitles file ' + descfn)
                          return
              if self.params.get('onlysubtitles', False):
                  return 
                          return
              if self.params.get('onlysubtitles', False):
                  return 
@@ -605,6 +660,20 @@ class FileDownloader(object):
                  self.report_error(u'Cannot write metadata to JSON file ' + infofn)
                  return
  
                  self.report_error(u'Cannot write metadata to JSON file ' + infofn)
                  return
  
+        if self.params.get('writethumbnail', False):
+            if 'thumbnail' in info_dict:
+                thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
+                if not thumb_format:
+                    thumb_format = 'jpg'
+                thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
+                self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
+                               (info_dict['extractor'], info_dict['id']))
+                uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+                with open(thumb_filename, 'wb') as thumbf:
+                    shutil.copyfileobj(uf, thumbf)
+                self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
+                               (info_dict['extractor'], info_dict['id'], thumb_filename))
+
          if not self.params.get('skip_download', False):
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
                  success = True
          if not self.params.get('skip_download', False):
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
                  success = True
@@ -633,17 +702,14 @@ class FileDownloader(object):
              raise SameFileError(self.params['outtmpl'])
  
          for url in url_list:
              raise SameFileError(self.params['outtmpl'])
  
          for url in url_list:
-            videos = self.extract_info(url)
-
-            for video in videos or []:
-                try:
-                    self.increment_downloads()
-                    self.process_info(video)
-                except UnavailableVideoError:
-                    self.trouble(u'\nERROR: unable to download video')
-                except MaxDownloadsReached:
-                    self.to_screen(u'[info] Maximum number of downloaded files reached.')
-                    raise
+            try:
+                #It also downloads the videos
+                videos = self.extract_info(url)
+            except UnavailableVideoError:
+                self.report_error(u'unable to download video')
+            except MaxDownloadsReached:
+                self.to_screen(u'[info] Maximum number of downloaded files reached.')
+                raise
  
          return self._download_retcode
  
  
          return self._download_retcode
  
@@ -670,7 +736,7 @@ class FileDownloader(object):
              except (IOError, OSError):
                  self.report_warning(u'Unable to remove downloaded video file')
  
              except (IOError, OSError):
                  self.report_warning(u'Unable to remove downloaded video file')
  
-    def _download_with_rtmpdump(self, filename, url, player_url, page_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
@@ -689,6 +755,8 @@ class FileDownloader(object):
              basic_args += ['-W', player_url]
          if page_url is not None:
              basic_args += ['--pageUrl', page_url]
              basic_args += ['-W', player_url]
          if page_url is not None:
              basic_args += ['--pageUrl', page_url]
+        if play_path is not None:
+            basic_args += ['-y', play_path]
          args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
          args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
@@ -743,7 +811,8 @@ class FileDownloader(object):
          if url.startswith('rtmp'):
              return self._download_with_rtmpdump(filename, url,
                                                  info_dict.get('player_url', None),
          if url.startswith('rtmp'):
              return self._download_with_rtmpdump(filename, url,
                                                  info_dict.get('player_url', None),
-                                                info_dict.get('page_url', None))
+                                                info_dict.get('page_url', None),
+                                                info_dict.get('play_path', None))
  
          tmpfilename = self.temp_name(filename)
          stream = None
  
          tmpfilename = self.temp_name(filename)
          stream = None