Proper support for changing User-Agents from IEs
[youtube-dl] / youtube_dl / FileDownloader.py
index 756fc72ec8e823751e891cdc3e7041506a0f6fe8..04ecd1ac5447cd306cc7d45bd67449e6f9bb66a4 100644 (file)
@@ -81,6 +81,7 @@ class FileDownloader(object):
     writesubtitles:    Write the video subtitles to a .srt file
     subtitleslang:     Language of the subtitles to download
     test:              Download only first bytes to test the downloader.
+    keepvideo:         Keep the video file after post-processing
     """
 
     params = None
@@ -216,12 +217,15 @@ class FileDownloader(object):
         Depending on if the downloader has been configured to ignore
         download errors or not, this method may throw an exception or
         not when errors are found, after printing the message.
+
+        tb, if given, is additional traceback information.
         """
         if message is not None:
             self.to_stderr(message)
         if self.params.get('verbose'):
             if tb is None:
-                tb = u''.join(traceback.format_list(traceback.extract_stack()))
+                tb_data = traceback.format_list(traceback.extract_stack())
+                tb = u''.join(tb_data)
             self.to_stderr(tb)
         if not self.params.get('ignoreerrors', False):
             raise DownloadError(message)
@@ -497,7 +501,7 @@ class FileDownloader(object):
                 try:
                     videos = ie.extract(url)
                 except ExtractorError as de: # An error we somewhat expected
-                    self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback))))
+                    self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
                     break
                 except Exception as e:
                     if self.params.get('ignoreerrors', False):
@@ -526,15 +530,29 @@ class FileDownloader(object):
         return self._download_retcode
 
     def post_process(self, filename, ie_info):
-        """Run the postprocessing chain on the given file."""
+        """Run all the postprocessors on the given file."""
         info = dict(ie_info)
         info['filepath'] = filename
+        keep_video = None
         for pp in self._pps:
-            info = pp.run(info)
-            if info is None:
-                break
+            try:
+                keep_video_wish,new_info = pp.run(info)
+                if keep_video_wish is not None:
+                    if keep_video_wish:
+                        keep_video = keep_video_wish
+                    elif keep_video is None:
+                        # No clear decision yet, let IE decide
+                        keep_video = keep_video_wish
+            except PostProcessingError as e:
+                self.to_stderr(u'ERROR: ' + e.msg)
+        if keep_video is False and not self.params.get('keepvideo', False):
+            try:
+                self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
+                os.remove(encodeFilename(filename))
+            except (IOError, OSError):
+                self.to_stderr(u'WARNING: Unable to remove downloaded video file')
 
-    def _download_with_rtmpdump(self, filename, url, player_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
@@ -548,7 +566,11 @@ class FileDownloader(object):
         # Download using rtmpdump. rtmpdump returns exit code 2 when
         # the connection was interrumpted and resuming appears to be
         # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+        basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
+        if player_url is not None:
+            basic_args += ['-W', player_url]
+        if page_url is not None:
+            basic_args += ['--pageUrl', page_url]
         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
         if self.params.get('verbose', False):
             try:
@@ -581,7 +603,6 @@ class FileDownloader(object):
 
     def _do_download(self, filename, info_dict):
         url = info_dict['url']
-        player_url = info_dict.get('player_url', None)
 
         # Check file already present
         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
@@ -590,13 +611,17 @@ class FileDownloader(object):
 
         # Attempt to download using rtmpdump
         if url.startswith('rtmp'):
-            return self._download_with_rtmpdump(filename, url, player_url)
+            return self._download_with_rtmpdump(filename, url,
+                                                info_dict.get('player_url', None),
+                                                info_dict.get('page_url', None))
 
         tmpfilename = self.temp_name(filename)
         stream = None
 
         # Do not include the Accept-Encoding header
         headers = {'Youtubedl-no-compression': 'True'}
+        if 'user_agent' in info_dict:
+            headers['Youtubedl-user-agent'] = info_dict['user_agent']
         basic_request = compat_urllib_request.Request(url, None, headers)
         request = compat_urllib_request.Request(url, None, headers)