Fix W504 and disable W503 (closes #20863)
[youtube-dl] / youtube_dl / downloader / fragment.py
index 25c8f18ecf2173421ff2d2b16b4b8b40e28944be..f2e5733b6406603f9a52b26b65bd4bc3cc833fec 100644 (file)
@@ -31,6 +31,26 @@ class FragmentFD(FileDownloader):
                         Skip unavailable fragments (DASH and hlsnative only)
     keep_fragments:     Keep downloaded fragments on disk after downloading is
                         finished
+
+    For each incomplete fragment download youtube-dl keeps on disk a special
+    bookkeeping file with download state and metadata (in future such files will
+    be used for any incomplete download handled by youtube-dl). This file is
+    used to properly handle resuming, check download file consistency and detect
+    potential errors. The file has a .ytdl extension and represents a standard
+    JSON file of the following format:
+
+    extractor:
+        Dictionary of extractor related data. TBD.
+
+    downloader:
+        Dictionary of downloader related data. May contain following data:
+            current_fragment:
+                Dictionary with current (being downloaded) fragment data:
+                index:  0-based index of current fragment among all fragments
+            fragment_count:
+                Total count of fragments
+
+    This feature is experimental and file format may change in future.
     """
 
     def report_retry_fragment(self, err, frag_index, count, retries):
@@ -54,17 +74,25 @@ class FragmentFD(FileDownloader):
         return not ctx['live'] and not ctx['tmpfilename'] == '-'
 
     def _read_ytdl_file(self, ctx):
+        assert 'ytdl_corrupt' not in ctx
         stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
-        ctx['fragment_index'] = json.loads(stream.read())['download']['current_fragment_index']
-        stream.close()
+        try:
+            ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+        except Exception:
+            ctx['ytdl_corrupt'] = True
+        finally:
+            stream.close()
 
     def _write_ytdl_file(self, ctx):
         frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
-        frag_index_stream.write(json.dumps({
-            'download': {
-                'current_fragment_index': ctx['fragment_index']
+        downloader = {
+            'current_fragment': {
+                'index': ctx['fragment_index'],
             },
-        }))
+        }
+        if ctx.get('fragment_count') is not None:
+            downloader['fragment_count'] = ctx['fragment_count']
+        frag_index_stream.write(json.dumps({'downloader': downloader}))
         frag_index_stream.close()
 
     def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
@@ -84,19 +112,26 @@ class FragmentFD(FileDownloader):
     def _append_fragment(self, ctx, frag_content):
         try:
             ctx['dest_stream'].write(frag_content)
+            ctx['dest_stream'].flush()
         finally:
             if self.__do_ytdl_file(ctx):
                 self._write_ytdl_file(ctx)
             if not self.params.get('keep_fragments', False):
-                os.remove(ctx['fragment_filename_sanitized'])
+                os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
             del ctx['fragment_filename_sanitized']
 
     def _prepare_frag_download(self, ctx):
         if 'live' not in ctx:
             ctx['live'] = False
+        if not ctx['live']:
+            total_frags_str = '%d' % ctx['total_frags']
+            ad_frags = ctx.get('ad_frags', 0)
+            if ad_frags:
+                total_frags_str += ' (not including %d ad)' % ad_frags
+        else:
+            total_frags_str = 'unknown (live)'
         self.to_screen(
-            '[%s] Total fragments: %s'
-            % (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
+            '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
         self.report_destination(ctx['filename'])
         dl = HttpQuietDownloader(
             self.ydl,
@@ -128,12 +163,21 @@ class FragmentFD(FileDownloader):
         if self.__do_ytdl_file(ctx):
             if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
                 self._read_ytdl_file(ctx)
+                is_corrupt = ctx.get('ytdl_corrupt') is True
+                is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
+                if is_corrupt or is_inconsistent:
+                    message = (
+                        '.ytdl file is corrupt' if is_corrupt else
+                        'Inconsistent state of incomplete fragment download')
+                    self.report_warning(
+                        '%s. Restarting from the beginning...' % message)
+                    ctx['fragment_index'] = resume_len = 0
+                    if 'ytdl_corrupt' in ctx:
+                        del ctx['ytdl_corrupt']
+                    self._write_ytdl_file(ctx)
             else:
                 self._write_ytdl_file(ctx)
-            if ctx['fragment_index'] > 0:
-                assert resume_len > 0
-            else:
-                assert resume_len == 0
+                assert ctx['fragment_index'] == 0
 
         dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
 
@@ -175,8 +219,8 @@ class FragmentFD(FileDownloader):
             frag_total_bytes = s.get('total_bytes') or 0
             if not ctx['live']:
                 estimated_size = (
-                    (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
-                    (state['fragment_index'] + 1) * total_frags)
+                    (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
+                    (state['fragment_index'] + 1) * total_frags)
                 state['total_bytes_estimate'] = estimated_size
 
             if s['status'] == 'finished':
@@ -208,12 +252,16 @@ class FragmentFD(FileDownloader):
             if os.path.isfile(ytdl_filename):
                 os.remove(ytdl_filename)
         elapsed = time.time() - ctx['started']
-        self.try_rename(ctx['tmpfilename'], ctx['filename'])
-        fsize = os.path.getsize(encodeFilename(ctx['filename']))
+
+        if ctx['tmpfilename'] == '-':
+            downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+        else:
+            self.try_rename(ctx['tmpfilename'], ctx['filename'])
+            downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
 
         self._hook_progress({
-            'downloaded_bytes': fsize,
-            'total_bytes': fsize,
+            'downloaded_bytes': downloaded_bytes,
+            'total_bytes': downloaded_bytes,
             'filename': ctx['filename'],
             'status': 'finished',
             'elapsed': elapsed,