X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fdownloader%2Ffragment.py;h=6f6fb4a77a9dc2b3e4e4278bc51b12755bc709ee;hb=2f0eb0a68a0711e7ac2c31b6177273433fb42b94;hp=44a3c10403bedb2380178320d42729f94d63eee5;hpb=75a24854073e590f4efc9f037b57dee348f52b61;p=youtube-dl diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 44a3c1040..6f6fb4a77 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -2,7 +2,7 @@ from __future__ import division, unicode_literals import os import time -import io +import json from .common import FileDownloader from .http import HttpFD @@ -11,7 +11,6 @@ from ..utils import ( encodeFilename, sanitize_open, sanitized_Request, - compat_str, ) @@ -30,6 +29,28 @@ class FragmentFD(FileDownloader): and hlsnative only) skip_unavailable_fragments: Skip unavailable fragments (DASH and hlsnative only) + keep_fragments: Keep downloaded fragments on disk after downloading is + finished + + For each incomplete fragment download youtube-dl keeps on disk a special + bookkeeping file with download state and metadata (in future such files will + be used for any incomplete download handled by youtube-dl). This file is + used to properly handle resuming, check download file consistency and detect + potential errors. The file has a .ytdl extension and represents a standard + JSON file of the following format: + + extractor: + Dictionary of extractor related data. TBD. + + downloader: + Dictionary of downloader related data. May contain following data: + current_fragment: + Dictionary with current (being downloaded) fragment data: + index: 0-based index of current fragment among all fragments + fragment_count: + Total count of fragments + + This feature is experimental and file format may change in future. """ def report_retry_fragment(self, err, frag_index, count, retries): @@ -48,24 +69,50 @@ class FragmentFD(FileDownloader): self._prepare_frag_download(ctx) self._start_frag_download(ctx) + @staticmethod + def __do_ytdl_file(ctx): + return not ctx['live'] and not ctx['tmpfilename'] == '-' + + def _read_ytdl_file(self, ctx): + stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') + ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] + stream.close() + + def _write_ytdl_file(self, ctx): + frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') + downloader = { + 'current_fragment': { + 'index': ctx['fragment_index'], + }, + } + if ctx.get('fragment_count') is not None: + downloader['fragment_count'] = ctx['fragment_count'] + frag_index_stream.write(json.dumps({'downloader': downloader})) + frag_index_stream.close() + def _download_fragment(self, ctx, frag_url, info_dict, headers=None): - down = io.BytesIO() - success = ctx['dl'].download(down, { + fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) + success = ctx['dl'].download(fragment_filename, { 'url': frag_url, 'http_headers': headers or info_dict.get('http_headers'), }) if not success: return False, None - frag_content = down.getvalue() + down, frag_sanitized = sanitize_open(fragment_filename, 'rb') + ctx['fragment_filename_sanitized'] = frag_sanitized + frag_content = down.read() down.close() return True, frag_content def _append_fragment(self, ctx, frag_content): - ctx['dest_stream'].write(frag_content) - if not (ctx.get('live') or ctx['tmpfilename'] == '-'): - frag_index_stream, _ = sanitize_open(ctx['tmpfilename'] + '.fragindex', 'w') - frag_index_stream.write(compat_str(ctx['frag_index'])) - frag_index_stream.close() + try: + ctx['dest_stream'].write(frag_content) + finally: + if self.__do_ytdl_file(ctx): + self._write_ytdl_file(ctx) + if not self.params.get('keep_fragments', False): + os.remove(ctx['fragment_filename_sanitized']) + del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): if 'live' not in ctx: @@ -89,22 +136,37 @@ class FragmentFD(FileDownloader): tmpfilename = self.temp_name(ctx['filename']) open_mode = 'wb' resume_len = 0 - frag_index = 0 + # Establish possible resume length if os.path.isfile(encodeFilename(tmpfilename)): open_mode = 'ab' resume_len = os.path.getsize(encodeFilename(tmpfilename)) - if os.path.isfile(encodeFilename(tmpfilename + '.fragindex')): - frag_index_stream, _ = sanitize_open(tmpfilename + '.fragindex', 'r') - frag_index = int(frag_index_stream.read()) - frag_index_stream.close() + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, + }) + + if self.__do_ytdl_file(ctx): + if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): + self._read_ytdl_file(ctx) + if ctx['fragment_index'] > 0 and resume_len == 0: + self.report_error( + 'Inconsistent state of incomplete fragment download. ' + 'Restarting from the beginning...') + ctx['fragment_index'] = resume_len = 0 + self._write_ytdl_file(ctx) + else: + self._write_ytdl_file(ctx) + assert ctx['fragment_index'] == 0 + dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) ctx.update({ 'dl': dl, 'dest_stream': dest_stream, 'tmpfilename': tmpfilename, - 'frag_index': frag_index, # Total complete fragments downloaded so far in bytes 'complete_frags_downloaded_bytes': resume_len, }) @@ -116,8 +178,8 @@ class FragmentFD(FileDownloader): state = { 'status': 'downloading', 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], - 'frag_index': ctx['frag_index'], - 'frag_count': total_frags, + 'fragment_index': ctx['fragment_index'], + 'fragment_count': total_frags, 'filename': ctx['filename'], 'tmpfilename': ctx['tmpfilename'], } @@ -140,12 +202,12 @@ class FragmentFD(FileDownloader): if not ctx['live']: estimated_size = ( (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / - (state['frag_index'] + 1) * total_frags) + (state['fragment_index'] + 1) * total_frags) state['total_bytes_estimate'] = estimated_size if s['status'] == 'finished': - state['frag_index'] += 1 - ctx['frag_index'] = state['frag_index'] + state['fragment_index'] += 1 + ctx['fragment_index'] = state['fragment_index'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] ctx['prev_frag_downloaded_bytes'] = 0 @@ -167,8 +229,10 @@ class FragmentFD(FileDownloader): def _finish_frag_download(self, ctx): ctx['dest_stream'].close() - if os.path.isfile(encodeFilename(ctx['tmpfilename'] + '.fragindex')): - os.remove(encodeFilename(ctx['tmpfilename'] + '.fragindex')) + if self.__do_ytdl_file(ctx): + ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) + if os.path.isfile(ytdl_filename): + os.remove(ytdl_filename) elapsed = time.time() - ctx['started'] self.try_rename(ctx['tmpfilename'], ctx['filename']) fsize = os.path.getsize(encodeFilename(ctx['filename']))