From a660978c9a14adbfe00152dd62a3e3ff8b8b7540 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Thu, 4 Jul 2019 07:57:43 +0200 Subject: [PATCH] src/backup/datastore.rs: generic index_mark_used_chunks implementation, improve GC stats --- src/backup/chunk_store.rs | 30 ++++++++++++++++-------------- src/backup/datastore.rs | 36 +++++++++++++++++++++++++++++++----- src/backup/dynamic_index.rs | 28 +++++++++------------------- src/backup/fixed_index.rs | 25 ++++--------------------- src/backup/index.rs | 4 ++++ 5 files changed, 64 insertions(+), 59 deletions(-) diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs index a4ddca83..6befdf1a 100644 --- a/src/backup/chunk_store.rs +++ b/src/backup/chunk_store.rs @@ -12,20 +12,24 @@ use super::DataChunk; #[derive(Clone, Serialize)] pub struct GarbageCollectionStatus { pub upid: Option, - pub used_bytes: usize, - pub used_chunks: usize, - pub disk_bytes: usize, + pub index_file_count: usize, + pub index_data_bytes: u64, + pub disk_bytes: u64, pub disk_chunks: usize, + pub removed_bytes: u64, + pub removed_chunks: usize, } impl Default for GarbageCollectionStatus { fn default() -> Self { GarbageCollectionStatus { upid: None, - used_bytes: 0, - used_chunks: 0, + index_file_count: 0, + index_data_bytes: 0, disk_bytes: 0, disk_chunks: 0, + removed_bytes: 0, + removed_chunks: 0, } } } @@ -143,13 +147,9 @@ impl ChunkStore { }) } - pub fn touch_chunk(&self, digest:&[u8]) -> Result<(), Error> { + pub fn touch_chunk(&self, digest: &[u8; 32]) -> Result<(), Error> { - let mut chunk_path = self.chunk_dir.clone(); - let prefix = digest_to_prefix(&digest); - chunk_path.push(&prefix); - let digest_str = proxmox::tools::digest_to_hex(&digest); - chunk_path.push(&digest_str); + let (chunk_path, _digest_str) = self.chunk_path(digest); const UTIME_NOW: i64 = ((1 << 30) - 1); const UTIME_OMIT: i64 = ((1 << 30) - 2); @@ -172,7 +172,7 @@ impl ChunkStore { Ok(()) } - pub fn read_chunk(&self, digest:&[u8; 32]) -> Result { + pub fn read_chunk(&self, digest: &[u8; 32]) -> Result { let (chunk_path, digest_str) = self.chunk_path(digest); let mut file = std::fs::File::open(&chunk_path) @@ -302,9 +302,11 @@ impl ChunkStore { err, ); } - } else { + status.removed_chunks += 1; + status.removed_bytes += stat.st_size as u64; + } else { status.disk_chunks += 1; - status.disk_bytes += stat.st_size as usize; + status.disk_bytes += stat.st_size as u64; } } drop(lock); diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs index eb6f6eac..ed42cc96 100644 --- a/src/backup/datastore.rs +++ b/src/backup/datastore.rs @@ -201,6 +201,28 @@ impl DataStore { Ok(list) } + // mark chunks used by ``index`` as used + fn index_mark_used_chunks( + &self, + index: I, + file_name: &Path, // only used for error reporting + status: &mut GarbageCollectionStatus, + ) -> Result<(), Error> { + + status.index_file_count += 1; + status.index_data_bytes += index.index_bytes(); + + for pos in 0..index.index_count() { + tools::fail_on_shutdown()?; + let digest = index.index_digest(pos).unwrap(); + if let Err(err) = self.chunk_store.touch_chunk(digest) { + bail!("unable to access chunk {}, required by {:?} - {}", + proxmox::tools::digest_to_hex(digest), file_name, err); + } + } + Ok(()) + } + fn mark_used_chunks(&self, status: &mut GarbageCollectionStatus) -> Result<(), Error> { let image_list = self.list_images()?; @@ -212,10 +234,10 @@ impl DataStore { if let Some(ext) = path.extension() { if ext == "fidx" { let index = self.open_fixed_reader(&path)?; - index.mark_used_chunks(status)?; + self.index_mark_used_chunks(index, &path, status)?; } else if ext == "didx" { let index = self.open_dynamic_reader(&path)?; - index.mark_used_chunks(status)?; + self.index_mark_used_chunks(index, &path, status)?; } } } @@ -245,10 +267,14 @@ impl DataStore { worker.log("Start GC phase2 (sweep unused chunks)"); self.chunk_store.sweep_unused_chunks(oldest_writer, &mut gc_status)?; - worker.log(&format!("Used bytes: {}", gc_status.used_bytes)); - worker.log(&format!("Used chunks: {}", gc_status.used_chunks)); - worker.log(&format!("Disk bytes: {}", gc_status.disk_bytes)); + worker.log(&format!("Removed bytes: {}", gc_status.removed_bytes)); + worker.log(&format!("Removed chunks: {}", gc_status.removed_chunks)); + worker.log(&format!("Original data bytes: {}", gc_status.index_data_bytes)); + let comp_per = (gc_status.disk_bytes*100)/gc_status.index_data_bytes; + worker.log(&format!("Disk bytes: {} ({} %)", gc_status.disk_bytes, comp_per)); worker.log(&format!("Disk chunks: {}", gc_status.disk_chunks)); + let avg_chunk = gc_status.index_data_bytes/(gc_status.disk_chunks as u64); + worker.log(&format!("Average chunk size: {}", avg_chunk)); *self.last_gc_status.lock().unwrap() = gc_status; diff --git a/src/backup/dynamic_index.rs b/src/backup/dynamic_index.rs index 30e4e202..db733459 100644 --- a/src/backup/dynamic_index.rs +++ b/src/backup/dynamic_index.rs @@ -167,25 +167,7 @@ impl DynamicIndexReader { slice.try_into().unwrap() } - pub fn mark_used_chunks(&self, _status: &mut GarbageCollectionStatus) -> Result<(), Error> { - unimplemented!(); - } /* - pub fn mark_used_chunks(&self, _status: &mut GarbageCollectionStatus) -> Result<(), Error> { - - for pos in 0..self.index_entries { - - tools::fail_on_shutdown()?; - - let digest = self.chunk_digest(pos); - if let Err(err) = self.store.touch_chunk(digest) { - bail!("unable to access chunk {}, required by {:?} - {}", - proxmox::tools::digest_to_hex(digest), self.filename, err); - } - } - Ok(()) - } - pub fn dump_pxar(&self, mut writer: Box) -> Result<(), Error> { for pos in 0..self.index_entries { @@ -200,7 +182,7 @@ impl DynamicIndexReader { Ok(()) } - */ + */ fn binary_search( &self, @@ -243,6 +225,14 @@ impl IndexFile for DynamicIndexReader { }) } } + + fn index_bytes(&self) -> u64 { + if self.index_entries == 0 { + 0 + } else { + self.chunk_end((self.index_entries - 1) as usize) + } + } } pub struct BufferedDynamicReader { diff --git a/src/backup/fixed_index.rs b/src/backup/fixed_index.rs index a4d6213e..cd47c748 100644 --- a/src/backup/fixed_index.rs +++ b/src/backup/fixed_index.rs @@ -138,27 +138,6 @@ impl FixedIndexReader { Ok(()) } - pub fn mark_used_chunks(&self, status: &mut GarbageCollectionStatus) -> Result<(), Error> { - - if self.index == std::ptr::null_mut() { bail!("detected closed index file."); } - - status.used_bytes += self.index_length * self.chunk_size; - status.used_chunks += self.index_length; - - for pos in 0..self.index_length { - - tools::fail_on_shutdown()?; - - let digest = self.index_digest(pos).unwrap(); - if let Err(err) = self.store.touch_chunk(digest) { - bail!("unable to access chunk {}, required by {:?} - {}", - proxmox::tools::digest_to_hex(digest), self.filename, err); - } - } - - Ok(()) - } - pub fn print_info(&self) { println!("Filename: {:?}", self.filename); println!("Size: {}", self.size); @@ -180,6 +159,10 @@ impl IndexFile for FixedIndexReader { Some(unsafe { std::mem::transmute(self.index.add(pos*32)) }) } } + + fn index_bytes(&self) -> u64 { + (self.index_length * self.chunk_size) as u64 + } } pub struct FixedIndexWriter { diff --git a/src/backup/index.rs b/src/backup/index.rs index fd375244..bc205574 100644 --- a/src/backup/index.rs +++ b/src/backup/index.rs @@ -2,9 +2,13 @@ use failure::*; use futures::*; use bytes::{Bytes, BytesMut}; +/// Trait to get digest list from index files +/// +/// To allow easy iteration over all used chunks. pub trait IndexFile: Send { fn index_count(&self) -> usize; fn index_digest(&self, pos: usize) -> Option<&[u8; 32]>; + fn index_bytes(&self) -> u64; } /// Encode digest list from an `IndexFile` into a binary stream