diff --git a/src/backup.rs b/src/backup.rs index 43830941..394091f6 100644 --- a/src/backup.rs +++ b/src/backup.rs @@ -11,6 +11,9 @@ //! whereas the `FixedIndex*` format is an optimization to store a //! list of equal sized chunks. +mod chunk_stat; +pub use chunk_stat::*; + mod chunker; pub use chunker::*; diff --git a/src/backup/chunk_stat.rs b/src/backup/chunk_stat.rs new file mode 100644 index 00000000..4ce6ef91 --- /dev/null +++ b/src/backup/chunk_stat.rs @@ -0,0 +1,32 @@ +pub struct ChunkStat { + pub size: u64, + pub compressed_size: u64, + pub disk_size: u64, + + pub chunk_count: usize, + pub duplicate_chunks: usize, +} + +impl ChunkStat { + + pub fn new(size: u64) -> Self { + ChunkStat { + size, + compressed_size: 0, + disk_size: 0, + + chunk_count: 0, + duplicate_chunks: 0, + } + } +} + +impl std::fmt::Debug for ChunkStat { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let avg = ((self.size as f64)/(self.chunk_count as f64)) as usize; + let compression = (self.compressed_size*100)/(self.size as u64); + let rate = (self.disk_size*100)/(self.size as u64); + write!(f, "Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)", + self.size, avg, compression, self.disk_size, rate) + } +} diff --git a/src/backup/dynamic_index.rs b/src/backup/dynamic_index.rs index 7d4a747d..a0f5f74e 100644 --- a/src/backup/dynamic_index.rs +++ b/src/backup/dynamic_index.rs @@ -1,6 +1,7 @@ use failure::*; use crate::tools; +use super::chunk_stat::*; use super::chunk_store::*; use super::chunker::*; @@ -329,9 +330,8 @@ pub struct DynamicIndexWriter { pub uuid: [u8; 16], pub ctime: u64, - compressed_size: u64, - disk_size: u64, - chunk_count: usize, + stat: ChunkStat, + chunk_offset: usize, last_chunk: usize, chunk_buffer: Vec, @@ -390,9 +390,8 @@ impl DynamicIndexWriter { ctime, uuid: *uuid.as_bytes(), - compressed_size: 0, - disk_size: 0, - chunk_count: 0, + stat: ChunkStat::new(0), + chunk_offset: 0, last_chunk: 0, chunk_buffer: Vec::with_capacity(chunk_size*4), @@ -411,12 +410,13 @@ impl DynamicIndexWriter { self.writer.flush()?; - let size = self.chunk_offset; - let avg = ((size as f64)/(self.chunk_count as f64)) as usize; - let compression = (self.compressed_size*100)/(size as u64); - let rate = (self.disk_size*100)/(size as u64); - println!("Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)", - size, avg, compression, self.disk_size, rate); + self.stat.size = self.chunk_offset as u64; + + // add size of index file + self.stat.size += (self.stat.chunk_count*40 + std::mem::size_of::()) as u64; + + println!("STAT: {:?}", self.stat); + // fixme: if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { @@ -426,6 +426,10 @@ impl DynamicIndexWriter { Ok(()) } + pub fn stat(&self) -> &ChunkStat { + &self.stat + } + fn write_chunk_buffer(&mut self) -> Result<(), std::io::Error> { use std::io::{Error, ErrorKind}; @@ -441,17 +445,18 @@ impl DynamicIndexWriter { format!("wrong chunk size {} != {}", expected_chunk_size, chunk_size))); } - self.chunk_count += 1; + self.stat.chunk_count += 1; self.last_chunk = self.chunk_offset; match self.store.insert_chunk(&self.chunk_buffer) { Ok((is_duplicate, digest, compressed_size)) => { - self.compressed_size += compressed_size; + self.stat.compressed_size += compressed_size; if is_duplicate { + self.stat.duplicate_chunks += 1; } else { - self.disk_size += compressed_size; + self.stat.disk_size += compressed_size; } println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size, diff --git a/src/backup/fixed_index.rs b/src/backup/fixed_index.rs index b89257d6..1dc69587 100644 --- a/src/backup/fixed_index.rs +++ b/src/backup/fixed_index.rs @@ -1,6 +1,7 @@ use failure::*; use crate::tools; +use super::chunk_stat::*; use super::chunk_store::*; use std::sync::Arc; @@ -159,10 +160,10 @@ pub struct FixedIndexWriter { filename: PathBuf, tmp_filename: PathBuf, chunk_size: usize, - duplicate_chunks: usize, - disk_size: u64, + + stat: ChunkStat, + size: usize, - compressed_size: u64, index: *mut u8, pub uuid: [u8; 16], pub ctime: u64, @@ -231,10 +232,8 @@ impl FixedIndexWriter { filename: full_path, tmp_filename: tmp_path, chunk_size, - duplicate_chunks: 0, size, - compressed_size: 0, - disk_size: 0, + stat: ChunkStat::new(size as u64), index: data, ctime, uuid: *uuid.as_bytes(), @@ -253,11 +252,9 @@ impl FixedIndexWriter { self.index = std::ptr::null_mut(); - let compression = (self.compressed_size*100)/(self.size as u64); - let rate = (self.disk_size*100)/(self.size as u64); + self.stat.disk_size += index_size as u64; - println!("Original size: {}, compression rate: {}%, deduplicated size: {}, disk size: {} ({}%)", - self.size, compression, self.size - (self.duplicate_chunks*self.chunk_size), self.disk_size, rate); + println!("STAT: {:?}", self.stat); Ok(()) } @@ -275,6 +272,10 @@ impl FixedIndexWriter { Ok(()) } + pub fn stat(&self) -> &ChunkStat { + &self.stat + } + // Note: We want to add data out of order, so do not assume and order here. pub fn add_chunk(&mut self, pos: usize, chunk: &[u8]) -> Result<(), Error> { @@ -299,15 +300,16 @@ impl FixedIndexWriter { let (is_duplicate, digest, compressed_size) = self.store.insert_chunk(chunk)?; - self.compressed_size += compressed_size; + self.stat.chunk_count += 1; + self.stat.compressed_size += compressed_size; println!("ADD CHUNK {} {} {}% {} {}", pos, chunk.len(), (compressed_size*100)/(chunk.len() as u64), is_duplicate, tools::digest_to_hex(&digest)); if is_duplicate { - self.duplicate_chunks += 1; + self.stat.duplicate_chunks += 1; } else { - self.disk_size += compressed_size; + self.stat.disk_size += compressed_size; } let index_pos = (pos/self.chunk_size)*32;