src/backup/chunk_store.rs: return info about compressed chunk size

So that we can generate better statistics ..
This commit is contained in:
Dietmar Maurer 2019-02-25 11:36:05 +01:00
parent 78216a5ab1
commit 798f7fa065
3 changed files with 50 additions and 16 deletions

View File

@ -292,7 +292,7 @@ impl ChunkStore {
Ok(()) Ok(())
} }
pub fn insert_chunk(&self, chunk: &[u8]) -> Result<(bool, [u8; 32]), Error> { pub fn insert_chunk(&self, chunk: &[u8]) -> Result<(bool, [u8; 32], u64), Error> {
// fixme: use Sha512/256 when available // fixme: use Sha512/256 when available
let mut hasher = sha::Sha256::new(); let mut hasher = sha::Sha256::new();
@ -312,7 +312,7 @@ impl ChunkStore {
if let Ok(metadata) = std::fs::metadata(&chunk_path) { if let Ok(metadata) = std::fs::metadata(&chunk_path) {
if metadata.is_file() { if metadata.is_file() {
return Ok((true, digest)); return Ok((true, digest, metadata.len()));
} else { } else {
bail!("Got unexpected file type on store '{}' for chunk {}", self.name, digest_str); bail!("Got unexpected file type on store '{}' for chunk {}", self.name, digest_str);
} }
@ -327,7 +327,7 @@ impl ChunkStore {
let mut encoder = lz4::EncoderBuilder::new().level(1).build(f)?; let mut encoder = lz4::EncoderBuilder::new().level(1).build(f)?;
encoder.write_all(chunk)?; encoder.write_all(chunk)?;
let (_, encode_result) = encoder.finish(); let (f, encode_result) = encoder.finish();
encode_result?; encode_result?;
if let Err(err) = std::fs::rename(&tmp_path, &chunk_path) { if let Err(err) = std::fs::rename(&tmp_path, &chunk_path) {
@ -340,11 +340,15 @@ impl ChunkStore {
); );
} }
// fixme: is there a better way to get the compressed size?
let stat = nix::sys::stat::fstat(f.as_raw_fd())?;
let compressed_size = stat.st_size as u64;
//println!("PATH {:?}", chunk_path); //println!("PATH {:?}", chunk_path);
drop(lock); drop(lock);
Ok((false, digest)) Ok((false, digest, compressed_size))
} }
pub fn relative_path(&self, path: &Path) -> PathBuf { pub fn relative_path(&self, path: &Path) -> PathBuf {
@ -372,10 +376,10 @@ fn test_chunk_store1() {
assert!(chunk_store.is_err()); assert!(chunk_store.is_err());
let chunk_store = ChunkStore::create("test", &path).unwrap(); let chunk_store = ChunkStore::create("test", &path).unwrap();
let (exists, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap(); let (exists, _, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
assert!(!exists); assert!(!exists);
let (exists, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap(); let (exists, _, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
assert!(exists); assert!(exists);

View File

@ -329,6 +329,8 @@ pub struct DynamicIndexWriter {
pub uuid: [u8; 16], pub uuid: [u8; 16],
pub ctime: u64, pub ctime: u64,
compressed_size: u64,
disk_size: u64,
chunk_count: usize, chunk_count: usize,
chunk_offset: usize, chunk_offset: usize,
last_chunk: usize, last_chunk: usize,
@ -388,6 +390,8 @@ impl DynamicIndexWriter {
ctime, ctime,
uuid: *uuid.as_bytes(), uuid: *uuid.as_bytes(),
compressed_size: 0,
disk_size: 0,
chunk_count: 0, chunk_count: 0,
chunk_offset: 0, chunk_offset: 0,
last_chunk: 0, last_chunk: 0,
@ -407,8 +411,12 @@ impl DynamicIndexWriter {
self.writer.flush()?; self.writer.flush()?;
let avg = ((self.chunk_offset as f64)/(self.chunk_count as f64)) as usize; let size = self.chunk_offset;
println!("Average chunk size {}", avg); let avg = ((size as f64)/(self.chunk_count as f64)) as usize;
let compression = (self.compressed_size*100)/(size as u64);
let rate = (self.disk_size*100)/(size as u64);
println!("Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)",
size, avg, compression, self.disk_size, rate);
// fixme: // fixme:
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
@ -438,8 +446,16 @@ impl DynamicIndexWriter {
self.last_chunk = self.chunk_offset; self.last_chunk = self.chunk_offset;
match self.store.insert_chunk(&self.chunk_buffer) { match self.store.insert_chunk(&self.chunk_buffer) {
Ok((is_duplicate, digest)) => { Ok((is_duplicate, digest, compressed_size)) => {
println!("ADD CHUNK {:016x} {} {} {}", self.chunk_offset, chunk_size, is_duplicate, tools::digest_to_hex(&digest));
self.compressed_size += compressed_size;
if is_duplicate {
} else {
self.disk_size += compressed_size;
}
println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size,
(compressed_size*100)/(chunk_size as u64), is_duplicate, tools::digest_to_hex(&digest));
self.writer.write(unsafe { &std::mem::transmute::<u64, [u8;8]>(self.chunk_offset as u64) })?; self.writer.write(unsafe { &std::mem::transmute::<u64, [u8;8]>(self.chunk_offset as u64) })?;
self.writer.write(&digest)?; self.writer.write(&digest)?;
self.chunk_buffer.truncate(0); self.chunk_buffer.truncate(0);

View File

@ -160,7 +160,9 @@ pub struct FixedIndexWriter {
tmp_filename: PathBuf, tmp_filename: PathBuf,
chunk_size: usize, chunk_size: usize,
duplicate_chunks: usize, duplicate_chunks: usize,
disk_size: u64,
size: usize, size: usize,
compressed_size: u64,
index: *mut u8, index: *mut u8,
pub uuid: [u8; 16], pub uuid: [u8; 16],
pub ctime: u64, pub ctime: u64,
@ -231,6 +233,8 @@ impl FixedIndexWriter {
chunk_size, chunk_size,
duplicate_chunks: 0, duplicate_chunks: 0,
size, size,
compressed_size: 0,
disk_size: 0,
index: data, index: data,
ctime, ctime,
uuid: *uuid.as_bytes(), uuid: *uuid.as_bytes(),
@ -249,8 +253,11 @@ impl FixedIndexWriter {
self.index = std::ptr::null_mut(); self.index = std::ptr::null_mut();
println!("Original size: {} Compressed size: {} Deduplicated size: {}", let compression = (self.compressed_size*100)/(self.size as u64);
self.size, self.size, self.size - (self.duplicate_chunks*self.chunk_size)); let rate = (self.disk_size*100)/(self.size as u64);
println!("Original size: {}, compression rate: {}%, deduplicated size: {}, disk size: {} ({}%)",
self.size, compression, self.size - (self.duplicate_chunks*self.chunk_size), self.disk_size, rate);
Ok(()) Ok(())
} }
@ -290,12 +297,19 @@ impl FixedIndexWriter {
if pos & (self.chunk_size-1) != 0 { bail!("add unaligned chunk (pos = {})", pos); } if pos & (self.chunk_size-1) != 0 { bail!("add unaligned chunk (pos = {})", pos); }
let (is_duplicate, digest) = self.store.insert_chunk(chunk)?; let (is_duplicate, digest, compressed_size) = self.store.insert_chunk(chunk)?;
println!("ADD CHUNK {} {} {} {}", pos, chunk.len(), is_duplicate, tools::digest_to_hex(&digest)); self.compressed_size += compressed_size;
println!("ADD CHUNK {} {} {}% {} {}", pos, chunk.len(),
(compressed_size*100)/(chunk.len() as u64), is_duplicate, tools::digest_to_hex(&digest));
if is_duplicate {
self.duplicate_chunks += 1;
} else {
self.disk_size += compressed_size;
}
if is_duplicate { self.duplicate_chunks += 1; }
let index_pos = (pos/self.chunk_size)*32; let index_pos = (pos/self.chunk_size)*32;
unsafe { unsafe {
let dst = self.index.add(index_pos); let dst = self.index.add(index_pos);