src/backup/chunk_store.rs: return info about compressed chunk size
So that we can generate better statistics ..
This commit is contained in:
parent
78216a5ab1
commit
798f7fa065
@ -292,7 +292,7 @@ impl ChunkStore {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert_chunk(&self, chunk: &[u8]) -> Result<(bool, [u8; 32]), Error> {
|
pub fn insert_chunk(&self, chunk: &[u8]) -> Result<(bool, [u8; 32], u64), Error> {
|
||||||
|
|
||||||
// fixme: use Sha512/256 when available
|
// fixme: use Sha512/256 when available
|
||||||
let mut hasher = sha::Sha256::new();
|
let mut hasher = sha::Sha256::new();
|
||||||
@ -312,7 +312,7 @@ impl ChunkStore {
|
|||||||
|
|
||||||
if let Ok(metadata) = std::fs::metadata(&chunk_path) {
|
if let Ok(metadata) = std::fs::metadata(&chunk_path) {
|
||||||
if metadata.is_file() {
|
if metadata.is_file() {
|
||||||
return Ok((true, digest));
|
return Ok((true, digest, metadata.len()));
|
||||||
} else {
|
} else {
|
||||||
bail!("Got unexpected file type on store '{}' for chunk {}", self.name, digest_str);
|
bail!("Got unexpected file type on store '{}' for chunk {}", self.name, digest_str);
|
||||||
}
|
}
|
||||||
@ -327,7 +327,7 @@ impl ChunkStore {
|
|||||||
let mut encoder = lz4::EncoderBuilder::new().level(1).build(f)?;
|
let mut encoder = lz4::EncoderBuilder::new().level(1).build(f)?;
|
||||||
|
|
||||||
encoder.write_all(chunk)?;
|
encoder.write_all(chunk)?;
|
||||||
let (_, encode_result) = encoder.finish();
|
let (f, encode_result) = encoder.finish();
|
||||||
encode_result?;
|
encode_result?;
|
||||||
|
|
||||||
if let Err(err) = std::fs::rename(&tmp_path, &chunk_path) {
|
if let Err(err) = std::fs::rename(&tmp_path, &chunk_path) {
|
||||||
@ -340,11 +340,15 @@ impl ChunkStore {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fixme: is there a better way to get the compressed size?
|
||||||
|
let stat = nix::sys::stat::fstat(f.as_raw_fd())?;
|
||||||
|
let compressed_size = stat.st_size as u64;
|
||||||
|
|
||||||
//println!("PATH {:?}", chunk_path);
|
//println!("PATH {:?}", chunk_path);
|
||||||
|
|
||||||
drop(lock);
|
drop(lock);
|
||||||
|
|
||||||
Ok((false, digest))
|
Ok((false, digest, compressed_size))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn relative_path(&self, path: &Path) -> PathBuf {
|
pub fn relative_path(&self, path: &Path) -> PathBuf {
|
||||||
@ -372,10 +376,10 @@ fn test_chunk_store1() {
|
|||||||
assert!(chunk_store.is_err());
|
assert!(chunk_store.is_err());
|
||||||
|
|
||||||
let chunk_store = ChunkStore::create("test", &path).unwrap();
|
let chunk_store = ChunkStore::create("test", &path).unwrap();
|
||||||
let (exists, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
|
let (exists, _, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
|
||||||
assert!(!exists);
|
assert!(!exists);
|
||||||
|
|
||||||
let (exists, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
|
let (exists, _, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
|
||||||
assert!(exists);
|
assert!(exists);
|
||||||
|
|
||||||
|
|
||||||
|
@ -329,6 +329,8 @@ pub struct DynamicIndexWriter {
|
|||||||
pub uuid: [u8; 16],
|
pub uuid: [u8; 16],
|
||||||
pub ctime: u64,
|
pub ctime: u64,
|
||||||
|
|
||||||
|
compressed_size: u64,
|
||||||
|
disk_size: u64,
|
||||||
chunk_count: usize,
|
chunk_count: usize,
|
||||||
chunk_offset: usize,
|
chunk_offset: usize,
|
||||||
last_chunk: usize,
|
last_chunk: usize,
|
||||||
@ -388,6 +390,8 @@ impl DynamicIndexWriter {
|
|||||||
ctime,
|
ctime,
|
||||||
uuid: *uuid.as_bytes(),
|
uuid: *uuid.as_bytes(),
|
||||||
|
|
||||||
|
compressed_size: 0,
|
||||||
|
disk_size: 0,
|
||||||
chunk_count: 0,
|
chunk_count: 0,
|
||||||
chunk_offset: 0,
|
chunk_offset: 0,
|
||||||
last_chunk: 0,
|
last_chunk: 0,
|
||||||
@ -407,8 +411,12 @@ impl DynamicIndexWriter {
|
|||||||
|
|
||||||
self.writer.flush()?;
|
self.writer.flush()?;
|
||||||
|
|
||||||
let avg = ((self.chunk_offset as f64)/(self.chunk_count as f64)) as usize;
|
let size = self.chunk_offset;
|
||||||
println!("Average chunk size {}", avg);
|
let avg = ((size as f64)/(self.chunk_count as f64)) as usize;
|
||||||
|
let compression = (self.compressed_size*100)/(size as u64);
|
||||||
|
let rate = (self.disk_size*100)/(size as u64);
|
||||||
|
println!("Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)",
|
||||||
|
size, avg, compression, self.disk_size, rate);
|
||||||
// fixme:
|
// fixme:
|
||||||
|
|
||||||
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
|
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
|
||||||
@ -438,8 +446,16 @@ impl DynamicIndexWriter {
|
|||||||
self.last_chunk = self.chunk_offset;
|
self.last_chunk = self.chunk_offset;
|
||||||
|
|
||||||
match self.store.insert_chunk(&self.chunk_buffer) {
|
match self.store.insert_chunk(&self.chunk_buffer) {
|
||||||
Ok((is_duplicate, digest)) => {
|
Ok((is_duplicate, digest, compressed_size)) => {
|
||||||
println!("ADD CHUNK {:016x} {} {} {}", self.chunk_offset, chunk_size, is_duplicate, tools::digest_to_hex(&digest));
|
|
||||||
|
self.compressed_size += compressed_size;
|
||||||
|
if is_duplicate {
|
||||||
|
} else {
|
||||||
|
self.disk_size += compressed_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size,
|
||||||
|
(compressed_size*100)/(chunk_size as u64), is_duplicate, tools::digest_to_hex(&digest));
|
||||||
self.writer.write(unsafe { &std::mem::transmute::<u64, [u8;8]>(self.chunk_offset as u64) })?;
|
self.writer.write(unsafe { &std::mem::transmute::<u64, [u8;8]>(self.chunk_offset as u64) })?;
|
||||||
self.writer.write(&digest)?;
|
self.writer.write(&digest)?;
|
||||||
self.chunk_buffer.truncate(0);
|
self.chunk_buffer.truncate(0);
|
||||||
|
@ -160,7 +160,9 @@ pub struct FixedIndexWriter {
|
|||||||
tmp_filename: PathBuf,
|
tmp_filename: PathBuf,
|
||||||
chunk_size: usize,
|
chunk_size: usize,
|
||||||
duplicate_chunks: usize,
|
duplicate_chunks: usize,
|
||||||
|
disk_size: u64,
|
||||||
size: usize,
|
size: usize,
|
||||||
|
compressed_size: u64,
|
||||||
index: *mut u8,
|
index: *mut u8,
|
||||||
pub uuid: [u8; 16],
|
pub uuid: [u8; 16],
|
||||||
pub ctime: u64,
|
pub ctime: u64,
|
||||||
@ -231,6 +233,8 @@ impl FixedIndexWriter {
|
|||||||
chunk_size,
|
chunk_size,
|
||||||
duplicate_chunks: 0,
|
duplicate_chunks: 0,
|
||||||
size,
|
size,
|
||||||
|
compressed_size: 0,
|
||||||
|
disk_size: 0,
|
||||||
index: data,
|
index: data,
|
||||||
ctime,
|
ctime,
|
||||||
uuid: *uuid.as_bytes(),
|
uuid: *uuid.as_bytes(),
|
||||||
@ -249,8 +253,11 @@ impl FixedIndexWriter {
|
|||||||
|
|
||||||
self.index = std::ptr::null_mut();
|
self.index = std::ptr::null_mut();
|
||||||
|
|
||||||
println!("Original size: {} Compressed size: {} Deduplicated size: {}",
|
let compression = (self.compressed_size*100)/(self.size as u64);
|
||||||
self.size, self.size, self.size - (self.duplicate_chunks*self.chunk_size));
|
let rate = (self.disk_size*100)/(self.size as u64);
|
||||||
|
|
||||||
|
println!("Original size: {}, compression rate: {}%, deduplicated size: {}, disk size: {} ({}%)",
|
||||||
|
self.size, compression, self.size - (self.duplicate_chunks*self.chunk_size), self.disk_size, rate);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -290,12 +297,19 @@ impl FixedIndexWriter {
|
|||||||
if pos & (self.chunk_size-1) != 0 { bail!("add unaligned chunk (pos = {})", pos); }
|
if pos & (self.chunk_size-1) != 0 { bail!("add unaligned chunk (pos = {})", pos); }
|
||||||
|
|
||||||
|
|
||||||
let (is_duplicate, digest) = self.store.insert_chunk(chunk)?;
|
let (is_duplicate, digest, compressed_size) = self.store.insert_chunk(chunk)?;
|
||||||
|
|
||||||
println!("ADD CHUNK {} {} {} {}", pos, chunk.len(), is_duplicate, tools::digest_to_hex(&digest));
|
self.compressed_size += compressed_size;
|
||||||
|
|
||||||
|
println!("ADD CHUNK {} {} {}% {} {}", pos, chunk.len(),
|
||||||
|
(compressed_size*100)/(chunk.len() as u64), is_duplicate, tools::digest_to_hex(&digest));
|
||||||
|
|
||||||
|
if is_duplicate {
|
||||||
|
self.duplicate_chunks += 1;
|
||||||
|
} else {
|
||||||
|
self.disk_size += compressed_size;
|
||||||
|
}
|
||||||
|
|
||||||
if is_duplicate { self.duplicate_chunks += 1; }
|
|
||||||
|
|
||||||
let index_pos = (pos/self.chunk_size)*32;
|
let index_pos = (pos/self.chunk_size)*32;
|
||||||
unsafe {
|
unsafe {
|
||||||
let dst = self.index.add(index_pos);
|
let dst = self.index.add(index_pos);
|
||||||
|
Loading…
Reference in New Issue
Block a user