src/backup/chunk_stat.rs: new struct to track chunk statistics

This commit is contained in:
Dietmar Maurer 2019-02-25 12:52:10 +01:00
parent 798f7fa065
commit 7e3365554e
4 changed files with 70 additions and 28 deletions

View File

@ -11,6 +11,9 @@
//! whereas the `FixedIndex*` format is an optimization to store a
//! list of equal sized chunks.
mod chunk_stat;
pub use chunk_stat::*;
mod chunker;
pub use chunker::*;

32
src/backup/chunk_stat.rs Normal file
View File

@ -0,0 +1,32 @@
pub struct ChunkStat {
pub size: u64,
pub compressed_size: u64,
pub disk_size: u64,
pub chunk_count: usize,
pub duplicate_chunks: usize,
}
impl ChunkStat {
pub fn new(size: u64) -> Self {
ChunkStat {
size,
compressed_size: 0,
disk_size: 0,
chunk_count: 0,
duplicate_chunks: 0,
}
}
}
impl std::fmt::Debug for ChunkStat {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let avg = ((self.size as f64)/(self.chunk_count as f64)) as usize;
let compression = (self.compressed_size*100)/(self.size as u64);
let rate = (self.disk_size*100)/(self.size as u64);
write!(f, "Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)",
self.size, avg, compression, self.disk_size, rate)
}
}

View File

@ -1,6 +1,7 @@
use failure::*;
use crate::tools;
use super::chunk_stat::*;
use super::chunk_store::*;
use super::chunker::*;
@ -329,9 +330,8 @@ pub struct DynamicIndexWriter {
pub uuid: [u8; 16],
pub ctime: u64,
compressed_size: u64,
disk_size: u64,
chunk_count: usize,
stat: ChunkStat,
chunk_offset: usize,
last_chunk: usize,
chunk_buffer: Vec<u8>,
@ -390,9 +390,8 @@ impl DynamicIndexWriter {
ctime,
uuid: *uuid.as_bytes(),
compressed_size: 0,
disk_size: 0,
chunk_count: 0,
stat: ChunkStat::new(0),
chunk_offset: 0,
last_chunk: 0,
chunk_buffer: Vec::with_capacity(chunk_size*4),
@ -411,12 +410,13 @@ impl DynamicIndexWriter {
self.writer.flush()?;
let size = self.chunk_offset;
let avg = ((size as f64)/(self.chunk_count as f64)) as usize;
let compression = (self.compressed_size*100)/(size as u64);
let rate = (self.disk_size*100)/(size as u64);
println!("Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)",
size, avg, compression, self.disk_size, rate);
self.stat.size = self.chunk_offset as u64;
// add size of index file
self.stat.size += (self.stat.chunk_count*40 + std::mem::size_of::<DynamicIndexHeader>()) as u64;
println!("STAT: {:?}", self.stat);
// fixme:
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
@ -426,6 +426,10 @@ impl DynamicIndexWriter {
Ok(())
}
pub fn stat(&self) -> &ChunkStat {
&self.stat
}
fn write_chunk_buffer(&mut self) -> Result<(), std::io::Error> {
use std::io::{Error, ErrorKind};
@ -441,17 +445,18 @@ impl DynamicIndexWriter {
format!("wrong chunk size {} != {}", expected_chunk_size, chunk_size)));
}
self.chunk_count += 1;
self.stat.chunk_count += 1;
self.last_chunk = self.chunk_offset;
match self.store.insert_chunk(&self.chunk_buffer) {
Ok((is_duplicate, digest, compressed_size)) => {
self.compressed_size += compressed_size;
self.stat.compressed_size += compressed_size;
if is_duplicate {
self.stat.duplicate_chunks += 1;
} else {
self.disk_size += compressed_size;
self.stat.disk_size += compressed_size;
}
println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size,

View File

@ -1,6 +1,7 @@
use failure::*;
use crate::tools;
use super::chunk_stat::*;
use super::chunk_store::*;
use std::sync::Arc;
@ -159,10 +160,10 @@ pub struct FixedIndexWriter {
filename: PathBuf,
tmp_filename: PathBuf,
chunk_size: usize,
duplicate_chunks: usize,
disk_size: u64,
stat: ChunkStat,
size: usize,
compressed_size: u64,
index: *mut u8,
pub uuid: [u8; 16],
pub ctime: u64,
@ -231,10 +232,8 @@ impl FixedIndexWriter {
filename: full_path,
tmp_filename: tmp_path,
chunk_size,
duplicate_chunks: 0,
size,
compressed_size: 0,
disk_size: 0,
stat: ChunkStat::new(size as u64),
index: data,
ctime,
uuid: *uuid.as_bytes(),
@ -253,11 +252,9 @@ impl FixedIndexWriter {
self.index = std::ptr::null_mut();
let compression = (self.compressed_size*100)/(self.size as u64);
let rate = (self.disk_size*100)/(self.size as u64);
self.stat.disk_size += index_size as u64;
println!("Original size: {}, compression rate: {}%, deduplicated size: {}, disk size: {} ({}%)",
self.size, compression, self.size - (self.duplicate_chunks*self.chunk_size), self.disk_size, rate);
println!("STAT: {:?}", self.stat);
Ok(())
}
@ -275,6 +272,10 @@ impl FixedIndexWriter {
Ok(())
}
pub fn stat(&self) -> &ChunkStat {
&self.stat
}
// Note: We want to add data out of order, so do not assume and order here.
pub fn add_chunk(&mut self, pos: usize, chunk: &[u8]) -> Result<(), Error> {
@ -299,15 +300,16 @@ impl FixedIndexWriter {
let (is_duplicate, digest, compressed_size) = self.store.insert_chunk(chunk)?;
self.compressed_size += compressed_size;
self.stat.chunk_count += 1;
self.stat.compressed_size += compressed_size;
println!("ADD CHUNK {} {} {}% {} {}", pos, chunk.len(),
(compressed_size*100)/(chunk.len() as u64), is_duplicate, tools::digest_to_hex(&digest));
if is_duplicate {
self.duplicate_chunks += 1;
self.stat.duplicate_chunks += 1;
} else {
self.disk_size += compressed_size;
self.stat.disk_size += compressed_size;
}
let index_pos = (pos/self.chunk_size)*32;