src/backup/chunk_stat.rs: new struct to track chunk statistics
This commit is contained in:
parent
798f7fa065
commit
7e3365554e
@ -11,6 +11,9 @@
|
|||||||
//! whereas the `FixedIndex*` format is an optimization to store a
|
//! whereas the `FixedIndex*` format is an optimization to store a
|
||||||
//! list of equal sized chunks.
|
//! list of equal sized chunks.
|
||||||
|
|
||||||
|
mod chunk_stat;
|
||||||
|
pub use chunk_stat::*;
|
||||||
|
|
||||||
mod chunker;
|
mod chunker;
|
||||||
pub use chunker::*;
|
pub use chunker::*;
|
||||||
|
|
||||||
|
32
src/backup/chunk_stat.rs
Normal file
32
src/backup/chunk_stat.rs
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
pub struct ChunkStat {
|
||||||
|
pub size: u64,
|
||||||
|
pub compressed_size: u64,
|
||||||
|
pub disk_size: u64,
|
||||||
|
|
||||||
|
pub chunk_count: usize,
|
||||||
|
pub duplicate_chunks: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChunkStat {
|
||||||
|
|
||||||
|
pub fn new(size: u64) -> Self {
|
||||||
|
ChunkStat {
|
||||||
|
size,
|
||||||
|
compressed_size: 0,
|
||||||
|
disk_size: 0,
|
||||||
|
|
||||||
|
chunk_count: 0,
|
||||||
|
duplicate_chunks: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for ChunkStat {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
let avg = ((self.size as f64)/(self.chunk_count as f64)) as usize;
|
||||||
|
let compression = (self.compressed_size*100)/(self.size as u64);
|
||||||
|
let rate = (self.disk_size*100)/(self.size as u64);
|
||||||
|
write!(f, "Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)",
|
||||||
|
self.size, avg, compression, self.disk_size, rate)
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
use failure::*;
|
use failure::*;
|
||||||
|
|
||||||
use crate::tools;
|
use crate::tools;
|
||||||
|
use super::chunk_stat::*;
|
||||||
use super::chunk_store::*;
|
use super::chunk_store::*;
|
||||||
use super::chunker::*;
|
use super::chunker::*;
|
||||||
|
|
||||||
@ -329,9 +330,8 @@ pub struct DynamicIndexWriter {
|
|||||||
pub uuid: [u8; 16],
|
pub uuid: [u8; 16],
|
||||||
pub ctime: u64,
|
pub ctime: u64,
|
||||||
|
|
||||||
compressed_size: u64,
|
stat: ChunkStat,
|
||||||
disk_size: u64,
|
|
||||||
chunk_count: usize,
|
|
||||||
chunk_offset: usize,
|
chunk_offset: usize,
|
||||||
last_chunk: usize,
|
last_chunk: usize,
|
||||||
chunk_buffer: Vec<u8>,
|
chunk_buffer: Vec<u8>,
|
||||||
@ -390,9 +390,8 @@ impl DynamicIndexWriter {
|
|||||||
ctime,
|
ctime,
|
||||||
uuid: *uuid.as_bytes(),
|
uuid: *uuid.as_bytes(),
|
||||||
|
|
||||||
compressed_size: 0,
|
stat: ChunkStat::new(0),
|
||||||
disk_size: 0,
|
|
||||||
chunk_count: 0,
|
|
||||||
chunk_offset: 0,
|
chunk_offset: 0,
|
||||||
last_chunk: 0,
|
last_chunk: 0,
|
||||||
chunk_buffer: Vec::with_capacity(chunk_size*4),
|
chunk_buffer: Vec::with_capacity(chunk_size*4),
|
||||||
@ -411,12 +410,13 @@ impl DynamicIndexWriter {
|
|||||||
|
|
||||||
self.writer.flush()?;
|
self.writer.flush()?;
|
||||||
|
|
||||||
let size = self.chunk_offset;
|
self.stat.size = self.chunk_offset as u64;
|
||||||
let avg = ((size as f64)/(self.chunk_count as f64)) as usize;
|
|
||||||
let compression = (self.compressed_size*100)/(size as u64);
|
// add size of index file
|
||||||
let rate = (self.disk_size*100)/(size as u64);
|
self.stat.size += (self.stat.chunk_count*40 + std::mem::size_of::<DynamicIndexHeader>()) as u64;
|
||||||
println!("Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)",
|
|
||||||
size, avg, compression, self.disk_size, rate);
|
println!("STAT: {:?}", self.stat);
|
||||||
|
|
||||||
// fixme:
|
// fixme:
|
||||||
|
|
||||||
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
|
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
|
||||||
@ -426,6 +426,10 @@ impl DynamicIndexWriter {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn stat(&self) -> &ChunkStat {
|
||||||
|
&self.stat
|
||||||
|
}
|
||||||
|
|
||||||
fn write_chunk_buffer(&mut self) -> Result<(), std::io::Error> {
|
fn write_chunk_buffer(&mut self) -> Result<(), std::io::Error> {
|
||||||
|
|
||||||
use std::io::{Error, ErrorKind};
|
use std::io::{Error, ErrorKind};
|
||||||
@ -441,17 +445,18 @@ impl DynamicIndexWriter {
|
|||||||
format!("wrong chunk size {} != {}", expected_chunk_size, chunk_size)));
|
format!("wrong chunk size {} != {}", expected_chunk_size, chunk_size)));
|
||||||
}
|
}
|
||||||
|
|
||||||
self.chunk_count += 1;
|
self.stat.chunk_count += 1;
|
||||||
|
|
||||||
self.last_chunk = self.chunk_offset;
|
self.last_chunk = self.chunk_offset;
|
||||||
|
|
||||||
match self.store.insert_chunk(&self.chunk_buffer) {
|
match self.store.insert_chunk(&self.chunk_buffer) {
|
||||||
Ok((is_duplicate, digest, compressed_size)) => {
|
Ok((is_duplicate, digest, compressed_size)) => {
|
||||||
|
|
||||||
self.compressed_size += compressed_size;
|
self.stat.compressed_size += compressed_size;
|
||||||
if is_duplicate {
|
if is_duplicate {
|
||||||
|
self.stat.duplicate_chunks += 1;
|
||||||
} else {
|
} else {
|
||||||
self.disk_size += compressed_size;
|
self.stat.disk_size += compressed_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size,
|
println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use failure::*;
|
use failure::*;
|
||||||
|
|
||||||
use crate::tools;
|
use crate::tools;
|
||||||
|
use super::chunk_stat::*;
|
||||||
use super::chunk_store::*;
|
use super::chunk_store::*;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@ -159,10 +160,10 @@ pub struct FixedIndexWriter {
|
|||||||
filename: PathBuf,
|
filename: PathBuf,
|
||||||
tmp_filename: PathBuf,
|
tmp_filename: PathBuf,
|
||||||
chunk_size: usize,
|
chunk_size: usize,
|
||||||
duplicate_chunks: usize,
|
|
||||||
disk_size: u64,
|
stat: ChunkStat,
|
||||||
|
|
||||||
size: usize,
|
size: usize,
|
||||||
compressed_size: u64,
|
|
||||||
index: *mut u8,
|
index: *mut u8,
|
||||||
pub uuid: [u8; 16],
|
pub uuid: [u8; 16],
|
||||||
pub ctime: u64,
|
pub ctime: u64,
|
||||||
@ -231,10 +232,8 @@ impl FixedIndexWriter {
|
|||||||
filename: full_path,
|
filename: full_path,
|
||||||
tmp_filename: tmp_path,
|
tmp_filename: tmp_path,
|
||||||
chunk_size,
|
chunk_size,
|
||||||
duplicate_chunks: 0,
|
|
||||||
size,
|
size,
|
||||||
compressed_size: 0,
|
stat: ChunkStat::new(size as u64),
|
||||||
disk_size: 0,
|
|
||||||
index: data,
|
index: data,
|
||||||
ctime,
|
ctime,
|
||||||
uuid: *uuid.as_bytes(),
|
uuid: *uuid.as_bytes(),
|
||||||
@ -253,11 +252,9 @@ impl FixedIndexWriter {
|
|||||||
|
|
||||||
self.index = std::ptr::null_mut();
|
self.index = std::ptr::null_mut();
|
||||||
|
|
||||||
let compression = (self.compressed_size*100)/(self.size as u64);
|
self.stat.disk_size += index_size as u64;
|
||||||
let rate = (self.disk_size*100)/(self.size as u64);
|
|
||||||
|
|
||||||
println!("Original size: {}, compression rate: {}%, deduplicated size: {}, disk size: {} ({}%)",
|
println!("STAT: {:?}", self.stat);
|
||||||
self.size, compression, self.size - (self.duplicate_chunks*self.chunk_size), self.disk_size, rate);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -275,6 +272,10 @@ impl FixedIndexWriter {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn stat(&self) -> &ChunkStat {
|
||||||
|
&self.stat
|
||||||
|
}
|
||||||
|
|
||||||
// Note: We want to add data out of order, so do not assume and order here.
|
// Note: We want to add data out of order, so do not assume and order here.
|
||||||
pub fn add_chunk(&mut self, pos: usize, chunk: &[u8]) -> Result<(), Error> {
|
pub fn add_chunk(&mut self, pos: usize, chunk: &[u8]) -> Result<(), Error> {
|
||||||
|
|
||||||
@ -299,15 +300,16 @@ impl FixedIndexWriter {
|
|||||||
|
|
||||||
let (is_duplicate, digest, compressed_size) = self.store.insert_chunk(chunk)?;
|
let (is_duplicate, digest, compressed_size) = self.store.insert_chunk(chunk)?;
|
||||||
|
|
||||||
self.compressed_size += compressed_size;
|
self.stat.chunk_count += 1;
|
||||||
|
self.stat.compressed_size += compressed_size;
|
||||||
|
|
||||||
println!("ADD CHUNK {} {} {}% {} {}", pos, chunk.len(),
|
println!("ADD CHUNK {} {} {}% {} {}", pos, chunk.len(),
|
||||||
(compressed_size*100)/(chunk.len() as u64), is_duplicate, tools::digest_to_hex(&digest));
|
(compressed_size*100)/(chunk.len() as u64), is_duplicate, tools::digest_to_hex(&digest));
|
||||||
|
|
||||||
if is_duplicate {
|
if is_duplicate {
|
||||||
self.duplicate_chunks += 1;
|
self.stat.duplicate_chunks += 1;
|
||||||
} else {
|
} else {
|
||||||
self.disk_size += compressed_size;
|
self.stat.disk_size += compressed_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
let index_pos = (pos/self.chunk_size)*32;
|
let index_pos = (pos/self.chunk_size)*32;
|
||||||
|
Loading…
Reference in New Issue
Block a user