diff --git a/src/backup/chunker.rs b/src/backup/chunker.rs index eee2dd79..25914532 100644 --- a/src/backup/chunker.rs +++ b/src/backup/chunker.rs @@ -23,6 +23,8 @@ pub struct Chunker { discriminator: u32, + break_test_value: u32, + window: [u8; CA_CHUNKER_WINDOW_SIZE], } @@ -110,6 +112,10 @@ impl Chunker { let avg = chunk_size_avg as f64; let discriminator = (avg / (-1.42888852e-7 * avg + 1.33237515)) as u32; + let break_test_value = (chunk_size_avg*2 -1) as u32; + + println!("DISCRIMINATOR: {} {}", discriminator, avg/1.33); + Self { h: 0, window_size: 0, @@ -118,6 +124,7 @@ impl Chunker { chunk_size_max: chunk_size_avg<<2, chunk_size_avg: chunk_size_avg, discriminator: discriminator, + break_test_value: break_test_value, window: [0u8; CA_CHUNKER_WINDOW_SIZE], } } @@ -177,6 +184,7 @@ impl Chunker { 0 } + // fast implementation avoiding modulo #[inline(always)] fn shall_break(&self) -> bool { @@ -184,6 +192,19 @@ impl Chunker { if self.chunk_size < self.chunk_size_min { return false; } + (self.h & self.break_test_value) <= 2 + + //(self.h & 0x1ffff) <= 2 //THIS IS SLOW!!! + } + + // This is the original implementation from casync + #[inline(always)] + fn shall_break_orig(&self) -> bool { + + if self.chunk_size >= self.chunk_size_max { return true; } + + if self.chunk_size < self.chunk_size_min { return false; } + (self.h % self.discriminator) == (self.discriminator - 1) } diff --git a/src/bin/test_chunk_speed.rs b/src/bin/test_chunk_speed.rs index 99f475e0..b2a7efeb 100644 --- a/src/bin/test_chunk_speed.rs +++ b/src/bin/test_chunk_speed.rs @@ -6,29 +6,34 @@ fn main() { let mut buffer = Vec::new(); - for i in 0..1024*1024 { + for i in 0..20*1024*1024 { for j in 0..4 { let byte = ((i >> (j<<3))&0xff) as u8; //println!("BYTE {}", byte); buffer.push(byte); } } - let mut chunker = Chunker::new(512*1024); + let mut chunker = Chunker::new(64*1024); - let count = 100; + let count = 5; let start = std::time::SystemTime::now(); + + let mut chunk_count = 0; for _i in 0..count { let mut pos = 0; + let mut last = 0; while pos < buffer.len() { let k = chunker.scan(&buffer[pos..]); if k == 0 { //println!("LAST {}", pos); break; } else { + last = pos; pos += k; - //println!("CHUNK {}", pos); + chunk_count += 1; + //println!("CHUNK {} {}", pos, pos-last); } } } @@ -38,6 +43,7 @@ fn main() { (elapsed.subsec_millis() as f64)/1000.0; let mbytecount = ((count*buffer.len()) as f64) / (1024.0*1024.0); + let avg_chunk_size = mbytecount/(chunk_count as f64); let mbytes_per_sec = mbytecount/elapsed; - println!("SPEED = {} MB/s", mbytes_per_sec); + println!("SPEED = {} MB/s, avg chunk size = {} KB", mbytes_per_sec, avg_chunk_size*1024.0); }