improve chunker speed (avoid modulo)
This commit is contained in:
parent
3329ae8c2e
commit
ea4ea34baf
|
@ -23,6 +23,8 @@ pub struct Chunker {
|
||||||
|
|
||||||
discriminator: u32,
|
discriminator: u32,
|
||||||
|
|
||||||
|
break_test_value: u32,
|
||||||
|
|
||||||
window: [u8; CA_CHUNKER_WINDOW_SIZE],
|
window: [u8; CA_CHUNKER_WINDOW_SIZE],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,6 +112,10 @@ impl Chunker {
|
||||||
let avg = chunk_size_avg as f64;
|
let avg = chunk_size_avg as f64;
|
||||||
let discriminator = (avg / (-1.42888852e-7 * avg + 1.33237515)) as u32;
|
let discriminator = (avg / (-1.42888852e-7 * avg + 1.33237515)) as u32;
|
||||||
|
|
||||||
|
let break_test_value = (chunk_size_avg*2 -1) as u32;
|
||||||
|
|
||||||
|
println!("DISCRIMINATOR: {} {}", discriminator, avg/1.33);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
h: 0,
|
h: 0,
|
||||||
window_size: 0,
|
window_size: 0,
|
||||||
|
@ -118,6 +124,7 @@ impl Chunker {
|
||||||
chunk_size_max: chunk_size_avg<<2,
|
chunk_size_max: chunk_size_avg<<2,
|
||||||
chunk_size_avg: chunk_size_avg,
|
chunk_size_avg: chunk_size_avg,
|
||||||
discriminator: discriminator,
|
discriminator: discriminator,
|
||||||
|
break_test_value: break_test_value,
|
||||||
window: [0u8; CA_CHUNKER_WINDOW_SIZE],
|
window: [0u8; CA_CHUNKER_WINDOW_SIZE],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -177,6 +184,7 @@ impl Chunker {
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fast implementation avoiding modulo
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn shall_break(&self) -> bool {
|
fn shall_break(&self) -> bool {
|
||||||
|
|
||||||
|
@ -184,6 +192,19 @@ impl Chunker {
|
||||||
|
|
||||||
if self.chunk_size < self.chunk_size_min { return false; }
|
if self.chunk_size < self.chunk_size_min { return false; }
|
||||||
|
|
||||||
|
(self.h & self.break_test_value) <= 2
|
||||||
|
|
||||||
|
//(self.h & 0x1ffff) <= 2 //THIS IS SLOW!!!
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is the original implementation from casync
|
||||||
|
#[inline(always)]
|
||||||
|
fn shall_break_orig(&self) -> bool {
|
||||||
|
|
||||||
|
if self.chunk_size >= self.chunk_size_max { return true; }
|
||||||
|
|
||||||
|
if self.chunk_size < self.chunk_size_min { return false; }
|
||||||
|
|
||||||
(self.h % self.discriminator) == (self.discriminator - 1)
|
(self.h % self.discriminator) == (self.discriminator - 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,29 +6,34 @@ fn main() {
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
|
||||||
for i in 0..1024*1024 {
|
for i in 0..20*1024*1024 {
|
||||||
for j in 0..4 {
|
for j in 0..4 {
|
||||||
let byte = ((i >> (j<<3))&0xff) as u8;
|
let byte = ((i >> (j<<3))&0xff) as u8;
|
||||||
//println!("BYTE {}", byte);
|
//println!("BYTE {}", byte);
|
||||||
buffer.push(byte);
|
buffer.push(byte);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut chunker = Chunker::new(512*1024);
|
let mut chunker = Chunker::new(64*1024);
|
||||||
|
|
||||||
let count = 100;
|
let count = 5;
|
||||||
|
|
||||||
let start = std::time::SystemTime::now();
|
let start = std::time::SystemTime::now();
|
||||||
|
|
||||||
|
let mut chunk_count = 0;
|
||||||
|
|
||||||
for _i in 0..count {
|
for _i in 0..count {
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
|
let mut last = 0;
|
||||||
while pos < buffer.len() {
|
while pos < buffer.len() {
|
||||||
let k = chunker.scan(&buffer[pos..]);
|
let k = chunker.scan(&buffer[pos..]);
|
||||||
if k == 0 {
|
if k == 0 {
|
||||||
//println!("LAST {}", pos);
|
//println!("LAST {}", pos);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
last = pos;
|
||||||
pos += k;
|
pos += k;
|
||||||
//println!("CHUNK {}", pos);
|
chunk_count += 1;
|
||||||
|
//println!("CHUNK {} {}", pos, pos-last);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,6 +43,7 @@ fn main() {
|
||||||
(elapsed.subsec_millis() as f64)/1000.0;
|
(elapsed.subsec_millis() as f64)/1000.0;
|
||||||
|
|
||||||
let mbytecount = ((count*buffer.len()) as f64) / (1024.0*1024.0);
|
let mbytecount = ((count*buffer.len()) as f64) / (1024.0*1024.0);
|
||||||
|
let avg_chunk_size = mbytecount/(chunk_count as f64);
|
||||||
let mbytes_per_sec = mbytecount/elapsed;
|
let mbytes_per_sec = mbytecount/elapsed;
|
||||||
println!("SPEED = {} MB/s", mbytes_per_sec);
|
println!("SPEED = {} MB/s, avg chunk size = {} KB", mbytes_per_sec, avg_chunk_size*1024.0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue