2019-08-22 09:24:15 +00:00
|
|
|
use std::collections::HashMap;
|
2020-03-23 14:03:18 +00:00
|
|
|
use std::ops::Range;
|
2019-05-21 10:21:22 +00:00
|
|
|
|
2020-03-23 14:03:18 +00:00
|
|
|
pub struct ChunkReadInfo {
|
|
|
|
pub range: Range<u64>,
|
|
|
|
pub digest: [u8; 32],
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ChunkReadInfo {
|
|
|
|
#[inline]
|
|
|
|
pub fn size(&self) -> u64 {
|
|
|
|
self.range.end - self.range.start
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-04 05:57:43 +00:00
|
|
|
/// Trait to get digest list from index files
|
|
|
|
///
|
|
|
|
/// To allow easy iteration over all used chunks.
|
2019-08-28 08:33:41 +00:00
|
|
|
pub trait IndexFile {
|
2019-02-27 13:32:34 +00:00
|
|
|
fn index_count(&self) -> usize;
|
|
|
|
fn index_digest(&self, pos: usize) -> Option<&[u8; 32]>;
|
2019-07-04 05:57:43 +00:00
|
|
|
fn index_bytes(&self) -> u64;
|
2019-07-05 08:42:46 +00:00
|
|
|
|
|
|
|
/// Returns most often used chunks
|
|
|
|
fn find_most_used_chunks(&self, max: usize) -> HashMap<[u8; 32], usize> {
|
|
|
|
let mut map = HashMap::new();
|
|
|
|
|
|
|
|
for pos in 0..self.index_count() {
|
|
|
|
let digest = self.index_digest(pos).unwrap();
|
|
|
|
|
|
|
|
let count = map.entry(*digest).or_insert(0);
|
|
|
|
*count += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut most_used = Vec::new();
|
|
|
|
|
|
|
|
for (digest, count) in map {
|
|
|
|
if count <= 1 { continue; }
|
|
|
|
match most_used.binary_search_by_key(&count, |&(_digest, count)| count) {
|
|
|
|
Ok(p) => most_used.insert(p, (digest, count)),
|
|
|
|
Err(p) => most_used.insert(p, (digest, count)),
|
|
|
|
}
|
|
|
|
|
|
|
|
if most_used.len() > max { let _ = most_used.pop(); }
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut map = HashMap::new();
|
|
|
|
|
|
|
|
for data in most_used {
|
|
|
|
map.insert(data.0, data.1);
|
|
|
|
}
|
|
|
|
|
|
|
|
map
|
|
|
|
}
|
2019-02-27 13:32:34 +00:00
|
|
|
}
|