2019-08-22 09:24:15 +00:00
|
|
|
use std::collections::HashMap;
|
2019-08-23 12:20:18 +00:00
|
|
|
use std::pin::Pin;
|
|
|
|
use std::task::{Context, Poll};
|
2019-08-22 09:24:15 +00:00
|
|
|
|
|
|
|
use bytes::{Bytes, BytesMut};
|
2019-05-21 10:21:22 +00:00
|
|
|
use failure::*;
|
|
|
|
use futures::*;
|
|
|
|
|
2019-07-04 05:57:43 +00:00
|
|
|
/// Trait to get digest list from index files
|
|
|
|
///
|
|
|
|
/// To allow easy iteration over all used chunks.
|
2019-08-28 08:33:41 +00:00
|
|
|
pub trait IndexFile {
|
2019-02-27 13:32:34 +00:00
|
|
|
fn index_count(&self) -> usize;
|
|
|
|
fn index_digest(&self, pos: usize) -> Option<&[u8; 32]>;
|
2019-07-04 05:57:43 +00:00
|
|
|
fn index_bytes(&self) -> u64;
|
2019-07-05 08:42:46 +00:00
|
|
|
|
|
|
|
/// Returns most often used chunks
|
|
|
|
fn find_most_used_chunks(&self, max: usize) -> HashMap<[u8; 32], usize> {
|
|
|
|
let mut map = HashMap::new();
|
|
|
|
|
|
|
|
for pos in 0..self.index_count() {
|
|
|
|
let digest = self.index_digest(pos).unwrap();
|
|
|
|
|
|
|
|
let count = map.entry(*digest).or_insert(0);
|
|
|
|
*count += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut most_used = Vec::new();
|
|
|
|
|
|
|
|
for (digest, count) in map {
|
|
|
|
if count <= 1 { continue; }
|
|
|
|
match most_used.binary_search_by_key(&count, |&(_digest, count)| count) {
|
|
|
|
Ok(p) => most_used.insert(p, (digest, count)),
|
|
|
|
Err(p) => most_used.insert(p, (digest, count)),
|
|
|
|
}
|
|
|
|
|
|
|
|
if most_used.len() > max { let _ = most_used.pop(); }
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut map = HashMap::new();
|
|
|
|
|
|
|
|
for data in most_used {
|
|
|
|
map.insert(data.0, data.1);
|
|
|
|
}
|
|
|
|
|
|
|
|
map
|
|
|
|
}
|
2019-02-27 13:32:34 +00:00
|
|
|
}
|
2019-05-11 14:01:42 +00:00
|
|
|
|
2019-05-21 10:28:44 +00:00
|
|
|
/// Encode digest list from an `IndexFile` into a binary stream
|
2019-05-11 14:01:42 +00:00
|
|
|
///
|
|
|
|
/// The reader simply returns a birary stream of 32 byte digest values.
|
2019-05-21 10:28:44 +00:00
|
|
|
pub struct DigestListEncoder {
|
2019-08-28 08:33:41 +00:00
|
|
|
index: Box<dyn IndexFile + Send + Sync>,
|
2019-05-11 14:01:42 +00:00
|
|
|
pos: usize,
|
|
|
|
count: usize,
|
|
|
|
}
|
|
|
|
|
2019-05-21 10:28:44 +00:00
|
|
|
impl DigestListEncoder {
|
2019-05-11 14:01:42 +00:00
|
|
|
|
2019-08-28 08:33:41 +00:00
|
|
|
pub fn new(index: Box<dyn IndexFile + Send + Sync>) -> Self {
|
2019-05-11 14:01:42 +00:00
|
|
|
let count = index.index_count();
|
|
|
|
Self { index, pos: 0, count }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-21 10:28:44 +00:00
|
|
|
impl std::io::Read for DigestListEncoder {
|
2019-05-11 14:01:42 +00:00
|
|
|
fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
|
2019-08-22 09:30:23 +00:00
|
|
|
if buf.len() < 32 {
|
|
|
|
panic!("read buffer too small");
|
|
|
|
}
|
|
|
|
|
2019-05-11 14:01:42 +00:00
|
|
|
if self.pos < self.count {
|
|
|
|
let mut written = 0;
|
|
|
|
loop {
|
|
|
|
let digest = self.index.index_digest(self.pos).unwrap();
|
2019-08-22 09:30:23 +00:00
|
|
|
buf[written..(written + 32)].copy_from_slice(digest);
|
2019-05-11 14:01:42 +00:00
|
|
|
self.pos += 1;
|
|
|
|
written += 32;
|
2019-08-22 09:30:23 +00:00
|
|
|
if self.pos >= self.count {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (written + 32) >= buf.len() {
|
|
|
|
break;
|
|
|
|
}
|
2019-05-11 14:01:42 +00:00
|
|
|
}
|
|
|
|
return Ok(written);
|
|
|
|
} else {
|
|
|
|
return Ok(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-05-21 10:21:22 +00:00
|
|
|
|
|
|
|
/// Decodes a Stream<Item=Bytes> into Stream<Item=<[u8;32]>
|
|
|
|
///
|
|
|
|
/// The reader simply returns a birary stream of 32 byte digest values.
|
|
|
|
|
2019-08-23 12:20:18 +00:00
|
|
|
pub struct DigestListDecoder<S: Unpin> {
|
2019-05-21 10:21:22 +00:00
|
|
|
input: S,
|
|
|
|
buffer: BytesMut,
|
|
|
|
}
|
|
|
|
|
2019-08-23 12:20:18 +00:00
|
|
|
impl<S: Unpin> DigestListDecoder<S> {
|
2019-05-21 10:21:22 +00:00
|
|
|
pub fn new(input: S) -> Self {
|
|
|
|
Self { input, buffer: BytesMut::new() }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-23 12:20:18 +00:00
|
|
|
impl<S: Unpin> Unpin for DigestListDecoder<S> {}
|
2019-05-21 10:21:22 +00:00
|
|
|
|
2019-08-23 12:20:18 +00:00
|
|
|
impl<S: Unpin, E> Stream for DigestListDecoder<S>
|
|
|
|
where
|
|
|
|
S: Stream<Item=Result<Bytes, E>>,
|
|
|
|
E: Into<Error>,
|
|
|
|
{
|
|
|
|
type Item = Result<[u8; 32], Error>;
|
2019-05-21 10:21:22 +00:00
|
|
|
|
2019-08-23 12:20:18 +00:00
|
|
|
fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
|
|
|
let this = self.get_mut();
|
2019-05-21 10:21:22 +00:00
|
|
|
|
2019-08-23 12:20:18 +00:00
|
|
|
loop {
|
|
|
|
if this.buffer.len() >= 32 {
|
|
|
|
let left = this.buffer.split_to(32);
|
2019-05-21 10:21:22 +00:00
|
|
|
|
2019-08-22 09:24:15 +00:00
|
|
|
let mut digest = std::mem::MaybeUninit::<[u8; 32]>::uninit();
|
|
|
|
unsafe {
|
|
|
|
(*digest.as_mut_ptr()).copy_from_slice(&left[..]);
|
2019-08-23 12:20:18 +00:00
|
|
|
return Poll::Ready(Some(Ok(digest.assume_init())));
|
2019-08-22 09:24:15 +00:00
|
|
|
}
|
2019-05-21 10:21:22 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 12:20:18 +00:00
|
|
|
match Pin::new(&mut this.input).poll_next(cx) {
|
|
|
|
Poll::Pending => {
|
|
|
|
return Poll::Pending;
|
2019-05-21 10:21:22 +00:00
|
|
|
}
|
2019-08-23 12:20:18 +00:00
|
|
|
Poll::Ready(Some(Err(err))) => {
|
|
|
|
return Poll::Ready(Some(Err(err.into())));
|
2019-05-21 10:21:22 +00:00
|
|
|
}
|
2019-08-23 12:20:18 +00:00
|
|
|
Poll::Ready(Some(Ok(data))) => {
|
|
|
|
this.buffer.extend_from_slice(&data);
|
2019-05-21 10:21:22 +00:00
|
|
|
// continue
|
|
|
|
}
|
2019-08-23 12:20:18 +00:00
|
|
|
Poll::Ready(None) => {
|
|
|
|
let rest = this.buffer.len();
|
|
|
|
if rest == 0 {
|
|
|
|
return Poll::Ready(None);
|
|
|
|
}
|
|
|
|
return Poll::Ready(Some(Err(format_err!(
|
|
|
|
"got small digest ({} != 32).",
|
|
|
|
rest,
|
|
|
|
))));
|
|
|
|
}
|
2019-05-21 10:21:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|