src/catar/encoder.rs: correctly sort goodby items

This commit is contained in:
Dietmar Maurer 2018-12-29 17:00:48 +01:00
parent 46b6fbd6ae
commit 95bd5dfec7
4 changed files with 86 additions and 17 deletions

View File

@ -0,0 +1,63 @@
/// Helpers to generate a binary search tree stored in an array from a
/// sorted array.
///
/// Specifically, for any given sorted * array 'input' permute the
/// array so that the following rule holds:
///
/// For each array item with index i, the item at 2*i+1 is smaller and
/// the item 2*i+2 is larger.
///
/// This structure permits efficient (meaning: O(log(n)) binary
/// searches: start with item i=0 (i.e. the root of the BST), compare
/// the value with the searched item, if smaller proceed at item
/// i*2+1, if larger proceed at item i*2+2, and repeat, until either
/// the item is found, or the indexes grow beyond the array size,
/// which means the entry does not exist.
///
/// Effectively this implements bisection, but instead of jumping
/// around wildly in the array during a single search we only search
/// with strictly monotonically increasing indexes.
///
/// Algorithm is from casync (camakebst.c), simplified and optimized
/// for rust. Permutation function originally by L. Bressel, 2017.
///
///
fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>(
copy_func: &mut F,
n: usize,
o: usize, // Note: virtual offset for input array
e: usize,
i: usize,
) {
let p = 1 << e;
let t = p + (p>>1) - 1;
let m = if n > t {
// |...........p.............t....n........(2p)|
p - 1
} else {
// |...........p.....n.......t.............(2p)|
p - 1 - (t-n)
};
(copy_func)(o+m, i);
if m > 0 {
copy_binary_search_tree_inner(copy_func, m, 0, e-1, i*2+1);
}
if (m + 1) < n {
copy_binary_search_tree_inner(copy_func, n-m-1, o+m+1, e-1, i*2+2);
}
}
pub fn copy_binary_search_tree<F: FnMut(usize, usize)>(
n: usize,
mut copy_func: F,
) {
if n == 0 { return };
let e = (64 - n.leading_zeros() - 1) as usize; // fast log2(n)
copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
}

View File

@ -1,6 +1,7 @@
use failure::*;
use super::format_definition::*;
use super::binary_search_tree::*;
use std::io::Write;
use std::os::unix::io::AsRawFd;
@ -218,7 +219,9 @@ impl <W: Write> CaTarEncoder<W> {
println!("encode_dir: {:?} end {}", self.current_path, self.writer_pos);
let goodby_start = self.writer_pos as u64;
let goodby_table_size = (goodby_items.len() + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
let item_count = goodby_items.len();
let goodby_table_size = (item_count + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
for item in &mut goodby_items {
item.offset = goodby_start - item.offset;
@ -228,13 +231,6 @@ impl <W: Write> CaTarEncoder<W> {
let goodby_offset = self.writer_pos - dir_start_pos;
// append CaFormatGoodbyeTail as last item
goodby_items.push(CaFormatGoodbyeItem {
offset: goodby_offset as u64,
size: (goodby_table_size + std::mem::size_of::<CaFormatHeader>()) as u64,
hash: CA_FORMAT_GOODBYE_TAIL_MARKER,
});
self.write_header(CA_FORMAT_GOODBYE, goodby_table_size as u64)?;
if goodby_table_size > FILE_COPY_BUFFER_SIZE {
@ -243,13 +239,22 @@ impl <W: Write> CaTarEncoder<W> {
let buffer = &mut self.file_copy_buffer;
let buffer_ptr = buffer.as_ptr();
for (i, item) in goodby_items.iter().enumerate() {
unsafe {
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()) as *mut u64) = u64::to_le(item.offset);
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()+8) as *mut u64) = u64::to_le(item.size);
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()+16) as *mut u64) = u64::to_le(item.hash);
}
}
copy_binary_search_tree(item_count, |s, d| {
let item = &goodby_items[s];
let offset = d*std::mem::size_of::<CaFormatGoodbyeItem>();
let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
dest.offset = u64::to_le(item.offset);
dest.size = u64::to_le(item.size);
dest.hash = u64::to_le(item.hash);
});
// append CaFormatGoodbyeTail as last item
let offset = item_count*std::mem::size_of::<CaFormatGoodbyeItem>();
let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
dest.offset = u64::to_le(goodby_offset as u64);
dest.size = u64::to_le((goodby_table_size + std::mem::size_of::<CaFormatHeader>()) as u64);
dest.hash = u64::to_le(CA_FORMAT_GOODBYE_TAIL_MARKER);
self.flush_copy_buffer(goodby_table_size)?;

View File

@ -32,6 +32,7 @@ pub mod server {
pub mod catar {
pub mod binary_search_tree;
pub mod format_definition;
pub mod encoder;
}

View File

@ -17,14 +17,14 @@ pub fn map_struct<T>(buffer: &[u8]) -> Result<&T, Error> {
if buffer.len() < ::std::mem::size_of::<T>() {
bail!("unable to map struct - buffer too small");
}
return Ok(unsafe { & * (buffer.as_ptr() as *const T) });
Ok(unsafe { & * (buffer.as_ptr() as *const T) })
}
pub fn map_struct_mut<T>(buffer: &mut [u8]) -> Result<&mut T, Error> {
if buffer.len() < ::std::mem::size_of::<T>() {
bail!("unable to map struct - buffer too small");
}
return Ok(unsafe { &mut * (buffer.as_ptr() as *mut T) });
Ok(unsafe { &mut * (buffer.as_ptr() as *mut T) })
}