src/catar/encoder.rs: correctly sort goodby items
This commit is contained in:
parent
46b6fbd6ae
commit
95bd5dfec7
|
@ -0,0 +1,63 @@
|
|||
/// Helpers to generate a binary search tree stored in an array from a
|
||||
/// sorted array.
|
||||
///
|
||||
/// Specifically, for any given sorted * array 'input' permute the
|
||||
/// array so that the following rule holds:
|
||||
///
|
||||
/// For each array item with index i, the item at 2*i+1 is smaller and
|
||||
/// the item 2*i+2 is larger.
|
||||
///
|
||||
/// This structure permits efficient (meaning: O(log(n)) binary
|
||||
/// searches: start with item i=0 (i.e. the root of the BST), compare
|
||||
/// the value with the searched item, if smaller proceed at item
|
||||
/// i*2+1, if larger proceed at item i*2+2, and repeat, until either
|
||||
/// the item is found, or the indexes grow beyond the array size,
|
||||
/// which means the entry does not exist.
|
||||
///
|
||||
/// Effectively this implements bisection, but instead of jumping
|
||||
/// around wildly in the array during a single search we only search
|
||||
/// with strictly monotonically increasing indexes.
|
||||
///
|
||||
/// Algorithm is from casync (camakebst.c), simplified and optimized
|
||||
/// for rust. Permutation function originally by L. Bressel, 2017.
|
||||
///
|
||||
///
|
||||
|
||||
fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>(
|
||||
copy_func: &mut F,
|
||||
n: usize,
|
||||
o: usize, // Note: virtual offset for input array
|
||||
e: usize,
|
||||
i: usize,
|
||||
) {
|
||||
let p = 1 << e;
|
||||
|
||||
let t = p + (p>>1) - 1;
|
||||
|
||||
let m = if n > t {
|
||||
// |...........p.............t....n........(2p)|
|
||||
p - 1
|
||||
} else {
|
||||
// |...........p.....n.......t.............(2p)|
|
||||
p - 1 - (t-n)
|
||||
};
|
||||
|
||||
(copy_func)(o+m, i);
|
||||
|
||||
if m > 0 {
|
||||
copy_binary_search_tree_inner(copy_func, m, 0, e-1, i*2+1);
|
||||
}
|
||||
|
||||
if (m + 1) < n {
|
||||
copy_binary_search_tree_inner(copy_func, n-m-1, o+m+1, e-1, i*2+2);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn copy_binary_search_tree<F: FnMut(usize, usize)>(
|
||||
n: usize,
|
||||
mut copy_func: F,
|
||||
) {
|
||||
if n == 0 { return };
|
||||
let e = (64 - n.leading_zeros() - 1) as usize; // fast log2(n)
|
||||
copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
use failure::*;
|
||||
|
||||
use super::format_definition::*;
|
||||
use super::binary_search_tree::*;
|
||||
|
||||
use std::io::Write;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
|
@ -218,7 +219,9 @@ impl <W: Write> CaTarEncoder<W> {
|
|||
println!("encode_dir: {:?} end {}", self.current_path, self.writer_pos);
|
||||
|
||||
let goodby_start = self.writer_pos as u64;
|
||||
let goodby_table_size = (goodby_items.len() + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
|
||||
|
||||
let item_count = goodby_items.len();
|
||||
let goodby_table_size = (item_count + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
|
||||
|
||||
for item in &mut goodby_items {
|
||||
item.offset = goodby_start - item.offset;
|
||||
|
@ -228,13 +231,6 @@ impl <W: Write> CaTarEncoder<W> {
|
|||
|
||||
let goodby_offset = self.writer_pos - dir_start_pos;
|
||||
|
||||
// append CaFormatGoodbyeTail as last item
|
||||
goodby_items.push(CaFormatGoodbyeItem {
|
||||
offset: goodby_offset as u64,
|
||||
size: (goodby_table_size + std::mem::size_of::<CaFormatHeader>()) as u64,
|
||||
hash: CA_FORMAT_GOODBYE_TAIL_MARKER,
|
||||
});
|
||||
|
||||
self.write_header(CA_FORMAT_GOODBYE, goodby_table_size as u64)?;
|
||||
|
||||
if goodby_table_size > FILE_COPY_BUFFER_SIZE {
|
||||
|
@ -243,13 +239,22 @@ impl <W: Write> CaTarEncoder<W> {
|
|||
|
||||
let buffer = &mut self.file_copy_buffer;
|
||||
let buffer_ptr = buffer.as_ptr();
|
||||
for (i, item) in goodby_items.iter().enumerate() {
|
||||
unsafe {
|
||||
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()) as *mut u64) = u64::to_le(item.offset);
|
||||
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()+8) as *mut u64) = u64::to_le(item.size);
|
||||
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()+16) as *mut u64) = u64::to_le(item.hash);
|
||||
}
|
||||
}
|
||||
|
||||
copy_binary_search_tree(item_count, |s, d| {
|
||||
let item = &goodby_items[s];
|
||||
let offset = d*std::mem::size_of::<CaFormatGoodbyeItem>();
|
||||
let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
|
||||
dest.offset = u64::to_le(item.offset);
|
||||
dest.size = u64::to_le(item.size);
|
||||
dest.hash = u64::to_le(item.hash);
|
||||
});
|
||||
|
||||
// append CaFormatGoodbyeTail as last item
|
||||
let offset = item_count*std::mem::size_of::<CaFormatGoodbyeItem>();
|
||||
let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
|
||||
dest.offset = u64::to_le(goodby_offset as u64);
|
||||
dest.size = u64::to_le((goodby_table_size + std::mem::size_of::<CaFormatHeader>()) as u64);
|
||||
dest.hash = u64::to_le(CA_FORMAT_GOODBYE_TAIL_MARKER);
|
||||
|
||||
self.flush_copy_buffer(goodby_table_size)?;
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ pub mod server {
|
|||
|
||||
pub mod catar {
|
||||
|
||||
pub mod binary_search_tree;
|
||||
pub mod format_definition;
|
||||
pub mod encoder;
|
||||
}
|
||||
|
|
|
@ -17,14 +17,14 @@ pub fn map_struct<T>(buffer: &[u8]) -> Result<&T, Error> {
|
|||
if buffer.len() < ::std::mem::size_of::<T>() {
|
||||
bail!("unable to map struct - buffer too small");
|
||||
}
|
||||
return Ok(unsafe { & * (buffer.as_ptr() as *const T) });
|
||||
Ok(unsafe { & * (buffer.as_ptr() as *const T) })
|
||||
}
|
||||
|
||||
pub fn map_struct_mut<T>(buffer: &mut [u8]) -> Result<&mut T, Error> {
|
||||
if buffer.len() < ::std::mem::size_of::<T>() {
|
||||
bail!("unable to map struct - buffer too small");
|
||||
}
|
||||
return Ok(unsafe { &mut * (buffer.as_ptr() as *mut T) });
|
||||
Ok(unsafe { &mut * (buffer.as_ptr() as *mut T) })
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue