src/catar/encoder.rs: correctly sort goodby items
This commit is contained in:
parent
46b6fbd6ae
commit
95bd5dfec7
|
@ -0,0 +1,63 @@
|
||||||
|
/// Helpers to generate a binary search tree stored in an array from a
|
||||||
|
/// sorted array.
|
||||||
|
///
|
||||||
|
/// Specifically, for any given sorted * array 'input' permute the
|
||||||
|
/// array so that the following rule holds:
|
||||||
|
///
|
||||||
|
/// For each array item with index i, the item at 2*i+1 is smaller and
|
||||||
|
/// the item 2*i+2 is larger.
|
||||||
|
///
|
||||||
|
/// This structure permits efficient (meaning: O(log(n)) binary
|
||||||
|
/// searches: start with item i=0 (i.e. the root of the BST), compare
|
||||||
|
/// the value with the searched item, if smaller proceed at item
|
||||||
|
/// i*2+1, if larger proceed at item i*2+2, and repeat, until either
|
||||||
|
/// the item is found, or the indexes grow beyond the array size,
|
||||||
|
/// which means the entry does not exist.
|
||||||
|
///
|
||||||
|
/// Effectively this implements bisection, but instead of jumping
|
||||||
|
/// around wildly in the array during a single search we only search
|
||||||
|
/// with strictly monotonically increasing indexes.
|
||||||
|
///
|
||||||
|
/// Algorithm is from casync (camakebst.c), simplified and optimized
|
||||||
|
/// for rust. Permutation function originally by L. Bressel, 2017.
|
||||||
|
///
|
||||||
|
///
|
||||||
|
|
||||||
|
fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>(
|
||||||
|
copy_func: &mut F,
|
||||||
|
n: usize,
|
||||||
|
o: usize, // Note: virtual offset for input array
|
||||||
|
e: usize,
|
||||||
|
i: usize,
|
||||||
|
) {
|
||||||
|
let p = 1 << e;
|
||||||
|
|
||||||
|
let t = p + (p>>1) - 1;
|
||||||
|
|
||||||
|
let m = if n > t {
|
||||||
|
// |...........p.............t....n........(2p)|
|
||||||
|
p - 1
|
||||||
|
} else {
|
||||||
|
// |...........p.....n.......t.............(2p)|
|
||||||
|
p - 1 - (t-n)
|
||||||
|
};
|
||||||
|
|
||||||
|
(copy_func)(o+m, i);
|
||||||
|
|
||||||
|
if m > 0 {
|
||||||
|
copy_binary_search_tree_inner(copy_func, m, 0, e-1, i*2+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m + 1) < n {
|
||||||
|
copy_binary_search_tree_inner(copy_func, n-m-1, o+m+1, e-1, i*2+2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn copy_binary_search_tree<F: FnMut(usize, usize)>(
|
||||||
|
n: usize,
|
||||||
|
mut copy_func: F,
|
||||||
|
) {
|
||||||
|
if n == 0 { return };
|
||||||
|
let e = (64 - n.leading_zeros() - 1) as usize; // fast log2(n)
|
||||||
|
copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
use failure::*;
|
use failure::*;
|
||||||
|
|
||||||
use super::format_definition::*;
|
use super::format_definition::*;
|
||||||
|
use super::binary_search_tree::*;
|
||||||
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::os::unix::io::AsRawFd;
|
use std::os::unix::io::AsRawFd;
|
||||||
|
@ -218,7 +219,9 @@ impl <W: Write> CaTarEncoder<W> {
|
||||||
println!("encode_dir: {:?} end {}", self.current_path, self.writer_pos);
|
println!("encode_dir: {:?} end {}", self.current_path, self.writer_pos);
|
||||||
|
|
||||||
let goodby_start = self.writer_pos as u64;
|
let goodby_start = self.writer_pos as u64;
|
||||||
let goodby_table_size = (goodby_items.len() + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
|
|
||||||
|
let item_count = goodby_items.len();
|
||||||
|
let goodby_table_size = (item_count + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
|
||||||
|
|
||||||
for item in &mut goodby_items {
|
for item in &mut goodby_items {
|
||||||
item.offset = goodby_start - item.offset;
|
item.offset = goodby_start - item.offset;
|
||||||
|
@ -228,13 +231,6 @@ impl <W: Write> CaTarEncoder<W> {
|
||||||
|
|
||||||
let goodby_offset = self.writer_pos - dir_start_pos;
|
let goodby_offset = self.writer_pos - dir_start_pos;
|
||||||
|
|
||||||
// append CaFormatGoodbyeTail as last item
|
|
||||||
goodby_items.push(CaFormatGoodbyeItem {
|
|
||||||
offset: goodby_offset as u64,
|
|
||||||
size: (goodby_table_size + std::mem::size_of::<CaFormatHeader>()) as u64,
|
|
||||||
hash: CA_FORMAT_GOODBYE_TAIL_MARKER,
|
|
||||||
});
|
|
||||||
|
|
||||||
self.write_header(CA_FORMAT_GOODBYE, goodby_table_size as u64)?;
|
self.write_header(CA_FORMAT_GOODBYE, goodby_table_size as u64)?;
|
||||||
|
|
||||||
if goodby_table_size > FILE_COPY_BUFFER_SIZE {
|
if goodby_table_size > FILE_COPY_BUFFER_SIZE {
|
||||||
|
@ -243,13 +239,22 @@ impl <W: Write> CaTarEncoder<W> {
|
||||||
|
|
||||||
let buffer = &mut self.file_copy_buffer;
|
let buffer = &mut self.file_copy_buffer;
|
||||||
let buffer_ptr = buffer.as_ptr();
|
let buffer_ptr = buffer.as_ptr();
|
||||||
for (i, item) in goodby_items.iter().enumerate() {
|
|
||||||
unsafe {
|
copy_binary_search_tree(item_count, |s, d| {
|
||||||
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()) as *mut u64) = u64::to_le(item.offset);
|
let item = &goodby_items[s];
|
||||||
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()+8) as *mut u64) = u64::to_le(item.size);
|
let offset = d*std::mem::size_of::<CaFormatGoodbyeItem>();
|
||||||
*(buffer_ptr.add(i*std::mem::size_of::<CaFormatGoodbyeItem>()+16) as *mut u64) = u64::to_le(item.hash);
|
let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
|
||||||
}
|
dest.offset = u64::to_le(item.offset);
|
||||||
}
|
dest.size = u64::to_le(item.size);
|
||||||
|
dest.hash = u64::to_le(item.hash);
|
||||||
|
});
|
||||||
|
|
||||||
|
// append CaFormatGoodbyeTail as last item
|
||||||
|
let offset = item_count*std::mem::size_of::<CaFormatGoodbyeItem>();
|
||||||
|
let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
|
||||||
|
dest.offset = u64::to_le(goodby_offset as u64);
|
||||||
|
dest.size = u64::to_le((goodby_table_size + std::mem::size_of::<CaFormatHeader>()) as u64);
|
||||||
|
dest.hash = u64::to_le(CA_FORMAT_GOODBYE_TAIL_MARKER);
|
||||||
|
|
||||||
self.flush_copy_buffer(goodby_table_size)?;
|
self.flush_copy_buffer(goodby_table_size)?;
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ pub mod server {
|
||||||
|
|
||||||
pub mod catar {
|
pub mod catar {
|
||||||
|
|
||||||
|
pub mod binary_search_tree;
|
||||||
pub mod format_definition;
|
pub mod format_definition;
|
||||||
pub mod encoder;
|
pub mod encoder;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,14 +17,14 @@ pub fn map_struct<T>(buffer: &[u8]) -> Result<&T, Error> {
|
||||||
if buffer.len() < ::std::mem::size_of::<T>() {
|
if buffer.len() < ::std::mem::size_of::<T>() {
|
||||||
bail!("unable to map struct - buffer too small");
|
bail!("unable to map struct - buffer too small");
|
||||||
}
|
}
|
||||||
return Ok(unsafe { & * (buffer.as_ptr() as *const T) });
|
Ok(unsafe { & * (buffer.as_ptr() as *const T) })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn map_struct_mut<T>(buffer: &mut [u8]) -> Result<&mut T, Error> {
|
pub fn map_struct_mut<T>(buffer: &mut [u8]) -> Result<&mut T, Error> {
|
||||||
if buffer.len() < ::std::mem::size_of::<T>() {
|
if buffer.len() < ::std::mem::size_of::<T>() {
|
||||||
bail!("unable to map struct - buffer too small");
|
bail!("unable to map struct - buffer too small");
|
||||||
}
|
}
|
||||||
return Ok(unsafe { &mut * (buffer.as_ptr() as *mut T) });
|
Ok(unsafe { &mut * (buffer.as_ptr() as *mut T) })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue