diff --git a/src/catar/binary_search_tree.rs b/src/catar/binary_search_tree.rs new file mode 100644 index 00000000..8f1f5dd0 --- /dev/null +++ b/src/catar/binary_search_tree.rs @@ -0,0 +1,63 @@ +/// Helpers to generate a binary search tree stored in an array from a +/// sorted array. +/// +/// Specifically, for any given sorted * array 'input' permute the +/// array so that the following rule holds: +/// +/// For each array item with index i, the item at 2*i+1 is smaller and +/// the item 2*i+2 is larger. +/// +/// This structure permits efficient (meaning: O(log(n)) binary +/// searches: start with item i=0 (i.e. the root of the BST), compare +/// the value with the searched item, if smaller proceed at item +/// i*2+1, if larger proceed at item i*2+2, and repeat, until either +/// the item is found, or the indexes grow beyond the array size, +/// which means the entry does not exist. +/// +/// Effectively this implements bisection, but instead of jumping +/// around wildly in the array during a single search we only search +/// with strictly monotonically increasing indexes. +/// +/// Algorithm is from casync (camakebst.c), simplified and optimized +/// for rust. Permutation function originally by L. Bressel, 2017. +/// +/// + +fn copy_binary_search_tree_inner( + copy_func: &mut F, + n: usize, + o: usize, // Note: virtual offset for input array + e: usize, + i: usize, +) { + let p = 1 << e; + + let t = p + (p>>1) - 1; + + let m = if n > t { + // |...........p.............t....n........(2p)| + p - 1 + } else { + // |...........p.....n.......t.............(2p)| + p - 1 - (t-n) + }; + + (copy_func)(o+m, i); + + if m > 0 { + copy_binary_search_tree_inner(copy_func, m, 0, e-1, i*2+1); + } + + if (m + 1) < n { + copy_binary_search_tree_inner(copy_func, n-m-1, o+m+1, e-1, i*2+2); + } +} + +pub fn copy_binary_search_tree( + n: usize, + mut copy_func: F, +) { + if n == 0 { return }; + let e = (64 - n.leading_zeros() - 1) as usize; // fast log2(n) + copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0); +} diff --git a/src/catar/encoder.rs b/src/catar/encoder.rs index 0e23fde5..5306f767 100644 --- a/src/catar/encoder.rs +++ b/src/catar/encoder.rs @@ -1,6 +1,7 @@ use failure::*; use super::format_definition::*; +use super::binary_search_tree::*; use std::io::Write; use std::os::unix::io::AsRawFd; @@ -218,7 +219,9 @@ impl CaTarEncoder { println!("encode_dir: {:?} end {}", self.current_path, self.writer_pos); let goodby_start = self.writer_pos as u64; - let goodby_table_size = (goodby_items.len() + 1)*std::mem::size_of::(); + + let item_count = goodby_items.len(); + let goodby_table_size = (item_count + 1)*std::mem::size_of::(); for item in &mut goodby_items { item.offset = goodby_start - item.offset; @@ -228,13 +231,6 @@ impl CaTarEncoder { let goodby_offset = self.writer_pos - dir_start_pos; - // append CaFormatGoodbyeTail as last item - goodby_items.push(CaFormatGoodbyeItem { - offset: goodby_offset as u64, - size: (goodby_table_size + std::mem::size_of::()) as u64, - hash: CA_FORMAT_GOODBYE_TAIL_MARKER, - }); - self.write_header(CA_FORMAT_GOODBYE, goodby_table_size as u64)?; if goodby_table_size > FILE_COPY_BUFFER_SIZE { @@ -243,13 +239,22 @@ impl CaTarEncoder { let buffer = &mut self.file_copy_buffer; let buffer_ptr = buffer.as_ptr(); - for (i, item) in goodby_items.iter().enumerate() { - unsafe { - *(buffer_ptr.add(i*std::mem::size_of::()) as *mut u64) = u64::to_le(item.offset); - *(buffer_ptr.add(i*std::mem::size_of::()+8) as *mut u64) = u64::to_le(item.size); - *(buffer_ptr.add(i*std::mem::size_of::()+16) as *mut u64) = u64::to_le(item.hash); - } - } + + copy_binary_search_tree(item_count, |s, d| { + let item = &goodby_items[s]; + let offset = d*std::mem::size_of::(); + let dest = crate::tools::map_struct_mut::(&mut buffer[offset..]).unwrap(); + dest.offset = u64::to_le(item.offset); + dest.size = u64::to_le(item.size); + dest.hash = u64::to_le(item.hash); + }); + + // append CaFormatGoodbyeTail as last item + let offset = item_count*std::mem::size_of::(); + let dest = crate::tools::map_struct_mut::(&mut buffer[offset..]).unwrap(); + dest.offset = u64::to_le(goodby_offset as u64); + dest.size = u64::to_le((goodby_table_size + std::mem::size_of::()) as u64); + dest.hash = u64::to_le(CA_FORMAT_GOODBYE_TAIL_MARKER); self.flush_copy_buffer(goodby_table_size)?; diff --git a/src/lib.rs b/src/lib.rs index f7e43b50..be9f4c07 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,7 @@ pub mod server { pub mod catar { + pub mod binary_search_tree; pub mod format_definition; pub mod encoder; } diff --git a/src/tools.rs b/src/tools.rs index f752201e..4ab6299b 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -17,14 +17,14 @@ pub fn map_struct(buffer: &[u8]) -> Result<&T, Error> { if buffer.len() < ::std::mem::size_of::() { bail!("unable to map struct - buffer too small"); } - return Ok(unsafe { & * (buffer.as_ptr() as *const T) }); + Ok(unsafe { & * (buffer.as_ptr() as *const T) }) } pub fn map_struct_mut(buffer: &mut [u8]) -> Result<&mut T, Error> { if buffer.len() < ::std::mem::size_of::() { bail!("unable to map struct - buffer too small"); } - return Ok(unsafe { &mut * (buffer.as_ptr() as *mut T) }); + Ok(unsafe { &mut * (buffer.as_ptr() as *mut T) }) }