diff --git a/src/pxar/binary_search_tree.rs b/src/pxar/binary_search_tree.rs index ebab86cb..b87db798 100644 --- a/src/pxar/binary_search_tree.rs +++ b/src/pxar/binary_search_tree.rs @@ -27,6 +27,8 @@ //! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short //! intro howto store binary trees using an array. +use std::cmp::Ordering; + #[allow(clippy::many_single_char_names)] fn copy_binary_search_tree_inner( copy_func: &mut F, @@ -91,6 +93,86 @@ pub fn copy_binary_search_tree( copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0); } + +/// This function searches for the index where the comparison by the provided +/// `compare()` function returns `Ordering::Equal`. +/// The order of the comparison matters (noncommutative) and should be search +/// value compared to value at given index as shown in the examples. +/// The parameter `skip_multiples` defines the number of matches to ignore while +/// searching before returning the index in order to lookup duplicate entries in +/// the tree. +/// +/// ``` +/// # use proxmox_backup::pxar::{copy_binary_search_tree, search_binary_tree_by}; +/// let mut vals = vec![0,1,2,2,2,3,4,5,6,6,7,8,8,8]; +/// +/// let clone = vals.clone(); +/// copy_binary_search_tree(vals.len(), |s, d| { +/// vals[d] = clone[s]; +/// }); +/// let should_be = vec![5,2,8,1,3,6,8,0,2,2,4,6,7,8]; +/// assert_eq!(vals, should_be); +/// +/// let find = 8; +/// let skip_multiples = 0; +/// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); +/// assert_eq!(idx, Some(2)); +/// +/// let find = 8; +/// let skip_multiples = 1; +/// let idx = search_binary_tree_by(2, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); +/// assert_eq!(idx, Some(6)); +/// +/// let find = 8; +/// let skip_multiples = 1; +/// let idx = search_binary_tree_by(6, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); +/// assert_eq!(idx, Some(13)); +/// +/// let find = 5; +/// let skip_multiples = 1; +/// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); +/// assert!(idx.is_none()); +/// ``` + +pub fn search_binary_tree_by Ordering>( + start: usize, + size: usize, + skip_multiples: usize, + compare: F +) -> Option { + if start > size { + return None; + } + + let mut skip = skip_multiples; + let cmp = compare(start); + if cmp == Ordering::Equal { + if skip == 0 { + // Found matching hash and want this one + return Some(start); + } + // Found matching hash, but we should skip the first `skip_multiple`, + // so continue search with reduced skip count. + skip -= 1; + } + + if cmp == Ordering::Less || cmp == Ordering::Equal { + let res = search_binary_tree_by(2 * start + 1, size, skip, compare); + if res.is_some() { + return res; + } + } + + if cmp == Ordering::Greater || cmp == Ordering::Equal { + let res = search_binary_tree_by(2 * start + 2, size, skip, compare); + if res.is_some() { + return res; + } + } + + None +} + #[test] fn test_binary_search_tree() { diff --git a/src/pxar/decoder.rs b/src/pxar/decoder.rs index 6e3a5b23..df636501 100644 --- a/src/pxar/decoder.rs +++ b/src/pxar/decoder.rs @@ -11,6 +11,7 @@ use std::os::unix::ffi::OsStrExt; use failure::*; use libc; +use super::binary_search_tree::search_binary_tree_by; use super::format_definition::*; use super::sequential_decoder::SequentialDecoder; use super::match_pattern::MatchPattern; @@ -276,11 +277,18 @@ impl Decoder { let gbt = self.goodbye_table(Some(dir.start), dir.end)?; let hash = compute_goodbye_hash(filename.as_bytes()); - let mut iterator = gbt.iter(); + let mut start_idx = 0; + let mut skip_multiple = 0; loop { // Search for the next goodbye entry with matching hash. - let (start, end) = match iterator.find(|(i, _, _)| i.hash == hash) { - Some((_item, start, end)) => (start, end), + let idx = search_binary_tree_by( + start_idx, + gbt.len(), + skip_multiple, + |idx| hash.cmp(&gbt[idx].0.hash), + ); + let (_item, start, end) = match idx { + Some(idx) => &gbt[idx], None => return Ok(None), }; @@ -303,6 +311,10 @@ impl Decoder { }; return Ok(Some((dir_entry, attr, payload_size))); } + // Hash collision, check the next entry in the goodbye table by starting + // from given index but skipping one more match (so hash at index itself). + start_idx = idx.unwrap(); + skip_multiple = 1; } } diff --git a/src/pxar/fuse.rs b/src/pxar/fuse.rs index f24a4a39..f39b3512 100644 --- a/src/pxar/fuse.rs +++ b/src/pxar/fuse.rs @@ -16,6 +16,7 @@ use lazy_static::lazy_static; use libc; use libc::{c_char, c_int, c_void, size_t}; +use super::binary_search_tree::search_binary_tree_by; use super::decoder::Decoder; use super::format_definition::{PxarAttributes, PxarEntry, PxarGoodbyeItem}; @@ -130,11 +131,18 @@ impl Context { ) -> Result<(u64, PxarEntry, PxarAttributes, u64), i32> { self.update_goodbye_cache()?; if let Some((_, gbt)) = &self.goodbye_cache { - let mut iterator = gbt.iter(); + let mut start_idx = 0; + let mut skip_multiple = 0; loop { // Search for the next goodbye entry with matching hash. - let (_item, start, end) = iterator.find(|(i, _, _)| i.hash == hash) - .ok_or(libc::ENOENT)?; + let idx = search_binary_tree_by( + start_idx, + gbt.len(), + skip_multiple, + |idx| hash.cmp(&gbt[idx].0.hash), + ).ok_or(libc::ENOENT)?; + + let (_item, start, end) = &gbt[idx]; // At this point it is not clear if the item is a directory or not, this // has to be decided based on the entry mode. @@ -150,6 +158,10 @@ impl Context { let child_offset = find_offset(&entry, *start, *end); return Ok((child_offset, entry, attr, payload_size)); } + // Hash collision, check the next entry in the goodbye table by starting + // from given index but skipping one more match (so hash at index itself). + start_idx = idx; + skip_multiple = 1; } }