From 02491b8fc6f0ca512b0114868fa443ac5b611558 Mon Sep 17 00:00:00 2001 From: Christian Ebner Date: Fri, 6 Dec 2019 13:13:18 +0100 Subject: [PATCH] pxar: fuse: use binary search instead of linear search in goodbye table. The goodbye table is layed out as binary search tree based on the hash, so use this to be more efficient when looking up a hash in the table for directories with a large number of entries. Signed-off-by: Christian Ebner --- src/pxar/binary_search_tree.rs | 82 ++++++++++++++++++++++++++++++++++ src/pxar/decoder.rs | 18 ++++++-- src/pxar/fuse.rs | 18 ++++++-- 3 files changed, 112 insertions(+), 6 deletions(-) diff --git a/src/pxar/binary_search_tree.rs b/src/pxar/binary_search_tree.rs index ebab86cb..b87db798 100644 --- a/src/pxar/binary_search_tree.rs +++ b/src/pxar/binary_search_tree.rs @@ -27,6 +27,8 @@ //! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short //! intro howto store binary trees using an array. +use std::cmp::Ordering; + #[allow(clippy::many_single_char_names)] fn copy_binary_search_tree_inner( copy_func: &mut F, @@ -91,6 +93,86 @@ pub fn copy_binary_search_tree( copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0); } + +/// This function searches for the index where the comparison by the provided +/// `compare()` function returns `Ordering::Equal`. +/// The order of the comparison matters (noncommutative) and should be search +/// value compared to value at given index as shown in the examples. +/// The parameter `skip_multiples` defines the number of matches to ignore while +/// searching before returning the index in order to lookup duplicate entries in +/// the tree. +/// +/// ``` +/// # use proxmox_backup::pxar::{copy_binary_search_tree, search_binary_tree_by}; +/// let mut vals = vec![0,1,2,2,2,3,4,5,6,6,7,8,8,8]; +/// +/// let clone = vals.clone(); +/// copy_binary_search_tree(vals.len(), |s, d| { +/// vals[d] = clone[s]; +/// }); +/// let should_be = vec![5,2,8,1,3,6,8,0,2,2,4,6,7,8]; +/// assert_eq!(vals, should_be); +/// +/// let find = 8; +/// let skip_multiples = 0; +/// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); +/// assert_eq!(idx, Some(2)); +/// +/// let find = 8; +/// let skip_multiples = 1; +/// let idx = search_binary_tree_by(2, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); +/// assert_eq!(idx, Some(6)); +/// +/// let find = 8; +/// let skip_multiples = 1; +/// let idx = search_binary_tree_by(6, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); +/// assert_eq!(idx, Some(13)); +/// +/// let find = 5; +/// let skip_multiples = 1; +/// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx])); +/// assert!(idx.is_none()); +/// ``` + +pub fn search_binary_tree_by Ordering>( + start: usize, + size: usize, + skip_multiples: usize, + compare: F +) -> Option { + if start > size { + return None; + } + + let mut skip = skip_multiples; + let cmp = compare(start); + if cmp == Ordering::Equal { + if skip == 0 { + // Found matching hash and want this one + return Some(start); + } + // Found matching hash, but we should skip the first `skip_multiple`, + // so continue search with reduced skip count. + skip -= 1; + } + + if cmp == Ordering::Less || cmp == Ordering::Equal { + let res = search_binary_tree_by(2 * start + 1, size, skip, compare); + if res.is_some() { + return res; + } + } + + if cmp == Ordering::Greater || cmp == Ordering::Equal { + let res = search_binary_tree_by(2 * start + 2, size, skip, compare); + if res.is_some() { + return res; + } + } + + None +} + #[test] fn test_binary_search_tree() { diff --git a/src/pxar/decoder.rs b/src/pxar/decoder.rs index 6e3a5b23..df636501 100644 --- a/src/pxar/decoder.rs +++ b/src/pxar/decoder.rs @@ -11,6 +11,7 @@ use std::os::unix::ffi::OsStrExt; use failure::*; use libc; +use super::binary_search_tree::search_binary_tree_by; use super::format_definition::*; use super::sequential_decoder::SequentialDecoder; use super::match_pattern::MatchPattern; @@ -276,11 +277,18 @@ impl Decoder { let gbt = self.goodbye_table(Some(dir.start), dir.end)?; let hash = compute_goodbye_hash(filename.as_bytes()); - let mut iterator = gbt.iter(); + let mut start_idx = 0; + let mut skip_multiple = 0; loop { // Search for the next goodbye entry with matching hash. - let (start, end) = match iterator.find(|(i, _, _)| i.hash == hash) { - Some((_item, start, end)) => (start, end), + let idx = search_binary_tree_by( + start_idx, + gbt.len(), + skip_multiple, + |idx| hash.cmp(&gbt[idx].0.hash), + ); + let (_item, start, end) = match idx { + Some(idx) => &gbt[idx], None => return Ok(None), }; @@ -303,6 +311,10 @@ impl Decoder { }; return Ok(Some((dir_entry, attr, payload_size))); } + // Hash collision, check the next entry in the goodbye table by starting + // from given index but skipping one more match (so hash at index itself). + start_idx = idx.unwrap(); + skip_multiple = 1; } } diff --git a/src/pxar/fuse.rs b/src/pxar/fuse.rs index f24a4a39..f39b3512 100644 --- a/src/pxar/fuse.rs +++ b/src/pxar/fuse.rs @@ -16,6 +16,7 @@ use lazy_static::lazy_static; use libc; use libc::{c_char, c_int, c_void, size_t}; +use super::binary_search_tree::search_binary_tree_by; use super::decoder::Decoder; use super::format_definition::{PxarAttributes, PxarEntry, PxarGoodbyeItem}; @@ -130,11 +131,18 @@ impl Context { ) -> Result<(u64, PxarEntry, PxarAttributes, u64), i32> { self.update_goodbye_cache()?; if let Some((_, gbt)) = &self.goodbye_cache { - let mut iterator = gbt.iter(); + let mut start_idx = 0; + let mut skip_multiple = 0; loop { // Search for the next goodbye entry with matching hash. - let (_item, start, end) = iterator.find(|(i, _, _)| i.hash == hash) - .ok_or(libc::ENOENT)?; + let idx = search_binary_tree_by( + start_idx, + gbt.len(), + skip_multiple, + |idx| hash.cmp(&gbt[idx].0.hash), + ).ok_or(libc::ENOENT)?; + + let (_item, start, end) = &gbt[idx]; // At this point it is not clear if the item is a directory or not, this // has to be decided based on the entry mode. @@ -150,6 +158,10 @@ impl Context { let child_offset = find_offset(&entry, *start, *end); return Ok((child_offset, entry, attr, payload_size)); } + // Hash collision, check the next entry in the goodbye table by starting + // from given index but skipping one more match (so hash at index itself). + start_idx = idx; + skip_multiple = 1; } }