pxar: fuse: use binary search instead of linear search in goodbye table.

The goodbye table is layed out as binary search tree based on the hash, so use
this to be more efficient when looking up a hash in the table for directories
with a large number of entries.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
This commit is contained in:
Christian Ebner 2019-12-06 13:13:18 +01:00 committed by Dietmar Maurer
parent 5ab52d9a8a
commit 02491b8fc6
3 changed files with 112 additions and 6 deletions

View File

@ -27,6 +27,8 @@
//! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short
//! intro howto store binary trees using an array.
use std::cmp::Ordering;
#[allow(clippy::many_single_char_names)]
fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>(
copy_func: &mut F,
@ -91,6 +93,86 @@ pub fn copy_binary_search_tree<F: FnMut(usize, usize)>(
copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
}
/// This function searches for the index where the comparison by the provided
/// `compare()` function returns `Ordering::Equal`.
/// The order of the comparison matters (noncommutative) and should be search
/// value compared to value at given index as shown in the examples.
/// The parameter `skip_multiples` defines the number of matches to ignore while
/// searching before returning the index in order to lookup duplicate entries in
/// the tree.
///
/// ```
/// # use proxmox_backup::pxar::{copy_binary_search_tree, search_binary_tree_by};
/// let mut vals = vec![0,1,2,2,2,3,4,5,6,6,7,8,8,8];
///
/// let clone = vals.clone();
/// copy_binary_search_tree(vals.len(), |s, d| {
/// vals[d] = clone[s];
/// });
/// let should_be = vec![5,2,8,1,3,6,8,0,2,2,4,6,7,8];
/// assert_eq!(vals, should_be);
///
/// let find = 8;
/// let skip_multiples = 0;
/// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
/// assert_eq!(idx, Some(2));
///
/// let find = 8;
/// let skip_multiples = 1;
/// let idx = search_binary_tree_by(2, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
/// assert_eq!(idx, Some(6));
///
/// let find = 8;
/// let skip_multiples = 1;
/// let idx = search_binary_tree_by(6, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
/// assert_eq!(idx, Some(13));
///
/// let find = 5;
/// let skip_multiples = 1;
/// let idx = search_binary_tree_by(0, vals.len(), skip_multiples, |idx| find.cmp(&vals[idx]));
/// assert!(idx.is_none());
/// ```
pub fn search_binary_tree_by<F: Copy + Fn(usize) -> Ordering>(
start: usize,
size: usize,
skip_multiples: usize,
compare: F
) -> Option<usize> {
if start > size {
return None;
}
let mut skip = skip_multiples;
let cmp = compare(start);
if cmp == Ordering::Equal {
if skip == 0 {
// Found matching hash and want this one
return Some(start);
}
// Found matching hash, but we should skip the first `skip_multiple`,
// so continue search with reduced skip count.
skip -= 1;
}
if cmp == Ordering::Less || cmp == Ordering::Equal {
let res = search_binary_tree_by(2 * start + 1, size, skip, compare);
if res.is_some() {
return res;
}
}
if cmp == Ordering::Greater || cmp == Ordering::Equal {
let res = search_binary_tree_by(2 * start + 2, size, skip, compare);
if res.is_some() {
return res;
}
}
None
}
#[test]
fn test_binary_search_tree() {

View File

@ -11,6 +11,7 @@ use std::os::unix::ffi::OsStrExt;
use failure::*;
use libc;
use super::binary_search_tree::search_binary_tree_by;
use super::format_definition::*;
use super::sequential_decoder::SequentialDecoder;
use super::match_pattern::MatchPattern;
@ -276,11 +277,18 @@ impl Decoder {
let gbt = self.goodbye_table(Some(dir.start), dir.end)?;
let hash = compute_goodbye_hash(filename.as_bytes());
let mut iterator = gbt.iter();
let mut start_idx = 0;
let mut skip_multiple = 0;
loop {
// Search for the next goodbye entry with matching hash.
let (start, end) = match iterator.find(|(i, _, _)| i.hash == hash) {
Some((_item, start, end)) => (start, end),
let idx = search_binary_tree_by(
start_idx,
gbt.len(),
skip_multiple,
|idx| hash.cmp(&gbt[idx].0.hash),
);
let (_item, start, end) = match idx {
Some(idx) => &gbt[idx],
None => return Ok(None),
};
@ -303,6 +311,10 @@ impl Decoder {
};
return Ok(Some((dir_entry, attr, payload_size)));
}
// Hash collision, check the next entry in the goodbye table by starting
// from given index but skipping one more match (so hash at index itself).
start_idx = idx.unwrap();
skip_multiple = 1;
}
}

View File

@ -16,6 +16,7 @@ use lazy_static::lazy_static;
use libc;
use libc::{c_char, c_int, c_void, size_t};
use super::binary_search_tree::search_binary_tree_by;
use super::decoder::Decoder;
use super::format_definition::{PxarAttributes, PxarEntry, PxarGoodbyeItem};
@ -130,11 +131,18 @@ impl Context {
) -> Result<(u64, PxarEntry, PxarAttributes, u64), i32> {
self.update_goodbye_cache()?;
if let Some((_, gbt)) = &self.goodbye_cache {
let mut iterator = gbt.iter();
let mut start_idx = 0;
let mut skip_multiple = 0;
loop {
// Search for the next goodbye entry with matching hash.
let (_item, start, end) = iterator.find(|(i, _, _)| i.hash == hash)
.ok_or(libc::ENOENT)?;
let idx = search_binary_tree_by(
start_idx,
gbt.len(),
skip_multiple,
|idx| hash.cmp(&gbt[idx].0.hash),
).ok_or(libc::ENOENT)?;
let (_item, start, end) = &gbt[idx];
// At this point it is not clear if the item is a directory or not, this
// has to be decided based on the entry mode.
@ -150,6 +158,10 @@ impl Context {
let child_offset = find_offset(&entry, *start, *end);
return Ok((child_offset, entry, attr, payload_size));
}
// Hash collision, check the next entry in the goodbye table by starting
// from given index but skipping one more match (so hash at index itself).
start_idx = idx;
skip_multiple = 1;
}
}