From 0e20b336e1789d3fda5c5a0ecd74e9009e7b2a3c Mon Sep 17 00:00:00 2001 From: Christian Ebner Date: Wed, 11 Sep 2019 17:25:03 +0200 Subject: [PATCH] pxar: fuse: avoid possible hash collision in lookup by additional checking against filename The hash of the filename in the goodbye table items allows to quickly compare to a hashed filename. Unfortunately, a matching hash is no garantee for matching filenames as hash collisions are possible. This patch fixes such possible collisions by further checking the filenames once a matching hash has been found. This introduces no significant extra cost (except for the filename comparison) for cases with matching hashes, as the lookup call has to seek and read the file attributes (including the filename) anyway. In cases with hash collision, the next matching item is read and treaded analogously (what means we need at least one extra seek). As collisions should be not that frequent, this should be an acceptable penalty. Signed-off-by: Christian Ebner --- src/pxar/decoder.rs | 15 +++++---- src/pxar/fuse.rs | 78 +++++++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/src/pxar/decoder.rs b/src/pxar/decoder.rs index 64668019..767cc8b3 100644 --- a/src/pxar/decoder.rs +++ b/src/pxar/decoder.rs @@ -289,7 +289,7 @@ impl Result<(), Error>> Decoder { /// directories `PXAR_GOODBYE_TAIL_MARKER`. This is not mandatory and it can /// also directly point to its `PXAR_FILENAME` or `PXAR_ENTRY`, thereby /// avoiding an additional seek. - pub fn attributes(&mut self, offset: u64) -> Result<(PxarEntry, PxarAttributes, u64), Error> { + pub fn attributes(&mut self, offset: u64) -> Result<(OsString, PxarEntry, PxarAttributes, u64), Error> { self.seek(SeekFrom::Start(offset))?; let mut marker: u64 = self.inner.read_item()?; @@ -301,11 +301,14 @@ impl Result<(), Error>> Decoder { marker = self.inner.read_item()?; } - if marker == PXAR_FILENAME { + let filename = if marker == PXAR_FILENAME { let size: u64 = self.inner.read_item()?; - let _filename = self.inner.read_filename(size)?; + let filename = self.inner.read_filename(size)?; marker = self.inner.read_item()?; - } + filename + } else { + OsString::new() + }; if marker == PXAR_FORMAT_HARDLINK { let size: u64 = self.inner.read_item()?; @@ -324,12 +327,12 @@ impl Result<(), Error>> Decoder { _ => 0, }; - Ok((entry, xattr, file_size)) + Ok((filename, entry, xattr, file_size)) } /// Opens the file by validating the given `offset` and returning its attrs, /// xattrs and size. - pub fn open(&mut self, offset: u64) -> Result<(PxarEntry, PxarAttributes, u64), Error> { + pub fn open(&mut self, offset: u64) -> Result<(OsString, PxarEntry, PxarAttributes, u64), Error> { self.attributes(offset) } diff --git a/src/pxar/fuse.rs b/src/pxar/fuse.rs index 57868ac6..2bc2d93a 100644 --- a/src/pxar/fuse.rs +++ b/src/pxar/fuse.rs @@ -363,6 +363,52 @@ struct EntryParam { entry_timeout: f64, } +/// Lookup the goodbye item identified by `filename` and its corresponding `hash` +/// +/// Search the first matching `hash` in the goodbye table, allowing for a fast +/// comparison with the items. +/// As there could be a hash collision, the found items filename is then compared +/// by seek to the corresponding item in the archive and reading its attributes +/// (which the lookup callback needs to do anyway). +/// If the filename does not match, the function is called recursively with the +/// rest of the goodbye table to lookup the next match. +/// The matching items archive offset, entry and payload size are returned. +/// If there is no entry with matching `filename` and `hash` a `libc::ENOENT` is +/// returned. +fn search_in_goodbye( + mut decoder: &mut Decoder, fn(&Path) -> Result<(), Error>>, + goodbye_table: &[(PxarGoodbyeItem, u64, u64)], + filename: &CStr, + hash: u64, +) -> Result<(u64, PxarEntry, u64), i32> { + // Search for the first position with matching hash. + let position = goodbye_table + .iter() + .position(|(e, _, _)| e.hash == hash) + .ok_or(libc::ENOENT)?; + + let (_item, start, end) = &goodbye_table[position]; + // At this point it is not clear if the item is a directory or not, this + // has to be decided based on the entry mode. + // `Decoder`s attributes function accepts both, offsets pointing to + // the start of an item (PXAR_FILENAME) or the GOODBYE_TAIL_MARKER in case + // of directories, so the use of start offset is fine for both cases. + let (entry_name, entry, _, payload_size) = decoder + .attributes(*start) + .map_err(|_| libc::EIO)?; + + // Possible hash collision, need to check if the found entry is indeed + // the filename to lookup. + if entry_name.as_bytes() != filename.to_bytes() { + // Hash collision, continue with the search for the next matching item. + search_in_goodbye(&mut decoder, &goodbye_table[position + 1..goodbye_table.len()], filename, hash) + } else { + // No hash collision, get the correct offset based on the entry mode. + let child_offset = find_offset(&entry, *start, *end); + Ok((child_offset, entry, payload_size)) + } +} + /// Lookup `name` in the directory referenced by `parent` inode. /// /// Inserts also the child and parent file offset in the hashmap to quickly @@ -371,22 +417,14 @@ extern "C" fn lookup(req: Request, parent: u64, name: StrPtr) { let filename = unsafe { CStr::from_ptr(name) }; let hash = super::format_definition::compute_goodbye_hash(filename.to_bytes()); - run_in_context(req, parent, |decoder, ino_offset| { - let goodbye_table = decoder.goodbye_table(None, ino_offset + GOODBYE_ITEM_SIZE).map_err(|_| libc::EIO)?; + run_in_context(req, parent, |mut decoder, ino_offset| { + let goodbye_table = decoder + .goodbye_table(None, ino_offset + GOODBYE_ITEM_SIZE) + .map_err(|_| libc::EIO)?; - let (_item, start, end) = goodbye_table - .iter() - .find(|(e, _, _)| e.hash == hash) - .ok_or(libc::ENOENT)?; - - // At this point it is not clear if the item is a directory or not, this - // has to be decided based on the entry mode. - // `Decoder`s attributes function accepts both, offsets pointing to - // the start of an item (PXAR_FILENAME) or the GOODBYE_TAIL_MARKER in case - // of directories, so the use of start offset is fine for both cases. - let (entry, _, payload_size) = decoder.attributes(*start).map_err(|_| libc::EIO)?; - // Get the correct offset based on the entry mode before calculating the i-node. - let child_offset = find_offset(&entry, *start, *end); + // Get the child item based on its hash and filename + let (child_offset, entry, payload_size) = + search_in_goodbye(&mut decoder, &goodbye_table[..], &filename, hash)?; let child_inode = calculate_inode(child_offset, decoder.root_end_offset()); let e = EntryParam { @@ -433,7 +471,7 @@ fn stat(inode: u64, entry: &PxarEntry, payload_size: u64) -> Result