From 255bb860300051cc81825d3b09bd1e54e5c07b8d Mon Sep 17 00:00:00 2001 From: Christian Ebner Date: Mon, 16 Dec 2019 12:13:44 +0100 Subject: [PATCH] pxar: match_pattern: refactor MatchPattern and introduce MatchPatternSlice. The MatchPattern impl heavily used copies and therefore was inefficient regarding memory management. This patch intoduces MatchPatternSlice as struct to avoid copies and perform the same pattern matching functionality. Signed-off-by: Christian Ebner --- src/pxar/encoder.rs | 26 ++-- src/pxar/match_pattern.rs | 255 ++++++++++++++++++--------------- src/pxar/sequential_decoder.rs | 17 ++- 3 files changed, 169 insertions(+), 129 deletions(-) diff --git a/src/pxar/encoder.rs b/src/pxar/encoder.rs index b40a7a6b..21c78f0a 100644 --- a/src/pxar/encoder.rs +++ b/src/pxar/encoder.rs @@ -24,7 +24,7 @@ use super::catalog::BackupCatalogWriter; use super::flags; use super::format_definition::*; use super::helper::*; -use super::match_pattern::{MatchPattern, MatchType}; +use super::match_pattern::{MatchPattern, MatchPatternSlice, MatchType}; use crate::tools::acl; use crate::tools::fs; use crate::tools::xattr; @@ -135,8 +135,12 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { if skip_lost_and_found { excludes.push(MatchPattern::from_line(b"**/lost+found").unwrap().unwrap()); } + let mut exclude_slices = Vec::new(); + for excl in &excludes { + exclude_slices.push(excl.as_slice()); + } - me.encode_dir(dir, &stat, magic, excludes)?; + me.encode_dir(dir, &stat, magic, exclude_slices)?; Ok(()) } @@ -622,7 +626,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { dir: &mut nix::dir::Dir, dir_stat: &FileStat, magic: i64, - match_pattern: Vec, + match_pattern: Vec, ) -> Result<(), Error> { //println!("encode_dir: {:?} start {}", self.full_path(), self.writer_pos); @@ -688,14 +692,16 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { // Expand the exclude match pattern inherited from the parent by local entries, if present let mut local_match_pattern = match_pattern.clone(); - let pxar_exclude = match MatchPattern::from_file(rawfd, ".pxarexclude") { - Ok(Some((mut excludes, buffer, stat))) => { - local_match_pattern.append(&mut excludes); - Some((buffer, stat)) + let (pxar_exclude, excludes) = match MatchPattern::from_file(rawfd, ".pxarexclude") { + Ok(Some((excludes, buffer, stat))) => { + (Some((buffer, stat)), excludes) } - Ok(None) => None, + Ok(None) => (None, Vec::new()), Err(err) => bail!("error while reading exclude file - {}", err), }; + for excl in &excludes { + local_match_pattern.push(excl.as_slice()); + } if include_children { // Exclude patterns passed via the CLI are stored as '.pxarexclude-cli' @@ -736,7 +742,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { Err(err) => bail!("fstat {:?} failed - {}", self.full_path(), err), }; - match MatchPattern::match_filename_exclude( + match MatchPatternSlice::match_filename_exclude( &filename, is_directory(&stat), &local_match_pattern, @@ -812,7 +818,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { // '.pxarexclude-cli' is used to store the exclude MatchPatterns // passed via the cli in the root directory of the archive. self.write_filename(&filename)?; - let content = MatchPattern::to_bytes(&exclude_list); + let content = MatchPatternSlice::to_bytes(&exclude_list); if let Some(ref mut catalog) = self.catalog { catalog.add_file(&filename, content.len() as u64, 0)?; } diff --git a/src/pxar/match_pattern.rs b/src/pxar/match_pattern.rs index 487b6cad..b4048d87 100644 --- a/src/pxar/match_pattern.rs +++ b/src/pxar/match_pattern.rs @@ -50,22 +50,21 @@ pub enum MatchType { /// /// /// Positive match of any file ending in `.conf` in any subdirectory /// let positive = MatchPattern::from_line(b"**/*.conf")?.unwrap(); -/// let m_positive = positive.matches_filename(&filename, is_dir)?; +/// let m_positive = positive.as_slice().matches_filename(&filename, is_dir)?; /// assert!(m_positive == MatchType::Positive); /// /// /// Negative match of filenames starting with `s` /// let negative = MatchPattern::from_line(b"![s]*")?.unwrap(); -/// let m_negative = negative.matches_filename(&filename, is_dir)?; +/// let m_negative = negative.as_slice().matches_filename(&filename, is_dir)?; /// assert!(m_negative == MatchType::Negative); /// # Ok(()) /// # } /// ``` #[derive(Clone)] pub struct MatchPattern { - pattern: CString, + pattern: Vec, match_positive: bool, match_dir_only: bool, - split_pattern: (CString, CString), } impl MatchPattern { @@ -149,60 +148,21 @@ impl MatchPattern { bail!("invalid path component encountered"); } - // This will fail if the line contains b"\0" - let pattern = CString::new(input)?; - let split_pattern = split_at_slash(&pattern); - Ok(Some(MatchPattern { - pattern, + pattern: input.to_vec(), match_positive, match_dir_only, - split_pattern, })) } - /// Returns the pattern before the first `/` encountered as `MatchPattern`. - /// If no slash is encountered, the `MatchPattern` will be a copy of the - /// original pattern. - /// ``` - /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType}; - /// # fn main() -> Result<(), failure::Error> { - /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap(); - /// let front = pattern.get_front_pattern(); - /// /// ... will be the same as ... - /// let front_pattern = MatchPattern::from_line(b"some")?.unwrap(); - /// # Ok(()) - /// # } - /// ``` - pub fn get_front_pattern(&self) -> MatchPattern { - let pattern = split_at_slash(&self.split_pattern.0); - MatchPattern { - pattern: self.split_pattern.0.clone(), - match_positive: self.match_positive, - match_dir_only: self.match_dir_only, - split_pattern: pattern, - } - } - /// Returns the pattern after the first encountered `/` as `MatchPattern`. - /// If no slash is encountered, the `MatchPattern` will be empty. - /// ``` - /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType}; - /// # fn main() -> Result<(), failure::Error> { - /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap(); - /// let rest = pattern.get_rest_pattern(); - /// /// ... will be the same as ... - /// let rest_pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap(); - /// # Ok(()) - /// # } - /// ``` - pub fn get_rest_pattern(&self) -> MatchPattern { - let pattern = split_at_slash(&self.split_pattern.1); - MatchPattern { - pattern: self.split_pattern.1.clone(), + /// Create a `MatchPatternSlice` of the `MatchPattern` to give a view of the + /// `MatchPattern` without copying its content. + pub fn as_slice<'a>(&'a self) -> MatchPatternSlice<'a> { + MatchPatternSlice { + pattern: self.pattern.as_slice(), match_positive: self.match_positive, match_dir_only: self.match_dir_only, - split_pattern: pattern, } } @@ -220,17 +180,114 @@ impl MatchPattern { /// Convert a list of MatchPattern to bytes in order to write them to e.g. /// a file. pub fn to_bytes(patterns: &[MatchPattern]) -> Vec { + let mut slices = Vec::new(); + for pattern in patterns { + slices.push(pattern.as_slice()); + } + + MatchPatternSlice::to_bytes(&slices) + } +} + +#[derive(Clone)] +pub struct MatchPatternSlice<'a> { + pattern: &'a [u8], + match_positive: bool, + match_dir_only: bool, +} + +impl<'a> MatchPatternSlice<'a> { + /// Returns the pattern before the first `/` encountered as `MatchPatternSlice`. + /// If no slash is encountered, the `MatchPatternSlice` will be a copy of the + /// original pattern. + /// ``` + /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType}; + /// # fn main() -> Result<(), failure::Error> { + /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap(); + /// let slice = pattern.as_slice(); + /// let front = slice.get_front_pattern(); + /// /// ... will be the same as ... + /// let front_pattern = MatchPattern::from_line(b"some")?.unwrap(); + /// let front_slice = front_pattern.as_slice(); + /// # Ok(()) + /// # } + /// ``` + pub fn get_front_pattern(&'a self) -> MatchPatternSlice<'a> { + let (front, _) = self.split_at_slash(); + MatchPatternSlice { + pattern: front, + match_positive: self.match_positive, + match_dir_only: self.match_dir_only, + } + } + + /// Returns the pattern after the first encountered `/` as `MatchPatternSlice`. + /// If no slash is encountered, the `MatchPatternSlice` will be empty. + /// ``` + /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType}; + /// # fn main() -> Result<(), failure::Error> { + /// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap(); + /// let slice = pattern.as_slice(); + /// let rest = slice.get_rest_pattern(); + /// /// ... will be the same as ... + /// let rest_pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap(); + /// let rest_slice = rest_pattern.as_slice(); + /// # Ok(()) + /// # } + /// ``` + pub fn get_rest_pattern(&'a self) -> MatchPatternSlice<'a> { + let (_, rest) = self.split_at_slash(); + MatchPatternSlice { + pattern: rest, + match_positive: self.match_positive, + match_dir_only: self.match_dir_only, + } + } + + /// Splits the `MatchPatternSlice` at the first slash encountered and returns the + /// content before (front pattern) and after the slash (rest pattern), + /// omitting the slash itself. + /// Slices starting with `**/` are an exception to this, as the corresponding + /// `MatchPattern` is intended to match multiple directories. + /// These pattern slices therefore return a `*` as front pattern and the original + /// pattern itself as rest pattern. + fn split_at_slash(&'a self) -> (&'a [u8], &'a [u8]) { + let pattern = if self.pattern.starts_with(b"./") { + &self.pattern[2..] + } else { + self.pattern + }; + + let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') { + Some(ind) => { + let (front, rest) = pattern.split_at(ind); + (front, &rest[1..]) + } + None => (pattern, &pattern[0..0]), + }; + // '**' is treated such that it maches any directory + if front == b"**" { + front = b"*"; + rest = pattern; + } + + (front, rest) + } + + /// Convert a list of `MatchPatternSlice`s to bytes in order to write them to e.g. + /// a file. + pub fn to_bytes(patterns: &[MatchPatternSlice]) -> Vec { let mut buffer = Vec::new(); for pattern in patterns { if !pattern.match_positive { buffer.push(b'!'); } - buffer.extend_from_slice( pattern.pattern.as_bytes()); + buffer.extend_from_slice(&pattern.pattern); if pattern.match_dir_only { buffer.push(b'/'); } buffer.push(b'\n'); } buffer } - /// Match the given filename against this `MatchPattern`. + /// Match the given filename against this `MatchPatternSlice`. /// If the filename matches the pattern completely, `MatchType::Positive` or /// `MatchType::Negative` is returned, depending if the match pattern is was /// declared as positive (no `!` prefix) or negative (`!` prefix). @@ -241,8 +298,9 @@ impl MatchPattern { /// No match results in `MatchType::None`. pub fn matches_filename(&self, filename: &CStr, is_dir: bool) -> Result { let mut res = MatchType::None; - let (front, _) = &self.split_pattern; + let (front, _) = self.split_at_slash(); + let front = CString::new(front).unwrap(); let fnmatch_res = unsafe { let front_ptr = front.as_ptr() as *const libc::c_char; let filename_ptr = filename.as_ptr() as *const libc::c_char; @@ -259,10 +317,10 @@ impl MatchPattern { }; } - let full = if self.pattern.to_bytes().starts_with(b"**/") { - CString::new(&self.pattern.to_bytes()[3..]).unwrap() + let full = if self.pattern.starts_with(b"**/") { + CString::new(&self.pattern[3..]).unwrap() } else { - CString::new(&self.pattern.to_bytes()[..]).unwrap() + CString::new(&self.pattern[..]).unwrap() }; let fnmatch_res = unsafe { let full_ptr = full.as_ptr() as *const libc::c_char; @@ -291,61 +349,62 @@ impl MatchPattern { Ok(res) } - /// Match the given filename against the set of match patterns. + /// Match the given filename against the set of `MatchPatternSlice`s. /// /// A positive match is intended to includes the full subtree (unless another /// negative match excludes entries later). - /// The `MatchType` together with an updated `MatchPattern` list for passing + /// The `MatchType` together with an updated `MatchPatternSlice` list for passing /// to the matched child is returned. /// ``` /// # use std::ffi::CString; - /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType}; + /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType}; /// # fn main() -> Result<(), failure::Error> { /// let patterns = vec![ /// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(), /// MatchPattern::from_line(b"to_match/")?.unwrap() /// ]; + /// let mut slices = Vec::new(); + /// for pattern in &patterns { + /// slices.push(pattern.as_slice()); + /// } /// let filename = CString::new("some")?; /// let is_dir = true; - /// let (match_type, child_pattern) = MatchPattern::match_filename_include( + /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_include( /// &filename, /// is_dir, - /// &patterns + /// &slices /// )?; /// assert_eq!(match_type, MatchType::PartialPositive); /// /// child pattern will be the same as ... /// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap(); + /// let slice = pattern.as_slice(); /// /// let filename = CString::new("to_match")?; /// let is_dir = true; - /// let (match_type, child_pattern) = MatchPattern::match_filename_include( + /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_include( /// &filename, /// is_dir, - /// &patterns + /// &slices /// )?; /// assert_eq!(match_type, MatchType::Positive); /// /// child pattern will be the same as ... /// let pattern = MatchPattern::from_line(b"**/*")?.unwrap(); + /// let slice = pattern.as_slice(); /// # Ok(()) /// # } /// ``` pub fn match_filename_include( filename: &CStr, is_dir: bool, - match_pattern: &[MatchPattern], - ) -> Result<(MatchType, Vec), Error> { + match_pattern: &'a [MatchPatternSlice<'a>], + ) -> Result<(MatchType, Vec>), Error> { let mut child_pattern = Vec::new(); let mut match_state = MatchType::None; for pattern in match_pattern { match pattern.matches_filename(filename, is_dir)? { MatchType::None => continue, - MatchType::Positive => { - match_state = MatchType::Positive; - // Full match so lets include everything below this node - let incl_pattern = MatchPattern::from_line(b"**/*").unwrap().unwrap(); - child_pattern.push(incl_pattern); - } + MatchType::Positive => match_state = MatchType::Positive, MatchType::Negative => match_state = MatchType::Negative, MatchType::PartialPositive => { if match_state != MatchType::Negative && match_state != MatchType::Positive { @@ -365,7 +424,7 @@ impl MatchPattern { Ok((match_state, child_pattern)) } - /// Match the given filename against the set of match patterns. + /// Match the given filename against the set of `MatchPatternSlice`s. /// /// A positive match is intended to exclude the full subtree, independent of /// matches deeper down the tree. @@ -373,29 +432,34 @@ impl MatchPattern { /// to the matched child is returned. /// ``` /// # use std::ffi::CString; - /// # use self::proxmox_backup::pxar::{MatchPattern, MatchType}; + /// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType}; /// # fn main() -> Result<(), failure::Error> { /// let patterns = vec![ /// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(), /// MatchPattern::from_line(b"to_match/")?.unwrap() /// ]; + /// let mut slices = Vec::new(); + /// for pattern in &patterns { + /// slices.push(pattern.as_slice()); + /// } /// let filename = CString::new("some")?; /// let is_dir = true; - /// let (match_type, child_pattern) = MatchPattern::match_filename_exclude( + /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_exclude( /// &filename, /// is_dir, - /// &patterns + /// &slices, /// )?; /// assert_eq!(match_type, MatchType::PartialPositive); /// /// child pattern will be the same as ... /// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap(); + /// let slice = pattern.as_slice(); /// /// let filename = CString::new("to_match")?; /// let is_dir = true; - /// let (match_type, child_pattern) = MatchPattern::match_filename_exclude( + /// let (match_type, child_pattern) = MatchPatternSlice::match_filename_exclude( /// &filename, /// is_dir, - /// &patterns + /// &slices, /// )?; /// assert_eq!(match_type, MatchType::Positive); /// /// child pattern will be empty @@ -405,8 +469,8 @@ impl MatchPattern { pub fn match_filename_exclude( filename: &CStr, is_dir: bool, - match_pattern: &[MatchPattern], - ) -> Result<(MatchType, Vec), Error> { + match_pattern: &'a [MatchPatternSlice<'a>], + ) -> Result<(MatchType, Vec>), Error> { let mut child_pattern = Vec::new(); let mut match_state = MatchType::None; @@ -427,38 +491,3 @@ impl MatchPattern { Ok((match_state, child_pattern)) } } - -// Splits the `CStr` slice at the first slash encountered and returns the -// content before (front pattern) and after the slash (rest pattern), -// omitting the slash itself. -// Slices starting with `**/` are an exception to this, as the corresponding -// `MatchPattern` is intended to match multiple directories. -// These pattern slices therefore return a `*` as front pattern and the original -// pattern itself as rest pattern. -fn split_at_slash(match_pattern: &CStr) -> (CString, CString) { - let match_pattern = match_pattern.to_bytes(); - - let pattern = if match_pattern.starts_with(b"./") { - &match_pattern[2..] - } else { - match_pattern - }; - - let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') { - Some(ind) => { - let (front, rest) = pattern.split_at(ind); - (front, &rest[1..]) - } - None => (pattern, &pattern[0..0]), - }; - // '**' is treated such that it maches any directory - if front == b"**" { - front = b"*"; - rest = pattern; - } - - // Pattern where valid CStrings before, so it is safe to unwrap the Result - let front_pattern = CString::new(front).unwrap(); - let rest_pattern = CString::new(rest).unwrap(); - (front_pattern, rest_pattern) -} diff --git a/src/pxar/sequential_decoder.rs b/src/pxar/sequential_decoder.rs index 012991a6..e1620e7d 100644 --- a/src/pxar/sequential_decoder.rs +++ b/src/pxar/sequential_decoder.rs @@ -23,7 +23,7 @@ use proxmox::tools::vec; use super::dir_stack::{PxarDir, PxarDirStack}; use super::flags; use super::format_definition::*; -use super::match_pattern::{MatchPattern, MatchType}; +use super::match_pattern::{MatchPattern, MatchPatternSlice, MatchType}; use crate::tools::acl; use crate::tools::fs; @@ -683,7 +683,7 @@ impl SequentialDecoder { entry: PxarEntry, filename: &OsStr, matched: MatchType, - match_pattern: &[MatchPattern], + match_pattern: &[MatchPatternSlice], ) -> Result<(), Error> { let (mut head, attr) = self .read_attributes() @@ -732,6 +732,10 @@ impl SequentialDecoder { /// /// The directory is created if it does not exist. pub fn restore(&mut self, path: &Path, match_pattern: &[MatchPattern]) -> Result<(), Error> { + let mut slices = Vec::new(); + for pattern in match_pattern { + slices.push(pattern.as_slice()); + } std::fs::create_dir_all(path) .map_err(|err| format_err!("error while creating directory {:?} - {}", path, err))?; @@ -744,7 +748,7 @@ impl SequentialDecoder { let fd = dir.as_raw_fd(); let mut dirs = PxarDirStack::new(fd); // An empty match pattern list indicates to restore the full archive. - let matched = if match_pattern.is_empty() { + let matched = if slices.is_empty() { MatchType::Positive } else { MatchType::None @@ -760,7 +764,7 @@ impl SequentialDecoder { while head.htype == PXAR_FILENAME { let name = self.read_filename(head.size)?; - self.restore_dir_entry(path, &mut dirs, &name, matched, match_pattern)?; + self.restore_dir_entry(path, &mut dirs, &name, matched, &slices)?; head = self.read_item()?; } @@ -791,7 +795,7 @@ impl SequentialDecoder { dirs: &mut PxarDirStack, filename: &OsStr, parent_matched: MatchType, - match_pattern: &[MatchPattern], + match_pattern: &[MatchPatternSlice], ) -> Result<(), Error> { let relative_path = dirs.as_path_buf(); let full_path = base_path.join(&relative_path).join(filename); @@ -819,13 +823,14 @@ impl SequentialDecoder { // there are no match pattern. let mut matched = parent_matched; if !match_pattern.is_empty() { - match MatchPattern::match_filename_include( + match MatchPatternSlice::match_filename_include( &CString::new(filename.as_bytes())?, ifmt == libc::S_IFDIR, match_pattern, )? { (MatchType::None, _) => matched = MatchType::None, (MatchType::Negative, _) => matched = MatchType::Negative, + (MatchType::Positive, _) => matched = MatchType::Positive, (match_type, pattern) => { matched = match_type; child_pattern = pattern;