pxar: match_pattern: refactor MatchPattern and introduce MatchPatternSlice.

The MatchPattern impl heavily used copies and therefore was inefficient regarding
memory management.
This patch intoduces MatchPatternSlice as struct to avoid copies and perform the
same pattern matching functionality.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
This commit is contained in:
Christian Ebner 2019-12-16 12:13:44 +01:00 committed by Dietmar Maurer
parent dc08934563
commit 255bb86030
3 changed files with 169 additions and 129 deletions

View File

@ -24,7 +24,7 @@ use super::catalog::BackupCatalogWriter;
use super::flags;
use super::format_definition::*;
use super::helper::*;
use super::match_pattern::{MatchPattern, MatchType};
use super::match_pattern::{MatchPattern, MatchPatternSlice, MatchType};
use crate::tools::acl;
use crate::tools::fs;
use crate::tools::xattr;
@ -135,8 +135,12 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
if skip_lost_and_found {
excludes.push(MatchPattern::from_line(b"**/lost+found").unwrap().unwrap());
}
let mut exclude_slices = Vec::new();
for excl in &excludes {
exclude_slices.push(excl.as_slice());
}
me.encode_dir(dir, &stat, magic, excludes)?;
me.encode_dir(dir, &stat, magic, exclude_slices)?;
Ok(())
}
@ -622,7 +626,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
dir: &mut nix::dir::Dir,
dir_stat: &FileStat,
magic: i64,
match_pattern: Vec<MatchPattern>,
match_pattern: Vec<MatchPatternSlice>,
) -> Result<(), Error> {
//println!("encode_dir: {:?} start {}", self.full_path(), self.writer_pos);
@ -688,14 +692,16 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
// Expand the exclude match pattern inherited from the parent by local entries, if present
let mut local_match_pattern = match_pattern.clone();
let pxar_exclude = match MatchPattern::from_file(rawfd, ".pxarexclude") {
Ok(Some((mut excludes, buffer, stat))) => {
local_match_pattern.append(&mut excludes);
Some((buffer, stat))
let (pxar_exclude, excludes) = match MatchPattern::from_file(rawfd, ".pxarexclude") {
Ok(Some((excludes, buffer, stat))) => {
(Some((buffer, stat)), excludes)
}
Ok(None) => None,
Ok(None) => (None, Vec::new()),
Err(err) => bail!("error while reading exclude file - {}", err),
};
for excl in &excludes {
local_match_pattern.push(excl.as_slice());
}
if include_children {
// Exclude patterns passed via the CLI are stored as '.pxarexclude-cli'
@ -736,7 +742,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
Err(err) => bail!("fstat {:?} failed - {}", self.full_path(), err),
};
match MatchPattern::match_filename_exclude(
match MatchPatternSlice::match_filename_exclude(
&filename,
is_directory(&stat),
&local_match_pattern,
@ -812,7 +818,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
// '.pxarexclude-cli' is used to store the exclude MatchPatterns
// passed via the cli in the root directory of the archive.
self.write_filename(&filename)?;
let content = MatchPattern::to_bytes(&exclude_list);
let content = MatchPatternSlice::to_bytes(&exclude_list);
if let Some(ref mut catalog) = self.catalog {
catalog.add_file(&filename, content.len() as u64, 0)?;
}

View File

@ -50,22 +50,21 @@ pub enum MatchType {
///
/// /// Positive match of any file ending in `.conf` in any subdirectory
/// let positive = MatchPattern::from_line(b"**/*.conf")?.unwrap();
/// let m_positive = positive.matches_filename(&filename, is_dir)?;
/// let m_positive = positive.as_slice().matches_filename(&filename, is_dir)?;
/// assert!(m_positive == MatchType::Positive);
///
/// /// Negative match of filenames starting with `s`
/// let negative = MatchPattern::from_line(b"![s]*")?.unwrap();
/// let m_negative = negative.matches_filename(&filename, is_dir)?;
/// let m_negative = negative.as_slice().matches_filename(&filename, is_dir)?;
/// assert!(m_negative == MatchType::Negative);
/// # Ok(())
/// # }
/// ```
#[derive(Clone)]
pub struct MatchPattern {
pattern: CString,
pattern: Vec<u8>,
match_positive: bool,
match_dir_only: bool,
split_pattern: (CString, CString),
}
impl MatchPattern {
@ -149,60 +148,21 @@ impl MatchPattern {
bail!("invalid path component encountered");
}
// This will fail if the line contains b"\0"
let pattern = CString::new(input)?;
let split_pattern = split_at_slash(&pattern);
Ok(Some(MatchPattern {
pattern,
pattern: input.to_vec(),
match_positive,
match_dir_only,
split_pattern,
}))
}
/// Returns the pattern before the first `/` encountered as `MatchPattern`.
/// If no slash is encountered, the `MatchPattern` will be a copy of the
/// original pattern.
/// ```
/// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
/// # fn main() -> Result<(), failure::Error> {
/// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
/// let front = pattern.get_front_pattern();
/// /// ... will be the same as ...
/// let front_pattern = MatchPattern::from_line(b"some")?.unwrap();
/// # Ok(())
/// # }
/// ```
pub fn get_front_pattern(&self) -> MatchPattern {
let pattern = split_at_slash(&self.split_pattern.0);
MatchPattern {
pattern: self.split_pattern.0.clone(),
match_positive: self.match_positive,
match_dir_only: self.match_dir_only,
split_pattern: pattern,
}
}
/// Returns the pattern after the first encountered `/` as `MatchPattern`.
/// If no slash is encountered, the `MatchPattern` will be empty.
/// ```
/// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
/// # fn main() -> Result<(), failure::Error> {
/// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
/// let rest = pattern.get_rest_pattern();
/// /// ... will be the same as ...
/// let rest_pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
/// # Ok(())
/// # }
/// ```
pub fn get_rest_pattern(&self) -> MatchPattern {
let pattern = split_at_slash(&self.split_pattern.1);
MatchPattern {
pattern: self.split_pattern.1.clone(),
/// Create a `MatchPatternSlice` of the `MatchPattern` to give a view of the
/// `MatchPattern` without copying its content.
pub fn as_slice<'a>(&'a self) -> MatchPatternSlice<'a> {
MatchPatternSlice {
pattern: self.pattern.as_slice(),
match_positive: self.match_positive,
match_dir_only: self.match_dir_only,
split_pattern: pattern,
}
}
@ -220,17 +180,114 @@ impl MatchPattern {
/// Convert a list of MatchPattern to bytes in order to write them to e.g.
/// a file.
pub fn to_bytes(patterns: &[MatchPattern]) -> Vec<u8> {
let mut slices = Vec::new();
for pattern in patterns {
slices.push(pattern.as_slice());
}
MatchPatternSlice::to_bytes(&slices)
}
}
#[derive(Clone)]
pub struct MatchPatternSlice<'a> {
pattern: &'a [u8],
match_positive: bool,
match_dir_only: bool,
}
impl<'a> MatchPatternSlice<'a> {
/// Returns the pattern before the first `/` encountered as `MatchPatternSlice`.
/// If no slash is encountered, the `MatchPatternSlice` will be a copy of the
/// original pattern.
/// ```
/// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
/// # fn main() -> Result<(), failure::Error> {
/// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
/// let slice = pattern.as_slice();
/// let front = slice.get_front_pattern();
/// /// ... will be the same as ...
/// let front_pattern = MatchPattern::from_line(b"some")?.unwrap();
/// let front_slice = front_pattern.as_slice();
/// # Ok(())
/// # }
/// ```
pub fn get_front_pattern(&'a self) -> MatchPatternSlice<'a> {
let (front, _) = self.split_at_slash();
MatchPatternSlice {
pattern: front,
match_positive: self.match_positive,
match_dir_only: self.match_dir_only,
}
}
/// Returns the pattern after the first encountered `/` as `MatchPatternSlice`.
/// If no slash is encountered, the `MatchPatternSlice` will be empty.
/// ```
/// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
/// # fn main() -> Result<(), failure::Error> {
/// let pattern = MatchPattern::from_line(b"some/match/pattern/")?.unwrap();
/// let slice = pattern.as_slice();
/// let rest = slice.get_rest_pattern();
/// /// ... will be the same as ...
/// let rest_pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
/// let rest_slice = rest_pattern.as_slice();
/// # Ok(())
/// # }
/// ```
pub fn get_rest_pattern(&'a self) -> MatchPatternSlice<'a> {
let (_, rest) = self.split_at_slash();
MatchPatternSlice {
pattern: rest,
match_positive: self.match_positive,
match_dir_only: self.match_dir_only,
}
}
/// Splits the `MatchPatternSlice` at the first slash encountered and returns the
/// content before (front pattern) and after the slash (rest pattern),
/// omitting the slash itself.
/// Slices starting with `**/` are an exception to this, as the corresponding
/// `MatchPattern` is intended to match multiple directories.
/// These pattern slices therefore return a `*` as front pattern and the original
/// pattern itself as rest pattern.
fn split_at_slash(&'a self) -> (&'a [u8], &'a [u8]) {
let pattern = if self.pattern.starts_with(b"./") {
&self.pattern[2..]
} else {
self.pattern
};
let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') {
Some(ind) => {
let (front, rest) = pattern.split_at(ind);
(front, &rest[1..])
}
None => (pattern, &pattern[0..0]),
};
// '**' is treated such that it maches any directory
if front == b"**" {
front = b"*";
rest = pattern;
}
(front, rest)
}
/// Convert a list of `MatchPatternSlice`s to bytes in order to write them to e.g.
/// a file.
pub fn to_bytes(patterns: &[MatchPatternSlice]) -> Vec<u8> {
let mut buffer = Vec::new();
for pattern in patterns {
if !pattern.match_positive { buffer.push(b'!'); }
buffer.extend_from_slice( pattern.pattern.as_bytes());
buffer.extend_from_slice(&pattern.pattern);
if pattern.match_dir_only { buffer.push(b'/'); }
buffer.push(b'\n');
}
buffer
}
/// Match the given filename against this `MatchPattern`.
/// Match the given filename against this `MatchPatternSlice`.
/// If the filename matches the pattern completely, `MatchType::Positive` or
/// `MatchType::Negative` is returned, depending if the match pattern is was
/// declared as positive (no `!` prefix) or negative (`!` prefix).
@ -241,8 +298,9 @@ impl MatchPattern {
/// No match results in `MatchType::None`.
pub fn matches_filename(&self, filename: &CStr, is_dir: bool) -> Result<MatchType, Error> {
let mut res = MatchType::None;
let (front, _) = &self.split_pattern;
let (front, _) = self.split_at_slash();
let front = CString::new(front).unwrap();
let fnmatch_res = unsafe {
let front_ptr = front.as_ptr() as *const libc::c_char;
let filename_ptr = filename.as_ptr() as *const libc::c_char;
@ -259,10 +317,10 @@ impl MatchPattern {
};
}
let full = if self.pattern.to_bytes().starts_with(b"**/") {
CString::new(&self.pattern.to_bytes()[3..]).unwrap()
let full = if self.pattern.starts_with(b"**/") {
CString::new(&self.pattern[3..]).unwrap()
} else {
CString::new(&self.pattern.to_bytes()[..]).unwrap()
CString::new(&self.pattern[..]).unwrap()
};
let fnmatch_res = unsafe {
let full_ptr = full.as_ptr() as *const libc::c_char;
@ -291,61 +349,62 @@ impl MatchPattern {
Ok(res)
}
/// Match the given filename against the set of match patterns.
/// Match the given filename against the set of `MatchPatternSlice`s.
///
/// A positive match is intended to includes the full subtree (unless another
/// negative match excludes entries later).
/// The `MatchType` together with an updated `MatchPattern` list for passing
/// The `MatchType` together with an updated `MatchPatternSlice` list for passing
/// to the matched child is returned.
/// ```
/// # use std::ffi::CString;
/// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
/// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
/// # fn main() -> Result<(), failure::Error> {
/// let patterns = vec![
/// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(),
/// MatchPattern::from_line(b"to_match/")?.unwrap()
/// ];
/// let mut slices = Vec::new();
/// for pattern in &patterns {
/// slices.push(pattern.as_slice());
/// }
/// let filename = CString::new("some")?;
/// let is_dir = true;
/// let (match_type, child_pattern) = MatchPattern::match_filename_include(
/// let (match_type, child_pattern) = MatchPatternSlice::match_filename_include(
/// &filename,
/// is_dir,
/// &patterns
/// &slices
/// )?;
/// assert_eq!(match_type, MatchType::PartialPositive);
/// /// child pattern will be the same as ...
/// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
/// let slice = pattern.as_slice();
///
/// let filename = CString::new("to_match")?;
/// let is_dir = true;
/// let (match_type, child_pattern) = MatchPattern::match_filename_include(
/// let (match_type, child_pattern) = MatchPatternSlice::match_filename_include(
/// &filename,
/// is_dir,
/// &patterns
/// &slices
/// )?;
/// assert_eq!(match_type, MatchType::Positive);
/// /// child pattern will be the same as ...
/// let pattern = MatchPattern::from_line(b"**/*")?.unwrap();
/// let slice = pattern.as_slice();
/// # Ok(())
/// # }
/// ```
pub fn match_filename_include(
filename: &CStr,
is_dir: bool,
match_pattern: &[MatchPattern],
) -> Result<(MatchType, Vec<MatchPattern>), Error> {
match_pattern: &'a [MatchPatternSlice<'a>],
) -> Result<(MatchType, Vec<MatchPatternSlice<'a>>), Error> {
let mut child_pattern = Vec::new();
let mut match_state = MatchType::None;
for pattern in match_pattern {
match pattern.matches_filename(filename, is_dir)? {
MatchType::None => continue,
MatchType::Positive => {
match_state = MatchType::Positive;
// Full match so lets include everything below this node
let incl_pattern = MatchPattern::from_line(b"**/*").unwrap().unwrap();
child_pattern.push(incl_pattern);
}
MatchType::Positive => match_state = MatchType::Positive,
MatchType::Negative => match_state = MatchType::Negative,
MatchType::PartialPositive => {
if match_state != MatchType::Negative && match_state != MatchType::Positive {
@ -365,7 +424,7 @@ impl MatchPattern {
Ok((match_state, child_pattern))
}
/// Match the given filename against the set of match patterns.
/// Match the given filename against the set of `MatchPatternSlice`s.
///
/// A positive match is intended to exclude the full subtree, independent of
/// matches deeper down the tree.
@ -373,29 +432,34 @@ impl MatchPattern {
/// to the matched child is returned.
/// ```
/// # use std::ffi::CString;
/// # use self::proxmox_backup::pxar::{MatchPattern, MatchType};
/// # use self::proxmox_backup::pxar::{MatchPattern, MatchPatternSlice, MatchType};
/// # fn main() -> Result<(), failure::Error> {
/// let patterns = vec![
/// MatchPattern::from_line(b"some/match/pattern/")?.unwrap(),
/// MatchPattern::from_line(b"to_match/")?.unwrap()
/// ];
/// let mut slices = Vec::new();
/// for pattern in &patterns {
/// slices.push(pattern.as_slice());
/// }
/// let filename = CString::new("some")?;
/// let is_dir = true;
/// let (match_type, child_pattern) = MatchPattern::match_filename_exclude(
/// let (match_type, child_pattern) = MatchPatternSlice::match_filename_exclude(
/// &filename,
/// is_dir,
/// &patterns
/// &slices,
/// )?;
/// assert_eq!(match_type, MatchType::PartialPositive);
/// /// child pattern will be the same as ...
/// let pattern = MatchPattern::from_line(b"match/pattern/")?.unwrap();
/// let slice = pattern.as_slice();
///
/// let filename = CString::new("to_match")?;
/// let is_dir = true;
/// let (match_type, child_pattern) = MatchPattern::match_filename_exclude(
/// let (match_type, child_pattern) = MatchPatternSlice::match_filename_exclude(
/// &filename,
/// is_dir,
/// &patterns
/// &slices,
/// )?;
/// assert_eq!(match_type, MatchType::Positive);
/// /// child pattern will be empty
@ -405,8 +469,8 @@ impl MatchPattern {
pub fn match_filename_exclude(
filename: &CStr,
is_dir: bool,
match_pattern: &[MatchPattern],
) -> Result<(MatchType, Vec<MatchPattern>), Error> {
match_pattern: &'a [MatchPatternSlice<'a>],
) -> Result<(MatchType, Vec<MatchPatternSlice<'a>>), Error> {
let mut child_pattern = Vec::new();
let mut match_state = MatchType::None;
@ -427,38 +491,3 @@ impl MatchPattern {
Ok((match_state, child_pattern))
}
}
// Splits the `CStr` slice at the first slash encountered and returns the
// content before (front pattern) and after the slash (rest pattern),
// omitting the slash itself.
// Slices starting with `**/` are an exception to this, as the corresponding
// `MatchPattern` is intended to match multiple directories.
// These pattern slices therefore return a `*` as front pattern and the original
// pattern itself as rest pattern.
fn split_at_slash(match_pattern: &CStr) -> (CString, CString) {
let match_pattern = match_pattern.to_bytes();
let pattern = if match_pattern.starts_with(b"./") {
&match_pattern[2..]
} else {
match_pattern
};
let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') {
Some(ind) => {
let (front, rest) = pattern.split_at(ind);
(front, &rest[1..])
}
None => (pattern, &pattern[0..0]),
};
// '**' is treated such that it maches any directory
if front == b"**" {
front = b"*";
rest = pattern;
}
// Pattern where valid CStrings before, so it is safe to unwrap the Result
let front_pattern = CString::new(front).unwrap();
let rest_pattern = CString::new(rest).unwrap();
(front_pattern, rest_pattern)
}

View File

@ -23,7 +23,7 @@ use proxmox::tools::vec;
use super::dir_stack::{PxarDir, PxarDirStack};
use super::flags;
use super::format_definition::*;
use super::match_pattern::{MatchPattern, MatchType};
use super::match_pattern::{MatchPattern, MatchPatternSlice, MatchType};
use crate::tools::acl;
use crate::tools::fs;
@ -683,7 +683,7 @@ impl<R: Read> SequentialDecoder<R> {
entry: PxarEntry,
filename: &OsStr,
matched: MatchType,
match_pattern: &[MatchPattern],
match_pattern: &[MatchPatternSlice],
) -> Result<(), Error> {
let (mut head, attr) = self
.read_attributes()
@ -732,6 +732,10 @@ impl<R: Read> SequentialDecoder<R> {
///
/// The directory is created if it does not exist.
pub fn restore(&mut self, path: &Path, match_pattern: &[MatchPattern]) -> Result<(), Error> {
let mut slices = Vec::new();
for pattern in match_pattern {
slices.push(pattern.as_slice());
}
std::fs::create_dir_all(path)
.map_err(|err| format_err!("error while creating directory {:?} - {}", path, err))?;
@ -744,7 +748,7 @@ impl<R: Read> SequentialDecoder<R> {
let fd = dir.as_raw_fd();
let mut dirs = PxarDirStack::new(fd);
// An empty match pattern list indicates to restore the full archive.
let matched = if match_pattern.is_empty() {
let matched = if slices.is_empty() {
MatchType::Positive
} else {
MatchType::None
@ -760,7 +764,7 @@ impl<R: Read> SequentialDecoder<R> {
while head.htype == PXAR_FILENAME {
let name = self.read_filename(head.size)?;
self.restore_dir_entry(path, &mut dirs, &name, matched, match_pattern)?;
self.restore_dir_entry(path, &mut dirs, &name, matched, &slices)?;
head = self.read_item()?;
}
@ -791,7 +795,7 @@ impl<R: Read> SequentialDecoder<R> {
dirs: &mut PxarDirStack,
filename: &OsStr,
parent_matched: MatchType,
match_pattern: &[MatchPattern],
match_pattern: &[MatchPatternSlice],
) -> Result<(), Error> {
let relative_path = dirs.as_path_buf();
let full_path = base_path.join(&relative_path).join(filename);
@ -819,13 +823,14 @@ impl<R: Read> SequentialDecoder<R> {
// there are no match pattern.
let mut matched = parent_matched;
if !match_pattern.is_empty() {
match MatchPattern::match_filename_include(
match MatchPatternSlice::match_filename_include(
&CString::new(filename.as_bytes())?,
ifmt == libc::S_IFDIR,
match_pattern,
)? {
(MatchType::None, _) => matched = MatchType::None,
(MatchType::Negative, _) => matched = MatchType::Negative,
(MatchType::Positive, _) => matched = MatchType::Positive,
(match_type, pattern) => {
matched = match_type;
child_pattern = pattern;