pxar: impl .pxarexclude parsing and exclude matching

.pxarexclude files allow to exclude or include parts of a subtree by matching
with a glob pattern. The globs are used according to the matches of fnmatch.
In addition '**' can be used to match multiple directories within the path.

Order of the entries matter, as later ones win over previous ones.
As the .pxarexclude files can be placed at any node of the directory hirarchy,
this implies that matching child entries win over parent entries.
The only exception to this behaviour is, when a parent entry already fully
matched the path, thereby excluding the child entries which would match
otherwise.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
This commit is contained in:
Christian Ebner 2019-06-21 18:15:01 +02:00 committed by Dietmar Maurer
parent 46bd880041
commit cd7dc87903
3 changed files with 353 additions and 21 deletions

View File

@ -62,4 +62,7 @@ pub use sequential_decoder::*;
mod decoder; mod decoder;
pub use decoder::*; pub use decoder::*;
mod exclude_pattern;
pub use exclude_pattern::*;
mod helper; mod helper;

View File

@ -9,6 +9,7 @@ use std::collections::HashMap;
use super::format_definition::*; use super::format_definition::*;
use super::binary_search_tree::*; use super::binary_search_tree::*;
use super::helper::*; use super::helper::*;
use super::exclude_pattern::*;
use crate::tools::fs; use crate::tools::fs;
use crate::tools::acl; use crate::tools::acl;
use crate::tools::xattr; use crate::tools::xattr;
@ -116,7 +117,7 @@ impl <'a, W: Write> Encoder<'a, W> {
if verbose { println!("{:?}", me.full_path()); } if verbose { println!("{:?}", me.full_path()); }
me.encode_dir(dir, &stat, magic)?; me.encode_dir(dir, &stat, magic, Vec::new())?;
Ok(()) Ok(())
} }
@ -560,7 +561,7 @@ impl <'a, W: Write> Encoder<'a, W> {
Ok(()) Ok(())
} }
fn encode_dir(&mut self, dir: &mut nix::dir::Dir, dir_stat: &FileStat, magic: i64) -> Result<(), Error> { fn encode_dir(&mut self, dir: &mut nix::dir::Dir, dir_stat: &FileStat, magic: i64, match_pattern: Vec<PxarExcludePattern>) -> Result<(), Error> {
//println!("encode_dir: {:?} start {}", self.full_path(), self.writer_pos); //println!("encode_dir: {:?} start {}", self.full_path(), self.writer_pos);
@ -622,14 +623,19 @@ impl <'a, W: Write> Encoder<'a, W> {
include_children = (self.root_st_dev == dir_stat.st_dev) || self.all_file_systems; include_children = (self.root_st_dev == dir_stat.st_dev) || self.all_file_systems;
} }
// Expand the exclude match pattern inherited from the parent by local entries, if present
let mut local_match_pattern = match_pattern.clone();
let pxar_exclude = match PxarExcludePattern::from_file(rawfd, ".pxarexclude") {
Ok(Some((mut excludes, buffer, stat))) => {
local_match_pattern.append(&mut excludes);
Some((buffer, stat))
},
Ok(None) => None,
Err(err) => bail!("error while reading exclude file - {}", err),
};
if include_children { if include_children {
for entry in dir.iter() { for entry in dir.iter() {
dir_count += 1;
if dir_count > MAX_DIRECTORY_ENTRIES {
bail!("too many directory items in {:?} (> {})",
self.full_path(), MAX_DIRECTORY_ENTRIES);
}
let entry = entry.map_err(|err| { let entry = entry.map_err(|err| {
format_err!("readir {:?} failed - {}", self.full_path(), err) format_err!("readir {:?} failed - {}", self.full_path(), err)
})?; })?;
@ -640,30 +646,68 @@ impl <'a, W: Write> Encoder<'a, W> {
continue; continue;
} }
name_list.push(filename); let stat = match nix::sys::stat::fstatat(rawfd, filename.as_ref(), nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
Ok(stat) => stat,
Err(nix::Error::Sys(Errno::ENOENT)) => {
let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes());
self.report_vanished_file(&self.full_path().join(filename_osstr))?;
continue;
},
Err(err) => bail!("fstat {:?} failed - {}", self.full_path(), err),
};
match self.match_exclude_pattern(&filename, &stat, &local_match_pattern) {
(MatchType::Exclude, _) => {
let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes());
eprintln!("matched by .pxarexclude entry - skipping: {:?}", self.full_path().join(filename_osstr));
},
(_, pattern_list) => name_list.push((filename, stat, pattern_list)),
}
dir_count += 1;
if dir_count > MAX_DIRECTORY_ENTRIES {
bail!("too many directory items in {:?} (> {})", self.full_path(), MAX_DIRECTORY_ENTRIES);
}
} }
} else { } else {
eprintln!("skip mount point: {:?}", self.full_path()); eprintln!("skip mount point: {:?}", self.full_path());
} }
name_list.sort_unstable_by(|a, b| a.cmp(&b)); name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0));
let mut goodbye_items = vec![]; let mut goodbye_items = vec![];
for filename in &name_list { for (filename, stat, exclude_list) in name_list {
self.relative_path.push(std::ffi::OsStr::from_bytes(filename.as_bytes())); if filename.as_bytes() == b".pxarexclude" {
if let Some((ref content, ref stat)) = pxar_exclude {
if self.verbose { println!("{:?}", self.full_path()); } let filefd = match nix::fcntl::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) {
Ok(filefd) => filefd,
let stat = match nix::sys::stat::fstatat(rawfd, filename.as_ref(), nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
Ok(stat) => stat,
Err(nix::Error::Sys(Errno::ENOENT)) => { Err(nix::Error::Sys(Errno::ENOENT)) => {
self.report_vanished_file(&self.full_path())?; self.report_vanished_file(&self.full_path())?;
continue; continue;
} },
Err(err) => bail!("fstat {:?} failed - {}", self.full_path(), err), Err(err) => {
let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes());
bail!("open file {:?} failed - {}", self.full_path().join(filename_osstr), err);
},
}; };
let child_magic = if dir_stat.st_dev != stat.st_dev {
detect_fs_type(filefd)?
} else {
magic
};
self.write_filename(&filename)?;
self.encode_pxar_exclude(filefd, stat, child_magic, content)?;
continue;
}
}
self.relative_path.push(std::ffi::OsStr::from_bytes(filename.as_bytes()));
if self.verbose { println!("{:?}", self.full_path()); }
let start_pos = self.writer_pos; let start_pos = self.writer_pos;
if is_directory(&stat) { if is_directory(&stat) {
@ -684,7 +728,7 @@ impl <'a, W: Write> Encoder<'a, W> {
}; };
self.write_filename(&filename)?; self.write_filename(&filename)?;
self.encode_dir(&mut dir, &stat, child_magic)?; self.encode_dir(&mut dir, &stat, child_magic, exclude_list)?;
} else if is_reg_file(&stat) { } else if is_reg_file(&stat) {
@ -786,6 +830,36 @@ impl <'a, W: Write> Encoder<'a, W> {
Ok(()) Ok(())
} }
// If there is a match, an updated PxarExcludePattern list to pass to the matched child is returned.
fn match_exclude_pattern(&mut self, filename: &CStr, stat: &FileStat, match_pattern: &Vec<PxarExcludePattern>) -> (MatchType, Vec<PxarExcludePattern>) {
let mut child_pattern = Vec::new();
let mut match_type = MatchType::None;
let is_dir = is_directory(&stat);
for pattern in match_pattern {
match pattern.matches_filename(filename, is_dir) {
MatchType::None => {},
MatchType::Exclude => match_type = MatchType::Exclude,
MatchType::Include => match_type = MatchType::Include,
MatchType::PartialExclude => {
if match_type != MatchType::Include && match_type != MatchType::Exclude {
match_type = MatchType::PartialExclude;
}
child_pattern.push(pattern.get_rest_pattern());
},
MatchType::PartialInclude => {
if match_type != MatchType::Include && match_type != MatchType::Exclude {
// always include partial matches, as we need to match children to decide
match_type = MatchType::PartialInclude;
}
child_pattern.push(pattern.get_rest_pattern());
},
}
}
(match_type, child_pattern)
}
fn encode_file(&mut self, filefd: RawFd, stat: &FileStat, magic: i64) -> Result<(), Error> { fn encode_file(&mut self, filefd: RawFd, stat: &FileStat, magic: i64) -> Result<(), Error> {
//println!("encode_file: {:?}", self.full_path()); //println!("encode_file: {:?}", self.full_path());
@ -916,6 +990,54 @@ impl <'a, W: Write> Encoder<'a, W> {
Ok(()) Ok(())
} }
fn encode_pxar_exclude(&mut self, filefd: RawFd, stat: &FileStat, magic: i64, content: &[u8]) -> Result<(), Error> {
let mut entry = self.create_entry(&stat)?;
self.read_chattr(filefd, &mut entry)?;
self.read_fat_attr(filefd, magic, &mut entry)?;
let (xattrs, fcaps) = self.read_xattrs(filefd, &stat)?;
let acl_access = self.read_acl(filefd, &stat, acl::ACL_TYPE_ACCESS)?;
let projid = self.read_quota_project_id(filefd, magic, &stat)?;
self.write_entry(entry)?;
for xattr in xattrs {
self.write_xattr(xattr)?;
}
self.write_fcaps(fcaps)?;
for user in acl_access.users {
self.write_acl_user(user)?;
}
for group in acl_access.groups {
self.write_acl_group(group)?;
}
if let Some(group_obj) = acl_access.group_obj {
self.write_acl_group_obj(group_obj)?;
}
if let Some(projid) = projid {
self.write_quota_project_id(projid)?;
}
let include_payload;
if is_virtual_file_system(magic) {
include_payload = false;
} else {
include_payload = (stat.st_dev == self.root_st_dev) || self.all_file_systems;
}
if !include_payload {
eprintln!("skip content: {:?}", self.full_path());
self.write_header(CA_FORMAT_PAYLOAD, 0)?;
return Ok(());
}
let size = content.len();
self.write_header(CA_FORMAT_PAYLOAD, size as u64)?;
self.writer.write_all(content)?;
self.writer_pos += size;
Ok(())
}
// the report_XXX method may raise and error - depending on encoder configuration // the report_XXX method may raise and error - depending on encoder configuration
fn report_vanished_file(&self, path: &Path) -> Result<(), Error> { fn report_vanished_file(&self, path: &Path) -> Result<(), Error> {

207
src/pxar/exclude_pattern.rs Normal file
View File

@ -0,0 +1,207 @@
use std::io::Read;
use std::ffi::{CStr, CString};
use std::fs::File;
use std::os::unix::io::{FromRawFd, RawFd};
use failure::*;
use libc::{c_char, c_int};
use nix::fcntl::OFlag;
use nix::errno::Errno;
use nix::NixPath;
use nix::sys::stat::{FileStat, Mode};
pub const FNM_NOMATCH: c_int = 1;
extern "C" {
fn fnmatch(pattern: *const c_char, string: *const c_char, flags: c_int) -> c_int;
}
#[derive(Debug, PartialEq)]
pub enum MatchType {
None,
Exclude,
Include,
PartialExclude,
PartialInclude,
}
#[derive(Clone)]
pub struct PxarExcludePattern {
pattern: CString,
match_exclude: bool,
match_dir_only: bool,
split_pattern: (CString, CString),
}
impl PxarExcludePattern {
pub fn from_file<P: ?Sized + NixPath>(parent_fd: RawFd, filename: &P) -> Result<Option<(Vec<PxarExcludePattern>, Vec<u8>, FileStat)>, Error> {
let stat = match nix::sys::stat::fstatat(parent_fd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
Ok(stat) => stat,
Err(nix::Error::Sys(Errno::ENOENT)) => return Ok(None),
Err(err) => bail!("stat failed - {}", err),
};
let filefd = nix::fcntl::openat(parent_fd, filename, OFlag::O_NOFOLLOW, Mode::empty())?;
let mut file = unsafe {
File::from_raw_fd(filefd)
};
let mut content_buffer = Vec::new();
let _bytes = file.read_to_end(&mut content_buffer)?;
let mut exclude_pattern = Vec::new();
for line in content_buffer.split(|&c| c == b'\n') {
if line.is_empty() {
continue;
}
if let Some(pattern) = Self::from_line(line)? {
exclude_pattern.push(pattern);
}
}
Ok(Some((exclude_pattern, content_buffer, stat)))
}
pub fn from_line(line: &[u8]) -> Result<Option<PxarExcludePattern>, Error> {
let mut input = line;
if input.starts_with(b"#") {
return Ok(None);
}
let match_exclude = if input.starts_with(b"!") {
// Reduce slice view to exclude "!"
input = &input[1..];
false
} else {
true
};
// Paths ending in / match only directory names (no filenames)
let match_dir_only = if input.ends_with(b"/") {
let len = input.len();
input = &input[..len - 1];
true
} else {
false
};
// Ignore initial slash
if input.starts_with(b"/") {
input = &input[1..];
}
if input.is_empty() || input == b"." ||
input == b".." || input.contains(&b'\0') {
bail!("invalid path component encountered");
}
// This will fail if the line contains b"\0"
let pattern = CString::new(input)?;
let split_pattern = split_at_slash(&pattern);
Ok(Some(PxarExcludePattern {
pattern,
match_exclude,
match_dir_only,
split_pattern,
}))
}
pub fn get_front_pattern(&self) -> PxarExcludePattern {
let pattern = split_at_slash(&self.split_pattern.0);
PxarExcludePattern {
pattern: self.split_pattern.0.clone(),
match_exclude: self.match_exclude,
match_dir_only: self.match_dir_only,
split_pattern: pattern,
}
}
pub fn get_rest_pattern(&self) -> PxarExcludePattern {
let pattern = split_at_slash(&self.split_pattern.1);
PxarExcludePattern {
pattern: self.split_pattern.1.clone(),
match_exclude: self.match_exclude,
match_dir_only: self.match_dir_only,
split_pattern: pattern,
}
}
pub fn dump(&self) {
match (self.match_exclude, self.match_dir_only) {
(true, true) => println!("{:#?}/", self.pattern),
(true, false) => println!("{:#?}", self.pattern),
(false, true) => println!("!{:#?}/", self.pattern),
(false, false) => println!("!{:#?}", self.pattern),
}
}
pub fn matches_filename(&self, filename: &CStr, is_dir: bool) -> MatchType {
let mut res = MatchType::None;
let (front, _) = &self.split_pattern;
let fnmatch_res = unsafe {
fnmatch(front.as_ptr() as *const libc::c_char, filename.as_ptr() as *const libc::c_char, 0)
};
// TODO error cases
if fnmatch_res == 0 {
res = if self.match_exclude {
MatchType::PartialExclude
} else {
MatchType::PartialInclude
};
}
let full = if self.pattern.to_bytes().starts_with(b"**/") {
CString::new(&self.pattern.to_bytes()[3..]).unwrap()
} else {
CString::new(&self.pattern.to_bytes()[..]).unwrap()
};
let fnmatch_res = unsafe {
fnmatch(full.as_ptr() as *const libc::c_char, filename.as_ptr() as *const libc::c_char, 0)
};
// TODO error cases
if fnmatch_res == 0 {
res = if self.match_exclude {
MatchType::Exclude
} else {
MatchType::Include
};
}
if !is_dir && self.match_dir_only {
res = MatchType::None;
}
res
}
}
fn split_at_slash(match_pattern: &CStr) -> (CString, CString) {
let match_pattern = match_pattern.to_bytes();
let pattern = if match_pattern.starts_with(b"./") {
&match_pattern[2..]
} else {
match_pattern
};
let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') {
Some(ind) => {
let (front, rest) = pattern.split_at(ind);
(front, &rest[1..])
},
None => (pattern, &pattern[0..0]),
};
// '**' is treated such that it maches any directory
if front == b"**" {
front = b"*";
rest = pattern;
}
// Pattern where valid CStrings before, so it is safe to unwrap the Result
let front_pattern = CString::new(front).unwrap();
let rest_pattern = CString::new(rest).unwrap();
(front_pattern, rest_pattern)
}