rename catar into pxar

To avoid confusion with the casync implementation.
This commit is contained in:
Dietmar Maurer
2019-03-14 10:54:09 +01:00
parent 7c4dd94670
commit 8968258b66
16 changed files with 85 additions and 82 deletions

View File

@ -0,0 +1,134 @@
//! Helpers to generate a binary search tree stored in an array from a
//! sorted array.
//!
//! Specifically, for any given sorted array 'input' permute the
//! array so that the following rule holds:
//!
//! For each array item with index i, the item at 2i+1 is smaller and
//! the item 2i+2 is larger.
//!
//! This structure permits efficient (meaning: O(log(n)) binary
//! searches: start with item i=0 (i.e. the root of the BST), compare
//! the value with the searched item, if smaller proceed at item
//! 2i+1, if larger proceed at item 2i+2, and repeat, until either
//! the item is found, or the indexes grow beyond the array size,
//! which means the entry does not exist.
//!
//! Effectively this implements bisection, but instead of jumping
//! around wildly in the array during a single search we only search
//! with strictly monotonically increasing indexes.
//!
//! Algorithm is from casync (camakebst.c), simplified and optimized
//! for rust. Permutation function originally by L. Bressel, 2017. We
//! pass permutation info to user provided callback, which actually
//! implements the data copy.
//!
//! The Wikipedia Artikel for [Binary
//! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short
//! intro howto store binary trees using an array.
fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>(
copy_func: &mut F,
// we work on input array input[o..o+n]
n: usize,
o: usize,
e: usize,
i: usize,
) {
let p = 1 << e;
let t = p + (p>>1) - 1;
let m = if n > t {
// |...........p.............t....n........(2p)|
p - 1
} else {
// |...........p.....n.......t.............(2p)|
p - 1 - (t-n)
};
(copy_func)(o+m, i);
if m > 0 {
copy_binary_search_tree_inner(copy_func, m, o, e-1, i*2+1);
}
if (m + 1) < n {
copy_binary_search_tree_inner(copy_func, n-m-1, o+m+1, e-1, i*2+2);
}
}
/// This function calls the provided `copy_func()` with the permutaion
/// info.
///
/// ```
/// # use proxmox_backup::pxar::binary_search_tree::copy_binary_search_tree;
/// copy_binary_search_tree(5, |src, dest| {
/// println!("Copy {} to {}", src, dest);
/// });
/// ```
///
/// This will produce the folowing output:
///
/// ```no-compile
/// Copy 3 to 0
/// Copy 1 to 1
/// Copy 0 to 3
/// Copy 2 to 4
/// Copy 4 to 2
/// ```
///
/// So this generates the following permuation: `[3,1,4,0,2]`.
pub fn copy_binary_search_tree<F: FnMut(usize, usize)>(
n: usize,
mut copy_func: F,
) {
if n == 0 { return };
let e = (64 - n.leading_zeros() - 1) as usize; // fast log2(n)
copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
}
#[test]
fn test_binary_search_tree() {
fn run_test(len: usize) -> Vec<usize> {
const MARKER: usize = 0xfffffff;
let mut output = vec![];
for _i in 0..len { output.push(MARKER); }
copy_binary_search_tree(len, |s, d| {
assert!(output[d] == MARKER);
output[d] = s;
});
if len < 32 { println!("GOT:{}:{:?}", len, output); }
for i in 0..len {
assert!(output[i] != MARKER);
}
output
}
assert!(run_test(0).len() == 0);
assert!(run_test(1) == [0]);
assert!(run_test(2) == [1,0]);
assert!(run_test(3) == [1,0,2]);
assert!(run_test(4) == [2,1,3,0]);
assert!(run_test(5) == [3,1,4,0,2]);
assert!(run_test(6) == [3,1,5,0,2,4]);
assert!(run_test(7) == [3,1,5,0,2,4,6]);
assert!(run_test(8) == [4,2,6,1,3,5,7,0]);
assert!(run_test(9) == [5,3,7,1,4,6,8,0,2]);
assert!(run_test(10) == [6,3,8,1,5,7,9,0,2,4]);
assert!(run_test(11) == [7,3,9,1,5,8,10,0,2,4,6]);
assert!(run_test(12) == [7,3,10,1,5,9,11,0,2,4,6,8]);
assert!(run_test(13) == [7,3,11,1,5,9,12,0,2,4,6,8,10]);
assert!(run_test(14) == [7,3,11,1,5,9,13,0,2,4,6,8,10,12]);
assert!(run_test(15) == [7,3,11,1,5,9,13,0,2,4,6,8,10,12,14]);
assert!(run_test(16) == [8,4,12,2,6,10,14,1,3,5,7,9,11,13,15,0]);
assert!(run_test(17) == [9,5,13,3,7,11,15,1,4,6,8,10,12,14,16,0,2]);
for len in 18..1000 {
run_test(len);
}
}

484
src/pxar/decoder.rs Normal file
View File

@ -0,0 +1,484 @@
//! *pxar* format decoder.
//!
//! This module contain the code to decode *pxar* archive files.
use failure::*;
use endian_trait::Endian;
use super::format_definition::*;
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use std::os::unix::io::AsRawFd;
use std::os::unix::io::RawFd;
use std::os::unix::io::FromRawFd;
use std::os::unix::ffi::{OsStringExt};
use std::ffi::{OsStr, OsString};
use nix::fcntl::OFlag;
use nix::sys::stat::Mode;
use nix::errno::Errno;
use nix::NixPath;
// This one need Read, but works without Seek
pub struct PxarDecoder<'a, R: Read> {
reader: &'a mut R,
skip_buffer: Vec<u8>,
}
const HEADER_SIZE: u64 = std::mem::size_of::<CaFormatHeader>() as u64;
impl <'a, R: Read> PxarDecoder<'a, R> {
pub fn new(reader: &'a mut R) -> Self {
let mut skip_buffer = vec![0u8; 64*1024];
Self { reader, skip_buffer }
}
fn read_item<T: Endian>(&mut self) -> Result<T, Error> {
let mut result: T = unsafe { std::mem::uninitialized() };
let buffer = unsafe { std::slice::from_raw_parts_mut(
&mut result as *mut T as *mut u8,
std::mem::size_of::<T>()
)};
self.reader.read_exact(buffer)?;
Ok(result.from_le())
}
fn read_symlink(&mut self, size: u64) -> Result<PathBuf, Error> {
if size < (HEADER_SIZE + 2) {
bail!("dectected short symlink target.");
}
let target_len = size - HEADER_SIZE;
if target_len > (libc::PATH_MAX as u64) {
bail!("symlink target too long ({}).", target_len);
}
let mut buffer = vec![0u8; target_len as usize];
self.reader.read_exact(&mut buffer)?;
let last_byte = buffer.pop().unwrap();
if last_byte != 0u8 {
bail!("symlink target not nul terminated.");
}
Ok(PathBuf::from(std::ffi::OsString::from_vec(buffer)))
}
fn read_filename(&mut self, size: u64) -> Result<OsString, Error> {
if size < (HEADER_SIZE + 2) {
bail!("dectected short filename");
}
let name_len = size - HEADER_SIZE;
if name_len > ((libc::FILENAME_MAX as u64) + 1) {
bail!("filename too long ({}).", name_len);
}
let mut buffer = vec![0u8; name_len as usize];
self.reader.read_exact(&mut buffer)?;
let last_byte = buffer.pop().unwrap();
if last_byte != 0u8 {
bail!("filename entry not nul terminated.");
}
if (buffer.len() == 1 && buffer[0] == b'.') || (buffer.len() == 2 && buffer[0] == b'.' && buffer[1] == b'.') {
bail!("found invalid filename with slashes.");
}
if buffer.iter().find(|b| (**b == b'/')).is_some() {
bail!("found invalid filename with slashes.");
}
let name = std::ffi::OsString::from_vec(buffer);
if name.is_empty() {
bail!("found empty filename.");
}
Ok(name)
}
fn restore_attributes(&mut self, _entry: &CaFormatEntry) -> Result<CaFormatHeader, Error> {
loop {
let head: CaFormatHeader = self.read_item()?;
match head.htype {
// fimxe: impl ...
_ => return Ok(head),
}
}
}
fn restore_mode(&mut self, entry: &CaFormatEntry, fd: RawFd) -> Result<(), Error> {
let mode = Mode::from_bits_truncate((entry.mode as u32) & 0o7777);
nix::sys::stat::fchmod(fd, mode)?;
Ok(())
}
fn restore_mode_at(&mut self, entry: &CaFormatEntry, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let mode = Mode::from_bits_truncate((entry.mode as u32) & 0o7777);
// NOTE: we want :FchmodatFlags::NoFollowSymlink, but fchmodat does not support that
// on linux (see man fchmodat). Fortunately, we can simply avoid calling this on symlinks.
nix::sys::stat::fchmodat(Some(dirfd), filename, mode, nix::sys::stat::FchmodatFlags::FollowSymlink)?;
Ok(())
}
fn restore_ugid(&mut self, entry: &CaFormatEntry, fd: RawFd) -> Result<(), Error> {
let uid = entry.uid as u32;
let gid = entry.gid as u32;
let res = unsafe { libc::fchown(fd, uid, gid) };
Errno::result(res)?;
Ok(())
}
fn restore_ugid_at(&mut self, entry: &CaFormatEntry, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let uid = entry.uid as u32;
let gid = entry.gid as u32;
let res = filename.with_nix_path(|cstr| unsafe {
libc::fchownat(dirfd, cstr.as_ptr(), uid, gid, libc::AT_SYMLINK_NOFOLLOW)
})?;
Errno::result(res)?;
Ok(())
}
fn restore_mtime(&mut self, entry: &CaFormatEntry, fd: RawFd) -> Result<(), Error> {
let times = nsec_to_update_timespec(entry.mtime);
let res = unsafe { libc::futimens(fd, &times[0]) };
Errno::result(res)?;
Ok(())
}
fn restore_mtime_at(&mut self, entry: &CaFormatEntry, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let times = nsec_to_update_timespec(entry.mtime);
let res = filename.with_nix_path(|cstr| unsafe {
libc::utimensat(dirfd, cstr.as_ptr(), &times[0], libc::AT_SYMLINK_NOFOLLOW)
})?;
Errno::result(res)?;
Ok(())
}
fn restore_device_at(&mut self, entry: &CaFormatEntry, dirfd: RawFd, filename: &OsStr, device: &CaFormatDevice) -> Result<(), Error> {
let rdev = nix::sys::stat::makedev(device.major, device.minor);
let mode = ((entry.mode as u32) & libc::S_IFMT) | 0o0600;
let res = filename.with_nix_path(|cstr| unsafe {
libc::mknodat(dirfd, cstr.as_ptr(), mode, rdev)
})?;
Errno::result(res)?;
Ok(())
}
fn restore_socket_at(&mut self, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let mode = libc::S_IFSOCK | 0o0600;
let res = filename.with_nix_path(|cstr| unsafe {
libc::mknodat(dirfd, cstr.as_ptr(), mode, 0)
})?;
Errno::result(res)?;
Ok(())
}
fn restore_fifo_at(&mut self, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let mode = libc::S_IFIFO | 0o0600;
let res = filename.with_nix_path(|cstr| unsafe {
libc::mkfifoat(dirfd, cstr.as_ptr(), mode)
})?;
Errno::result(res)?;
Ok(())
}
pub fn restore<F>(
&mut self,
path: &Path, // used for error reporting
callback: &F,
) -> Result<(), Error>
where F: Fn(&Path) -> Result<(), Error>
{
let _ = std::fs::create_dir(path);
let dir = match nix::dir::Dir::open(path, nix::fcntl::OFlag::O_DIRECTORY, nix::sys::stat::Mode::empty()) {
Ok(dir) => dir,
Err(err) => bail!("unable to open target directory {:?} - {}", path, err),
};
self.restore_sequential(&mut path.to_owned(), &OsString::new(), &dir, callback)
}
fn restore_sequential<F>(
&mut self,
path: &mut PathBuf, // used for error reporting
filename: &OsStr, // repeats path last component
parent: &nix::dir::Dir,
callback: &F,
) -> Result<(), Error>
where F: Fn(&Path) -> Result<(), Error>
{
let parent_fd = parent.as_raw_fd();
// read ENTRY first
let head: CaFormatHeader = self.read_item()?;
check_ca_header::<CaFormatEntry>(&head, CA_FORMAT_ENTRY)?;
let entry: CaFormatEntry = self.read_item()?;
let mode = entry.mode as u32; //fixme: upper 32bits?
let ifmt = mode & libc::S_IFMT;
if ifmt == libc::S_IFDIR {
let dir;
if filename.is_empty() {
dir = nix::dir::Dir::openat(parent_fd, ".", OFlag::O_DIRECTORY, Mode::empty())?;
} else {
dir = match dir_mkdirat(parent_fd, filename, true) {
Ok(dir) => dir,
Err(err) => bail!("unable to open directory {:?} - {}", path, err),
};
}
let mut head = self.restore_attributes(&entry)?;
while head.htype == CA_FORMAT_FILENAME {
let name = self.read_filename(head.size)?;
path.push(&name);
println!("NAME: {:?}", path);
self.restore_sequential(path, &name, &dir, callback)?;
path.pop();
head = self.read_item()?;
}
if head.htype != CA_FORMAT_GOODBYE {
bail!("got unknown header type inside directory entry {:016x}", head.htype);
}
println!("Skip Goodbye");
if head.size < HEADER_SIZE { bail!("detected short goodbye table"); }
// self.reader.seek(SeekFrom::Current((head.size - HEADER_SIZE) as i64))?;
let mut done = 0;
let skip = (head.size - HEADER_SIZE) as usize;
while done < skip {
let todo = skip - done;
let n = if todo > self.skip_buffer.len() { self.skip_buffer.len() } else { todo };
let data = &mut self.skip_buffer[..n];
self.reader.read_exact(data)?;
done += n;
}
self.restore_mode(&entry, dir.as_raw_fd())?;
self.restore_mtime(&entry, dir.as_raw_fd())?;
self.restore_ugid(&entry, dir.as_raw_fd())?;
return Ok(());
}
if filename.is_empty() {
bail!("got empty file name at {:?}", path)
}
if ifmt == libc::S_IFLNK {
// fixme: create symlink
//fixme: restore permission, acls, xattr, ...
let head: CaFormatHeader = self.read_item()?;
match head.htype {
CA_FORMAT_SYMLINK => {
let target = self.read_symlink(head.size)?;
println!("TARGET: {:?}", target);
if let Err(err) = symlinkat(&target, parent_fd, filename) {
bail!("create symlink {:?} failed - {}", path, err);
}
}
_ => {
bail!("got unknown header type inside symlink entry {:016x}", head.htype);
}
}
// self.restore_mode_at(&entry, parent_fd, filename)?; //not supported on symlinks
self.restore_ugid_at(&entry, parent_fd, filename)?;
self.restore_mtime_at(&entry, parent_fd, filename)?;
return Ok(());
}
if ifmt == libc::S_IFSOCK {
self.restore_socket_at(parent_fd, filename)?;
self.restore_mode_at(&entry, parent_fd, filename)?;
self.restore_ugid_at(&entry, parent_fd, filename)?;
self.restore_mtime_at(&entry, parent_fd, filename)?;
return Ok(());
}
if ifmt == libc::S_IFIFO {
self.restore_fifo_at(parent_fd, filename)?;
self.restore_mode_at(&entry, parent_fd, filename)?;
self.restore_ugid_at(&entry, parent_fd, filename)?;
self.restore_mtime_at(&entry, parent_fd, filename)?;
return Ok(());
}
if (ifmt == libc::S_IFBLK) || (ifmt == libc::S_IFCHR) {
let head: CaFormatHeader = self.read_item()?;
match head.htype {
CA_FORMAT_DEVICE => {
let device: CaFormatDevice = self.read_item()?;
self.restore_device_at(&entry, parent_fd, filename, &device)?;
}
_ => {
bail!("got unknown header type inside device entry {:016x}", head.htype);
}
}
self.restore_mode_at(&entry, parent_fd, filename)?;
self.restore_ugid_at(&entry, parent_fd, filename)?;
self.restore_mtime_at(&entry, parent_fd, filename)?;
return Ok(());
}
if ifmt == libc::S_IFREG {
let mut read_buffer: [u8; 64*1024] = unsafe { std::mem::uninitialized() };
let flags = OFlag::O_CREAT|OFlag::O_WRONLY|OFlag::O_EXCL;
let open_mode = Mode::from_bits_truncate(0o0600 | mode);
let mut file = match file_openat(parent_fd, filename, flags, open_mode) {
Ok(file) => file,
Err(err) => bail!("open file {:?} failed - {}", path, err),
};
let head = self.restore_attributes(&entry)?;
if head.htype != CA_FORMAT_PAYLOAD {
bail!("got unknown header type for file entry {:016x}", head.htype);
}
if head.size < HEADER_SIZE {
bail!("detected short payload");
}
let need = (head.size - HEADER_SIZE) as usize;
//self.reader.seek(SeekFrom::Current(need as i64))?;
let mut done = 0;
while done < need {
let todo = need - done;
let n = if todo > read_buffer.len() { read_buffer.len() } else { todo };
let data = &mut read_buffer[..n];
self.reader.read_exact(data)?;
file.write_all(data)?;
done += n;
}
self.restore_mode(&entry, file.as_raw_fd())?;
self.restore_mtime(&entry, file.as_raw_fd())?;
self.restore_ugid(&entry, file.as_raw_fd())?;
return Ok(());
}
Ok(())
}
}
fn file_openat(parent: RawFd, filename: &OsStr, flags: OFlag, mode: Mode) -> Result<std::fs::File, Error> {
let fd = filename.with_nix_path(|cstr| {
nix::fcntl::openat(parent, cstr.as_ref(), flags, mode)
})??;
let file = unsafe { std::fs::File::from_raw_fd(fd) };
Ok(file)
}
fn dir_mkdirat(parent: RawFd, filename: &OsStr, create_new: bool) -> Result<nix::dir::Dir, nix::Error> {
// call mkdirat first
let res = filename.with_nix_path(|cstr| unsafe {
libc::mkdirat(parent, cstr.as_ptr(), libc::S_IRWXU)
})?;
match Errno::result(res) {
Ok(_) => {},
Err(err) => {
if err == nix::Error::Sys(nix::errno::Errno::EEXIST) {
if create_new { return Err(err); }
} else {
return Err(err);
}
}
}
let dir = nix::dir::Dir::openat(parent, filename, OFlag::O_DIRECTORY, Mode::empty())?;
Ok(dir)
}
fn symlinkat(target: &Path, parent: RawFd, linkname: &OsStr) -> Result<(), Error> {
target.with_nix_path(|target| {
linkname.with_nix_path(|linkname| {
let res = unsafe { libc::symlinkat(target.as_ptr(), parent, linkname.as_ptr()) };
Errno::result(res)?;
Ok(())
})?
})?
}
fn nsec_to_update_timespec(mtime_nsec: u64) -> [libc::timespec; 2] {
// restore mtime
const UTIME_OMIT: i64 = ((1 << 30) - 2);
const NANOS_PER_SEC: i64 = 1_000_000_000;
let sec = (mtime_nsec as i64) / NANOS_PER_SEC;
let nsec = (mtime_nsec as i64) % NANOS_PER_SEC;
let times: [libc::timespec; 2] = [
libc::timespec { tv_sec: 0, tv_nsec: UTIME_OMIT },
libc::timespec { tv_sec: sec, tv_nsec: nsec },
];
times
}

609
src/pxar/encoder.rs Normal file
View File

@ -0,0 +1,609 @@
//! *pxar* format encoder.
//!
//! This module contain the code to generate *pxar* archive files.
use failure::*;
use endian_trait::Endian;
use super::format_definition::*;
use super::binary_search_tree::*;
use std::io::Write;
use std::os::unix::io::AsRawFd;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::io::RawFd;
use std::path::{Path, PathBuf};
use std::ffi::CStr;
use nix::NixPath;
use nix::fcntl::OFlag;
use nix::sys::stat::Mode;
use nix::errno::Errno;
use nix::sys::stat::FileStat;
/// The format requires to build sorted directory lookup tables in
/// memory, so we restrict the number of allowed entries to limit
/// maximum memory usage.
pub const MAX_DIRECTORY_ENTRIES: usize = 256*1024;
pub struct PxarEncoder<'a, W: Write> {
current_path: PathBuf, // used for error reporting
writer: &'a mut W,
writer_pos: usize,
_size: usize,
file_copy_buffer: Vec<u8>,
all_file_systems: bool,
root_st_dev: u64,
verbose: bool,
}
impl <'a, W: Write> PxarEncoder<'a, W> {
pub fn encode(
path: PathBuf,
dir: &mut nix::dir::Dir,
writer: &'a mut W,
all_file_systems: bool,
verbose: bool,
) -> Result<(), Error> {
const FILE_COPY_BUFFER_SIZE: usize = 1024*1024;
let mut file_copy_buffer = Vec::with_capacity(FILE_COPY_BUFFER_SIZE);
unsafe { file_copy_buffer.set_len(FILE_COPY_BUFFER_SIZE); }
// todo: use scandirat??
let dir_fd = dir.as_raw_fd();
let stat = match nix::sys::stat::fstat(dir_fd) {
Ok(stat) => stat,
Err(err) => bail!("fstat {:?} failed - {}", path, err),
};
if (stat.st_mode & libc::S_IFMT) != libc::S_IFDIR {
bail!("got unexpected file type {:?} (not a directory)", path);
}
let magic = detect_fs_type(dir_fd)?;
if is_virtual_file_system(magic) {
bail!("backup virtual file systems is disabled!");
}
let mut me = Self {
current_path: path,
writer: writer,
writer_pos: 0,
_size: 0,
file_copy_buffer,
all_file_systems,
root_st_dev: stat.st_dev,
verbose,
};
if verbose { println!("{:?}", me.current_path); }
me.encode_dir(dir, &stat, magic)?;
Ok(())
}
fn write(&mut self, buf: &[u8]) -> Result<(), Error> {
self.writer.write_all(buf)?;
self.writer_pos += buf.len();
Ok(())
}
fn write_item<T: Endian>(&mut self, item: T) -> Result<(), Error> {
let data = item.to_le();
let buffer = unsafe { std::slice::from_raw_parts(
&data as *const T as *const u8,
std::mem::size_of::<T>()
)};
self.write(buffer)?;
Ok(())
}
fn flush_copy_buffer(&mut self, size: usize) -> Result<(), Error> {
self.writer.write_all(&self.file_copy_buffer[..size])?;
self.writer_pos += size;
Ok(())
}
fn write_header(&mut self, htype: u64, size: u64) -> Result<(), Error> {
let size = size + (std::mem::size_of::<CaFormatHeader>() as u64);
self.write_item(CaFormatHeader { size, htype })?;
Ok(())
}
fn write_filename(&mut self, name: &CStr) -> Result<(), Error> {
let buffer = name.to_bytes_with_nul();
self.write_header(CA_FORMAT_FILENAME, buffer.len() as u64)?;
self.write(buffer)?;
Ok(())
}
fn create_entry(&self, stat: &FileStat) -> Result<CaFormatEntry, Error> {
let mode = if (stat.st_mode & libc::S_IFMT) == libc::S_IFLNK {
(libc::S_IFLNK | 0o777) as u64
} else {
(stat.st_mode & (libc::S_IFMT | 0o7777)) as u64
};
let mtime = stat.st_mtime * 1_000_000_000 + stat.st_mtime_nsec;
if mtime < 0 {
bail!("got strange mtime ({}) from fstat for {:?}.", mtime, self.current_path);
}
let entry = CaFormatEntry {
feature_flags: CA_FORMAT_DEFAULT, // fixme: ??
mode: mode,
flags: 0,
uid: stat.st_uid as u64,
gid: stat.st_gid as u64,
mtime: mtime as u64,
};
Ok(entry)
}
fn read_chattr(&self, fd: RawFd, entry: &mut CaFormatEntry) -> Result<(), Error> {
let mut attr: usize = 0;
let res = unsafe { read_attr_fd(fd, &mut attr)};
if let Err(err) = res {
if let nix::Error::Sys(errno) = err {
if errno_is_unsupported(errno) { return Ok(()) };
}
bail!("read_attr_fd failed for {:?} - {}", self.current_path, err);
}
let flags = ca_feature_flags_from_chattr(attr as u32);
entry.flags = entry.flags | flags;
Ok(())
}
fn read_fat_attr(&self, fd: RawFd, magic: i64, entry: &mut CaFormatEntry) -> Result<(), Error> {
if magic != MSDOS_SUPER_MAGIC && magic != FUSE_SUPER_MAGIC { return Ok(()); }
let mut attr: u32 = 0;
let res = unsafe { read_fat_attr_fd(fd, &mut attr)};
if let Err(err) = res {
if let nix::Error::Sys(errno) = err {
if errno_is_unsupported(errno) { return Ok(()) };
}
bail!("read_fat_attr_fd failed for {:?} - {}", self.current_path, err);
}
let flags = ca_feature_flags_from_fat_attr(attr);
entry.flags = entry.flags | flags;
Ok(())
}
fn write_entry(&mut self, entry: CaFormatEntry) -> Result<(), Error> {
self.write_header(CA_FORMAT_ENTRY, std::mem::size_of::<CaFormatEntry>() as u64)?;
self.write_item(entry)?;
Ok(())
}
fn write_goodbye_table(&mut self, goodbye_offset: usize, goodbye_items: &mut [CaFormatGoodbyeItem]) -> Result<(), Error> {
goodbye_items.sort_unstable_by(|a, b| a.hash.cmp(&b.hash));
let item_count = goodbye_items.len();
let goodbye_table_size = (item_count + 1)*std::mem::size_of::<CaFormatGoodbyeItem>();
self.write_header(CA_FORMAT_GOODBYE, goodbye_table_size as u64)?;
if self.file_copy_buffer.len() < goodbye_table_size {
let need = goodbye_table_size - self.file_copy_buffer.len();
self.file_copy_buffer.reserve(need);
unsafe { self.file_copy_buffer.set_len(self.file_copy_buffer.capacity()); }
}
let buffer = &mut self.file_copy_buffer;
copy_binary_search_tree(item_count, |s, d| {
let item = &goodbye_items[s];
let offset = d*std::mem::size_of::<CaFormatGoodbyeItem>();
let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
dest.offset = u64::to_le(item.offset);
dest.size = u64::to_le(item.size);
dest.hash = u64::to_le(item.hash);
});
// append CaFormatGoodbyeTail as last item
let offset = item_count*std::mem::size_of::<CaFormatGoodbyeItem>();
let dest = crate::tools::map_struct_mut::<CaFormatGoodbyeItem>(&mut buffer[offset..]).unwrap();
dest.offset = u64::to_le(goodbye_offset as u64);
dest.size = u64::to_le((goodbye_table_size + std::mem::size_of::<CaFormatHeader>()) as u64);
dest.hash = u64::to_le(CA_FORMAT_GOODBYE_TAIL_MARKER);
self.flush_copy_buffer(goodbye_table_size)?;
Ok(())
}
fn encode_dir(&mut self, dir: &mut nix::dir::Dir, dir_stat: &FileStat, magic: i64) -> Result<(), Error> {
//println!("encode_dir: {:?} start {}", self.current_path, self.writer_pos);
let mut name_list = vec![];
let rawfd = dir.as_raw_fd();
let dir_start_pos = self.writer_pos;
let mut dir_entry = self.create_entry(&dir_stat)?;
self.read_chattr(rawfd, &mut dir_entry)?;
self.read_fat_attr(rawfd, magic, &mut dir_entry)?;
self.write_entry(dir_entry)?;
let mut dir_count = 0;
let include_children;
if is_virtual_file_system(magic) {
include_children = false;
} else {
include_children = (self.root_st_dev == dir_stat.st_dev) || self.all_file_systems;
}
if include_children {
for entry in dir.iter() {
dir_count += 1;
if dir_count > MAX_DIRECTORY_ENTRIES {
bail!("too many directory items in {:?} (> {})",
self.current_path, MAX_DIRECTORY_ENTRIES);
}
let entry = match entry {
Ok(entry) => entry,
Err(err) => bail!("readir {:?} failed - {}", self.current_path, err),
};
let filename = entry.file_name().to_owned();
let name = filename.to_bytes_with_nul();
let name_len = name.len();
if name_len == 2 && name[0] == b'.' && name[1] == 0u8 { continue; }
if name_len == 3 && name[0] == b'.' && name[1] == b'.' && name[2] == 0u8 { continue; }
name_list.push(filename);
}
} else {
eprintln!("skip mount point: {:?}", self.current_path);
}
name_list.sort_unstable_by(|a, b| a.cmp(&b));
let mut goodbye_items = vec![];
for filename in &name_list {
self.current_path.push(std::ffi::OsStr::from_bytes(filename.as_bytes()));
if self.verbose { println!("{:?}", self.current_path); }
let stat = match nix::sys::stat::fstatat(rawfd, filename.as_ref(), nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
Ok(stat) => stat,
Err(nix::Error::Sys(Errno::ENOENT)) => {
self.report_vanished_file(&self.current_path)?;
continue;
}
Err(err) => bail!("fstat {:?} failed - {}", self.current_path, err),
};
let start_pos = self.writer_pos;
let ifmt = stat.st_mode & libc::S_IFMT;
if ifmt == libc::S_IFDIR {
let mut dir = match nix::dir::Dir::openat(rawfd, filename.as_ref(), OFlag::O_DIRECTORY|OFlag::O_NOFOLLOW, Mode::empty()) {
Ok(dir) => dir,
Err(nix::Error::Sys(Errno::ENOENT)) => {
self.report_vanished_file(&self.current_path)?;
continue; // fixme!!
},
Err(err) => bail!("open dir {:?} failed - {}", self.current_path, err),
};
let child_magic = if dir_stat.st_dev != stat.st_dev {
detect_fs_type(dir.as_raw_fd())?
} else {
magic
};
self.write_filename(&filename)?;
self.encode_dir(&mut dir, &stat, child_magic)?;
} else if ifmt == libc::S_IFREG {
let filefd = match nix::fcntl::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) {
Ok(filefd) => filefd,
Err(nix::Error::Sys(Errno::ENOENT)) => {
self.report_vanished_file(&self.current_path)?;
continue;
},
Err(err) => bail!("open file {:?} failed - {}", self.current_path, err),
};
let child_magic = if dir_stat.st_dev != stat.st_dev {
detect_fs_type(filefd)?
} else {
magic
};
self.write_filename(&filename)?;
let res = self.encode_file(filefd, &stat, child_magic);
let _ = nix::unistd::close(filefd); // ignore close errors
res?;
} else if ifmt == libc::S_IFLNK {
let mut buffer = [0u8; libc::PATH_MAX as usize];
let res = filename.with_nix_path(|cstr| {
unsafe { libc::readlinkat(rawfd, cstr.as_ptr(), buffer.as_mut_ptr() as *mut libc::c_char, buffer.len()-1) }
})?;
match Errno::result(res) {
Ok(len) => {
buffer[len as usize] = 0u8; // add Nul byte
self.write_filename(&filename)?;
self.encode_symlink(&buffer[..((len+1) as usize)], &stat)?
}
Err(nix::Error::Sys(Errno::ENOENT)) => {
self.report_vanished_file(&self.current_path)?;
continue;
}
Err(err) => bail!("readlink {:?} failed - {}", self.current_path, err),
}
} else if (ifmt == libc::S_IFBLK) || (ifmt == libc::S_IFCHR) {
self.write_filename(&filename)?;
self.encode_device(&stat)?;
} else if (ifmt == libc::S_IFIFO) || (ifmt == libc::S_IFSOCK) {
self.write_filename(&filename)?;
self.encode_special(&stat)?;
} else {
bail!("unsupported file type (mode {:o} {:?})", stat.st_mode, self.current_path);
}
let end_pos = self.writer_pos;
goodbye_items.push(CaFormatGoodbyeItem {
offset: start_pos as u64,
size: (end_pos - start_pos) as u64,
hash: compute_goodbye_hash(filename.to_bytes()),
});
self.current_path.pop();
}
//println!("encode_dir: {:?} end {}", self.current_path, self.writer_pos);
// fixup goodby item offsets
let goodbye_start = self.writer_pos as u64;
for item in &mut goodbye_items {
item.offset = goodbye_start - item.offset;
}
let goodbye_offset = self.writer_pos - dir_start_pos;
self.write_goodbye_table(goodbye_offset, &mut goodbye_items)?;
//println!("encode_dir: {:?} end1 {}", self.current_path, self.writer_pos);
Ok(())
}
fn encode_file(&mut self, filefd: RawFd, stat: &FileStat, magic: i64) -> Result<(), Error> {
//println!("encode_file: {:?}", self.current_path);
let mut entry = self.create_entry(&stat)?;
self.read_chattr(filefd, &mut entry)?;
self.read_fat_attr(filefd, magic, &mut entry)?;
self.write_entry(entry)?;
let include_payload;
if is_virtual_file_system(magic) {
include_payload = false;
} else {
include_payload = (stat.st_dev == self.root_st_dev) || self.all_file_systems;
}
if !include_payload {
eprintln!("skip content: {:?}", self.current_path);
self.write_header(CA_FORMAT_PAYLOAD, 0)?;
return Ok(());
}
let size = stat.st_size as u64;
self.write_header(CA_FORMAT_PAYLOAD, size)?;
let mut pos: u64 = 0;
loop {
let n = match nix::unistd::read(filefd, &mut self.file_copy_buffer) {
Ok(n) => n,
Err(nix::Error::Sys(Errno::EINTR)) => continue /* try again */,
Err(err) => bail!("read {:?} failed - {}", self.current_path, err),
};
if n == 0 { // EOF
if pos != size {
// Note:: casync format cannot handle that
bail!("detected shrinked file {:?} ({} < {})", self.current_path, pos, size);
}
break;
}
let mut next = pos + (n as u64);
if next > size { next = size; }
let count = (next - pos) as usize;
self.flush_copy_buffer(count)?;
pos = next;
if pos >= size { break; }
}
Ok(())
}
fn encode_device(&mut self, stat: &FileStat) -> Result<(), Error> {
let entry = self.create_entry(&stat)?;
self.write_entry(entry)?;
let major = unsafe { libc::major(stat.st_rdev) } as u64;
let minor = unsafe { libc::minor(stat.st_rdev) } as u64;
//println!("encode_device: {:?} {} {} {}", self.current_path, stat.st_rdev, major, minor);
self.write_header(CA_FORMAT_DEVICE, std::mem::size_of::<CaFormatDevice>() as u64)?;
self.write_item(CaFormatDevice { major, minor })?;
Ok(())
}
// FIFO or Socket
fn encode_special(&mut self, stat: &FileStat) -> Result<(), Error> {
let entry = self.create_entry(&stat)?;
self.write_entry(entry)?;
Ok(())
}
fn encode_symlink(&mut self, target: &[u8], stat: &FileStat) -> Result<(), Error> {
//println!("encode_symlink: {:?} -> {:?}", self.current_path, target);
let entry = self.create_entry(&stat)?;
self.write_entry(entry)?;
self.write_header(CA_FORMAT_SYMLINK, target.len() as u64)?;
self.write(target)?;
Ok(())
}
// the report_XXX method may raise and error - depending on encoder configuration
fn report_vanished_file(&self, path: &Path) -> Result<(), Error> {
eprintln!("WARNING: detected vanished file {:?}", path);
Ok(())
}
}
fn errno_is_unsupported(errno: Errno) -> bool {
match errno {
Errno::ENOTTY | Errno::ENOSYS | Errno::EBADF | Errno::EOPNOTSUPP | Errno::EINVAL => {
true
}
_ => false,
}
}
fn detect_fs_type(fd: RawFd) -> Result<i64, Error> {
let mut fs_stat: libc::statfs = unsafe { std::mem::uninitialized() };
let res = unsafe { libc::fstatfs(fd, &mut fs_stat) };
Errno::result(res)?;
Ok(fs_stat.f_type)
}
use nix::{convert_ioctl_res, request_code_read, ioc};
// /usr/include/linux/fs.h: #define FS_IOC_GETFLAGS _IOR('f', 1, long)
/// read Linux file system attributes (see man chattr)
nix::ioctl_read!(read_attr_fd, b'f', 1, usize);
// /usr/include/linux/msdos_fs.h: #define FAT_IOCTL_GET_ATTRIBUTES _IOR('r', 0x10, __u32)
// read FAT file system attributes
nix::ioctl_read!(read_fat_attr_fd, b'r', 0x10, u32);
// from /usr/include/linux/magic.h
// and from casync util.h
pub const BINFMTFS_MAGIC: i64 = 0x42494e4d;
pub const CGROUP2_SUPER_MAGIC: i64 = 0x63677270;
pub const CGROUP_SUPER_MAGIC: i64 = 0x0027e0eb;
pub const CONFIGFS_MAGIC: i64 = 0x62656570;
pub const DEBUGFS_MAGIC: i64 = 0x64626720;
pub const DEVPTS_SUPER_MAGIC: i64 = 0x00001cd1;
pub const EFIVARFS_MAGIC: i64 = 0xde5e81e4;
pub const FUSE_CTL_SUPER_MAGIC: i64 = 0x65735543;
pub const HUGETLBFS_MAGIC: i64 = 0x958458f6;
pub const MQUEUE_MAGIC: i64 = 0x19800202;
pub const NFSD_MAGIC: i64 = 0x6e667364;
pub const PROC_SUPER_MAGIC: i64 = 0x00009fa0;
pub const PSTOREFS_MAGIC: i64 = 0x6165676C;
pub const RPCAUTH_GSSMAGIC: i64 = 0x67596969;
pub const SECURITYFS_MAGIC: i64 = 0x73636673;
pub const SELINUX_MAGIC: i64 = 0xf97cff8c;
pub const SMACK_MAGIC: i64 = 0x43415d53;
pub const RAMFS_MAGIC: i64 = 0x858458f6;
pub const TMPFS_MAGIC: i64 = 0x01021994;
pub const SYSFS_MAGIC: i64 = 0x62656572;
pub const MSDOS_SUPER_MAGIC: i64 = 0x00004d44;
pub const FUSE_SUPER_MAGIC: i64 = 0x65735546;
#[inline(always)]
pub fn is_temporary_file_system(magic: i64) -> bool {
magic == RAMFS_MAGIC || magic == TMPFS_MAGIC
}
pub fn is_virtual_file_system(magic: i64) -> bool {
match magic {
BINFMTFS_MAGIC |
CGROUP2_SUPER_MAGIC |
CGROUP_SUPER_MAGIC |
CONFIGFS_MAGIC |
DEBUGFS_MAGIC |
DEVPTS_SUPER_MAGIC |
EFIVARFS_MAGIC |
FUSE_CTL_SUPER_MAGIC |
HUGETLBFS_MAGIC |
MQUEUE_MAGIC |
NFSD_MAGIC |
PROC_SUPER_MAGIC |
PSTOREFS_MAGIC |
RPCAUTH_GSSMAGIC |
SECURITYFS_MAGIC |
SELINUX_MAGIC |
SMACK_MAGIC |
SYSFS_MAGIC => true,
_ => false
}
}

View File

@ -0,0 +1,276 @@
//! *pxar* binary format definition
//!
//! Please note the all values are stored in little endian ordering.
//!
//! The Archive contains a list of items. Each item starts with a
//! `CaFormatHeader`, followed by the item data.
use failure::*;
use endian_trait::Endian;
use siphasher::sip::SipHasher24;
pub const CA_FORMAT_ENTRY: u64 = 0x1396fabcea5bbb51;
pub const CA_FORMAT_FILENAME: u64 = 0x6dbb6ebcb3161f0b;
pub const CA_FORMAT_SYMLINK: u64 = 0x664a6fb6830e0d6c;
pub const CA_FORMAT_DEVICE: u64 = 0xac3dace369dfe643;
pub const CA_FORMAT_PAYLOAD: u64 = 0x8b9e1d93d6dcffc9;
pub const CA_FORMAT_GOODBYE: u64 = 0xdfd35c5e8327c403;
/* The end marker used in the GOODBYE object */
pub const CA_FORMAT_GOODBYE_TAIL_MARKER: u64 = 0x57446fa533702943;
// Feature flags
/// restrict UIDs toö 16 bit
pub const CA_FORMAT_WITH_16BIT_UIDS: u64 = 0x1;
/// assume UIDs are 32 bit
pub const CA_FORMAT_WITH_32BIT_UIDS: u64 = 0x2;
/// include user and group name
pub const CA_FORMAT_WITH_USER_NAMES: u64 = 0x4;
pub const CA_FORMAT_WITH_SEC_TIME: u64 = 0x8;
pub const CA_FORMAT_WITH_USEC_TIME: u64 = 0x10;
pub const CA_FORMAT_WITH_NSEC_TIME: u64 = 0x20;
/// FAT-style 2s time granularity
pub const CA_FORMAT_WITH_2SEC_TIME: u64 = 0x40;
pub const CA_FORMAT_WITH_READ_ONLY: u64 = 0x80;
pub const CA_FORMAT_WITH_PERMISSIONS: u64 = 0x100;
/// include symbolik links
pub const CA_FORMAT_WITH_SYMLINKS: u64 = 0x200;
/// include device nodes
pub const CA_FORMAT_WITH_DEVICE_NODES: u64 = 0x400;
/// include FIFOs
pub const CA_FORMAT_WITH_FIFOS: u64 = 0x800;
/// include Sockets
pub const CA_FORMAT_WITH_SOCKETS: u64 = 0x1000;
/// DOS file flag `HIDDEN`
pub const CA_FORMAT_WITH_FLAG_HIDDEN: u64 = 0x2000;
/// DOS file flag `SYSTEM`
pub const CA_FORMAT_WITH_FLAG_SYSTEM: u64 = 0x4000;
/// DOS file flag `ARCHIVE`
pub const CA_FORMAT_WITH_FLAG_ARCHIVE: u64 = 0x8000;
// chattr() flags#
/// Linux file attribute `APPEND`
pub const CA_FORMAT_WITH_FLAG_APPEND: u64 = 0x10000;
/// Linux file attribute `NOATIME`
pub const CA_FORMAT_WITH_FLAG_NOATIME: u64 = 0x20000;
/// Linux file attribute `COMPR`
pub const CA_FORMAT_WITH_FLAG_COMPR: u64 = 0x40000;
/// Linux file attribute `NOCOW`
pub const CA_FORMAT_WITH_FLAG_NOCOW: u64 = 0x80000;
/// Linux file attribute `NODUMP`
pub const CA_FORMAT_WITH_FLAG_NODUMP: u64 = 0x100000;
/// Linux file attribute `DIRSYNC`
pub const CA_FORMAT_WITH_FLAG_DIRSYNC: u64 = 0x200000;
/// Linux file attribute `IMMUTABLE`
pub const CA_FORMAT_WITH_FLAG_IMMUTABLE: u64 = 0x400000;
/// Linux file attribute `SYNC`
pub const CA_FORMAT_WITH_FLAG_SYNC: u64 = 0x800000;
/// Linux file attribute `NOCOMP`
pub const CA_FORMAT_WITH_FLAG_NOCOMP: u64 = 0x1000000;
/// Linux file attribute `PROJINHERIT`
pub const CA_FORMAT_WITH_FLAG_PROJINHERIT: u64 = 0x2000000;
// Include BTRFS subvolume flag
pub const CA_FORMAT_WITH_SUBVOLUME: u64 = 0x4000000;
// Include BTRFS read-only subvolume flag
pub const CA_FORMAT_WITH_SUBVOLUME_RO: u64 = 0x8000000;
/// Include Extended Attribute metadata */
pub const CA_FORMAT_WITH_XATTRS: u64 = 0x10000000;
/// Include Access Control List metadata
pub const CA_FORMAT_WITH_ACL: u64 = 0x20000000;
/// Include SELinux security context
pub const CA_FORMAT_WITH_SELINUX: u64 = 0x40000000;
/// Include "security.capability" xattr
pub const CA_FORMAT_WITH_FCAPS: u64 = 0x80000000;
/// XFS/ext4 project quota ID
pub const CA_FORMAT_WITH_QUOTA_PROJID: u64 = 0x100000000;
/// Support ".caexclude" files
pub const CA_FORMAT_EXCLUDE_FILE: u64 = 0x1000000000000000;
/// the purpose of this flag is still unclear
pub const CA_FORMAT_SHA512_256: u64 = 0x2000000000000000;
/// Exclude submounts
pub const CA_FORMAT_EXCLUDE_SUBMOUNTS: u64 = 0x4000000000000000;
/// Exclude entries with chattr flag NODUMP
pub const CA_FORMAT_EXCLUDE_NODUMP: u64 = 0x8000000000000000;
pub const CA_FORMAT_DEFAULT: u64 =
CA_FORMAT_WITH_32BIT_UIDS |
CA_FORMAT_WITH_USER_NAMES |
CA_FORMAT_WITH_NSEC_TIME|
CA_FORMAT_WITH_SYMLINKS|
CA_FORMAT_WITH_DEVICE_NODES|
CA_FORMAT_WITH_FIFOS|
CA_FORMAT_WITH_SOCKETS|
CA_FORMAT_WITH_FLAG_HIDDEN|
CA_FORMAT_WITH_FLAG_SYSTEM|
CA_FORMAT_WITH_FLAG_ARCHIVE|
CA_FORMAT_WITH_FLAG_APPEND|
CA_FORMAT_WITH_FLAG_NOATIME|
CA_FORMAT_WITH_FLAG_COMPR|
CA_FORMAT_WITH_FLAG_NOCOW|
//CA_FORMAT_WITH_FLAG_NODUMP|
CA_FORMAT_WITH_FLAG_DIRSYNC|
CA_FORMAT_WITH_FLAG_IMMUTABLE|
CA_FORMAT_WITH_FLAG_SYNC|
CA_FORMAT_WITH_FLAG_NOCOMP|
CA_FORMAT_WITH_FLAG_PROJINHERIT|
CA_FORMAT_WITH_SUBVOLUME|
CA_FORMAT_WITH_SUBVOLUME_RO|
CA_FORMAT_WITH_XATTRS|
CA_FORMAT_WITH_ACL|
CA_FORMAT_WITH_SELINUX|
CA_FORMAT_WITH_FCAPS|
CA_FORMAT_WITH_QUOTA_PROJID |
CA_FORMAT_EXCLUDE_NODUMP|
CA_FORMAT_EXCLUDE_FILE|
CA_FORMAT_SHA512_256;
#[derive(Endian)]
#[repr(C)]
pub struct CaFormatHeader {
/// The size of the item, including the size of `CaFormatHeader`.
pub size: u64,
/// The item type (see `CA_FORMAT_` constants).
pub htype: u64,
}
#[derive(Endian)]
#[repr(C)]
pub struct CaFormatEntry {
pub feature_flags: u64,
pub mode: u64,
pub flags: u64,
pub uid: u64,
pub gid: u64,
pub mtime: u64,
}
#[derive(Endian)]
#[repr(C)]
pub struct CaFormatDevice {
pub major: u64,
pub minor: u64,
}
#[derive(Endian)]
#[repr(C)]
pub struct CaFormatGoodbyeItem {
/// The offset from the start of the GOODBYE object to the start
/// of the matching directory item (point to a FILENAME). The last
/// GOODBYE item points to the start of the matching ENTRY
/// object.
pub offset: u64,
/// The overall size of the directory item. The last GOODBYE item
/// repeats the size of the GOODBYE item.
pub size: u64,
/// SipHash24 of the directory item name. The last GOODBYE item
/// uses the special hash value `CA_FORMAT_GOODBYE_TAIL_MARKER`.
pub hash: u64,
}
/// Helper function to extract file names from binary archive.
pub fn read_os_string(buffer: &[u8]) -> std::ffi::OsString {
let len = buffer.len();
use std::os::unix::ffi::OsStrExt;
let name = if len > 0 && buffer[len-1] == 0 {
std::ffi::OsStr::from_bytes(&buffer[0..len-1])
} else {
std::ffi::OsStr::from_bytes(&buffer)
};
name.into()
}
/// Create SipHash values for goodby tables.
//pub fn compute_goodbye_hash(name: &std::ffi::CStr) -> u64 {
pub fn compute_goodbye_hash(name: &[u8]) -> u64 {
use std::hash::Hasher;
let mut hasher = SipHasher24::new_with_keys(0x8574442b0f1d84b3, 0x2736ed30d1c22ec1);
hasher.write(name);
hasher.finish()
}
pub fn check_ca_header<T>(head: &CaFormatHeader, htype: u64) -> Result<(), Error> {
if head.htype != htype {
bail!("got wrong header type ({:016x} != {:016x}", head.htype, htype);
}
if head.size != (std::mem::size_of::<T>() + std::mem::size_of::<CaFormatHeader>()) as u64 {
bail!("got wrong header size for type {:016x}", htype);
}
Ok(())
}
// form /usr/include/linux/fs.h
const FS_APPEND_FL: u32 = 0x00000020;
const FS_NOATIME_FL: u32 = 0x00000080;
const FS_COMPR_FL: u32 = 0x00000004;
const FS_NOCOW_FL: u32 = 0x00800000;
const FS_NODUMP_FL: u32 = 0x00000040;
const FS_DIRSYNC_FL: u32 = 0x00010000;
const FS_IMMUTABLE_FL: u32 = 0x00000010;
const FS_SYNC_FL: u32 = 0x00000008;
const FS_NOCOMP_FL: u32 = 0x00000400;
const FS_PROJINHERIT_FL: u32 = 0x20000000;
static CHATTR_MAP: [(u64, u32); 10] = [
( CA_FORMAT_WITH_FLAG_APPEND, FS_APPEND_FL ),
( CA_FORMAT_WITH_FLAG_NOATIME, FS_NOATIME_FL ),
( CA_FORMAT_WITH_FLAG_COMPR, FS_COMPR_FL ),
( CA_FORMAT_WITH_FLAG_NOCOW, FS_NOCOW_FL ),
( CA_FORMAT_WITH_FLAG_NODUMP, FS_NODUMP_FL ),
( CA_FORMAT_WITH_FLAG_DIRSYNC, FS_DIRSYNC_FL ),
( CA_FORMAT_WITH_FLAG_IMMUTABLE, FS_IMMUTABLE_FL ),
( CA_FORMAT_WITH_FLAG_SYNC, FS_SYNC_FL ),
( CA_FORMAT_WITH_FLAG_NOCOMP, FS_NOCOMP_FL ),
( CA_FORMAT_WITH_FLAG_PROJINHERIT, FS_PROJINHERIT_FL ),
];
pub fn ca_feature_flags_from_chattr(attr: u32) -> u64 {
let mut flags = 0u64;
for (ca_flag, fs_flag) in &CHATTR_MAP {
if (attr & fs_flag) != 0 { flags = flags | ca_flag; }
}
flags
}
// from /usr/include/linux/msdos_fs.h
const ATTR_HIDDEN: u32 = 2;
const ATTR_SYS: u32 = 4;
const ATTR_ARCH: u32 = 32;
static FAT_ATTR_MAP: [(u64, u32); 3] = [
( CA_FORMAT_WITH_FLAG_HIDDEN, ATTR_HIDDEN ),
( CA_FORMAT_WITH_FLAG_SYSTEM, ATTR_SYS ),
( CA_FORMAT_WITH_FLAG_ARCHIVE, ATTR_ARCH ),
];
pub fn ca_feature_flags_from_fat_attr(attr: u32) -> u64 {
let mut flags = 0u64;
for (ca_flag, fs_flag) in &FAT_ATTR_MAP {
if (attr & fs_flag) != 0 { flags = flags | ca_flag; }
}
flags
}

656
src/pxar/inspector.rs Normal file
View File

@ -0,0 +1,656 @@
//! *pxar* format decoder.
//!
//! This module contain the code to decode *pxar* archive files.
use failure::*;
use endian_trait::Endian;
use super::format_definition::*;
use crate::tools;
use std::io::{Read, Write, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use std::os::unix::io::AsRawFd;
use std::os::unix::io::RawFd;
use std::os::unix::io::FromRawFd;
use std::os::unix::ffi::{OsStrExt, OsStringExt};
use std::ffi::{OsStr, OsString};
use nix::fcntl::OFlag;
use nix::sys::stat::Mode;
use nix::errno::Errno;
use nix::NixPath;
pub struct CaDirectoryEntry {
start: u64,
end: u64,
pub filename: OsString,
pub entry: CaFormatEntry,
}
// This one needs Read+Seek (we may want one without Seek?)
pub struct PxarDecoder<'a, R: Read + Seek> {
reader: &'a mut R,
root_start: u64,
root_end: u64,
}
const HEADER_SIZE: u64 = std::mem::size_of::<CaFormatHeader>() as u64;
impl <'a, R: Read + Seek> PxarDecoder<'a, R> {
pub fn new(reader: &'a mut R) -> Result<Self, Error> {
let root_end = reader.seek(SeekFrom::End(0))?;
Ok(Self {
reader: reader,
root_start: 0,
root_end: root_end,
})
}
pub fn root(&self) -> CaDirectoryEntry {
CaDirectoryEntry {
start: self.root_start,
end: self.root_end,
filename: OsString::new(), // Empty
entry: CaFormatEntry {
feature_flags: 0,
mode: 0,
flags: 0,
uid: 0,
gid: 0,
mtime: 0,
}
}
}
fn read_item<T: Endian>(&mut self) -> Result<T, Error> {
let mut result: T = unsafe { std::mem::uninitialized() };
let buffer = unsafe { std::slice::from_raw_parts_mut(
&mut result as *mut T as *mut u8,
std::mem::size_of::<T>()
)};
self.reader.read_exact(buffer)?;
Ok(result.from_le())
}
fn read_symlink(&mut self, size: u64) -> Result<PathBuf, Error> {
if size < (HEADER_SIZE + 2) {
bail!("dectected short symlink target.");
}
let target_len = size - HEADER_SIZE;
if target_len > (libc::PATH_MAX as u64) {
bail!("symlink target too long ({}).", target_len);
}
let mut buffer = vec![0u8; target_len as usize];
self.reader.read_exact(&mut buffer)?;
let last_byte = buffer.pop().unwrap();
if last_byte != 0u8 {
bail!("symlink target not nul terminated.");
}
Ok(PathBuf::from(std::ffi::OsString::from_vec(buffer)))
}
fn read_filename(&mut self, size: u64) -> Result<OsString, Error> {
if size < (HEADER_SIZE + 2) {
bail!("dectected short filename");
}
let name_len = size - HEADER_SIZE;
if name_len > ((libc::FILENAME_MAX as u64) + 1) {
bail!("filename too long ({}).", name_len);
}
let mut buffer = vec![0u8; name_len as usize];
self.reader.read_exact(&mut buffer)?;
let last_byte = buffer.pop().unwrap();
if last_byte != 0u8 {
bail!("filename entry not nul terminated.");
}
if buffer.iter().find(|b| (**b == b'/')).is_some() {
bail!("found invalid filename with slashes.");
}
Ok(std::ffi::OsString::from_vec(buffer))
}
pub fn restore<F: Fn(&Path) -> Result<(), Error>>(
&mut self,
dir: &CaDirectoryEntry,
callback: F,
) -> Result<(), Error> {
let start = dir.start;
self.reader.seek(SeekFrom::Start(start))?;
let base = ".";
let mut path = PathBuf::from(base);
let dir = match nix::dir::Dir::open(&path, nix::fcntl::OFlag::O_DIRECTORY, nix::sys::stat::Mode::empty()) {
Ok(dir) => dir,
Err(err) => bail!("unable to open base directory - {}", err),
};
let restore_dir = "restoretest";
path.push(restore_dir);
self.restore_sequential(&mut path, &OsString::from(restore_dir), &dir, &callback)?;
Ok(())
}
fn restore_attributes(&mut self, _entry: &CaFormatEntry) -> Result<CaFormatHeader, Error> {
loop {
let head: CaFormatHeader = self.read_item()?;
match head.htype {
// fimxe: impl ...
_ => return Ok(head),
}
}
}
fn restore_mode(&mut self, entry: &CaFormatEntry, fd: RawFd) -> Result<(), Error> {
let mode = Mode::from_bits_truncate((entry.mode as u32) & 0o7777);
nix::sys::stat::fchmod(fd, mode)?;
Ok(())
}
fn restore_mode_at(&mut self, entry: &CaFormatEntry, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let mode = Mode::from_bits_truncate((entry.mode as u32) & 0o7777);
// NOTE: we want :FchmodatFlags::NoFollowSymlink, but fchmodat does not support that
// on linux (see man fchmodat). Fortunately, we can simply avoid calling this on symlinks.
nix::sys::stat::fchmodat(Some(dirfd), filename, mode, nix::sys::stat::FchmodatFlags::FollowSymlink)?;
Ok(())
}
fn restore_ugid(&mut self, entry: &CaFormatEntry, fd: RawFd) -> Result<(), Error> {
let uid = entry.uid as u32;
let gid = entry.gid as u32;
let res = unsafe { libc::fchown(fd, uid, gid) };
Errno::result(res)?;
Ok(())
}
fn restore_ugid_at(&mut self, entry: &CaFormatEntry, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let uid = entry.uid as u32;
let gid = entry.gid as u32;
let res = filename.with_nix_path(|cstr| unsafe {
libc::fchownat(dirfd, cstr.as_ptr(), uid, gid, libc::AT_SYMLINK_NOFOLLOW)
})?;
Errno::result(res)?;
Ok(())
}
fn restore_mtime(&mut self, entry: &CaFormatEntry, fd: RawFd) -> Result<(), Error> {
let times = nsec_to_update_timespec(entry.mtime);
let res = unsafe { libc::futimens(fd, &times[0]) };
Errno::result(res)?;
Ok(())
}
fn restore_mtime_at(&mut self, entry: &CaFormatEntry, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let times = nsec_to_update_timespec(entry.mtime);
let res = filename.with_nix_path(|cstr| unsafe {
libc::utimensat(dirfd, cstr.as_ptr(), &times[0], libc::AT_SYMLINK_NOFOLLOW)
})?;
Errno::result(res)?;
Ok(())
}
fn restore_device_at(&mut self, entry: &CaFormatEntry, dirfd: RawFd, filename: &OsStr, device: &CaFormatDevice) -> Result<(), Error> {
let rdev = nix::sys::stat::makedev(device.major, device.minor);
let mode = ((entry.mode as u32) & libc::S_IFMT) | 0o0600;
let res = filename.with_nix_path(|cstr| unsafe {
libc::mknodat(dirfd, cstr.as_ptr(), mode, rdev)
})?;
Errno::result(res)?;
Ok(())
}
fn restore_socket_at(&mut self, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let mode = libc::S_IFSOCK | 0o0600;
let res = filename.with_nix_path(|cstr| unsafe {
libc::mknodat(dirfd, cstr.as_ptr(), mode, 0)
})?;
Errno::result(res)?;
Ok(())
}
fn restore_fifo_at(&mut self, dirfd: RawFd, filename: &OsStr) -> Result<(), Error> {
let mode = libc::S_IFIFO | 0o0600;
let res = filename.with_nix_path(|cstr| unsafe {
libc::mkfifoat(dirfd, cstr.as_ptr(), mode)
})?;
Errno::result(res)?;
Ok(())
}
pub fn restore_sequential<F: Fn(&Path) -> Result<(), Error>>(
&mut self,
path: &mut PathBuf, // user for error reporting
filename: &OsStr, // repeats path last component
parent: &nix::dir::Dir,
callback: &F,
) -> Result<(), Error> {
let parent_fd = parent.as_raw_fd();
// read ENTRY first
let head: CaFormatHeader = self.read_item()?;
check_ca_header::<CaFormatEntry>(&head, CA_FORMAT_ENTRY)?;
let entry: CaFormatEntry = self.read_item()?;
let mode = entry.mode as u32; //fixme: upper 32bits?
let ifmt = mode & libc::S_IFMT;
if ifmt == libc::S_IFDIR {
let dir = match dir_mkdirat(parent_fd, filename) {
Ok(dir) => dir,
Err(err) => bail!("unable to open directory {:?} - {}", path, err),
};
let mut head = self.restore_attributes(&entry)?;
while head.htype == CA_FORMAT_FILENAME {
let name = self.read_filename(head.size)?;
path.push(&name);
println!("NAME: {:?}", path);
self.restore_sequential(path, &name, &dir, callback)?;
path.pop();
head = self.read_item()?;
}
if head.htype != CA_FORMAT_GOODBYE {
bail!("got unknown header type inside directory entry {:016x}", head.htype);
}
println!("Skip Goodbye");
if head.size < HEADER_SIZE { bail!("detected short goodbye table"); }
// self.reader.seek(SeekFrom::Current((head.size - HEADER_SIZE) as i64))?;
let mut done = 0;
let skip = (head.size - HEADER_SIZE) as usize;
let mut skip_buffer = vec![0u8; 64*1024];
while done < skip {
let todo = skip - done;
let n = if todo > skip_buffer.len() { skip_buffer.len() } else { todo };
let data = &mut skip_buffer[..n];
self.reader.read_exact(data)?;
done += n;
}
self.restore_mode(&entry, dir.as_raw_fd())?;
self.restore_mtime(&entry, dir.as_raw_fd())?;
self.restore_ugid(&entry, dir.as_raw_fd())?;
return Ok(());
}
if ifmt == libc::S_IFLNK {
// fixme: create symlink
//fixme: restore permission, acls, xattr, ...
let head: CaFormatHeader = self.read_item()?;
match head.htype {
CA_FORMAT_SYMLINK => {
let target = self.read_symlink(head.size)?;
println!("TARGET: {:?}", target);
if let Err(err) = symlinkat(&target, parent_fd, filename) {
bail!("create symlink {:?} failed - {}", path, err);
}
}
_ => {
bail!("got unknown header type inside symlink entry {:016x}", head.htype);
}
}
// self.restore_mode_at(&entry, parent_fd, filename)?; //not supported on symlinks
self.restore_ugid_at(&entry, parent_fd, filename)?;
self.restore_mtime_at(&entry, parent_fd, filename)?;
return Ok(());
}
if ifmt == libc::S_IFSOCK {
self.restore_socket_at(parent_fd, filename)?;
self.restore_mode_at(&entry, parent_fd, filename)?;
self.restore_ugid_at(&entry, parent_fd, filename)?;
self.restore_mtime_at(&entry, parent_fd, filename)?;
return Ok(());
}
if ifmt == libc::S_IFIFO {
self.restore_fifo_at(parent_fd, filename)?;
self.restore_mode_at(&entry, parent_fd, filename)?;
self.restore_ugid_at(&entry, parent_fd, filename)?;
self.restore_mtime_at(&entry, parent_fd, filename)?;
return Ok(());
}
if (ifmt == libc::S_IFBLK) || (ifmt == libc::S_IFCHR) {
let head: CaFormatHeader = self.read_item()?;
match head.htype {
CA_FORMAT_DEVICE => {
let device: CaFormatDevice = self.read_item()?;
self.restore_device_at(&entry, parent_fd, filename, &device)?;
}
_ => {
bail!("got unknown header type inside device entry {:016x}", head.htype);
}
}
self.restore_mode_at(&entry, parent_fd, filename)?;
self.restore_ugid_at(&entry, parent_fd, filename)?;
self.restore_mtime_at(&entry, parent_fd, filename)?;
return Ok(());
}
if ifmt == libc::S_IFREG {
let mut read_buffer: [u8; 64*1024] = unsafe { std::mem::uninitialized() };
let flags = OFlag::O_CREAT|OFlag::O_WRONLY|OFlag::O_EXCL;
let open_mode = Mode::from_bits_truncate(0o0600 | mode);
let mut file = match file_openat(parent_fd, filename, flags, open_mode) {
Ok(file) => file,
Err(err) => bail!("open file {:?} failed - {}", path, err),
};
let head = self.restore_attributes(&entry)?;
if head.htype != CA_FORMAT_PAYLOAD {
bail!("got unknown header type for file entry {:016x}", head.htype);
}
if head.size < HEADER_SIZE {
bail!("detected short payload");
}
let need = (head.size - HEADER_SIZE) as usize;
//self.reader.seek(SeekFrom::Current(need as i64))?;
let mut done = 0;
while done < need {
let todo = need - done;
let n = if todo > read_buffer.len() { read_buffer.len() } else { todo };
let data = &mut read_buffer[..n];
self.reader.read_exact(data)?;
file.write_all(data)?;
done += n;
}
self.restore_mode(&entry, file.as_raw_fd())?;
self.restore_mtime(&entry, file.as_raw_fd())?;
self.restore_ugid(&entry, file.as_raw_fd())?;
return Ok(());
}
Ok(())
}
fn read_directory_entry(&mut self, start: u64, end: u64) -> Result<CaDirectoryEntry, Error> {
self.reader.seek(SeekFrom::Start(start))?;
let mut buffer = [0u8; HEADER_SIZE as usize];
self.reader.read_exact(&mut buffer)?;
let head = tools::map_struct::<CaFormatHeader>(&buffer)?;
if u64::from_le(head.htype) != CA_FORMAT_FILENAME {
bail!("wrong filename header type for object [{}..{}]", start, end);
}
let name_len = u64::from_le(head.size);
let entry_start = start + name_len;
let filename = self.read_filename(name_len)?;
let head: CaFormatHeader = self.read_item()?;
check_ca_header::<CaFormatEntry>(&head, CA_FORMAT_ENTRY)?;
let entry: CaFormatEntry = self.read_item()?;
Ok(CaDirectoryEntry {
start: entry_start,
end: end,
filename: filename,
entry: CaFormatEntry {
feature_flags: u64::from_le(entry.feature_flags),
mode: u64::from_le(entry.mode),
flags: u64::from_le(entry.flags),
uid: u64::from_le(entry.uid),
gid: u64::from_le(entry.gid),
mtime: u64::from_le(entry.mtime),
},
})
}
pub fn list_dir(&mut self, dir: &CaDirectoryEntry) -> Result<Vec<CaDirectoryEntry>, Error> {
const GOODBYE_ITEM_SIZE: u64 = std::mem::size_of::<CaFormatGoodbyeItem>() as u64;
let start = dir.start;
let end = dir.end;
//println!("list_dir1: {} {}", start, end);
if (end - start) < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
bail!("detected short object [{}..{}]", start, end);
}
self.reader.seek(SeekFrom::Start(end - GOODBYE_ITEM_SIZE))?;
let mut buffer = [0u8; GOODBYE_ITEM_SIZE as usize];
self.reader.read_exact(&mut buffer)?;
let item = tools::map_struct::<CaFormatGoodbyeItem>(&buffer)?;
if u64::from_le(item.hash) != CA_FORMAT_GOODBYE_TAIL_MARKER {
bail!("missing goodbye tail marker for object [{}..{}]", start, end);
}
let goodbye_table_size = u64::from_le(item.size);
if goodbye_table_size < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
bail!("short goodbye table size for object [{}..{}]", start, end);
}
let goodbye_inner_size = goodbye_table_size - HEADER_SIZE - GOODBYE_ITEM_SIZE;
if (goodbye_inner_size % GOODBYE_ITEM_SIZE) != 0 {
bail!("wrong goodbye inner table size for entry [{}..{}]", start, end);
}
let goodbye_start = end - goodbye_table_size;
if u64::from_le(item.offset) != (goodbye_start - start) {
println!("DEBUG: {} {}", u64::from_le(item.offset), goodbye_start - start);
bail!("wrong offset in goodbye tail marker for entry [{}..{}]", start, end);
}
self.reader.seek(SeekFrom::Start(goodbye_start))?;
let mut buffer = [0u8; HEADER_SIZE as usize];
self.reader.read_exact(&mut buffer)?;
let head = tools::map_struct::<CaFormatHeader>(&buffer)?;
if u64::from_le(head.htype) != CA_FORMAT_GOODBYE {
bail!("wrong goodbye table header type for entry [{}..{}]", start, end);
}
if u64::from_le(head.size) != goodbye_table_size {
bail!("wrong goodbye table size for entry [{}..{}]", start, end);
}
let mut buffer = [0u8; GOODBYE_ITEM_SIZE as usize];
let mut range_list = Vec::new();
for i in 0..goodbye_inner_size/GOODBYE_ITEM_SIZE {
self.reader.read_exact(&mut buffer)?;
let item = tools::map_struct::<CaFormatGoodbyeItem>(&buffer)?;
let item_offset = u64::from_le(item.offset);
if item_offset > (goodbye_start - start) {
bail!("goodbye entry {} offset out of range [{}..{}] {} {} {}",
i, start, end, item_offset, goodbye_start, start);
}
let item_start = goodbye_start - item_offset;
let _item_hash = u64::from_le(item.hash);
let item_end = item_start + u64::from_le(item.size);
if item_end > goodbye_start {
bail!("goodbye entry {} end out of range [{}..{}]",
i, start, end);
}
range_list.push((item_start, item_end));
}
let mut result = vec![];
for (item_start, item_end) in range_list {
let entry = self.read_directory_entry(item_start, item_end)?;
//println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename);
result.push(entry);
}
Ok(result)
}
pub fn print_filenames<W: std::io::Write>(
&mut self,
output: &mut W,
prefix: &mut PathBuf,
dir: &CaDirectoryEntry,
) -> Result<(), Error> {
let mut list = self.list_dir(dir)?;
list.sort_unstable_by(|a, b| a.filename.cmp(&b.filename));
for item in &list {
prefix.push(item.filename.clone());
let mode = item.entry.mode as u32;
let ifmt = mode & libc::S_IFMT;
let osstr: &OsStr = prefix.as_ref();
output.write(osstr.as_bytes())?;
output.write(b"\n")?;
if ifmt == libc::S_IFDIR {
self.print_filenames(output, prefix, item)?;
} else if ifmt == libc::S_IFREG {
} else if ifmt == libc::S_IFLNK {
} else if ifmt == libc::S_IFBLK {
} else if ifmt == libc::S_IFCHR {
} else {
bail!("unknown item mode/type for {:?}", prefix);
}
prefix.pop();
}
Ok(())
}
}
fn file_openat(parent: RawFd, filename: &OsStr, flags: OFlag, mode: Mode) -> Result<std::fs::File, Error> {
let fd = filename.with_nix_path(|cstr| {
nix::fcntl::openat(parent, cstr.as_ref(), flags, mode)
})??;
let file = unsafe { std::fs::File::from_raw_fd(fd) };
Ok(file)
}
fn dir_mkdirat(parent: RawFd, filename: &OsStr) -> Result<nix::dir::Dir, Error> {
// call mkdirat first
let res = filename.with_nix_path(|cstr| unsafe {
libc::mkdirat(parent, cstr.as_ptr(), libc::S_IRWXU)
})?;
Errno::result(res)?;
let dir = nix::dir::Dir::openat(parent, filename, OFlag::O_DIRECTORY, Mode::empty())?;
Ok(dir)
}
fn symlinkat(target: &Path, parent: RawFd, linkname: &OsStr) -> Result<(), Error> {
target.with_nix_path(|target| {
linkname.with_nix_path(|linkname| {
let res = unsafe { libc::symlinkat(target.as_ptr(), parent, linkname.as_ptr()) };
Errno::result(res)?;
Ok(())
})?
})?
}
fn nsec_to_update_timespec(mtime_nsec: u64) -> [libc::timespec; 2] {
// restore mtime
const UTIME_OMIT: i64 = ((1 << 30) - 2);
const NANOS_PER_SEC: i64 = 1_000_000_000;
let sec = (mtime_nsec as i64) / NANOS_PER_SEC;
let nsec = (mtime_nsec as i64) % NANOS_PER_SEC;
let times: [libc::timespec; 2] = [
libc::timespec { tv_sec: 0, tv_nsec: UTIME_OMIT },
libc::timespec { tv_sec: sec, tv_nsec: nsec },
];
times
}