improve catar docs

This commit is contained in:
Dietmar Maurer 2018-12-30 13:47:27 +01:00
parent 48147efd12
commit 4fa71e0573
5 changed files with 113 additions and 40 deletions

43
src/catar.rs Normal file
View File

@ -0,0 +1,43 @@
//! *catar* Implementation
//!
//! This is a implementation of the *catar* format used by the
//! [casync](https://github.com/systemd/casync) toolkit. It is a file
//! archive format defined by 'Lennart Poettering', specially defined
//! for efficent deduplication.
//! Every archive contains items in the following order:
//! * ENTRY -- containing general stat() data and related bits
//! * USER -- user name as text, if enabled
//! * GROUP -- group name as text, if enabled
//! * XATTR -- one extended attribute
//! * ... -- more of these when there are multiple defined
//! * ACL_USER -- one USER ACL entry
//! * ... -- more of these when there are multiple defined
//! * ACL_GROUP -- one GROUP ACL entry
//! * ... -- more of these when there are multiple defined
//! * ACL_GROUP_OBJ -- The ACL_GROUP_OBJ
//! * ACL_DEFAULT -- The various default ACL fields if there's one defined
//! * ACL_DEFAULT_USER -- one USER ACL entry
//! * ... -- more of these when multiple are defined
//! * ACL_DEFAULT_GROUP -- one GROUP ACL entry
//! * ... -- more of these when multiple are defined
//! * FCAPS -- file capability in Linux disk format
//! * QUOTA_PROJECT_ID -- the ext4/xfs quota project ID
//! * PAYLOAD -- file contents, if it is one
//! * SYMLINK -- symlink target, if it is one
//! * DEVICE -- device major/minor, if it is a block/char device
//!
//! If we are serializing a directory, then this is followed by:
//!
//! * FILENAME -- name of the first directory entry (strictly ordered!)
//! * <archive> -- serialization of the first directory entry's metadata and contents,
//! following the exact same archive format
//! * FILENAME -- name of the second directory entry (strictly ordered!)
//! * <archive> -- serialization of the second directory entry
//! * ...
//! * GOODBYE -- lookup table at the end of a list of directory entries
pub mod binary_search_tree;
pub mod format_definition;
pub mod encoder;

View File

@ -1,36 +1,31 @@
/// Helpers to generate a binary search tree stored in an array from a
/// sorted array.
///
/// Specifically, for any given sorted * array 'input' permute the
/// array so that the following rule holds:
///
/// For each array item with index i, the item at 2*i+1 is smaller and
/// the item 2*i+2 is larger.
///
/// This structure permits efficient (meaning: O(log(n)) binary
/// searches: start with item i=0 (i.e. the root of the BST), compare
/// the value with the searched item, if smaller proceed at item
/// i*2+1, if larger proceed at item i*2+2, and repeat, until either
/// the item is found, or the indexes grow beyond the array size,
/// which means the entry does not exist.
///
/// Effectively this implements bisection, but instead of jumping
/// around wildly in the array during a single search we only search
/// with strictly monotonically increasing indexes.
///
/// Algorithm is from casync (camakebst.c), simplified and optimized
/// for rust. Permutation function originally by L. Bressel, 2017. We
/// pass permutation info to user provided callback, which actually
/// implements the data copy.
///
// NOTES:
//
// https://en.wikipedia.org/wiki/Binary_heap
// https://en.wikipedia.org/wiki/Heapsort
//
// ==> Maype it is possible to build a sorted array from unsorted
// array inplace, using heapsort?
//! Helpers to generate a binary search tree stored in an array from a
//! sorted array.
//!
//! Specifically, for any given sorted array 'input' permute the
//! array so that the following rule holds:
//!
//! For each array item with index i, the item at 2i+1 is smaller and
//! the item 2i+2 is larger.
//!
//! This structure permits efficient (meaning: O(log(n)) binary
//! searches: start with item i=0 (i.e. the root of the BST), compare
//! the value with the searched item, if smaller proceed at item
//! 2i+1, if larger proceed at item 2i+2, and repeat, until either
//! the item is found, or the indexes grow beyond the array size,
//! which means the entry does not exist.
//!
//! Effectively this implements bisection, but instead of jumping
//! around wildly in the array during a single search we only search
//! with strictly monotonically increasing indexes.
//!
//! Algorithm is from casync (camakebst.c), simplified and optimized
//! for rust. Permutation function originally by L. Bressel, 2017. We
//! pass permutation info to user provided callback, which actually
//! implements the data copy.
//!
//! The Wikipedia Artikel for [Binary
//! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short
//! intro howto store binary trees using an array.
fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>(
copy_func: &mut F,
@ -63,6 +58,26 @@ fn copy_binary_search_tree_inner<F: FnMut(usize, usize)>(
}
}
/// This function calls the provided `copy_func()` with the permutaion
/// info.
///
/// ```
/// # use proxmox_backup::catar::binary_search_tree::copy_binary_search_tree;
/// copy_binary_search_tree(5, |src, dest| {
/// println!("Copy {} to {}", src, dest);
/// });
/// ```
///
/// This will produce the folowing output:
///
/// Copy 3 to 0
/// Copy 1 to 1
/// Copy 0 to 3
/// Copy 2 to 4
/// Copy 4 to 2
///
/// So this generates the following permuation: `[3,1,4,0,2]`.
pub fn copy_binary_search_tree<F: FnMut(usize, usize)>(
n: usize,
mut copy_func: F,
@ -72,7 +87,6 @@ pub fn copy_binary_search_tree<F: FnMut(usize, usize)>(
copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
}
#[test]
fn test_binary_search_tree() {

View File

@ -1,3 +1,7 @@
//! *catar* format encoder.
//!
//! This module contain the code to generate *catar* archive files.
use failure::*;
use super::format_definition::*;

View File

@ -1,3 +1,10 @@
//! *catar* binary format definition
//!
//! Please note the all values are stored in little endian ordering.
//!
//! The Archive contains a list of items. Each item starts with a
//! `CaFormatHeader`, followed by the item data.
use failure::*;
pub const CA_FORMAT_ENTRY: u64 = 0x1396fabcea5bbb51;
@ -13,7 +20,9 @@ pub const CA_FORMAT_FEATURE_FLAGS_MAX: u64 = 0xb000_0001_ffef_fe26; // fixme: ?
#[repr(C)]
pub struct CaFormatHeader {
/// The size of the item, including the size of `CaFormatHeader`.
pub size: u64,
/// The item type (see `CA_FORMAT_` constants).
pub htype: u64,
}
@ -29,8 +38,16 @@ pub struct CaFormatEntry {
#[repr(C)]
pub struct CaFormatGoodbyeItem {
/// The offset from the start of the GOODBYE object to the start
/// of the matching directory item (point to a FILENAME). The last
/// GOODBYE item points to the start of the matching ENTRY
/// object. repeats the `size`
pub offset: u64,
/// The overall size of the directory item. The last GOODBYE item
/// repeats the size of the GOODBYE item.
pub size: u64,
/// SipHash24 of the directory item name. The last GOODBYE item
/// uses the special hash value `CA_FORMAT_GOODBYE_TAIL_MARKER`.
pub hash: u64,
}

View File

@ -30,12 +30,7 @@ pub mod server {
}
pub mod catar {
pub mod binary_search_tree;
pub mod format_definition;
pub mod encoder;
}
pub mod catar;
pub mod section_config;