diff --git a/src/catar.rs b/src/catar.rs new file mode 100644 index 00000000..22a87a24 --- /dev/null +++ b/src/catar.rs @@ -0,0 +1,43 @@ +//! *catar* Implementation +//! +//! This is a implementation of the *catar* format used by the +//! [casync](https://github.com/systemd/casync) toolkit. It is a file +//! archive format defined by 'Lennart Poettering', specially defined +//! for efficent deduplication. + +//! Every archive contains items in the following order: +//! * ENTRY -- containing general stat() data and related bits +//! * USER -- user name as text, if enabled +//! * GROUP -- group name as text, if enabled +//! * XATTR -- one extended attribute +//! * ... -- more of these when there are multiple defined +//! * ACL_USER -- one USER ACL entry +//! * ... -- more of these when there are multiple defined +//! * ACL_GROUP -- one GROUP ACL entry +//! * ... -- more of these when there are multiple defined +//! * ACL_GROUP_OBJ -- The ACL_GROUP_OBJ +//! * ACL_DEFAULT -- The various default ACL fields if there's one defined +//! * ACL_DEFAULT_USER -- one USER ACL entry +//! * ... -- more of these when multiple are defined +//! * ACL_DEFAULT_GROUP -- one GROUP ACL entry +//! * ... -- more of these when multiple are defined +//! * FCAPS -- file capability in Linux disk format +//! * QUOTA_PROJECT_ID -- the ext4/xfs quota project ID +//! * PAYLOAD -- file contents, if it is one +//! * SYMLINK -- symlink target, if it is one +//! * DEVICE -- device major/minor, if it is a block/char device +//! +//! If we are serializing a directory, then this is followed by: +//! +//! * FILENAME -- name of the first directory entry (strictly ordered!) +//! * -- serialization of the first directory entry's metadata and contents, +//! following the exact same archive format +//! * FILENAME -- name of the second directory entry (strictly ordered!) +//! * -- serialization of the second directory entry +//! * ... +//! * GOODBYE -- lookup table at the end of a list of directory entries + +pub mod binary_search_tree; +pub mod format_definition; +pub mod encoder; + diff --git a/src/catar/binary_search_tree.rs b/src/catar/binary_search_tree.rs index ba996a94..e1293e96 100644 --- a/src/catar/binary_search_tree.rs +++ b/src/catar/binary_search_tree.rs @@ -1,36 +1,31 @@ -/// Helpers to generate a binary search tree stored in an array from a -/// sorted array. -/// -/// Specifically, for any given sorted * array 'input' permute the -/// array so that the following rule holds: -/// -/// For each array item with index i, the item at 2*i+1 is smaller and -/// the item 2*i+2 is larger. -/// -/// This structure permits efficient (meaning: O(log(n)) binary -/// searches: start with item i=0 (i.e. the root of the BST), compare -/// the value with the searched item, if smaller proceed at item -/// i*2+1, if larger proceed at item i*2+2, and repeat, until either -/// the item is found, or the indexes grow beyond the array size, -/// which means the entry does not exist. -/// -/// Effectively this implements bisection, but instead of jumping -/// around wildly in the array during a single search we only search -/// with strictly monotonically increasing indexes. -/// -/// Algorithm is from casync (camakebst.c), simplified and optimized -/// for rust. Permutation function originally by L. Bressel, 2017. We -/// pass permutation info to user provided callback, which actually -/// implements the data copy. -/// - -// NOTES: -// -// https://en.wikipedia.org/wiki/Binary_heap -// https://en.wikipedia.org/wiki/Heapsort -// -// ==> Maype it is possible to build a sorted array from unsorted -// array inplace, using heapsort? +//! Helpers to generate a binary search tree stored in an array from a +//! sorted array. +//! +//! Specifically, for any given sorted array 'input' permute the +//! array so that the following rule holds: +//! +//! For each array item with index i, the item at 2i+1 is smaller and +//! the item 2i+2 is larger. +//! +//! This structure permits efficient (meaning: O(log(n)) binary +//! searches: start with item i=0 (i.e. the root of the BST), compare +//! the value with the searched item, if smaller proceed at item +//! 2i+1, if larger proceed at item 2i+2, and repeat, until either +//! the item is found, or the indexes grow beyond the array size, +//! which means the entry does not exist. +//! +//! Effectively this implements bisection, but instead of jumping +//! around wildly in the array during a single search we only search +//! with strictly monotonically increasing indexes. +//! +//! Algorithm is from casync (camakebst.c), simplified and optimized +//! for rust. Permutation function originally by L. Bressel, 2017. We +//! pass permutation info to user provided callback, which actually +//! implements the data copy. +//! +//! The Wikipedia Artikel for [Binary +//! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short +//! intro howto store binary trees using an array. fn copy_binary_search_tree_inner( copy_func: &mut F, @@ -63,6 +58,26 @@ fn copy_binary_search_tree_inner( } } +/// This function calls the provided `copy_func()` with the permutaion +/// info. +/// +/// ``` +/// # use proxmox_backup::catar::binary_search_tree::copy_binary_search_tree; +/// copy_binary_search_tree(5, |src, dest| { +/// println!("Copy {} to {}", src, dest); +/// }); +/// ``` +/// +/// This will produce the folowing output: +/// +/// Copy 3 to 0 +/// Copy 1 to 1 +/// Copy 0 to 3 +/// Copy 2 to 4 +/// Copy 4 to 2 +/// +/// So this generates the following permuation: `[3,1,4,0,2]`. + pub fn copy_binary_search_tree( n: usize, mut copy_func: F, @@ -72,7 +87,6 @@ pub fn copy_binary_search_tree( copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0); } - #[test] fn test_binary_search_tree() { diff --git a/src/catar/encoder.rs b/src/catar/encoder.rs index ed2ca6bb..5ac91277 100644 --- a/src/catar/encoder.rs +++ b/src/catar/encoder.rs @@ -1,3 +1,7 @@ +//! *catar* format encoder. +//! +//! This module contain the code to generate *catar* archive files. + use failure::*; use super::format_definition::*; diff --git a/src/catar/format_definition.rs b/src/catar/format_definition.rs index b7d610d8..755b3fee 100644 --- a/src/catar/format_definition.rs +++ b/src/catar/format_definition.rs @@ -1,3 +1,10 @@ +//! *catar* binary format definition +//! +//! Please note the all values are stored in little endian ordering. +//! +//! The Archive contains a list of items. Each item starts with a +//! `CaFormatHeader`, followed by the item data. + use failure::*; pub const CA_FORMAT_ENTRY: u64 = 0x1396fabcea5bbb51; @@ -13,7 +20,9 @@ pub const CA_FORMAT_FEATURE_FLAGS_MAX: u64 = 0xb000_0001_ffef_fe26; // fixme: ? #[repr(C)] pub struct CaFormatHeader { + /// The size of the item, including the size of `CaFormatHeader`. pub size: u64, + /// The item type (see `CA_FORMAT_` constants). pub htype: u64, } @@ -29,8 +38,16 @@ pub struct CaFormatEntry { #[repr(C)] pub struct CaFormatGoodbyeItem { + /// The offset from the start of the GOODBYE object to the start + /// of the matching directory item (point to a FILENAME). The last + /// GOODBYE item points to the start of the matching ENTRY + /// object. repeats the `size` pub offset: u64, + /// The overall size of the directory item. The last GOODBYE item + /// repeats the size of the GOODBYE item. pub size: u64, + /// SipHash24 of the directory item name. The last GOODBYE item + /// uses the special hash value `CA_FORMAT_GOODBYE_TAIL_MARKER`. pub hash: u64, } diff --git a/src/lib.rs b/src/lib.rs index be9f4c07..d7e3b0b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,12 +30,7 @@ pub mod server { } -pub mod catar { - - pub mod binary_search_tree; - pub mod format_definition; - pub mod encoder; -} +pub mod catar; pub mod section_config;