improve catar docs

2018-12-30 13:47:27 +01:00 · 2018-12-30 13:47:27 +01:00 · 4fa71e0573
commit 4fa71e0573
parent 48147efd12
5 changed files with 113 additions and 40 deletions
--- a/src/catar.rs
+++ b/src/catar.rs
@ -0,0 +1,43 @@
+//! *catar* Implementation
+//!
+//! This is a implementation of the *catar* format used by the
+//! [casync](https://github.com/systemd/casync) toolkit. It is a file
+//! archive format defined by 'Lennart Poettering', specially defined
+//! for efficent deduplication.
+
+//! Every archive contains items in the following order:
+//!  * ENTRY             -- containing general stat() data and related bits
+//!   * USER              -- user name as text, if enabled
+//!   * GROUP             -- group name as text, if enabled
+//!   * XATTR             -- one extended attribute
+//!   * ...               -- more of these when there are multiple defined
+//!   * ACL_USER          -- one USER ACL entry
+//!   * ...               -- more of these when there are multiple defined
+//!   * ACL_GROUP         -- one GROUP ACL entry
+//!   * ...               -- more of these when there are multiple defined
+//!   * ACL_GROUP_OBJ     -- The ACL_GROUP_OBJ
+//!   * ACL_DEFAULT       -- The various default ACL fields if there's one defined
+//!   * ACL_DEFAULT_USER  -- one USER ACL entry
+//!   * ...               -- more of these when multiple are defined
+//!   * ACL_DEFAULT_GROUP -- one GROUP ACL entry
+//!   * ...               -- more of these when multiple are defined
+//!   * FCAPS             -- file capability in Linux disk format
+//!   * QUOTA_PROJECT_ID  -- the ext4/xfs quota project ID
+//!   * PAYLOAD           -- file contents, if it is one
+//!   * SYMLINK           -- symlink target, if it is one
+//!   * DEVICE            -- device major/minor, if it is a block/char device
+//!
+//!   If we are serializing a directory, then this is followed by:
+//!
+//!   * FILENAME          -- name of the first directory entry (strictly ordered!)
+//!   * <archive>         -- serialization of the first directory entry's metadata and contents,
+//!  following the exact same archive format
+//!   * FILENAME          -- name of the second directory entry (strictly ordered!)
+//!   * <archive>         -- serialization of the second directory entry
+//!   * ...
+//!   * GOODBYE           -- lookup table at the end of a list of directory entries
+
+pub mod binary_search_tree;
+pub mod format_definition;
+pub mod encoder;
+
--- a/src/catar/binary_search_tree.rs
+++ b/src/catar/binary_search_tree.rs
@ -1,36 +1,31 @@
-/// Helpers to generate a binary search tree stored in an array from a
-/// sorted array.
-///
-/// Specifically, for any given sorted * array 'input' permute the
-/// array so that the following rule holds:
-///
-/// For each array item with index i, the item at 2*i+1 is smaller and
-/// the item 2*i+2 is larger.
-///
-/// This structure permits efficient (meaning: O(log(n)) binary
-/// searches: start with item i=0 (i.e. the root of the BST), compare
-/// the value with the searched item, if smaller proceed at item
-/// i*2+1, if larger proceed at item i*2+2, and repeat, until either
-/// the item is found, or the indexes grow beyond the array size,
-/// which means the entry does not exist.
-///
-/// Effectively this implements bisection, but instead of jumping
-/// around wildly in the array during a single search we only search
-/// with strictly monotonically increasing indexes.
-///
-/// Algorithm is from casync (camakebst.c), simplified and optimized
-/// for rust. Permutation function originally by L. Bressel, 2017. We
-/// pass permutation info to user provided callback, which actually
-/// implements the data copy.
-///
-
-// NOTES:
-//
-// https://en.wikipedia.org/wiki/Binary_heap
-// https://en.wikipedia.org/wiki/Heapsort
-//
-// ==> Maype it is possible to build a sorted array from unsorted
-// array inplace, using heapsort?
+//! Helpers to generate a binary search tree stored in an array from a
+//! sorted array.
+//!
+//! Specifically, for any given sorted array 'input' permute the
+//! array so that the following rule holds:
+//!
+//! For each array item with index i, the item at 2i+1 is smaller and
+//! the item 2i+2 is larger.
+//!
+//! This structure permits efficient (meaning: O(log(n)) binary
+//! searches: start with item i=0 (i.e. the root of the BST), compare
+//! the value with the searched item, if smaller proceed at item
+//! 2i+1, if larger proceed at item 2i+2, and repeat, until either
+//! the item is found, or the indexes grow beyond the array size,
+//! which means the entry does not exist.
+//!
+//! Effectively this implements bisection, but instead of jumping
+//! around wildly in the array during a single search we only search
+//! with strictly monotonically increasing indexes.
+//!
+//! Algorithm is from casync (camakebst.c), simplified and optimized
+//! for rust. Permutation function originally by L. Bressel, 2017. We
+//! pass permutation info to user provided callback, which actually
+//! implements the data copy.
+//!
+//! The Wikipedia Artikel for [Binary
+//! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short
+//! intro howto store binary trees using an array.

 fn copy_binary_search_tree_inner<F:  FnMut(usize, usize)>(
    copy_func: &mut F,
@ -63,6 +58,26 @@ fn copy_binary_search_tree_inner<F:  FnMut(usize, usize)>(
    }
 }

+/// This function calls the provided `copy_func()` with the permutaion
+/// info.
+///
+/// ```
+/// # use proxmox_backup::catar::binary_search_tree::copy_binary_search_tree;
+/// copy_binary_search_tree(5, |src, dest| {
+///    println!("Copy {} to {}", src, dest);
+/// });
+/// ```
+///
+/// This will produce the folowing output:
+///
+///     Copy 3 to 0
+///     Copy 1 to 1
+///     Copy 0 to 3
+///     Copy 2 to 4
+///     Copy 4 to 2
+///
+/// So this generates the following permuation: `[3,1,4,0,2]`.
+
 pub fn copy_binary_search_tree<F:  FnMut(usize, usize)>(
    n: usize,
    mut copy_func: F,
@ -72,7 +87,6 @@ pub fn copy_binary_search_tree<F:  FnMut(usize, usize)>(
    copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
 }

-
 #[test]
 fn test_binary_search_tree() {

--- a/src/catar/encoder.rs
+++ b/src/catar/encoder.rs
@ -1,3 +1,7 @@
+//! *catar* format encoder.
+//!
+//! This module contain the code to generate *catar* archive files.
+
 use failure::*;

 use super::format_definition::*;
--- a/src/catar/format_definition.rs
+++ b/src/catar/format_definition.rs
@ -1,3 +1,10 @@
+//! *catar* binary format definition
+//!
+//! Please note the all values are stored in little endian ordering.
+//!
+//! The Archive contains a list of items. Each item starts with a
+//! `CaFormatHeader`, followed by the item data.
+
 use failure::*;

 pub const CA_FORMAT_ENTRY: u64 = 0x1396fabcea5bbb51;
@ -13,7 +20,9 @@ pub const CA_FORMAT_FEATURE_FLAGS_MAX: u64 = 0xb000_0001_ffef_fe26; // fixme: ?

 #[repr(C)]
 pub struct CaFormatHeader {
+    /// The size of the item, including the size of `CaFormatHeader`.
    pub size: u64,
+    /// The item type (see `CA_FORMAT_` constants).
    pub htype: u64,
 }

@ -29,8 +38,16 @@ pub struct CaFormatEntry {

 #[repr(C)]
 pub struct CaFormatGoodbyeItem {
+    /// The offset from the start of the GOODBYE object to the start
+    /// of the matching directory item (point to a FILENAME). The last
+    /// GOODBYE item points to the start of the matching ENTRY
+    /// object. repeats the `size`
    pub offset: u64,
+    /// The overall size of the directory item. The last GOODBYE item
+    /// repeats the size of the GOODBYE item.
    pub size: u64,
+    /// SipHash24 of the directory item name. The last GOODBYE item
+    /// uses the special hash value `CA_FORMAT_GOODBYE_TAIL_MARKER`.
    pub hash: u64,
 }

--- a/src/lib.rs
+++ b/src/lib.rs
@ -30,12 +30,7 @@ pub mod server {

 }

-pub mod catar {
-
-    pub mod binary_search_tree;
-    pub mod format_definition;
-    pub mod encoder;
-}
+pub mod catar;

 pub mod section_config;