improve catar docs
This commit is contained in:
		
							
								
								
									
										43
									
								
								src/catar.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								src/catar.rs
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,43 @@
 | 
			
		||||
//! *catar* Implementation
 | 
			
		||||
//!
 | 
			
		||||
//! This is a implementation of the *catar* format used by the
 | 
			
		||||
//! [casync](https://github.com/systemd/casync) toolkit. It is a file
 | 
			
		||||
//! archive format defined by 'Lennart Poettering', specially defined
 | 
			
		||||
//! for efficent deduplication.
 | 
			
		||||
 | 
			
		||||
//! Every archive contains items in the following order:
 | 
			
		||||
//!  * ENTRY             -- containing general stat() data and related bits
 | 
			
		||||
//!   * USER              -- user name as text, if enabled
 | 
			
		||||
//!   * GROUP             -- group name as text, if enabled
 | 
			
		||||
//!   * XATTR             -- one extended attribute
 | 
			
		||||
//!   * ...               -- more of these when there are multiple defined
 | 
			
		||||
//!   * ACL_USER          -- one USER ACL entry
 | 
			
		||||
//!   * ...               -- more of these when there are multiple defined
 | 
			
		||||
//!   * ACL_GROUP         -- one GROUP ACL entry
 | 
			
		||||
//!   * ...               -- more of these when there are multiple defined
 | 
			
		||||
//!   * ACL_GROUP_OBJ     -- The ACL_GROUP_OBJ
 | 
			
		||||
//!   * ACL_DEFAULT       -- The various default ACL fields if there's one defined
 | 
			
		||||
//!   * ACL_DEFAULT_USER  -- one USER ACL entry
 | 
			
		||||
//!   * ...               -- more of these when multiple are defined
 | 
			
		||||
//!   * ACL_DEFAULT_GROUP -- one GROUP ACL entry
 | 
			
		||||
//!   * ...               -- more of these when multiple are defined
 | 
			
		||||
//!   * FCAPS             -- file capability in Linux disk format
 | 
			
		||||
//!   * QUOTA_PROJECT_ID  -- the ext4/xfs quota project ID
 | 
			
		||||
//!   * PAYLOAD           -- file contents, if it is one
 | 
			
		||||
//!   * SYMLINK           -- symlink target, if it is one
 | 
			
		||||
//!   * DEVICE            -- device major/minor, if it is a block/char device
 | 
			
		||||
//!
 | 
			
		||||
//!   If we are serializing a directory, then this is followed by:
 | 
			
		||||
//!
 | 
			
		||||
//!   * FILENAME          -- name of the first directory entry (strictly ordered!)
 | 
			
		||||
//!   * <archive>         -- serialization of the first directory entry's metadata and contents,
 | 
			
		||||
//!  following the exact same archive format
 | 
			
		||||
//!   * FILENAME          -- name of the second directory entry (strictly ordered!)
 | 
			
		||||
//!   * <archive>         -- serialization of the second directory entry
 | 
			
		||||
//!   * ...
 | 
			
		||||
//!   * GOODBYE           -- lookup table at the end of a list of directory entries
 | 
			
		||||
 | 
			
		||||
pub mod binary_search_tree;
 | 
			
		||||
pub mod format_definition;
 | 
			
		||||
pub mod encoder;
 | 
			
		||||
 | 
			
		||||
@ -1,36 +1,31 @@
 | 
			
		||||
/// Helpers to generate a binary search tree stored in an array from a
 | 
			
		||||
/// sorted array.
 | 
			
		||||
///
 | 
			
		||||
/// Specifically, for any given sorted * array 'input' permute the
 | 
			
		||||
/// array so that the following rule holds:
 | 
			
		||||
///
 | 
			
		||||
/// For each array item with index i, the item at 2*i+1 is smaller and
 | 
			
		||||
/// the item 2*i+2 is larger.
 | 
			
		||||
///
 | 
			
		||||
/// This structure permits efficient (meaning: O(log(n)) binary
 | 
			
		||||
/// searches: start with item i=0 (i.e. the root of the BST), compare
 | 
			
		||||
/// the value with the searched item, if smaller proceed at item
 | 
			
		||||
/// i*2+1, if larger proceed at item i*2+2, and repeat, until either
 | 
			
		||||
/// the item is found, or the indexes grow beyond the array size,
 | 
			
		||||
/// which means the entry does not exist.
 | 
			
		||||
///
 | 
			
		||||
/// Effectively this implements bisection, but instead of jumping
 | 
			
		||||
/// around wildly in the array during a single search we only search
 | 
			
		||||
/// with strictly monotonically increasing indexes.
 | 
			
		||||
///
 | 
			
		||||
/// Algorithm is from casync (camakebst.c), simplified and optimized
 | 
			
		||||
/// for rust. Permutation function originally by L. Bressel, 2017. We
 | 
			
		||||
/// pass permutation info to user provided callback, which actually
 | 
			
		||||
/// implements the data copy.
 | 
			
		||||
///
 | 
			
		||||
 | 
			
		||||
// NOTES:
 | 
			
		||||
//
 | 
			
		||||
// https://en.wikipedia.org/wiki/Binary_heap
 | 
			
		||||
// https://en.wikipedia.org/wiki/Heapsort
 | 
			
		||||
//
 | 
			
		||||
// ==> Maype it is possible to build a sorted array from unsorted
 | 
			
		||||
// array inplace, using heapsort?
 | 
			
		||||
//! Helpers to generate a binary search tree stored in an array from a
 | 
			
		||||
//! sorted array.
 | 
			
		||||
//!
 | 
			
		||||
//! Specifically, for any given sorted array 'input' permute the
 | 
			
		||||
//! array so that the following rule holds:
 | 
			
		||||
//!
 | 
			
		||||
//! For each array item with index i, the item at 2i+1 is smaller and
 | 
			
		||||
//! the item 2i+2 is larger.
 | 
			
		||||
//!
 | 
			
		||||
//! This structure permits efficient (meaning: O(log(n)) binary
 | 
			
		||||
//! searches: start with item i=0 (i.e. the root of the BST), compare
 | 
			
		||||
//! the value with the searched item, if smaller proceed at item
 | 
			
		||||
//! 2i+1, if larger proceed at item 2i+2, and repeat, until either
 | 
			
		||||
//! the item is found, or the indexes grow beyond the array size,
 | 
			
		||||
//! which means the entry does not exist.
 | 
			
		||||
//!
 | 
			
		||||
//! Effectively this implements bisection, but instead of jumping
 | 
			
		||||
//! around wildly in the array during a single search we only search
 | 
			
		||||
//! with strictly monotonically increasing indexes.
 | 
			
		||||
//!
 | 
			
		||||
//! Algorithm is from casync (camakebst.c), simplified and optimized
 | 
			
		||||
//! for rust. Permutation function originally by L. Bressel, 2017. We
 | 
			
		||||
//! pass permutation info to user provided callback, which actually
 | 
			
		||||
//! implements the data copy.
 | 
			
		||||
//!
 | 
			
		||||
//! The Wikipedia Artikel for [Binary
 | 
			
		||||
//! Heap](https://en.wikipedia.org/wiki/Binary_heap) gives a short
 | 
			
		||||
//! intro howto store binary trees using an array.
 | 
			
		||||
 | 
			
		||||
fn copy_binary_search_tree_inner<F:  FnMut(usize, usize)>(
 | 
			
		||||
    copy_func: &mut F,
 | 
			
		||||
@ -63,6 +58,26 @@ fn copy_binary_search_tree_inner<F:  FnMut(usize, usize)>(
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// This function calls the provided `copy_func()` with the permutaion
 | 
			
		||||
/// info.
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use proxmox_backup::catar::binary_search_tree::copy_binary_search_tree;
 | 
			
		||||
/// copy_binary_search_tree(5, |src, dest| {
 | 
			
		||||
///    println!("Copy {} to {}", src, dest);
 | 
			
		||||
/// });
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// This will produce the folowing output:
 | 
			
		||||
///
 | 
			
		||||
///     Copy 3 to 0
 | 
			
		||||
///     Copy 1 to 1
 | 
			
		||||
///     Copy 0 to 3
 | 
			
		||||
///     Copy 2 to 4
 | 
			
		||||
///     Copy 4 to 2
 | 
			
		||||
///
 | 
			
		||||
/// So this generates the following permuation: `[3,1,4,0,2]`.
 | 
			
		||||
 | 
			
		||||
pub fn copy_binary_search_tree<F:  FnMut(usize, usize)>(
 | 
			
		||||
    n: usize,
 | 
			
		||||
    mut copy_func: F,
 | 
			
		||||
@ -72,7 +87,6 @@ pub fn copy_binary_search_tree<F:  FnMut(usize, usize)>(
 | 
			
		||||
    copy_binary_search_tree_inner(&mut copy_func, n, 0, e, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#[test]
 | 
			
		||||
fn test_binary_search_tree() {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,7 @@
 | 
			
		||||
//! *catar* format encoder.
 | 
			
		||||
//!
 | 
			
		||||
//! This module contain the code to generate *catar* archive files.
 | 
			
		||||
 | 
			
		||||
use failure::*;
 | 
			
		||||
 | 
			
		||||
use super::format_definition::*;
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,10 @@
 | 
			
		||||
//! *catar* binary format definition
 | 
			
		||||
//!
 | 
			
		||||
//! Please note the all values are stored in little endian ordering.
 | 
			
		||||
//!
 | 
			
		||||
//! The Archive contains a list of items. Each item starts with a
 | 
			
		||||
//! `CaFormatHeader`, followed by the item data.
 | 
			
		||||
 | 
			
		||||
use failure::*;
 | 
			
		||||
 | 
			
		||||
pub const CA_FORMAT_ENTRY: u64 = 0x1396fabcea5bbb51;
 | 
			
		||||
@ -13,7 +20,9 @@ pub const CA_FORMAT_FEATURE_FLAGS_MAX: u64 = 0xb000_0001_ffef_fe26; // fixme: ?
 | 
			
		||||
 | 
			
		||||
#[repr(C)]
 | 
			
		||||
pub struct CaFormatHeader {
 | 
			
		||||
    /// The size of the item, including the size of `CaFormatHeader`.
 | 
			
		||||
    pub size: u64,
 | 
			
		||||
    /// The item type (see `CA_FORMAT_` constants).
 | 
			
		||||
    pub htype: u64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -29,8 +38,16 @@ pub struct CaFormatEntry {
 | 
			
		||||
 | 
			
		||||
#[repr(C)]
 | 
			
		||||
pub struct CaFormatGoodbyeItem {
 | 
			
		||||
    /// The offset from the start of the GOODBYE object to the start
 | 
			
		||||
    /// of the matching directory item (point to a FILENAME). The last
 | 
			
		||||
    /// GOODBYE item points to the start of the matching ENTRY
 | 
			
		||||
    /// object. repeats the `size`
 | 
			
		||||
    pub offset: u64,
 | 
			
		||||
    /// The overall size of the directory item. The last GOODBYE item
 | 
			
		||||
    /// repeats the size of the GOODBYE item.
 | 
			
		||||
    pub size: u64,
 | 
			
		||||
    /// SipHash24 of the directory item name. The last GOODBYE item
 | 
			
		||||
    /// uses the special hash value `CA_FORMAT_GOODBYE_TAIL_MARKER`.
 | 
			
		||||
    pub hash: u64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -30,12 +30,7 @@ pub mod server {
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub mod catar {
 | 
			
		||||
 | 
			
		||||
    pub mod binary_search_tree;
 | 
			
		||||
    pub mod format_definition;
 | 
			
		||||
    pub mod encoder;
 | 
			
		||||
}
 | 
			
		||||
pub mod catar;
 | 
			
		||||
 | 
			
		||||
pub mod section_config;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user