remove DataChunk file format - use DataBlob instead

This commit is contained in:
Dietmar Maurer 2019-10-06 10:31:06 +02:00
parent c26d4b4573
commit 4ee8f53d07
12 changed files with 184 additions and 453 deletions

View File

@ -57,14 +57,14 @@ impl Future for UploadChunk {
} }
let (is_duplicate, compressed_size) = match proxmox::tools::try_block! { let (is_duplicate, compressed_size) = match proxmox::tools::try_block! {
let mut chunk = DataChunk::from_raw(raw_data, this.digest)?; let mut chunk = DataBlob::from_raw(raw_data)?;
chunk.verify_unencrypted(this.size as usize)?; chunk.verify_unencrypted(this.size as usize, &this.digest)?;
// always comput CRC at server side // always comput CRC at server side
chunk.set_crc(chunk.compute_crc()); chunk.set_crc(chunk.compute_crc());
this.store.insert_chunk(&chunk) this.store.insert_chunk(&chunk, &this.digest)
} { } {
Ok(res) => res, Ok(res) => res,
Err(err) => break err, Err(err) => break err,
@ -95,8 +95,8 @@ pub fn api_method_upload_fixed_chunk() -> ApiAsyncMethod {
.maximum(1024*1024*16) .maximum(1024*1024*16)
) )
.required("encoded-size", IntegerSchema::new("Encoded chunk size.") .required("encoded-size", IntegerSchema::new("Encoded chunk size.")
.minimum((std::mem::size_of::<DataChunkHeader>() as isize)+1) .minimum((std::mem::size_of::<DataBlobHeader>() as isize)+1)
.maximum(1024*1024*16+(std::mem::size_of::<EncryptedDataChunkHeader>() as isize)) .maximum(1024*1024*16+(std::mem::size_of::<EncryptedDataBlobHeader>() as isize))
) )
) )
} }
@ -151,8 +151,8 @@ pub fn api_method_upload_dynamic_chunk() -> ApiAsyncMethod {
.maximum(1024*1024*16) .maximum(1024*1024*16)
) )
.required("encoded-size", IntegerSchema::new("Encoded chunk size.") .required("encoded-size", IntegerSchema::new("Encoded chunk size.")
.minimum((std::mem::size_of::<DataChunkHeader>() as isize) +1) .minimum((std::mem::size_of::<DataBlobHeader>() as isize) +1)
.maximum(1024*1024*16+(std::mem::size_of::<EncryptedDataChunkHeader>() as isize)) .maximum(1024*1024*16+(std::mem::size_of::<EncryptedDataBlobHeader>() as isize))
) )
) )
} }

View File

@ -140,9 +140,6 @@ pub use checksum_writer::*;
mod chunker; mod chunker;
pub use chunker::*; pub use chunker::*;
mod data_chunk;
pub use data_chunk::*;
mod data_blob; mod data_blob;
pub use data_blob::*; pub use data_blob::*;

View File

@ -7,7 +7,7 @@ use std::os::unix::io::AsRawFd;
use serde::Serialize; use serde::Serialize;
use crate::tools; use crate::tools;
use super::DataChunk; use super::DataBlob;
use crate::server::WorkerTask; use crate::server::WorkerTask;
#[derive(Clone, Serialize)] #[derive(Clone, Serialize)]
@ -173,7 +173,7 @@ impl ChunkStore {
Ok(()) Ok(())
} }
pub fn read_chunk(&self, digest: &[u8; 32]) -> Result<DataChunk, Error> { pub fn read_chunk(&self, digest: &[u8; 32]) -> Result<DataBlob, Error> {
let (chunk_path, digest_str) = self.chunk_path(digest); let (chunk_path, digest_str) = self.chunk_path(digest);
let mut file = std::fs::File::open(&chunk_path) let mut file = std::fs::File::open(&chunk_path)
@ -186,7 +186,7 @@ impl ChunkStore {
) )
})?; })?;
DataChunk::load(&mut file, *digest) DataBlob::load(&mut file)
} }
pub fn get_chunk_iterator( pub fn get_chunk_iterator(
@ -357,11 +357,10 @@ impl ChunkStore {
pub fn insert_chunk( pub fn insert_chunk(
&self, &self,
chunk: &DataChunk, chunk: &DataBlob,
digest: &[u8; 32],
) -> Result<(bool, u64), Error> { ) -> Result<(bool, u64), Error> {
let digest = chunk.digest();
//println!("DIGEST {}", proxmox::tools::digest_to_hex(digest)); //println!("DIGEST {}", proxmox::tools::digest_to_hex(digest));
let (chunk_path, digest_str) = self.chunk_path(digest); let (chunk_path, digest_str) = self.chunk_path(digest);
@ -444,12 +443,12 @@ fn test_chunk_store1() {
let chunk_store = ChunkStore::create("test", &path).unwrap(); let chunk_store = ChunkStore::create("test", &path).unwrap();
let chunk = super::DataChunkBuilder::new(&[0u8, 1u8]).build().unwrap(); let (chunk, digest) = super::DataChunkBuilder::new(&[0u8, 1u8]).build().unwrap();
let (exists, _) = chunk_store.insert_chunk(&chunk).unwrap(); let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap();
assert!(!exists); assert!(!exists);
let (exists, _) = chunk_store.insert_chunk(&chunk).unwrap(); let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap();
assert!(exists); assert!(exists);

View File

@ -6,13 +6,22 @@ use proxmox::tools::io::{ReadExt, WriteExt};
const MAX_BLOB_SIZE: usize = 128*1024*1024; const MAX_BLOB_SIZE: usize = 128*1024*1024;
use super::*; use super::file_formats::*;
use super::CryptConfig;
/// Encoded data chunk with digest and positional information
pub struct ChunkInfo {
pub chunk: DataBlob,
pub digest: [u8; 32],
pub chunk_len: u64,
pub offset: u64,
}
/// Data blob binary storage format /// Data blob binary storage format
/// ///
/// Data blobs store arbitrary binary data (< 128MB), and can be /// Data blobs store arbitrary binary data (< 128MB), and can be
/// compressed and encrypted. A simply binary format is used to store /// compressed and encrypted (or just signed). A simply binary format
/// them on disk or transfer them over the network. /// is used to store them on disk or transfer them over the network.
/// ///
/// Please use index files to store large data files (".fidx" of /// Please use index files to store large data files (".fidx" of
/// ".didx"). /// ".didx").
@ -255,6 +264,15 @@ impl DataBlob {
return Ok(blob); return Ok(blob);
} }
/// Load blob from ``reader``
pub fn load(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
let mut data = Vec::with_capacity(1024*1024);
reader.read_to_end(&mut data)?;
Self::from_raw(data)
}
/// Create Instance from raw data /// Create Instance from raw data
pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> { pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
@ -290,4 +308,122 @@ impl DataBlob {
bail!("unable to parse raw blob - wrong magic"); bail!("unable to parse raw blob - wrong magic");
} }
} }
/// Verify digest and data length for unencrypted chunks.
///
/// To do that, we need to decompress data first. Please note that
/// this is noth possible for encrypted chunks.
pub fn verify_unencrypted(
&self,
expected_chunk_size: usize,
expected_digest: &[u8; 32],
) -> Result<(), Error> {
let magic = self.magic();
let verify_raw_data = |data: &[u8]| {
if expected_chunk_size != data.len() {
bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
}
let digest = openssl::sha::sha256(data);
if &digest != expected_digest {
bail!("detected chunk with wrong digest.");
}
Ok(())
};
if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
verify_raw_data(&data)?;
} else if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
verify_raw_data(&self.raw_data[12..])?;
}
Ok(())
}
}
/// Builder for chunk DataBlobs
///
/// Main purpose is to centralize digest computation. Digest
/// computation differ for encryped chunk, and this interface ensures that
/// we always compute the correct one.
pub struct DataChunkBuilder<'a> {
config: Option<Arc<CryptConfig>>,
orig_data: &'a [u8],
digest_computed: bool,
digest: [u8; 32],
compress: bool,
}
impl <'a> DataChunkBuilder<'a> {
/// Create a new builder instance.
pub fn new(orig_data: &'a [u8]) -> Self {
Self {
orig_data,
config: None,
digest_computed: false,
digest: [0u8; 32],
compress: true,
}
}
/// Set compression flag.
///
/// If true, chunk data is compressed using zstd (level 1).
pub fn compress(mut self, value: bool) -> Self {
self.compress = value;
self
}
/// Set encryption Configuration
///
/// If set, chunks are encrypted.
pub fn crypt_config(mut self, value: Arc<CryptConfig>) -> Self {
if self.digest_computed {
panic!("unable to set crypt_config after compute_digest().");
}
self.config = Some(value);
self
}
fn compute_digest(&mut self) {
if !self.digest_computed {
if let Some(ref config) = self.config {
self.digest = config.compute_digest(self.orig_data);
} else {
self.digest = openssl::sha::sha256(self.orig_data);
}
self.digest_computed = true;
}
}
/// Returns the chunk Digest
///
/// Note: For encrypted chunks, this needs to be called after
/// ``crypt_config``.
pub fn digest(&mut self) -> &[u8; 32] {
if !self.digest_computed {
self.compute_digest();
}
&self.digest
}
/// Consume self and build the ``DataBlob``.
///
/// Returns the blob and the computet digest.
pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
if !self.digest_computed {
self.compute_digest();
}
let chunk = DataBlob::encode(
self.orig_data,
self.config,
self.compress,
)?;
Ok((chunk, self.digest))
}
} }

View File

@ -1,350 +0,0 @@
use std::convert::TryInto;
use failure::*;
use proxmox::tools::io::{ReadExt, WriteExt};
// FIXME: Move all the magic numbers into a module so we can import super::magic::* instead of
// the everything of everything everywhere.
use super::*;
/// Data chunk with positional information
pub struct ChunkInfo {
pub chunk: DataChunk,
pub chunk_len: u64,
pub offset: u64,
}
/// Data chunk binary storage format
///
/// Data chunks are identified by a unique digest, and can be
/// compressed and encrypted. A simply binary format is used to store
/// them on disk or transfer them over the network.
///
/// Please use the ``DataChunkBuilder`` to create new instances.
pub struct DataChunk {
digest: [u8; 32],
raw_data: Vec<u8>, // tagged, compressed, encryped data
}
impl DataChunk {
/// accessor to raw_data field
pub fn raw_data(&self) -> &[u8] {
&self.raw_data
}
/// return as raw_data
pub fn into_raw(self) -> Vec<u8> {
self.raw_data
}
/// accessor to chunk digest field
pub fn digest(&self) -> &[u8; 32] {
&self.digest
}
/// accessor to chunk type (magic number)
pub fn magic(&self) -> &[u8; 8] {
self.raw_data[0..8].try_into().unwrap()
}
/// accessor to crc32 checksum
pub fn crc(&self) -> u32 {
let crc_o = proxmox::tools::offsetof!(DataChunkHeader, crc);
u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
}
// set the CRC checksum field
pub fn set_crc(&mut self, crc: u32) {
let crc_o = proxmox::tools::offsetof!(DataChunkHeader, crc);
self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
}
/// compute the CRC32 checksum
pub fn compute_crc(&self) -> u32 {
let mut hasher = crc32fast::Hasher::new();
let start = std::mem::size_of::<DataChunkHeader>(); // start after HEAD
hasher.update(&self.raw_data[start..]);
hasher.finalize()
}
/// verify the CRC32 checksum
pub fn verify_crc(&self) -> Result<(), Error> {
let expected_crc = self.compute_crc();
if expected_crc != self.crc() {
bail!("Data chunk has wrong CRC checksum.");
}
Ok(())
}
fn encode(
data: &[u8],
config: Option<&CryptConfig>,
digest: [u8;32],
compress: bool,
) -> Result<Self, Error> {
if let Some(config) = config {
let compr_data;
let (_compress, data, magic) = if compress {
compr_data = zstd::block::compress(data, 1)?;
// Note: We only use compression if result is shorter
if compr_data.len() < data.len() {
(true, &compr_data[..], ENCR_COMPR_CHUNK_MAGIC_1_0)
} else {
(false, data, ENCRYPTED_CHUNK_MAGIC_1_0)
}
} else {
(false, data, ENCRYPTED_CHUNK_MAGIC_1_0)
};
let header_len = std::mem::size_of::<EncryptedDataChunkHeader>();
let mut raw_data = Vec::with_capacity(data.len() + header_len);
let dummy_head = EncryptedDataChunkHeader {
head: DataChunkHeader { magic: [0u8; 8], crc: [0; 4] },
iv: [0u8; 16],
tag: [0u8; 16],
};
unsafe {
raw_data.write_le_value(dummy_head)?;
}
let (iv, tag) = config.encrypt_to(data, &mut raw_data)?;
let head = EncryptedDataChunkHeader {
head: DataChunkHeader { magic, crc: [0; 4] }, iv, tag,
};
unsafe {
(&mut raw_data[0..header_len]).write_le_value(head)?;
}
return Ok(DataChunk { digest, raw_data });
} else {
let max_data_len = data.len() + std::mem::size_of::<DataChunkHeader>();
if compress {
let mut comp_data = Vec::with_capacity(max_data_len);
let head = DataChunkHeader {
magic: COMPRESSED_CHUNK_MAGIC_1_0,
crc: [0; 4],
};
unsafe {
comp_data.write_le_value(head)?;
}
zstd::stream::copy_encode(data, &mut comp_data, 1)?;
if comp_data.len() < max_data_len {
let chunk = DataChunk { digest, raw_data: comp_data };
return Ok(chunk);
}
}
let mut raw_data = Vec::with_capacity(max_data_len);
let head = DataChunkHeader {
magic: UNCOMPRESSED_CHUNK_MAGIC_1_0,
crc: [0; 4],
};
unsafe {
raw_data.write_le_value(head)?;
}
raw_data.extend_from_slice(data);
let chunk = DataChunk { digest, raw_data };
return Ok(chunk);
}
}
/// Decode chunk data
pub fn decode(self, config: Option<&CryptConfig>) -> Result<Vec<u8>, Error> {
let magic = self.magic();
if magic == &UNCOMPRESSED_CHUNK_MAGIC_1_0 {
let data_start = std::mem::size_of::<DataChunkHeader>();
return Ok(self.raw_data[data_start..].to_vec());
} else if magic == &COMPRESSED_CHUNK_MAGIC_1_0 {
let data_start = std::mem::size_of::<DataChunkHeader>();
let data = zstd::block::decompress(&self.raw_data[data_start..], 16*1024*1024)?;
return Ok(data);
} else if magic == &ENCR_COMPR_CHUNK_MAGIC_1_0 || magic == &ENCRYPTED_CHUNK_MAGIC_1_0 {
let header_len = std::mem::size_of::<EncryptedDataChunkHeader>();
let head = unsafe {
(&self.raw_data[..header_len]).read_le_value::<EncryptedDataChunkHeader>()?
};
if let Some(config) = config {
let data = if magic == &ENCR_COMPR_CHUNK_MAGIC_1_0 {
config.decode_compressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
} else {
config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
};
return Ok(data);
} else {
bail!("unable to decrypt chunk - missing CryptConfig");
}
} else {
bail!("Invalid chunk magic number.");
}
}
/// Load chunk data from ``reader``
///
/// Please note that it is impossible to compute the digest for
/// encrypted chunks, so we need to trust and use the provided
/// ``digest``.
pub fn load(reader: &mut dyn std::io::Read, digest: [u8; 32]) -> Result<Self, Error> {
let mut data = Vec::with_capacity(1024*1024);
reader.read_to_end(&mut data)?;
Self::from_raw(data, digest)
}
/// Create Instance from raw data
pub fn from_raw(data: Vec<u8>, digest: [u8;32]) -> Result<Self, Error> {
if data.len() < std::mem::size_of::<DataChunkHeader>() {
bail!("chunk too small ({} bytes).", data.len());
}
let magic = &data[0..8];
if magic == ENCR_COMPR_CHUNK_MAGIC_1_0 || magic == ENCRYPTED_CHUNK_MAGIC_1_0 {
if data.len() < std::mem::size_of::<EncryptedDataChunkHeader>() {
bail!("encrypted chunk too small ({} bytes).", data.len());
}
let chunk = DataChunk { digest, raw_data: data };
Ok(chunk)
} else if magic == COMPRESSED_CHUNK_MAGIC_1_0 || magic == UNCOMPRESSED_CHUNK_MAGIC_1_0 {
let chunk = DataChunk { digest, raw_data: data };
Ok(chunk)
} else {
bail!("unable to parse raw chunk - wrong magic");
}
}
/// Verify digest and data length for unencrypted chunks.
///
/// To do that, we need to decompress data first. Please note that
/// this is noth possible for encrypted chunks.
pub fn verify_unencrypted(&self, expected_chunk_size: usize) -> Result<(), Error> {
let magic = self.magic();
let verify_raw_data = |data: &[u8]| {
if expected_chunk_size != data.len() {
bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
}
let digest = openssl::sha::sha256(data);
if digest != self.digest {
bail!("detected chunk with wrong digest.");
}
Ok(())
};
if magic == &COMPRESSED_CHUNK_MAGIC_1_0 {
let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
verify_raw_data(&data)?;
} else if magic == &UNCOMPRESSED_CHUNK_MAGIC_1_0 {
verify_raw_data(&self.raw_data[12..])?;
}
Ok(())
}
}
/// Builder for DataChunk
///
/// Main purpose is to centralize digest computation. Digest
/// computation differ for encryped chunk, and this interface ensures that
/// we always compute the correct one.
pub struct DataChunkBuilder<'a, 'b> {
config: Option<&'b CryptConfig>,
orig_data: &'a [u8],
digest_computed: bool,
digest: [u8; 32],
compress: bool,
}
impl <'a, 'b> DataChunkBuilder<'a, 'b> {
/// Create a new builder instance.
pub fn new(orig_data: &'a [u8]) -> Self {
Self {
orig_data,
config: None,
digest_computed: false,
digest: [0u8; 32],
compress: true,
}
}
/// Set compression flag.
///
/// If true, chunk data is compressed using zstd (level 1).
pub fn compress(mut self, value: bool) -> Self {
self.compress = value;
self
}
/// Set encryption Configuration
///
/// If set, chunks are encrypted.
pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
if self.digest_computed {
panic!("unable to set crypt_config after compute_digest().");
}
self.config = Some(value);
self
}
fn compute_digest(&mut self) {
if !self.digest_computed {
if let Some(config) = self.config {
self.digest = config.compute_digest(self.orig_data);
} else {
self.digest = openssl::sha::sha256(self.orig_data);
}
self.digest_computed = true;
}
}
/// Returns the chunk Digest
///
/// Note: For encrypted chunks, this needs to be called after
/// ``crypt_config``.
pub fn digest(&mut self) -> &[u8; 32] {
if !self.digest_computed {
self.compute_digest();
}
&self.digest
}
/// Consume self and build the ``DataChunk``.
pub fn build(mut self) -> Result<DataChunk, Error> {
if !self.digest_computed {
self.compute_digest();
}
let chunk = DataChunk::encode(
self.orig_data,
self.config,
self.digest,
self.compress,
)?;
Ok(chunk)
}
}

View File

@ -11,7 +11,7 @@ use super::chunk_store::{ChunkStore, GarbageCollectionStatus};
use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter}; use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
use super::fixed_index::{FixedIndexReader, FixedIndexWriter}; use super::fixed_index::{FixedIndexReader, FixedIndexWriter};
use super::index::*; use super::index::*;
use super::DataChunk; use super::DataBlob;
use crate::config::datastore; use crate::config::datastore;
use crate::server::WorkerTask; use crate::server::WorkerTask;
use crate::tools; use crate::tools;
@ -290,8 +290,9 @@ impl DataStore {
pub fn insert_chunk( pub fn insert_chunk(
&self, &self,
chunk: &DataChunk, chunk: &DataBlob,
digest: &[u8; 32],
) -> Result<(bool, u64), Error> { ) -> Result<(bool, u64), Error> {
self.chunk_store.insert_chunk(chunk) self.chunk_store.insert_chunk(chunk, digest)
} }
} }

View File

@ -16,7 +16,7 @@ use super::IndexFile;
use super::chunk_stat::ChunkStat; use super::chunk_stat::ChunkStat;
use super::chunk_store::ChunkStore; use super::chunk_store::ChunkStore;
use super::read_chunk::ReadChunk; use super::read_chunk::ReadChunk;
use super::{DataChunk, DataChunkBuilder}; use super::{DataBlob, DataChunkBuilder};
use crate::tools; use crate::tools;
/// Header format definition for dynamic index files (`.dixd`) /// Header format definition for dynamic index files (`.dixd`)
@ -465,8 +465,8 @@ impl DynamicIndexWriter {
} }
// fixme: use add_chunk instead? // fixme: use add_chunk instead?
pub fn insert_chunk(&self, chunk: &DataChunk) -> Result<(bool, u64), Error> { pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
self.store.insert_chunk(chunk) self.store.insert_chunk(chunk, digest)
} }
pub fn close(&mut self) -> Result<[u8; 32], Error> { pub fn close(&mut self) -> Result<[u8; 32], Error> {
@ -581,13 +581,11 @@ impl DynamicChunkWriter {
self.last_chunk = self.chunk_offset; self.last_chunk = self.chunk_offset;
let chunk = DataChunkBuilder::new(&self.chunk_buffer) let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
.compress(true) .compress(true)
.build()?; .build()?;
let digest = chunk.digest(); match self.index.insert_chunk(&chunk, &digest) {
match self.index.insert_chunk(&chunk) {
Ok((is_duplicate, compressed_size)) => { Ok((is_duplicate, compressed_size)) => {
self.stat.compressed_size += compressed_size; self.stat.compressed_size += compressed_size;
@ -598,7 +596,7 @@ impl DynamicChunkWriter {
} }
println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size, println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size,
(compressed_size*100)/(chunk_size as u64), is_duplicate, proxmox::tools::digest_to_hex(digest)); (compressed_size*100)/(chunk_size as u64), is_duplicate, proxmox::tools::digest_to_hex(&digest));
self.index.add_chunk(self.chunk_offset as u64, &digest)?; self.index.add_chunk(self.chunk_offset as u64, &digest)?;
self.chunk_buffer.truncate(0); self.chunk_buffer.truncate(0);
return Ok(()); return Ok(());
@ -620,7 +618,7 @@ impl Write for DynamicChunkWriter {
let pos = chunker.scan(data); let pos = chunker.scan(data);
if pos > 0 { if pos > 0 {
self.chunk_buffer.extend(&data[0..pos]); self.chunk_buffer.extend_from_slice(&data[0..pos]);
self.chunk_offset += pos; self.chunk_offset += pos;
if let Err(err) = self.write_chunk_buffer() { if let Err(err) = self.write_chunk_buffer() {
@ -630,7 +628,7 @@ impl Write for DynamicChunkWriter {
} else { } else {
self.chunk_offset += data.len(); self.chunk_offset += data.len();
self.chunk_buffer.extend(data); self.chunk_buffer.extend_from_slice(data);
Ok(data.len()) Ok(data.len())
} }
} }

View File

@ -2,18 +2,6 @@ use endian_trait::Endian;
// WARNING: PLEASE DO NOT MODIFY THOSE MAGIC VALUES // WARNING: PLEASE DO NOT MODIFY THOSE MAGIC VALUES
// openssl::sha::sha256(b"Proxmox Backup uncompressed chunk v1.0")[0..8]
pub const UNCOMPRESSED_CHUNK_MAGIC_1_0: [u8; 8] = [79, 127, 200, 4, 121, 74, 135, 239];
// openssl::sha::sha256(b"Proxmox Backup encrypted chunk v1.0")[0..8]
pub const ENCRYPTED_CHUNK_MAGIC_1_0: [u8; 8] = [8, 54, 114, 153, 70, 156, 26, 151];
// openssl::sha::sha256(b"Proxmox Backup zstd compressed chunk v1.0")[0..8]
pub const COMPRESSED_CHUNK_MAGIC_1_0: [u8; 8] = [191, 237, 46, 195, 108, 17, 228, 235];
// openssl::sha::sha256(b"Proxmox Backup zstd compressed encrypted chunk v1.0")[0..8]
pub const ENCR_COMPR_CHUNK_MAGIC_1_0: [u8; 8] = [9, 40, 53, 200, 37, 150, 90, 196];
// openssl::sha::sha256(b"Proxmox Backup uncompressed blob v1.0")[0..8] // openssl::sha::sha256(b"Proxmox Backup uncompressed blob v1.0")[0..8]
pub const UNCOMPRESSED_BLOB_MAGIC_1_0: [u8; 8] = [66, 171, 56, 7, 190, 131, 112, 161]; pub const UNCOMPRESSED_BLOB_MAGIC_1_0: [u8; 8] = [66, 171, 56, 7, 190, 131, 112, 161];
@ -87,49 +75,11 @@ pub struct EncryptedDataBlobHeader {
pub tag: [u8; 16], pub tag: [u8; 16],
} }
/// Data chunk binary storage format
///
/// The format start with a 8 byte magic number to identify the type,
/// followed by a 4 byte CRC. This CRC is used on the server side to
/// detect file corruption (computed when upload data), so there is
/// usually no need to compute it on the client side.
///
/// Unencrypted blobs simply contain the CRC, followed by the
/// (compressed) data.
///
/// (MAGIC || CRC32 || Data)
#[derive(Endian)]
#[repr(C,packed)]
pub struct DataChunkHeader {
pub magic: [u8; 8],
pub crc: [u8; 4],
}
/// Encrypted Data chunk binary storage format
///
/// The ``DataChunkHeader`` for encrypted chunks additionally contains
/// a 16 byte IV, followed by a 16 byte Authenticated Encyrypten (AE)
/// tag, followed by the encrypted data:
///
/// (MAGIC || CRC32 || IV || TAG || EncryptedData).
#[derive(Endian)]
#[repr(C,packed)]
pub struct EncryptedDataChunkHeader {
pub head: DataChunkHeader,
pub iv: [u8; 16],
pub tag: [u8; 16],
}
/// Header size for different file types /// Header size for different file types
/// ///
/// Panics on unknown magic numbers. /// Panics on unknown magic numbers.
pub fn header_size(magic: &[u8; 8]) -> usize { pub fn header_size(magic: &[u8; 8]) -> usize {
match magic { match magic {
&UNCOMPRESSED_CHUNK_MAGIC_1_0 => std::mem::size_of::<DataChunkHeader>(),
&COMPRESSED_CHUNK_MAGIC_1_0 => std::mem::size_of::<DataChunkHeader>(),
&ENCRYPTED_CHUNK_MAGIC_1_0 => std::mem::size_of::<EncryptedDataChunkHeader>(),
&ENCR_COMPR_CHUNK_MAGIC_1_0 => std::mem::size_of::<EncryptedDataChunkHeader>(),
&UNCOMPRESSED_BLOB_MAGIC_1_0 => std::mem::size_of::<DataBlobHeader>(), &UNCOMPRESSED_BLOB_MAGIC_1_0 => std::mem::size_of::<DataBlobHeader>(),
&COMPRESSED_BLOB_MAGIC_1_0 => std::mem::size_of::<DataBlobHeader>(), &COMPRESSED_BLOB_MAGIC_1_0 => std::mem::size_of::<DataBlobHeader>(),
&ENCRYPTED_BLOB_MAGIC_1_0 => std::mem::size_of::<EncryptedDataBlobHeader>(), &ENCRYPTED_BLOB_MAGIC_1_0 => std::mem::size_of::<EncryptedDataBlobHeader>(),

View File

@ -394,12 +394,13 @@ impl FixedIndexWriter {
let idx = self.check_chunk_alignment(offset, chunk_len)?; let idx = self.check_chunk_alignment(offset, chunk_len)?;
let (is_duplicate, compressed_size) = self.store.insert_chunk(&chunk_info.chunk)?; let (is_duplicate, compressed_size) =
self.store.insert_chunk(&chunk_info.chunk, &chunk_info.digest)?;
stat.chunk_count += 1; stat.chunk_count += 1;
stat.compressed_size += compressed_size; stat.compressed_size += compressed_size;
let digest = chunk_info.chunk.digest(); let digest = &chunk_info.digest;
println!("ADD CHUNK {} {} {}% {} {}", idx, chunk_len, println!("ADD CHUNK {} {} {}% {} {}", idx, chunk_len,
(compressed_size*100)/(chunk_len as u64), is_duplicate, proxmox::tools::digest_to_hex(digest)); (compressed_size*100)/(chunk_len as u64), is_duplicate, proxmox::tools::digest_to_hex(digest));

View File

@ -3,7 +3,7 @@ use std::sync::Arc;
use super::datastore::*; use super::datastore::*;
use super::crypt_config::*; use super::crypt_config::*;
use super::data_chunk::*; use super::data_blob::*;
/// The ReadChunk trait allows reading backup data chunks (local or remote) /// The ReadChunk trait allows reading backup data chunks (local or remote)
pub trait ReadChunk { pub trait ReadChunk {
@ -32,13 +32,12 @@ impl ReadChunk for LocalChunkReader {
let (path, _) = self.store.chunk_path(digest); let (path, _) = self.store.chunk_path(digest);
let raw_data = proxmox::tools::fs::file_get_contents(&path)?; let raw_data = proxmox::tools::fs::file_get_contents(&path)?;
let chunk = DataChunk::from_raw(raw_data, *digest)?; let chunk = DataBlob::from_raw(raw_data)?;
chunk.verify_crc()?; chunk.verify_crc()?;
let raw_data = match self.crypt_config { let raw_data = chunk.decode(self.crypt_config.clone())?;
Some(ref crypt_config) => chunk.decode(Some(crypt_config))?,
None => chunk.decode(None)?, // fixme: verify digest?
};
Ok(raw_data) Ok(raw_data)
} }

View File

@ -678,7 +678,7 @@ impl BackupClient {
if sign_only { if sign_only {
DataBlob::create_signed(&data, crypt_config, compress)? DataBlob::create_signed(&data, crypt_config, compress)?
} else { } else {
DataBlob::encode(&data, Some(crypt_config.clone()), compress)? DataBlob::encode(&data, Some(crypt_config), compress)?
} }
} else { } else {
DataBlob::encode(&data, None, compress)? DataBlob::encode(&data, None, compress)?
@ -936,7 +936,7 @@ impl BackupClient {
.compress(true); .compress(true);
if let Some(ref crypt_config) = crypt_config { if let Some(ref crypt_config) = crypt_config {
chunk_builder = chunk_builder.crypt_config(crypt_config); chunk_builder = chunk_builder.crypt_config(crypt_config.clone());
} }
let mut known_chunks = known_chunks.lock().unwrap(); let mut known_chunks = known_chunks.lock().unwrap();
@ -957,8 +957,9 @@ impl BackupClient {
known_chunks.insert(*digest); known_chunks.insert(*digest);
future::ready(chunk_builder future::ready(chunk_builder
.build() .build()
.map(move |chunk| MergedChunkInfo::New(ChunkInfo { .map(move |(chunk, digest)| MergedChunkInfo::New(ChunkInfo {
chunk, chunk,
digest,
chunk_len: chunk_len as u64, chunk_len: chunk_len as u64,
offset, offset,
})) }))
@ -970,7 +971,7 @@ impl BackupClient {
if let MergedChunkInfo::New(chunk_info) = merged_chunk_info { if let MergedChunkInfo::New(chunk_info) = merged_chunk_info {
let offset = chunk_info.offset; let offset = chunk_info.offset;
let digest = *chunk_info.chunk.digest(); let digest = chunk_info.digest;
let digest_str = digest_to_hex(&digest); let digest_str = digest_to_hex(&digest);
println!("upload new chunk {} ({} bytes, offset {})", digest_str, println!("upload new chunk {} ({} bytes, offset {})", digest_str,

View File

@ -4,7 +4,7 @@ use std::sync::Arc;
use failure::*; use failure::*;
use super::BackupReader; use super::BackupReader;
use crate::backup::{ReadChunk, DataChunk, CryptConfig}; use crate::backup::{ReadChunk, DataBlob, CryptConfig};
/// Read chunks from remote host using ``BackupReader`` /// Read chunks from remote host using ``BackupReader``
pub struct RemoteChunkReader { pub struct RemoteChunkReader {
@ -43,13 +43,12 @@ impl ReadChunk for RemoteChunkReader {
futures::executor::block_on(self.client.download_chunk(&digest, &mut chunk_data))?; futures::executor::block_on(self.client.download_chunk(&digest, &mut chunk_data))?;
let chunk = DataChunk::from_raw(chunk_data, *digest)?; let chunk = DataBlob::from_raw(chunk_data)?;
chunk.verify_crc()?; chunk.verify_crc()?;
let raw_data = match self.crypt_config { let raw_data = chunk.decode(self.crypt_config.clone())?;
Some(ref crypt_config) => chunk.decode(Some(crypt_config))?,
None => chunk.decode(None)?, // fixme: verify chunk digest
};
if use_cache { if use_cache {
self.cache.insert(*digest, raw_data.to_vec()); self.cache.insert(*digest, raw_data.to_vec());