remove DataChunk file format - use DataBlob instead
This commit is contained in:
parent
c26d4b4573
commit
4ee8f53d07
@ -57,14 +57,14 @@ impl Future for UploadChunk {
|
||||
}
|
||||
|
||||
let (is_duplicate, compressed_size) = match proxmox::tools::try_block! {
|
||||
let mut chunk = DataChunk::from_raw(raw_data, this.digest)?;
|
||||
let mut chunk = DataBlob::from_raw(raw_data)?;
|
||||
|
||||
chunk.verify_unencrypted(this.size as usize)?;
|
||||
chunk.verify_unencrypted(this.size as usize, &this.digest)?;
|
||||
|
||||
// always comput CRC at server side
|
||||
chunk.set_crc(chunk.compute_crc());
|
||||
|
||||
this.store.insert_chunk(&chunk)
|
||||
this.store.insert_chunk(&chunk, &this.digest)
|
||||
} {
|
||||
Ok(res) => res,
|
||||
Err(err) => break err,
|
||||
@ -95,8 +95,8 @@ pub fn api_method_upload_fixed_chunk() -> ApiAsyncMethod {
|
||||
.maximum(1024*1024*16)
|
||||
)
|
||||
.required("encoded-size", IntegerSchema::new("Encoded chunk size.")
|
||||
.minimum((std::mem::size_of::<DataChunkHeader>() as isize)+1)
|
||||
.maximum(1024*1024*16+(std::mem::size_of::<EncryptedDataChunkHeader>() as isize))
|
||||
.minimum((std::mem::size_of::<DataBlobHeader>() as isize)+1)
|
||||
.maximum(1024*1024*16+(std::mem::size_of::<EncryptedDataBlobHeader>() as isize))
|
||||
)
|
||||
)
|
||||
}
|
||||
@ -151,8 +151,8 @@ pub fn api_method_upload_dynamic_chunk() -> ApiAsyncMethod {
|
||||
.maximum(1024*1024*16)
|
||||
)
|
||||
.required("encoded-size", IntegerSchema::new("Encoded chunk size.")
|
||||
.minimum((std::mem::size_of::<DataChunkHeader>() as isize) +1)
|
||||
.maximum(1024*1024*16+(std::mem::size_of::<EncryptedDataChunkHeader>() as isize))
|
||||
.minimum((std::mem::size_of::<DataBlobHeader>() as isize) +1)
|
||||
.maximum(1024*1024*16+(std::mem::size_of::<EncryptedDataBlobHeader>() as isize))
|
||||
)
|
||||
)
|
||||
}
|
||||
|
@ -140,9 +140,6 @@ pub use checksum_writer::*;
|
||||
mod chunker;
|
||||
pub use chunker::*;
|
||||
|
||||
mod data_chunk;
|
||||
pub use data_chunk::*;
|
||||
|
||||
mod data_blob;
|
||||
pub use data_blob::*;
|
||||
|
||||
|
@ -7,7 +7,7 @@ use std::os::unix::io::AsRawFd;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::tools;
|
||||
use super::DataChunk;
|
||||
use super::DataBlob;
|
||||
use crate::server::WorkerTask;
|
||||
|
||||
#[derive(Clone, Serialize)]
|
||||
@ -173,7 +173,7 @@ impl ChunkStore {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn read_chunk(&self, digest: &[u8; 32]) -> Result<DataChunk, Error> {
|
||||
pub fn read_chunk(&self, digest: &[u8; 32]) -> Result<DataBlob, Error> {
|
||||
|
||||
let (chunk_path, digest_str) = self.chunk_path(digest);
|
||||
let mut file = std::fs::File::open(&chunk_path)
|
||||
@ -186,7 +186,7 @@ impl ChunkStore {
|
||||
)
|
||||
})?;
|
||||
|
||||
DataChunk::load(&mut file, *digest)
|
||||
DataBlob::load(&mut file)
|
||||
}
|
||||
|
||||
pub fn get_chunk_iterator(
|
||||
@ -357,11 +357,10 @@ impl ChunkStore {
|
||||
|
||||
pub fn insert_chunk(
|
||||
&self,
|
||||
chunk: &DataChunk,
|
||||
chunk: &DataBlob,
|
||||
digest: &[u8; 32],
|
||||
) -> Result<(bool, u64), Error> {
|
||||
|
||||
let digest = chunk.digest();
|
||||
|
||||
//println!("DIGEST {}", proxmox::tools::digest_to_hex(digest));
|
||||
|
||||
let (chunk_path, digest_str) = self.chunk_path(digest);
|
||||
@ -444,12 +443,12 @@ fn test_chunk_store1() {
|
||||
|
||||
let chunk_store = ChunkStore::create("test", &path).unwrap();
|
||||
|
||||
let chunk = super::DataChunkBuilder::new(&[0u8, 1u8]).build().unwrap();
|
||||
let (chunk, digest) = super::DataChunkBuilder::new(&[0u8, 1u8]).build().unwrap();
|
||||
|
||||
let (exists, _) = chunk_store.insert_chunk(&chunk).unwrap();
|
||||
let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap();
|
||||
assert!(!exists);
|
||||
|
||||
let (exists, _) = chunk_store.insert_chunk(&chunk).unwrap();
|
||||
let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap();
|
||||
assert!(exists);
|
||||
|
||||
|
||||
|
@ -6,13 +6,22 @@ use proxmox::tools::io::{ReadExt, WriteExt};
|
||||
|
||||
const MAX_BLOB_SIZE: usize = 128*1024*1024;
|
||||
|
||||
use super::*;
|
||||
use super::file_formats::*;
|
||||
use super::CryptConfig;
|
||||
|
||||
/// Encoded data chunk with digest and positional information
|
||||
pub struct ChunkInfo {
|
||||
pub chunk: DataBlob,
|
||||
pub digest: [u8; 32],
|
||||
pub chunk_len: u64,
|
||||
pub offset: u64,
|
||||
}
|
||||
|
||||
/// Data blob binary storage format
|
||||
///
|
||||
/// Data blobs store arbitrary binary data (< 128MB), and can be
|
||||
/// compressed and encrypted. A simply binary format is used to store
|
||||
/// them on disk or transfer them over the network.
|
||||
/// compressed and encrypted (or just signed). A simply binary format
|
||||
/// is used to store them on disk or transfer them over the network.
|
||||
///
|
||||
/// Please use index files to store large data files (".fidx" of
|
||||
/// ".didx").
|
||||
@ -255,6 +264,15 @@ impl DataBlob {
|
||||
return Ok(blob);
|
||||
}
|
||||
|
||||
/// Load blob from ``reader``
|
||||
pub fn load(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
|
||||
|
||||
let mut data = Vec::with_capacity(1024*1024);
|
||||
reader.read_to_end(&mut data)?;
|
||||
|
||||
Self::from_raw(data)
|
||||
}
|
||||
|
||||
/// Create Instance from raw data
|
||||
pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
|
||||
|
||||
@ -290,4 +308,122 @@ impl DataBlob {
|
||||
bail!("unable to parse raw blob - wrong magic");
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify digest and data length for unencrypted chunks.
|
||||
///
|
||||
/// To do that, we need to decompress data first. Please note that
|
||||
/// this is noth possible for encrypted chunks.
|
||||
pub fn verify_unencrypted(
|
||||
&self,
|
||||
expected_chunk_size: usize,
|
||||
expected_digest: &[u8; 32],
|
||||
) -> Result<(), Error> {
|
||||
|
||||
let magic = self.magic();
|
||||
|
||||
let verify_raw_data = |data: &[u8]| {
|
||||
if expected_chunk_size != data.len() {
|
||||
bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
|
||||
}
|
||||
let digest = openssl::sha::sha256(data);
|
||||
if &digest != expected_digest {
|
||||
bail!("detected chunk with wrong digest.");
|
||||
}
|
||||
Ok(())
|
||||
};
|
||||
|
||||
if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
|
||||
let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
|
||||
verify_raw_data(&data)?;
|
||||
} else if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
|
||||
verify_raw_data(&self.raw_data[12..])?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for chunk DataBlobs
|
||||
///
|
||||
/// Main purpose is to centralize digest computation. Digest
|
||||
/// computation differ for encryped chunk, and this interface ensures that
|
||||
/// we always compute the correct one.
|
||||
pub struct DataChunkBuilder<'a> {
|
||||
config: Option<Arc<CryptConfig>>,
|
||||
orig_data: &'a [u8],
|
||||
digest_computed: bool,
|
||||
digest: [u8; 32],
|
||||
compress: bool,
|
||||
}
|
||||
|
||||
impl <'a> DataChunkBuilder<'a> {
|
||||
|
||||
/// Create a new builder instance.
|
||||
pub fn new(orig_data: &'a [u8]) -> Self {
|
||||
Self {
|
||||
orig_data,
|
||||
config: None,
|
||||
digest_computed: false,
|
||||
digest: [0u8; 32],
|
||||
compress: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set compression flag.
|
||||
///
|
||||
/// If true, chunk data is compressed using zstd (level 1).
|
||||
pub fn compress(mut self, value: bool) -> Self {
|
||||
self.compress = value;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set encryption Configuration
|
||||
///
|
||||
/// If set, chunks are encrypted.
|
||||
pub fn crypt_config(mut self, value: Arc<CryptConfig>) -> Self {
|
||||
if self.digest_computed {
|
||||
panic!("unable to set crypt_config after compute_digest().");
|
||||
}
|
||||
self.config = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
fn compute_digest(&mut self) {
|
||||
if !self.digest_computed {
|
||||
if let Some(ref config) = self.config {
|
||||
self.digest = config.compute_digest(self.orig_data);
|
||||
} else {
|
||||
self.digest = openssl::sha::sha256(self.orig_data);
|
||||
}
|
||||
self.digest_computed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the chunk Digest
|
||||
///
|
||||
/// Note: For encrypted chunks, this needs to be called after
|
||||
/// ``crypt_config``.
|
||||
pub fn digest(&mut self) -> &[u8; 32] {
|
||||
if !self.digest_computed {
|
||||
self.compute_digest();
|
||||
}
|
||||
&self.digest
|
||||
}
|
||||
|
||||
/// Consume self and build the ``DataBlob``.
|
||||
///
|
||||
/// Returns the blob and the computet digest.
|
||||
pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
|
||||
if !self.digest_computed {
|
||||
self.compute_digest();
|
||||
}
|
||||
|
||||
let chunk = DataBlob::encode(
|
||||
self.orig_data,
|
||||
self.config,
|
||||
self.compress,
|
||||
)?;
|
||||
|
||||
Ok((chunk, self.digest))
|
||||
}
|
||||
}
|
||||
|
@ -1,350 +0,0 @@
|
||||
use std::convert::TryInto;
|
||||
|
||||
use failure::*;
|
||||
|
||||
use proxmox::tools::io::{ReadExt, WriteExt};
|
||||
|
||||
// FIXME: Move all the magic numbers into a module so we can import super::magic::* instead of
|
||||
// the everything of everything everywhere.
|
||||
use super::*;
|
||||
|
||||
/// Data chunk with positional information
|
||||
pub struct ChunkInfo {
|
||||
pub chunk: DataChunk,
|
||||
pub chunk_len: u64,
|
||||
pub offset: u64,
|
||||
}
|
||||
|
||||
/// Data chunk binary storage format
|
||||
///
|
||||
/// Data chunks are identified by a unique digest, and can be
|
||||
/// compressed and encrypted. A simply binary format is used to store
|
||||
/// them on disk or transfer them over the network.
|
||||
///
|
||||
/// Please use the ``DataChunkBuilder`` to create new instances.
|
||||
pub struct DataChunk {
|
||||
digest: [u8; 32],
|
||||
raw_data: Vec<u8>, // tagged, compressed, encryped data
|
||||
}
|
||||
|
||||
impl DataChunk {
|
||||
|
||||
/// accessor to raw_data field
|
||||
pub fn raw_data(&self) -> &[u8] {
|
||||
&self.raw_data
|
||||
}
|
||||
|
||||
/// return as raw_data
|
||||
pub fn into_raw(self) -> Vec<u8> {
|
||||
self.raw_data
|
||||
}
|
||||
|
||||
/// accessor to chunk digest field
|
||||
pub fn digest(&self) -> &[u8; 32] {
|
||||
&self.digest
|
||||
}
|
||||
|
||||
/// accessor to chunk type (magic number)
|
||||
pub fn magic(&self) -> &[u8; 8] {
|
||||
self.raw_data[0..8].try_into().unwrap()
|
||||
}
|
||||
|
||||
/// accessor to crc32 checksum
|
||||
pub fn crc(&self) -> u32 {
|
||||
let crc_o = proxmox::tools::offsetof!(DataChunkHeader, crc);
|
||||
u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
|
||||
}
|
||||
|
||||
// set the CRC checksum field
|
||||
pub fn set_crc(&mut self, crc: u32) {
|
||||
let crc_o = proxmox::tools::offsetof!(DataChunkHeader, crc);
|
||||
self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
|
||||
}
|
||||
|
||||
/// compute the CRC32 checksum
|
||||
pub fn compute_crc(&self) -> u32 {
|
||||
let mut hasher = crc32fast::Hasher::new();
|
||||
let start = std::mem::size_of::<DataChunkHeader>(); // start after HEAD
|
||||
hasher.update(&self.raw_data[start..]);
|
||||
hasher.finalize()
|
||||
}
|
||||
|
||||
/// verify the CRC32 checksum
|
||||
pub fn verify_crc(&self) -> Result<(), Error> {
|
||||
let expected_crc = self.compute_crc();
|
||||
if expected_crc != self.crc() {
|
||||
bail!("Data chunk has wrong CRC checksum.");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn encode(
|
||||
data: &[u8],
|
||||
config: Option<&CryptConfig>,
|
||||
digest: [u8;32],
|
||||
compress: bool,
|
||||
) -> Result<Self, Error> {
|
||||
|
||||
if let Some(config) = config {
|
||||
|
||||
let compr_data;
|
||||
let (_compress, data, magic) = if compress {
|
||||
compr_data = zstd::block::compress(data, 1)?;
|
||||
// Note: We only use compression if result is shorter
|
||||
if compr_data.len() < data.len() {
|
||||
(true, &compr_data[..], ENCR_COMPR_CHUNK_MAGIC_1_0)
|
||||
} else {
|
||||
(false, data, ENCRYPTED_CHUNK_MAGIC_1_0)
|
||||
}
|
||||
} else {
|
||||
(false, data, ENCRYPTED_CHUNK_MAGIC_1_0)
|
||||
};
|
||||
|
||||
let header_len = std::mem::size_of::<EncryptedDataChunkHeader>();
|
||||
let mut raw_data = Vec::with_capacity(data.len() + header_len);
|
||||
|
||||
let dummy_head = EncryptedDataChunkHeader {
|
||||
head: DataChunkHeader { magic: [0u8; 8], crc: [0; 4] },
|
||||
iv: [0u8; 16],
|
||||
tag: [0u8; 16],
|
||||
};
|
||||
unsafe {
|
||||
raw_data.write_le_value(dummy_head)?;
|
||||
}
|
||||
|
||||
let (iv, tag) = config.encrypt_to(data, &mut raw_data)?;
|
||||
|
||||
let head = EncryptedDataChunkHeader {
|
||||
head: DataChunkHeader { magic, crc: [0; 4] }, iv, tag,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
(&mut raw_data[0..header_len]).write_le_value(head)?;
|
||||
}
|
||||
|
||||
return Ok(DataChunk { digest, raw_data });
|
||||
} else {
|
||||
|
||||
let max_data_len = data.len() + std::mem::size_of::<DataChunkHeader>();
|
||||
if compress {
|
||||
let mut comp_data = Vec::with_capacity(max_data_len);
|
||||
|
||||
let head = DataChunkHeader {
|
||||
magic: COMPRESSED_CHUNK_MAGIC_1_0,
|
||||
crc: [0; 4],
|
||||
};
|
||||
unsafe {
|
||||
comp_data.write_le_value(head)?;
|
||||
}
|
||||
|
||||
zstd::stream::copy_encode(data, &mut comp_data, 1)?;
|
||||
|
||||
if comp_data.len() < max_data_len {
|
||||
let chunk = DataChunk { digest, raw_data: comp_data };
|
||||
return Ok(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
let mut raw_data = Vec::with_capacity(max_data_len);
|
||||
|
||||
let head = DataChunkHeader {
|
||||
magic: UNCOMPRESSED_CHUNK_MAGIC_1_0,
|
||||
crc: [0; 4],
|
||||
};
|
||||
unsafe {
|
||||
raw_data.write_le_value(head)?;
|
||||
}
|
||||
raw_data.extend_from_slice(data);
|
||||
|
||||
let chunk = DataChunk { digest, raw_data };
|
||||
return Ok(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode chunk data
|
||||
pub fn decode(self, config: Option<&CryptConfig>) -> Result<Vec<u8>, Error> {
|
||||
|
||||
let magic = self.magic();
|
||||
|
||||
if magic == &UNCOMPRESSED_CHUNK_MAGIC_1_0 {
|
||||
let data_start = std::mem::size_of::<DataChunkHeader>();
|
||||
return Ok(self.raw_data[data_start..].to_vec());
|
||||
} else if magic == &COMPRESSED_CHUNK_MAGIC_1_0 {
|
||||
let data_start = std::mem::size_of::<DataChunkHeader>();
|
||||
let data = zstd::block::decompress(&self.raw_data[data_start..], 16*1024*1024)?;
|
||||
return Ok(data);
|
||||
} else if magic == &ENCR_COMPR_CHUNK_MAGIC_1_0 || magic == &ENCRYPTED_CHUNK_MAGIC_1_0 {
|
||||
let header_len = std::mem::size_of::<EncryptedDataChunkHeader>();
|
||||
let head = unsafe {
|
||||
(&self.raw_data[..header_len]).read_le_value::<EncryptedDataChunkHeader>()?
|
||||
};
|
||||
|
||||
if let Some(config) = config {
|
||||
let data = if magic == &ENCR_COMPR_CHUNK_MAGIC_1_0 {
|
||||
config.decode_compressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
|
||||
} else {
|
||||
config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
|
||||
};
|
||||
return Ok(data);
|
||||
} else {
|
||||
bail!("unable to decrypt chunk - missing CryptConfig");
|
||||
}
|
||||
} else {
|
||||
bail!("Invalid chunk magic number.");
|
||||
}
|
||||
}
|
||||
|
||||
/// Load chunk data from ``reader``
|
||||
///
|
||||
/// Please note that it is impossible to compute the digest for
|
||||
/// encrypted chunks, so we need to trust and use the provided
|
||||
/// ``digest``.
|
||||
pub fn load(reader: &mut dyn std::io::Read, digest: [u8; 32]) -> Result<Self, Error> {
|
||||
|
||||
let mut data = Vec::with_capacity(1024*1024);
|
||||
reader.read_to_end(&mut data)?;
|
||||
|
||||
Self::from_raw(data, digest)
|
||||
}
|
||||
|
||||
/// Create Instance from raw data
|
||||
pub fn from_raw(data: Vec<u8>, digest: [u8;32]) -> Result<Self, Error> {
|
||||
|
||||
if data.len() < std::mem::size_of::<DataChunkHeader>() {
|
||||
bail!("chunk too small ({} bytes).", data.len());
|
||||
}
|
||||
|
||||
let magic = &data[0..8];
|
||||
|
||||
if magic == ENCR_COMPR_CHUNK_MAGIC_1_0 || magic == ENCRYPTED_CHUNK_MAGIC_1_0 {
|
||||
|
||||
if data.len() < std::mem::size_of::<EncryptedDataChunkHeader>() {
|
||||
bail!("encrypted chunk too small ({} bytes).", data.len());
|
||||
}
|
||||
|
||||
let chunk = DataChunk { digest, raw_data: data };
|
||||
|
||||
Ok(chunk)
|
||||
} else if magic == COMPRESSED_CHUNK_MAGIC_1_0 || magic == UNCOMPRESSED_CHUNK_MAGIC_1_0 {
|
||||
|
||||
let chunk = DataChunk { digest, raw_data: data };
|
||||
|
||||
Ok(chunk)
|
||||
} else {
|
||||
bail!("unable to parse raw chunk - wrong magic");
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify digest and data length for unencrypted chunks.
|
||||
///
|
||||
/// To do that, we need to decompress data first. Please note that
|
||||
/// this is noth possible for encrypted chunks.
|
||||
pub fn verify_unencrypted(&self, expected_chunk_size: usize) -> Result<(), Error> {
|
||||
|
||||
let magic = self.magic();
|
||||
|
||||
let verify_raw_data = |data: &[u8]| {
|
||||
if expected_chunk_size != data.len() {
|
||||
bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
|
||||
}
|
||||
let digest = openssl::sha::sha256(data);
|
||||
if digest != self.digest {
|
||||
bail!("detected chunk with wrong digest.");
|
||||
}
|
||||
Ok(())
|
||||
};
|
||||
|
||||
if magic == &COMPRESSED_CHUNK_MAGIC_1_0 {
|
||||
let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
|
||||
verify_raw_data(&data)?;
|
||||
} else if magic == &UNCOMPRESSED_CHUNK_MAGIC_1_0 {
|
||||
verify_raw_data(&self.raw_data[12..])?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for DataChunk
|
||||
///
|
||||
/// Main purpose is to centralize digest computation. Digest
|
||||
/// computation differ for encryped chunk, and this interface ensures that
|
||||
/// we always compute the correct one.
|
||||
pub struct DataChunkBuilder<'a, 'b> {
|
||||
config: Option<&'b CryptConfig>,
|
||||
orig_data: &'a [u8],
|
||||
digest_computed: bool,
|
||||
digest: [u8; 32],
|
||||
compress: bool,
|
||||
}
|
||||
|
||||
impl <'a, 'b> DataChunkBuilder<'a, 'b> {
|
||||
|
||||
/// Create a new builder instance.
|
||||
pub fn new(orig_data: &'a [u8]) -> Self {
|
||||
Self {
|
||||
orig_data,
|
||||
config: None,
|
||||
digest_computed: false,
|
||||
digest: [0u8; 32],
|
||||
compress: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set compression flag.
|
||||
///
|
||||
/// If true, chunk data is compressed using zstd (level 1).
|
||||
pub fn compress(mut self, value: bool) -> Self {
|
||||
self.compress = value;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set encryption Configuration
|
||||
///
|
||||
/// If set, chunks are encrypted.
|
||||
pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
|
||||
if self.digest_computed {
|
||||
panic!("unable to set crypt_config after compute_digest().");
|
||||
}
|
||||
self.config = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
fn compute_digest(&mut self) {
|
||||
if !self.digest_computed {
|
||||
if let Some(config) = self.config {
|
||||
self.digest = config.compute_digest(self.orig_data);
|
||||
} else {
|
||||
self.digest = openssl::sha::sha256(self.orig_data);
|
||||
}
|
||||
self.digest_computed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the chunk Digest
|
||||
///
|
||||
/// Note: For encrypted chunks, this needs to be called after
|
||||
/// ``crypt_config``.
|
||||
pub fn digest(&mut self) -> &[u8; 32] {
|
||||
if !self.digest_computed {
|
||||
self.compute_digest();
|
||||
}
|
||||
&self.digest
|
||||
}
|
||||
|
||||
/// Consume self and build the ``DataChunk``.
|
||||
pub fn build(mut self) -> Result<DataChunk, Error> {
|
||||
if !self.digest_computed {
|
||||
self.compute_digest();
|
||||
}
|
||||
|
||||
let chunk = DataChunk::encode(
|
||||
self.orig_data,
|
||||
self.config,
|
||||
self.digest,
|
||||
self.compress,
|
||||
)?;
|
||||
|
||||
Ok(chunk)
|
||||
}
|
||||
}
|
@ -11,7 +11,7 @@ use super::chunk_store::{ChunkStore, GarbageCollectionStatus};
|
||||
use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
|
||||
use super::fixed_index::{FixedIndexReader, FixedIndexWriter};
|
||||
use super::index::*;
|
||||
use super::DataChunk;
|
||||
use super::DataBlob;
|
||||
use crate::config::datastore;
|
||||
use crate::server::WorkerTask;
|
||||
use crate::tools;
|
||||
@ -290,8 +290,9 @@ impl DataStore {
|
||||
|
||||
pub fn insert_chunk(
|
||||
&self,
|
||||
chunk: &DataChunk,
|
||||
chunk: &DataBlob,
|
||||
digest: &[u8; 32],
|
||||
) -> Result<(bool, u64), Error> {
|
||||
self.chunk_store.insert_chunk(chunk)
|
||||
self.chunk_store.insert_chunk(chunk, digest)
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ use super::IndexFile;
|
||||
use super::chunk_stat::ChunkStat;
|
||||
use super::chunk_store::ChunkStore;
|
||||
use super::read_chunk::ReadChunk;
|
||||
use super::{DataChunk, DataChunkBuilder};
|
||||
use super::{DataBlob, DataChunkBuilder};
|
||||
use crate::tools;
|
||||
|
||||
/// Header format definition for dynamic index files (`.dixd`)
|
||||
@ -465,8 +465,8 @@ impl DynamicIndexWriter {
|
||||
}
|
||||
|
||||
// fixme: use add_chunk instead?
|
||||
pub fn insert_chunk(&self, chunk: &DataChunk) -> Result<(bool, u64), Error> {
|
||||
self.store.insert_chunk(chunk)
|
||||
pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
|
||||
self.store.insert_chunk(chunk, digest)
|
||||
}
|
||||
|
||||
pub fn close(&mut self) -> Result<[u8; 32], Error> {
|
||||
@ -581,13 +581,11 @@ impl DynamicChunkWriter {
|
||||
|
||||
self.last_chunk = self.chunk_offset;
|
||||
|
||||
let chunk = DataChunkBuilder::new(&self.chunk_buffer)
|
||||
let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
|
||||
.compress(true)
|
||||
.build()?;
|
||||
|
||||
let digest = chunk.digest();
|
||||
|
||||
match self.index.insert_chunk(&chunk) {
|
||||
match self.index.insert_chunk(&chunk, &digest) {
|
||||
Ok((is_duplicate, compressed_size)) => {
|
||||
|
||||
self.stat.compressed_size += compressed_size;
|
||||
@ -598,7 +596,7 @@ impl DynamicChunkWriter {
|
||||
}
|
||||
|
||||
println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size,
|
||||
(compressed_size*100)/(chunk_size as u64), is_duplicate, proxmox::tools::digest_to_hex(digest));
|
||||
(compressed_size*100)/(chunk_size as u64), is_duplicate, proxmox::tools::digest_to_hex(&digest));
|
||||
self.index.add_chunk(self.chunk_offset as u64, &digest)?;
|
||||
self.chunk_buffer.truncate(0);
|
||||
return Ok(());
|
||||
@ -620,7 +618,7 @@ impl Write for DynamicChunkWriter {
|
||||
let pos = chunker.scan(data);
|
||||
|
||||
if pos > 0 {
|
||||
self.chunk_buffer.extend(&data[0..pos]);
|
||||
self.chunk_buffer.extend_from_slice(&data[0..pos]);
|
||||
self.chunk_offset += pos;
|
||||
|
||||
if let Err(err) = self.write_chunk_buffer() {
|
||||
@ -630,7 +628,7 @@ impl Write for DynamicChunkWriter {
|
||||
|
||||
} else {
|
||||
self.chunk_offset += data.len();
|
||||
self.chunk_buffer.extend(data);
|
||||
self.chunk_buffer.extend_from_slice(data);
|
||||
Ok(data.len())
|
||||
}
|
||||
}
|
||||
|
@ -2,18 +2,6 @@ use endian_trait::Endian;
|
||||
|
||||
// WARNING: PLEASE DO NOT MODIFY THOSE MAGIC VALUES
|
||||
|
||||
// openssl::sha::sha256(b"Proxmox Backup uncompressed chunk v1.0")[0..8]
|
||||
pub const UNCOMPRESSED_CHUNK_MAGIC_1_0: [u8; 8] = [79, 127, 200, 4, 121, 74, 135, 239];
|
||||
|
||||
// openssl::sha::sha256(b"Proxmox Backup encrypted chunk v1.0")[0..8]
|
||||
pub const ENCRYPTED_CHUNK_MAGIC_1_0: [u8; 8] = [8, 54, 114, 153, 70, 156, 26, 151];
|
||||
|
||||
// openssl::sha::sha256(b"Proxmox Backup zstd compressed chunk v1.0")[0..8]
|
||||
pub const COMPRESSED_CHUNK_MAGIC_1_0: [u8; 8] = [191, 237, 46, 195, 108, 17, 228, 235];
|
||||
|
||||
// openssl::sha::sha256(b"Proxmox Backup zstd compressed encrypted chunk v1.0")[0..8]
|
||||
pub const ENCR_COMPR_CHUNK_MAGIC_1_0: [u8; 8] = [9, 40, 53, 200, 37, 150, 90, 196];
|
||||
|
||||
// openssl::sha::sha256(b"Proxmox Backup uncompressed blob v1.0")[0..8]
|
||||
pub const UNCOMPRESSED_BLOB_MAGIC_1_0: [u8; 8] = [66, 171, 56, 7, 190, 131, 112, 161];
|
||||
|
||||
@ -87,49 +75,11 @@ pub struct EncryptedDataBlobHeader {
|
||||
pub tag: [u8; 16],
|
||||
}
|
||||
|
||||
/// Data chunk binary storage format
|
||||
///
|
||||
/// The format start with a 8 byte magic number to identify the type,
|
||||
/// followed by a 4 byte CRC. This CRC is used on the server side to
|
||||
/// detect file corruption (computed when upload data), so there is
|
||||
/// usually no need to compute it on the client side.
|
||||
///
|
||||
/// Unencrypted blobs simply contain the CRC, followed by the
|
||||
/// (compressed) data.
|
||||
///
|
||||
/// (MAGIC || CRC32 || Data)
|
||||
#[derive(Endian)]
|
||||
#[repr(C,packed)]
|
||||
pub struct DataChunkHeader {
|
||||
pub magic: [u8; 8],
|
||||
pub crc: [u8; 4],
|
||||
}
|
||||
|
||||
/// Encrypted Data chunk binary storage format
|
||||
///
|
||||
/// The ``DataChunkHeader`` for encrypted chunks additionally contains
|
||||
/// a 16 byte IV, followed by a 16 byte Authenticated Encyrypten (AE)
|
||||
/// tag, followed by the encrypted data:
|
||||
///
|
||||
/// (MAGIC || CRC32 || IV || TAG || EncryptedData).
|
||||
#[derive(Endian)]
|
||||
#[repr(C,packed)]
|
||||
pub struct EncryptedDataChunkHeader {
|
||||
pub head: DataChunkHeader,
|
||||
pub iv: [u8; 16],
|
||||
pub tag: [u8; 16],
|
||||
}
|
||||
|
||||
/// Header size for different file types
|
||||
///
|
||||
/// Panics on unknown magic numbers.
|
||||
pub fn header_size(magic: &[u8; 8]) -> usize {
|
||||
match magic {
|
||||
&UNCOMPRESSED_CHUNK_MAGIC_1_0 => std::mem::size_of::<DataChunkHeader>(),
|
||||
&COMPRESSED_CHUNK_MAGIC_1_0 => std::mem::size_of::<DataChunkHeader>(),
|
||||
&ENCRYPTED_CHUNK_MAGIC_1_0 => std::mem::size_of::<EncryptedDataChunkHeader>(),
|
||||
&ENCR_COMPR_CHUNK_MAGIC_1_0 => std::mem::size_of::<EncryptedDataChunkHeader>(),
|
||||
|
||||
&UNCOMPRESSED_BLOB_MAGIC_1_0 => std::mem::size_of::<DataBlobHeader>(),
|
||||
&COMPRESSED_BLOB_MAGIC_1_0 => std::mem::size_of::<DataBlobHeader>(),
|
||||
&ENCRYPTED_BLOB_MAGIC_1_0 => std::mem::size_of::<EncryptedDataBlobHeader>(),
|
||||
|
@ -394,12 +394,13 @@ impl FixedIndexWriter {
|
||||
|
||||
let idx = self.check_chunk_alignment(offset, chunk_len)?;
|
||||
|
||||
let (is_duplicate, compressed_size) = self.store.insert_chunk(&chunk_info.chunk)?;
|
||||
let (is_duplicate, compressed_size) =
|
||||
self.store.insert_chunk(&chunk_info.chunk, &chunk_info.digest)?;
|
||||
|
||||
stat.chunk_count += 1;
|
||||
stat.compressed_size += compressed_size;
|
||||
|
||||
let digest = chunk_info.chunk.digest();
|
||||
let digest = &chunk_info.digest;
|
||||
|
||||
println!("ADD CHUNK {} {} {}% {} {}", idx, chunk_len,
|
||||
(compressed_size*100)/(chunk_len as u64), is_duplicate, proxmox::tools::digest_to_hex(digest));
|
||||
|
@ -3,7 +3,7 @@ use std::sync::Arc;
|
||||
|
||||
use super::datastore::*;
|
||||
use super::crypt_config::*;
|
||||
use super::data_chunk::*;
|
||||
use super::data_blob::*;
|
||||
|
||||
/// The ReadChunk trait allows reading backup data chunks (local or remote)
|
||||
pub trait ReadChunk {
|
||||
@ -32,13 +32,12 @@ impl ReadChunk for LocalChunkReader {
|
||||
|
||||
let (path, _) = self.store.chunk_path(digest);
|
||||
let raw_data = proxmox::tools::fs::file_get_contents(&path)?;
|
||||
let chunk = DataChunk::from_raw(raw_data, *digest)?;
|
||||
let chunk = DataBlob::from_raw(raw_data)?;
|
||||
chunk.verify_crc()?;
|
||||
|
||||
let raw_data = match self.crypt_config {
|
||||
Some(ref crypt_config) => chunk.decode(Some(crypt_config))?,
|
||||
None => chunk.decode(None)?,
|
||||
};
|
||||
let raw_data = chunk.decode(self.crypt_config.clone())?;
|
||||
|
||||
// fixme: verify digest?
|
||||
|
||||
Ok(raw_data)
|
||||
}
|
||||
|
@ -678,7 +678,7 @@ impl BackupClient {
|
||||
if sign_only {
|
||||
DataBlob::create_signed(&data, crypt_config, compress)?
|
||||
} else {
|
||||
DataBlob::encode(&data, Some(crypt_config.clone()), compress)?
|
||||
DataBlob::encode(&data, Some(crypt_config), compress)?
|
||||
}
|
||||
} else {
|
||||
DataBlob::encode(&data, None, compress)?
|
||||
@ -936,7 +936,7 @@ impl BackupClient {
|
||||
.compress(true);
|
||||
|
||||
if let Some(ref crypt_config) = crypt_config {
|
||||
chunk_builder = chunk_builder.crypt_config(crypt_config);
|
||||
chunk_builder = chunk_builder.crypt_config(crypt_config.clone());
|
||||
}
|
||||
|
||||
let mut known_chunks = known_chunks.lock().unwrap();
|
||||
@ -957,8 +957,9 @@ impl BackupClient {
|
||||
known_chunks.insert(*digest);
|
||||
future::ready(chunk_builder
|
||||
.build()
|
||||
.map(move |chunk| MergedChunkInfo::New(ChunkInfo {
|
||||
.map(move |(chunk, digest)| MergedChunkInfo::New(ChunkInfo {
|
||||
chunk,
|
||||
digest,
|
||||
chunk_len: chunk_len as u64,
|
||||
offset,
|
||||
}))
|
||||
@ -970,7 +971,7 @@ impl BackupClient {
|
||||
|
||||
if let MergedChunkInfo::New(chunk_info) = merged_chunk_info {
|
||||
let offset = chunk_info.offset;
|
||||
let digest = *chunk_info.chunk.digest();
|
||||
let digest = chunk_info.digest;
|
||||
let digest_str = digest_to_hex(&digest);
|
||||
|
||||
println!("upload new chunk {} ({} bytes, offset {})", digest_str,
|
||||
|
@ -4,7 +4,7 @@ use std::sync::Arc;
|
||||
use failure::*;
|
||||
|
||||
use super::BackupReader;
|
||||
use crate::backup::{ReadChunk, DataChunk, CryptConfig};
|
||||
use crate::backup::{ReadChunk, DataBlob, CryptConfig};
|
||||
|
||||
/// Read chunks from remote host using ``BackupReader``
|
||||
pub struct RemoteChunkReader {
|
||||
@ -43,13 +43,12 @@ impl ReadChunk for RemoteChunkReader {
|
||||
|
||||
futures::executor::block_on(self.client.download_chunk(&digest, &mut chunk_data))?;
|
||||
|
||||
let chunk = DataChunk::from_raw(chunk_data, *digest)?;
|
||||
let chunk = DataBlob::from_raw(chunk_data)?;
|
||||
chunk.verify_crc()?;
|
||||
|
||||
let raw_data = match self.crypt_config {
|
||||
Some(ref crypt_config) => chunk.decode(Some(crypt_config))?,
|
||||
None => chunk.decode(None)?,
|
||||
};
|
||||
let raw_data = chunk.decode(self.crypt_config.clone())?;
|
||||
|
||||
// fixme: verify chunk digest
|
||||
|
||||
if use_cache {
|
||||
self.cache.insert(*digest, raw_data.to_vec());
|
||||
|
Loading…
Reference in New Issue
Block a user