add pbs-datastore module

Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
Wolfgang Bumiller 2021-07-06 12:49:10 +02:00
parent 770a36e53a
commit f323e90602
23 changed files with 549 additions and 246 deletions

View File

@ -22,6 +22,7 @@ exclude = [ "build", "debian", "tests/catar_data/test_symlink/symlink1"]
[workspace]
members = [
"pbs-buildcfg",
"pbs-datastore",
"pbs-runtime",
"pbs-tools",
]
@ -94,6 +95,7 @@ proxmox-http = { version = "0.2.1", features = [ "client", "http-helpers", "webs
proxmox-openid = "0.6.0"
pbs-buildcfg = { path = "pbs-buildcfg" }
pbs-datastore = { path = "pbs-datastore" }
pbs-runtime = { path = "pbs-runtime" }
pbs-tools = { path = "pbs-tools" }

View File

@ -32,6 +32,7 @@ RESTORE_BIN := \
SUBCRATES := \
pbs-buildcfg \
pbs-datastore \
pbs-runtime \
pbs-tools

22
pbs-datastore/Cargo.toml Normal file
View File

@ -0,0 +1,22 @@
[package]
name = "pbs-datastore"
version = "0.1.0"
authors = ["Proxmox Support Team <support@proxmox.com>"]
edition = "2018"
description = "low level pbs data storage access"
[dependencies]
anyhow = "1.0"
crc32fast = "1"
endian_trait = { version = "0.6", features = [ "arrays" ] }
nix = "0.19.1"
openssl = "0.10"
serde = { version = "1.0", features = ["derive"] }
zstd = { version = "0.6", features = [ "bindgen" ] }
pathpatterns = "0.1.2"
pxar = { version = "0.10.1", features = [ "tokio-io" ] }
proxmox = { version = "0.11.5", default-features = false, features = [ "api-macro" ] }
pbs-tools = { path = "../pbs-tools" }

View File

@ -9,12 +9,27 @@ use anyhow::{bail, format_err, Error};
use pathpatterns::{MatchList, MatchType};
use proxmox::tools::io::ReadExt;
use crate::backup::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
use crate::pxar::catalog::BackupCatalogWriter;
use crate::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
/// Trait for writing file list catalogs.
///
/// A file list catalog simply stores a directory tree. Such catalogs may be used as index to do a
/// fast search for files.
pub trait BackupCatalogWriter {
fn start_directory(&mut self, name: &CStr) -> Result<(), Error>;
fn end_directory(&mut self) -> Result<(), Error>;
fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error>;
fn add_symlink(&mut self, name: &CStr) -> Result<(), Error>;
fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error>;
fn add_block_device(&mut self, name: &CStr) -> Result<(), Error>;
fn add_char_device(&mut self, name: &CStr) -> Result<(), Error>;
fn add_fifo(&mut self, name: &CStr) -> Result<(), Error>;
fn add_socket(&mut self, name: &CStr) -> Result<(), Error>;
}
#[repr(u8)]
#[derive(Copy,Clone,PartialEq)]
pub(crate) enum CatalogEntryType {
pub enum CatalogEntryType {
Directory = b'd',
File = b'f',
Symlink = b'l',

View File

@ -1,6 +1,6 @@
//! Wrappers for OpenSSL crypto functions
//!
//! We use this to encrypt and decryprt data chunks. Cipher is
//! We use this to encrypt and decrypt data chunks. Cipher is
//! AES_256_GCM, which is fast and provides authenticated encryption.
//!
//! See the Wikipedia Artikel for [Authenticated

View File

@ -1,7 +1,8 @@
use anyhow::{bail, Error};
use std::sync::Arc;
use std::io::{Read, BufRead};
use anyhow::{bail, Error};
use super::CryptConfig;
pub struct CryptReader<R> {

View File

@ -1,7 +1,8 @@
use anyhow::{Error};
use std::sync::Arc;
use std::io::Write;
use anyhow::Error;
use super::CryptConfig;
pub struct CryptWriter<W> {

View File

@ -1,6 +1,7 @@
use anyhow::{bail, Error};
use std::convert::TryInto;
use anyhow::{bail, Error};
use proxmox::tools::io::{ReadExt, WriteExt};
use super::file_formats::*;

View File

@ -0,0 +1,177 @@
use std::io::{BufReader, Read};
use std::sync::Arc;
use anyhow::{bail, format_err, Error};
use proxmox::tools::io::ReadExt;
use crate::checksum_reader::ChecksumReader;
use crate::crypt_config::CryptConfig;
use crate::crypt_reader::CryptReader;
use crate::file_formats::{self, DataBlobHeader};
enum BlobReaderState<'reader, R: Read> {
Uncompressed {
expected_crc: u32,
csum_reader: ChecksumReader<R>,
},
Compressed {
expected_crc: u32,
decompr: zstd::stream::read::Decoder<'reader, BufReader<ChecksumReader<R>>>,
},
Encrypted {
expected_crc: u32,
decrypt_reader: CryptReader<BufReader<ChecksumReader<R>>>,
},
EncryptedCompressed {
expected_crc: u32,
decompr: zstd::stream::read::Decoder<
'reader,
BufReader<CryptReader<BufReader<ChecksumReader<R>>>>,
>,
},
}
/// Read data blobs
pub struct DataBlobReader<'reader, R: Read> {
state: BlobReaderState<'reader, R>,
}
// zstd_safe::DCtx is not sync but we are, since
// the only public interface is on mutable reference
unsafe impl<R: Read> Sync for DataBlobReader<'_, R> {}
impl<R: Read> DataBlobReader<'_, R> {
pub fn new(mut reader: R, config: Option<Arc<CryptConfig>>) -> Result<Self, Error> {
let head: DataBlobHeader = unsafe { reader.read_le_value()? };
match head.magic {
file_formats::UNCOMPRESSED_BLOB_MAGIC_1_0 => {
let expected_crc = u32::from_le_bytes(head.crc);
let csum_reader = ChecksumReader::new(reader, None);
Ok(Self {
state: BlobReaderState::Uncompressed {
expected_crc,
csum_reader,
},
})
}
file_formats::COMPRESSED_BLOB_MAGIC_1_0 => {
let expected_crc = u32::from_le_bytes(head.crc);
let csum_reader = ChecksumReader::new(reader, None);
let decompr = zstd::stream::read::Decoder::new(csum_reader)?;
Ok(Self {
state: BlobReaderState::Compressed {
expected_crc,
decompr,
},
})
}
file_formats::ENCRYPTED_BLOB_MAGIC_1_0 => {
let config = config
.ok_or_else(|| format_err!("unable to read encrypted blob without key"))?;
let expected_crc = u32::from_le_bytes(head.crc);
let mut iv = [0u8; 16];
let mut expected_tag = [0u8; 16];
reader.read_exact(&mut iv)?;
reader.read_exact(&mut expected_tag)?;
let csum_reader = ChecksumReader::new(reader, None);
let decrypt_reader = CryptReader::new(
BufReader::with_capacity(64 * 1024, csum_reader),
iv,
expected_tag,
config,
)?;
Ok(Self {
state: BlobReaderState::Encrypted {
expected_crc,
decrypt_reader,
},
})
}
file_formats::ENCR_COMPR_BLOB_MAGIC_1_0 => {
let config = config
.ok_or_else(|| format_err!("unable to read encrypted blob without key"))?;
let expected_crc = u32::from_le_bytes(head.crc);
let mut iv = [0u8; 16];
let mut expected_tag = [0u8; 16];
reader.read_exact(&mut iv)?;
reader.read_exact(&mut expected_tag)?;
let csum_reader = ChecksumReader::new(reader, None);
let decrypt_reader = CryptReader::new(
BufReader::with_capacity(64 * 1024, csum_reader),
iv,
expected_tag,
config,
)?;
let decompr = zstd::stream::read::Decoder::new(decrypt_reader)?;
Ok(Self {
state: BlobReaderState::EncryptedCompressed {
expected_crc,
decompr,
},
})
}
_ => bail!("got wrong magic number {:?}", head.magic),
}
}
pub fn finish(self) -> Result<R, Error> {
match self.state {
BlobReaderState::Uncompressed {
csum_reader,
expected_crc,
} => {
let (reader, crc, _) = csum_reader.finish()?;
if crc != expected_crc {
bail!("blob crc check failed");
}
Ok(reader)
}
BlobReaderState::Compressed {
expected_crc,
decompr,
} => {
let csum_reader = decompr.finish().into_inner();
let (reader, crc, _) = csum_reader.finish()?;
if crc != expected_crc {
bail!("blob crc check failed");
}
Ok(reader)
}
BlobReaderState::Encrypted {
expected_crc,
decrypt_reader,
} => {
let csum_reader = decrypt_reader.finish()?.into_inner();
let (reader, crc, _) = csum_reader.finish()?;
if crc != expected_crc {
bail!("blob crc check failed");
}
Ok(reader)
}
BlobReaderState::EncryptedCompressed {
expected_crc,
decompr,
} => {
let decrypt_reader = decompr.finish().into_inner();
let csum_reader = decrypt_reader.finish()?.into_inner();
let (reader, crc, _) = csum_reader.finish()?;
if crc != expected_crc {
bail!("blob crc check failed");
}
Ok(reader)
}
}
}
}
impl<R: Read> Read for DataBlobReader<'_, R> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
match &mut self.state {
BlobReaderState::Uncompressed { csum_reader, .. } => csum_reader.read(buf),
BlobReaderState::Compressed { decompr, .. } => decompr.read(buf),
BlobReaderState::Encrypted { decrypt_reader, .. } => decrypt_reader.read(buf),
BlobReaderState::EncryptedCompressed { decompr, .. } => decompr.read(buf),
}
}
}

View File

@ -1,15 +1,26 @@
use anyhow::{Error};
use std::sync::Arc;
use std::io::{Write, Seek, SeekFrom};
use anyhow::Error;
use proxmox::tools::io::WriteExt;
use std::io::{Seek, SeekFrom, Write};
use std::sync::Arc;
use super::*;
use crate::checksum_writer::ChecksumWriter;
use crate::crypt_config::CryptConfig;
use crate::crypt_writer::CryptWriter;
use crate::file_formats::{self, DataBlobHeader, EncryptedDataBlobHeader};
enum BlobWriterState<'writer, W: Write> {
Uncompressed { csum_writer: ChecksumWriter<W> },
Compressed { compr: zstd::stream::write::Encoder<'writer, ChecksumWriter<W>> },
Encrypted { crypt_writer: CryptWriter<ChecksumWriter<W>> },
EncryptedCompressed { compr: zstd::stream::write::Encoder<'writer, CryptWriter<ChecksumWriter<W>>> },
Uncompressed {
csum_writer: ChecksumWriter<W>,
},
Compressed {
compr: zstd::stream::write::Encoder<'writer, ChecksumWriter<W>>,
},
Encrypted {
crypt_writer: CryptWriter<ChecksumWriter<W>>,
},
EncryptedCompressed {
compr: zstd::stream::write::Encoder<'writer, CryptWriter<ChecksumWriter<W>>>,
},
}
/// Data blob writer
@ -17,33 +28,45 @@ pub struct DataBlobWriter<'writer, W: Write> {
state: BlobWriterState<'writer, W>,
}
impl <W: Write + Seek> DataBlobWriter<'_, W> {
impl<W: Write + Seek> DataBlobWriter<'_, W> {
pub fn new_uncompressed(mut writer: W) -> Result<Self, Error> {
writer.seek(SeekFrom::Start(0))?;
let head = DataBlobHeader { magic: UNCOMPRESSED_BLOB_MAGIC_1_0, crc: [0; 4] };
let head = DataBlobHeader {
magic: file_formats::UNCOMPRESSED_BLOB_MAGIC_1_0,
crc: [0; 4],
};
unsafe {
writer.write_le_value(head)?;
}
let csum_writer = ChecksumWriter::new(writer, None);
Ok(Self { state: BlobWriterState::Uncompressed { csum_writer }})
Ok(Self {
state: BlobWriterState::Uncompressed { csum_writer },
})
}
pub fn new_compressed(mut writer: W) -> Result<Self, Error> {
writer.seek(SeekFrom::Start(0))?;
let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: [0; 4] };
writer.seek(SeekFrom::Start(0))?;
let head = DataBlobHeader {
magic: file_formats::COMPRESSED_BLOB_MAGIC_1_0,
crc: [0; 4],
};
unsafe {
writer.write_le_value(head)?;
}
let csum_writer = ChecksumWriter::new(writer, None);
let compr = zstd::stream::write::Encoder::new(csum_writer, 1)?;
Ok(Self { state: BlobWriterState::Compressed { compr }})
Ok(Self {
state: BlobWriterState::Compressed { compr },
})
}
pub fn new_encrypted(mut writer: W, config: Arc<CryptConfig>) -> Result<Self, Error> {
writer.seek(SeekFrom::Start(0))?;
let head = EncryptedDataBlobHeader {
head: DataBlobHeader { magic: ENCRYPTED_BLOB_MAGIC_1_0, crc: [0; 4] },
head: DataBlobHeader {
magic: file_formats::ENCRYPTED_BLOB_MAGIC_1_0,
crc: [0; 4],
},
iv: [0u8; 16],
tag: [0u8; 16],
};
@ -52,14 +75,22 @@ impl <W: Write + Seek> DataBlobWriter<'_, W> {
}
let csum_writer = ChecksumWriter::new(writer, None);
let crypt_writer = CryptWriter::new(csum_writer, config)?;
Ok(Self { state: BlobWriterState::Encrypted { crypt_writer }})
let crypt_writer = CryptWriter::new(csum_writer, config)?;
Ok(Self {
state: BlobWriterState::Encrypted { crypt_writer },
})
}
pub fn new_encrypted_compressed(mut writer: W, config: Arc<CryptConfig>) -> Result<Self, Error> {
pub fn new_encrypted_compressed(
mut writer: W,
config: Arc<CryptConfig>,
) -> Result<Self, Error> {
writer.seek(SeekFrom::Start(0))?;
let head = EncryptedDataBlobHeader {
head: DataBlobHeader { magic: ENCR_COMPR_BLOB_MAGIC_1_0, crc: [0; 4] },
head: DataBlobHeader {
magic: file_formats::ENCR_COMPR_BLOB_MAGIC_1_0,
crc: [0; 4],
},
iv: [0u8; 16],
tag: [0u8; 16],
};
@ -68,9 +99,11 @@ impl <W: Write + Seek> DataBlobWriter<'_, W> {
}
let csum_writer = ChecksumWriter::new(writer, None);
let crypt_writer = CryptWriter::new(csum_writer, config)?;
let crypt_writer = CryptWriter::new(csum_writer, config)?;
let compr = zstd::stream::write::Encoder::new(crypt_writer, 1)?;
Ok(Self { state: BlobWriterState::EncryptedCompressed { compr }})
Ok(Self {
state: BlobWriterState::EncryptedCompressed { compr },
})
}
pub fn finish(self) -> Result<W, Error> {
@ -78,7 +111,10 @@ impl <W: Write + Seek> DataBlobWriter<'_, W> {
BlobWriterState::Uncompressed { csum_writer } => {
// write CRC
let (mut writer, crc, _) = csum_writer.finish()?;
let head = DataBlobHeader { magic: UNCOMPRESSED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() };
let head = DataBlobHeader {
magic: file_formats::UNCOMPRESSED_BLOB_MAGIC_1_0,
crc: crc.to_le_bytes(),
};
writer.seek(SeekFrom::Start(0))?;
unsafe {
@ -91,7 +127,10 @@ impl <W: Write + Seek> DataBlobWriter<'_, W> {
let csum_writer = compr.finish()?;
let (mut writer, crc, _) = csum_writer.finish()?;
let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() };
let head = DataBlobHeader {
magic: file_formats::COMPRESSED_BLOB_MAGIC_1_0,
crc: crc.to_le_bytes(),
};
writer.seek(SeekFrom::Start(0))?;
unsafe {
@ -105,8 +144,12 @@ impl <W: Write + Seek> DataBlobWriter<'_, W> {
let (mut writer, crc, _) = csum_writer.finish()?;
let head = EncryptedDataBlobHeader {
head: DataBlobHeader { magic: ENCRYPTED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() },
iv, tag,
head: DataBlobHeader {
magic: file_formats::ENCRYPTED_BLOB_MAGIC_1_0,
crc: crc.to_le_bytes(),
},
iv,
tag,
};
writer.seek(SeekFrom::Start(0))?;
unsafe {
@ -120,8 +163,12 @@ impl <W: Write + Seek> DataBlobWriter<'_, W> {
let (mut writer, crc, _) = csum_writer.finish()?;
let head = EncryptedDataBlobHeader {
head: DataBlobHeader { magic: ENCR_COMPR_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() },
iv, tag,
head: DataBlobHeader {
magic: file_formats::ENCR_COMPR_BLOB_MAGIC_1_0,
crc: crc.to_le_bytes(),
},
iv,
tag,
};
writer.seek(SeekFrom::Start(0))?;
unsafe {
@ -133,39 +180,30 @@ impl <W: Write + Seek> DataBlobWriter<'_, W> {
}
}
impl <W: Write + Seek> Write for DataBlobWriter<'_, W> {
impl<W: Write + Seek> Write for DataBlobWriter<'_, W> {
fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
match self.state {
BlobWriterState::Uncompressed { ref mut csum_writer } => {
csum_writer.write(buf)
}
BlobWriterState::Compressed { ref mut compr } => {
compr.write(buf)
}
BlobWriterState::Encrypted { ref mut crypt_writer } => {
crypt_writer.write(buf)
}
BlobWriterState::EncryptedCompressed { ref mut compr } => {
compr.write(buf)
}
BlobWriterState::Uncompressed {
ref mut csum_writer,
} => csum_writer.write(buf),
BlobWriterState::Compressed { ref mut compr } => compr.write(buf),
BlobWriterState::Encrypted {
ref mut crypt_writer,
} => crypt_writer.write(buf),
BlobWriterState::EncryptedCompressed { ref mut compr } => compr.write(buf),
}
}
fn flush(&mut self) -> Result<(), std::io::Error> {
match self.state {
BlobWriterState::Uncompressed { ref mut csum_writer } => {
csum_writer.flush()
}
BlobWriterState::Compressed { ref mut compr } => {
compr.flush()
}
BlobWriterState::Encrypted { ref mut crypt_writer } => {
crypt_writer.flush()
}
BlobWriterState::EncryptedCompressed { ref mut compr } => {
compr.flush()
}
BlobWriterState::Uncompressed {
ref mut csum_writer,
} => csum_writer.flush(),
BlobWriterState::Compressed { ref mut compr } => compr.flush(),
BlobWriterState::Encrypted {
ref mut crypt_writer,
} => crypt_writer.flush(),
BlobWriterState::EncryptedCompressed { ref mut compr } => compr.flush(),
}
}
}

199
pbs-datastore/src/lib.rs Normal file
View File

@ -0,0 +1,199 @@
//! This module implements the data storage and access layer.
//!
//! # Data formats
//!
//! PBS splits large files into chunks, and stores them deduplicated using
//! a content addressable storage format.
//!
//! Backup snapshots are stored as folders containing a manifest file and
//! potentially one or more index or blob files.
//!
//! The manifest contains hashes of all other files and can be signed by
//! the client.
//!
//! Blob files contain data directly. They are used for config files and
//! the like.
//!
//! Index files are used to reconstruct an original file. They contain a
//! list of SHA256 checksums. The `DynamicIndex*` format is able to deal
//! with dynamic chunk sizes (CT and host backups), whereas the
//! `FixedIndex*` format is an optimization to store a list of equal sized
//! chunks (VMs, whole block devices).
//!
//! A chunk is defined as a binary blob, which is stored inside a
//! [ChunkStore](struct.ChunkStore.html) instead of the backup directory
//! directly, and can be addressed by its SHA256 digest.
//!
//!
//! # Garbage Collection (GC)
//!
//! Deleting backups is as easy as deleting the corresponding .idx files.
//! However, this does not free up any storage, because those files just
//! contain references to chunks.
//!
//! To free up some storage, we run a garbage collection process at
//! regular intervals. The collector uses a mark and sweep approach. In
//! the first phase, it scans all .idx files to mark used chunks. The
//! second phase then removes all unmarked chunks from the store.
//!
//! The locking mechanisms mentioned below make sure that we are the only
//! process running GC. We still want to be able to create backups during
//! GC, so there may be multiple backup threads/tasks running, either
//! started before GC, or while GC is running.
//!
//! ## `atime` based GC
//!
//! The idea here is to mark chunks by updating the `atime` (access
//! timestamp) on the chunk file. This is quite simple and does not need
//! additional RAM.
//!
//! One minor problem is that recent Linux versions use the `relatime`
//! mount flag by default for performance reasons (and we want that). When
//! enabled, `atime` data is written to the disk only if the file has been
//! modified since the `atime` data was last updated (`mtime`), or if the
//! file was last accessed more than a certain amount of time ago (by
//! default 24h). So we may only delete chunks with `atime` older than 24
//! hours.
//!
//! Another problem arises from running backups. The mark phase does not
//! find any chunks from those backups, because there is no .idx file for
//! them (created after the backup). Chunks created or touched by those
//! backups may have an `atime` as old as the start time of those backups.
//! Please note that the backup start time may predate the GC start time.
//! So we may only delete chunks older than the start time of those
//! running backup jobs, which might be more than 24h back (this is the
//! reason why ProcessLocker exclusive locks only have to be exclusive
//! between processes, since within one we can determine the age of the
//! oldest shared lock).
//!
//! ## Store `marks` in RAM using a HASH
//!
//! Might be better. Under investigation.
//!
//!
//! # Locking
//!
//! Since PBS allows multiple potentially interfering operations at the
//! same time (e.g. garbage collect, prune, multiple backup creations
//! (only in separate groups), forget, ...), these need to lock against
//! each other in certain scenarios. There is no overarching global lock
//! though, instead always the finest grained lock possible is used,
//! because running these operations concurrently is treated as a feature
//! on its own.
//!
//! ## Inter-process Locking
//!
//! We need to be able to restart the proxmox-backup service daemons, so
//! that we can update the software without rebooting the host. But such
//! restarts must not abort running backup jobs, so we need to keep the
//! old service running until those jobs are finished. This implies that
//! we need some kind of locking for modifying chunks and indices in the
//! ChunkStore.
//!
//! Please note that it is perfectly valid to have multiple
//! parallel ChunkStore writers, even when they write the same chunk
//! (because the chunk would have the same name and the same data, and
//! writes are completed atomically via a rename). The only problem is
//! garbage collection, because we need to avoid deleting chunks which are
//! still referenced.
//!
//! To do this we use the
//! [ProcessLocker](../tools/struct.ProcessLocker.html).
//!
//! ### ChunkStore-wide
//!
//! * Create Index Files:
//!
//! Acquire shared lock for ChunkStore.
//!
//! Note: When creating .idx files, we create a temporary .tmp file,
//! then do an atomic rename.
//!
//! * Garbage Collect:
//!
//! Acquire exclusive lock for ChunkStore. If we have
//! already a shared lock for the ChunkStore, try to upgrade that
//! lock.
//!
//! Exclusive locks only work _between processes_. It is valid to have an
//! exclusive and one or more shared locks held within one process. Writing
//! chunks within one process is synchronized using the gc_mutex.
//!
//! On server restart, we stop any running GC in the old process to avoid
//! having the exclusive lock held for too long.
//!
//! ## Locking table
//!
//! Below table shows all operations that play a role in locking, and which
//! mechanisms are used to make their concurrent usage safe.
//!
//! | starting ><br>v during | read index file | create index file | GC mark | GC sweep | update manifest | forget | prune | create backup | verify | reader api |
//! |-|-|-|-|-|-|-|-|-|-|-|
//! | **read index file** | / | / | / | / | / | mmap stays valid, oldest_shared_lock prevents GC | see forget column | / | / | / |
//! | **create index file** | / | / | / | / | / | / | / | /, happens at the end, after all chunks are touched | /, only happens without a manifest | / |
//! | **GC mark** | / | Datastore process-lock shared | gc_mutex, exclusive ProcessLocker | gc_mutex | /, GC only cares about index files, not manifests | tells GC about removed chunks | see forget column | /, index files dont exist yet | / | / |
//! | **GC sweep** | / | Datastore process-lock shared | gc_mutex, exclusive ProcessLocker | gc_mutex | / | /, chunks already marked | see forget column | chunks get touched; chunk_store.mutex; oldest PL lock | / | / |
//! | **update manifest** | / | / | / | / | update_manifest lock | update_manifest lock, remove dir under lock | see forget column | /, “write manifest” happens at the end | /, can call “write manifest”, see that column | / |
//! | **forget** | / | / | removed_during_gc mutex is held during unlink | marking done, doesnt matter if forgotten now | update_manifest lock, forget waits for lock | /, unlink is atomic | causes forget to fail, but thats OK | running backup has snapshot flock | /, potentially detects missing folder | shared snap flock |
//! | **prune** | / | / | see forget row | see forget row | see forget row | causes warn in prune, but no error | see forget column | running and last non-running cant be pruned | see forget row | shared snap flock |
//! | **create backup** | / | only time this happens, thus has snapshot flock | / | chunks get touched; chunk_store.mutex; oldest PL lock | no lock, but cannot exist beforehand | snapshot flock, cant be forgotten | running and last non-running cant be pruned | snapshot group flock, only one running per group | /, wont be verified since manifest missing | / |
//! | **verify** | / | / | / | / | see “update manifest” row | /, potentially detects missing folder | see forget column | / | /, but useless (“update manifest” protects itself) | / |
//! | **reader api** | / | / | / | /, open snap cant be forgotten, so ref must exist | / | prevented by shared snap flock | prevented by shared snap flock | / | / | /, lock is shared |!
//! * / = no interaction
//! * shared/exclusive from POV of 'starting' process
use anyhow::{format_err, Error};
// Note: .pcat1 => Proxmox Catalog Format version 1
pub const CATALOG_NAME: &str = "catalog.pcat1.didx";
#[macro_export]
macro_rules! PROXMOX_BACKUP_PROTOCOL_ID_V1 {
() => {
"proxmox-backup-protocol-v1"
};
}
#[macro_export]
macro_rules! PROXMOX_BACKUP_READER_PROTOCOL_ID_V1 {
() => {
"proxmox-backup-reader-protocol-v1"
};
}
/// Unix system user used by proxmox-backup-proxy
pub const BACKUP_USER_NAME: &str = "backup";
/// Unix system group used by proxmox-backup-proxy
pub const BACKUP_GROUP_NAME: &str = "backup";
/// Return User info for the 'backup' user (``getpwnam_r(3)``)
pub fn backup_user() -> Result<nix::unistd::User, Error> {
nix::unistd::User::from_name(BACKUP_USER_NAME)?
.ok_or_else(|| format_err!("Unable to lookup backup user."))
}
/// Return Group info for the 'backup' group (``getgrnam(3)``)
pub fn backup_group() -> Result<nix::unistd::Group, Error> {
nix::unistd::Group::from_name(BACKUP_GROUP_NAME)?
.ok_or_else(|| format_err!("Unable to lookup backup user."))
}
pub mod catalog;
pub mod checksum_reader;
pub mod checksum_writer;
pub mod chunker;
pub mod crypt_config;
pub mod crypt_reader;
pub mod crypt_writer;
pub mod data_blob;
pub mod data_blob_reader;
pub mod data_blob_writer;
pub mod file_formats;
pub mod index;
pub use checksum_reader::ChecksumReader;
pub use checksum_writer::ChecksumWriter;
pub use chunker::Chunker;
pub use crypt_config::{CryptConfig, CryptMode};
pub use crypt_reader::CryptReader;
pub use crypt_writer::CryptWriter;

View File

@ -7,12 +7,13 @@ use proxmox::api::{api, schema::*};
use proxmox::const_regex;
use proxmox::{IPRE, IPRE_BRACKET, IPV4RE, IPV6RE, IPV4OCTET, IPV6H16, IPV6LS32};
use pbs_datastore::catalog::CatalogEntryType;
use crate::{
backup::{
CryptMode,
Fingerprint,
DirEntryAttribute,
CatalogEntryType,
},
server::UPID,
config::acl::Role,

View File

@ -1,125 +0,0 @@
use anyhow::{bail, format_err, Error};
use std::sync::Arc;
use std::io::{Read, BufReader};
use proxmox::tools::io::ReadExt;
use super::*;
enum BlobReaderState<'reader, R: Read> {
Uncompressed { expected_crc: u32, csum_reader: ChecksumReader<R> },
Compressed { expected_crc: u32, decompr: zstd::stream::read::Decoder<'reader, BufReader<ChecksumReader<R>>> },
Encrypted { expected_crc: u32, decrypt_reader: CryptReader<BufReader<ChecksumReader<R>>> },
EncryptedCompressed { expected_crc: u32, decompr: zstd::stream::read::Decoder<'reader, BufReader<CryptReader<BufReader<ChecksumReader<R>>>>> },
}
/// Read data blobs
pub struct DataBlobReader<'reader, R: Read> {
state: BlobReaderState<'reader, R>,
}
// zstd_safe::DCtx is not sync but we are, since
// the only public interface is on mutable reference
unsafe impl<R: Read> Sync for DataBlobReader<'_, R> {}
impl <R: Read> DataBlobReader<'_, R> {
pub fn new(mut reader: R, config: Option<Arc<CryptConfig>>) -> Result<Self, Error> {
let head: DataBlobHeader = unsafe { reader.read_le_value()? };
match head.magic {
UNCOMPRESSED_BLOB_MAGIC_1_0 => {
let expected_crc = u32::from_le_bytes(head.crc);
let csum_reader = ChecksumReader::new(reader, None);
Ok(Self { state: BlobReaderState::Uncompressed { expected_crc, csum_reader }})
}
COMPRESSED_BLOB_MAGIC_1_0 => {
let expected_crc = u32::from_le_bytes(head.crc);
let csum_reader = ChecksumReader::new(reader, None);
let decompr = zstd::stream::read::Decoder::new(csum_reader)?;
Ok(Self { state: BlobReaderState::Compressed { expected_crc, decompr }})
}
ENCRYPTED_BLOB_MAGIC_1_0 => {
let config = config.ok_or_else(|| format_err!("unable to read encrypted blob without key"))?;
let expected_crc = u32::from_le_bytes(head.crc);
let mut iv = [0u8; 16];
let mut expected_tag = [0u8; 16];
reader.read_exact(&mut iv)?;
reader.read_exact(&mut expected_tag)?;
let csum_reader = ChecksumReader::new(reader, None);
let decrypt_reader = CryptReader::new(BufReader::with_capacity(64*1024, csum_reader), iv, expected_tag, config)?;
Ok(Self { state: BlobReaderState::Encrypted { expected_crc, decrypt_reader }})
}
ENCR_COMPR_BLOB_MAGIC_1_0 => {
let config = config.ok_or_else(|| format_err!("unable to read encrypted blob without key"))?;
let expected_crc = u32::from_le_bytes(head.crc);
let mut iv = [0u8; 16];
let mut expected_tag = [0u8; 16];
reader.read_exact(&mut iv)?;
reader.read_exact(&mut expected_tag)?;
let csum_reader = ChecksumReader::new(reader, None);
let decrypt_reader = CryptReader::new(BufReader::with_capacity(64*1024, csum_reader), iv, expected_tag, config)?;
let decompr = zstd::stream::read::Decoder::new(decrypt_reader)?;
Ok(Self { state: BlobReaderState::EncryptedCompressed { expected_crc, decompr }})
}
_ => bail!("got wrong magic number {:?}", head.magic)
}
}
pub fn finish(self) -> Result<R, Error> {
match self.state {
BlobReaderState::Uncompressed { csum_reader, expected_crc } => {
let (reader, crc, _) = csum_reader.finish()?;
if crc != expected_crc {
bail!("blob crc check failed");
}
Ok(reader)
}
BlobReaderState::Compressed { expected_crc, decompr } => {
let csum_reader = decompr.finish().into_inner();
let (reader, crc, _) = csum_reader.finish()?;
if crc != expected_crc {
bail!("blob crc check failed");
}
Ok(reader)
}
BlobReaderState::Encrypted { expected_crc, decrypt_reader } => {
let csum_reader = decrypt_reader.finish()?.into_inner();
let (reader, crc, _) = csum_reader.finish()?;
if crc != expected_crc {
bail!("blob crc check failed");
}
Ok(reader)
}
BlobReaderState::EncryptedCompressed { expected_crc, decompr } => {
let decrypt_reader = decompr.finish().into_inner();
let csum_reader = decrypt_reader.finish()?.into_inner();
let (reader, crc, _) = csum_reader.finish()?;
if crc != expected_crc {
bail!("blob crc check failed");
}
Ok(reader)
}
}
}
}
impl <R: Read> Read for DataBlobReader<'_, R> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
match &mut self.state {
BlobReaderState::Uncompressed { csum_reader, .. } => {
csum_reader.read(buf)
}
BlobReaderState::Compressed { decompr, .. } => {
decompr.read(buf)
}
BlobReaderState::Encrypted { decrypt_reader, .. } => {
decrypt_reader.read(buf)
}
BlobReaderState::EncryptedCompressed { decompr, .. } => {
decompr.read(buf)
}
}
}
}

View File

@ -178,44 +178,36 @@ pub fn backup_group() -> Result<nix::unistd::Group, Error> {
}
}
mod file_formats;
pub use file_formats::*;
mod manifest;
pub use manifest::*;
mod crypt_config;
pub use crypt_config::*;
pub use pbs_datastore::catalog::*;
pub use pbs_datastore::catalog;
pub use pbs_datastore::file_formats::*;
pub use pbs_datastore::file_formats;
pub use pbs_datastore::index::*;
pub use pbs_datastore::index;
pub use pbs_datastore::crypt_config;
pub use pbs_datastore::crypt_config::*;
pub use pbs_datastore::crypt_reader;
pub use pbs_datastore::crypt_reader::*;
pub use pbs_datastore::crypt_writer;
pub use pbs_datastore::crypt_writer::*;
pub use pbs_datastore::data_blob;
pub use pbs_datastore::data_blob::*;
pub use pbs_datastore::data_blob_reader;
pub use pbs_datastore::data_blob_reader::*;
pub use pbs_datastore::data_blob_writer;
pub use pbs_datastore::data_blob_writer::*;
pub use pbs_datastore::checksum_reader;
pub use pbs_datastore::checksum_reader::*;
pub use pbs_datastore::checksum_writer;
pub use pbs_datastore::checksum_writer::*;
pub use pbs_datastore::chunker;
pub use pbs_datastore::chunker::*;
mod key_derivation;
pub use key_derivation::*;
mod crypt_reader;
pub use crypt_reader::*;
mod crypt_writer;
pub use crypt_writer::*;
mod checksum_reader;
pub use checksum_reader::*;
mod checksum_writer;
pub use checksum_writer::*;
mod chunker;
pub use chunker::*;
mod data_blob;
pub use data_blob::*;
mod data_blob_reader;
pub use data_blob_reader::*;
mod data_blob_writer;
pub use data_blob_writer::*;
mod catalog;
pub use catalog::*;
mod manifest;
pub use manifest::*;
mod chunk_stream;
pub use chunk_stream::*;
@ -229,9 +221,6 @@ pub use read_chunk::*;
mod chunk_store;
pub use chunk_store::*;
mod index;
pub use index::*;
mod fixed_index;
pub use fixed_index::*;

View File

@ -30,6 +30,8 @@ use proxmox::{
};
use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation};
use pbs_datastore::catalog::BackupCatalogWriter;
use proxmox_backup::tools::{
self,
StdChannelWriter,
@ -38,7 +40,6 @@ use proxmox_backup::tools::{
use proxmox_backup::api2::types::*;
use proxmox_backup::api2::version;
use proxmox_backup::client::*;
use proxmox_backup::pxar::catalog::*;
use proxmox_backup::backup::{
archive_type,
decrypt_key,

View File

@ -1,19 +0,0 @@
//! Trait for file list catalog
//!
//! A file list catalog simply store a directory tree. Such catalogs
//! may be used as index to do a fast search for files.
use anyhow::{Error};
use std::ffi::CStr;
pub trait BackupCatalogWriter {
fn start_directory(&mut self, name: &CStr) -> Result<(), Error>;
fn end_directory(&mut self) -> Result<(), Error>;
fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error>;
fn add_symlink(&mut self, name: &CStr) -> Result<(), Error>;
fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error>;
fn add_block_device(&mut self, name: &CStr) -> Result<(), Error>;
fn add_char_device(&mut self, name: &CStr) -> Result<(), Error>;
fn add_fifo(&mut self, name: &CStr) -> Result<(), Error>;
fn add_socket(&mut self, name: &CStr) -> Result<(), Error>;
}

View File

@ -24,9 +24,9 @@ use proxmox::sys::error::SysError;
use proxmox::tools::fd::RawFdNum;
use proxmox::tools::vec;
use pbs_datastore::catalog::BackupCatalogWriter;
use pbs_tools::fs;
use crate::pxar::catalog::BackupCatalogWriter;
use crate::pxar::metadata::errno_is_unsupported;
use crate::pxar::Flags;
use crate::pxar::tools::assert_single_path_component;

View File

@ -47,7 +47,6 @@
//! (user, group, acl, ...) because this is already defined by the
//! linked `ENTRY`.
pub mod catalog;
pub(crate) mod create;
pub(crate) mod dir_stack;
pub(crate) mod extract;