don't require WorkerTask in backup/

To untangle the server code from the actual backup
implementation.
It would be ideal if the whole backup/ dir could become its
own crate with minimal dependencies, certainly without
depending on the actual api server. That would then also be
used more easily to create forensic tools for all the data
file types we have in the backup repositories.

Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
Wolfgang Bumiller
2020-10-12 11:46:34 +02:00
parent d1993187b6
commit f6b1d1cc66
5 changed files with 195 additions and 73 deletions

View File

@ -6,9 +6,7 @@ use std::time::Instant;
use anyhow::{bail, format_err, Error};
use crate::{
server::WorkerTask,
api2::types::*,
tools::ParallelHandler,
backup::{
DataStore,
DataBlob,
@ -21,6 +19,10 @@ use crate::{
ArchiveType,
archive_type,
},
server::UPID,
task::TaskState,
task_log,
tools::ParallelHandler,
};
fn verify_blob(datastore: Arc<DataStore>, backup_dir: &BackupDir, info: &FileInfo) -> Result<(), Error> {
@ -51,7 +53,7 @@ fn verify_blob(datastore: Arc<DataStore>, backup_dir: &BackupDir, info: &FileInf
fn rename_corrupted_chunk(
datastore: Arc<DataStore>,
digest: &[u8;32],
worker: Arc<WorkerTask>,
worker: &dyn TaskState,
) {
let (path, digest_str) = datastore.chunk_path(digest);
@ -64,12 +66,12 @@ fn rename_corrupted_chunk(
match std::fs::rename(&path, &new_path) {
Ok(_) => {
worker.log(format!("corrupted chunk renamed to {:?}", &new_path));
task_log!(worker, "corrupted chunk renamed to {:?}", &new_path);
},
Err(err) => {
match err.kind() {
std::io::ErrorKind::NotFound => { /* ignored */ },
_ => worker.log(format!("could not rename corrupted chunk {:?} - {}", &path, err))
_ => task_log!(worker, "could not rename corrupted chunk {:?} - {}", &path, err)
}
}
};
@ -81,7 +83,7 @@ fn verify_index_chunks(
verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
corrupt_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
crypt_mode: CryptMode,
worker: Arc<WorkerTask>,
worker: Arc<dyn TaskState + Send + Sync>,
) -> Result<(), Error> {
let errors = Arc::new(AtomicUsize::new(0));
@ -103,7 +105,7 @@ fn verify_index_chunks(
let chunk_crypt_mode = match chunk.crypt_mode() {
Err(err) => {
corrupt_chunks2.lock().unwrap().insert(digest);
worker2.log(format!("can't verify chunk, unknown CryptMode - {}", err));
task_log!(worker2, "can't verify chunk, unknown CryptMode - {}", err);
errors2.fetch_add(1, Ordering::SeqCst);
return Ok(());
},
@ -111,19 +113,20 @@ fn verify_index_chunks(
};
if chunk_crypt_mode != crypt_mode {
worker2.log(format!(
task_log!(
worker2,
"chunk CryptMode {:?} does not match index CryptMode {:?}",
chunk_crypt_mode,
crypt_mode
));
);
errors2.fetch_add(1, Ordering::SeqCst);
}
if let Err(err) = chunk.verify_unencrypted(size as usize, &digest) {
corrupt_chunks2.lock().unwrap().insert(digest);
worker2.log(format!("{}", err));
task_log!(worker2, "{}", err);
errors2.fetch_add(1, Ordering::SeqCst);
rename_corrupted_chunk(datastore2.clone(), &digest, worker2.clone());
rename_corrupted_chunk(datastore2.clone(), &digest, &worker2);
} else {
verified_chunks2.lock().unwrap().insert(digest);
}
@ -134,7 +137,7 @@ fn verify_index_chunks(
for pos in 0..index.index_count() {
worker.fail_on_abort()?;
worker.check_abort()?;
crate::tools::fail_on_shutdown()?;
let info = index.chunk_info(pos).unwrap();
@ -146,7 +149,7 @@ fn verify_index_chunks(
if corrupt_chunks.lock().unwrap().contains(&info.digest) {
let digest_str = proxmox::tools::digest_to_hex(&info.digest);
worker.log(format!("chunk {} was marked as corrupt", digest_str));
task_log!(worker, "chunk {} was marked as corrupt", digest_str);
errors.fetch_add(1, Ordering::SeqCst);
continue;
}
@ -154,9 +157,9 @@ fn verify_index_chunks(
match datastore.load_chunk(&info.digest) {
Err(err) => {
corrupt_chunks.lock().unwrap().insert(info.digest);
worker.log(format!("can't verify chunk, load failed - {}", err));
task_log!(worker, "can't verify chunk, load failed - {}", err);
errors.fetch_add(1, Ordering::SeqCst);
rename_corrupted_chunk(datastore.clone(), &info.digest, worker.clone());
rename_corrupted_chunk(datastore.clone(), &info.digest, &worker);
continue;
}
Ok(chunk) => {
@ -179,8 +182,16 @@ fn verify_index_chunks(
let error_count = errors.load(Ordering::SeqCst);
worker.log(format!(" verified {:.2}/{:.2} MiB in {:.2} seconds, speed {:.2}/{:.2} MiB/s ({} errors)",
read_bytes_mib, decoded_bytes_mib, elapsed, read_speed, decode_speed, error_count));
task_log!(
worker,
" verified {:.2}/{:.2} MiB in {:.2} seconds, speed {:.2}/{:.2} MiB/s ({} errors)",
read_bytes_mib,
decoded_bytes_mib,
elapsed,
read_speed,
decode_speed,
error_count,
);
if errors.load(Ordering::SeqCst) > 0 {
bail!("chunks could not be verified");
@ -195,7 +206,7 @@ fn verify_fixed_index(
info: &FileInfo,
verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
worker: Arc<WorkerTask>,
worker: Arc<dyn TaskState + Send + Sync>,
) -> Result<(), Error> {
let mut path = backup_dir.relative_path();
@ -212,7 +223,14 @@ fn verify_fixed_index(
bail!("wrong index checksum");
}
verify_index_chunks(datastore, Box::new(index), verified_chunks, corrupt_chunks, info.chunk_crypt_mode(), worker)
verify_index_chunks(
datastore,
Box::new(index),
verified_chunks,
corrupt_chunks,
info.chunk_crypt_mode(),
worker,
)
}
fn verify_dynamic_index(
@ -221,7 +239,7 @@ fn verify_dynamic_index(
info: &FileInfo,
verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
worker: Arc<WorkerTask>,
worker: Arc<dyn TaskState + Send + Sync>,
) -> Result<(), Error> {
let mut path = backup_dir.relative_path();
@ -238,7 +256,14 @@ fn verify_dynamic_index(
bail!("wrong index checksum");
}
verify_index_chunks(datastore, Box::new(index), verified_chunks, corrupt_chunks, info.chunk_crypt_mode(), worker)
verify_index_chunks(
datastore,
Box::new(index),
verified_chunks,
corrupt_chunks,
info.chunk_crypt_mode(),
worker,
)
}
/// Verify a single backup snapshot
@ -255,25 +280,32 @@ pub fn verify_backup_dir(
backup_dir: &BackupDir,
verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
worker: Arc<WorkerTask>
worker: Arc<dyn TaskState + Send + Sync>,
upid: UPID,
) -> Result<bool, Error> {
let mut manifest = match datastore.load_manifest(&backup_dir) {
Ok((manifest, _)) => manifest,
Err(err) => {
worker.log(format!("verify {}:{} - manifest load error: {}", datastore.name(), backup_dir, err));
task_log!(
worker,
"verify {}:{} - manifest load error: {}",
datastore.name(),
backup_dir,
err,
);
return Ok(false);
}
};
worker.log(format!("verify {}:{}", datastore.name(), backup_dir));
task_log!(worker, "verify {}:{}", datastore.name(), backup_dir);
let mut error_count = 0;
let mut verify_result = VerifyState::Ok;
for info in manifest.files() {
let result = proxmox::try_block!({
worker.log(format!(" check {}", info.filename));
task_log!(worker, " check {}", info.filename);
match archive_type(&info.filename)? {
ArchiveType::FixedIndex =>
verify_fixed_index(
@ -297,11 +329,18 @@ pub fn verify_backup_dir(
}
});
worker.fail_on_abort()?;
worker.check_abort()?;
crate::tools::fail_on_shutdown()?;
if let Err(err) = result {
worker.log(format!("verify {}:{}/{} failed: {}", datastore.name(), backup_dir, info.filename, err));
task_log!(
worker,
"verify {}:{}/{} failed: {}",
datastore.name(),
backup_dir,
info.filename,
err,
);
error_count += 1;
verify_result = VerifyState::Failed;
}
@ -310,7 +349,7 @@ pub fn verify_backup_dir(
let verify_state = SnapshotVerifyState {
state: verify_result,
upid: worker.upid().clone(),
upid,
};
manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?;
datastore.store_manifest(&backup_dir, serde_json::to_value(manifest)?)
@ -332,19 +371,26 @@ pub fn verify_backup_group(
verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
progress: Option<(usize, usize)>, // (done, snapshot_count)
worker: Arc<WorkerTask>,
worker: Arc<dyn TaskState + Send + Sync>,
upid: &UPID,
) -> Result<(usize, Vec<String>), Error> {
let mut errors = Vec::new();
let mut list = match group.list_backups(&datastore.base_path()) {
Ok(list) => list,
Err(err) => {
worker.log(format!("verify group {}:{} - unable to list backups: {}", datastore.name(), group, err));
task_log!(
worker,
"verify group {}:{} - unable to list backups: {}",
datastore.name(),
group,
err,
);
return Ok((0, errors));
}
};
worker.log(format!("verify group {}:{}", datastore.name(), group));
task_log!(worker, "verify group {}:{}", datastore.name(), group);
let (done, snapshot_count) = progress.unwrap_or((0, list.len()));
@ -352,13 +398,26 @@ pub fn verify_backup_group(
BackupInfo::sort_list(&mut list, false); // newest first
for info in list {
count += 1;
if !verify_backup_dir(datastore.clone(), &info.backup_dir, verified_chunks.clone(), corrupt_chunks.clone(), worker.clone())?{
if !verify_backup_dir(
datastore.clone(),
&info.backup_dir,
verified_chunks.clone(),
corrupt_chunks.clone(),
worker.clone(),
upid.clone(),
)? {
errors.push(info.backup_dir.to_string());
}
if snapshot_count != 0 {
let pos = done + count;
let percentage = ((pos as f64) * 100.0)/(snapshot_count as f64);
worker.log(format!("percentage done: {:.2}% ({} of {} snapshots)", percentage, pos, snapshot_count));
task_log!(
worker,
"percentage done: {:.2}% ({} of {} snapshots)",
percentage,
pos,
snapshot_count,
);
}
}
@ -372,8 +431,11 @@ pub fn verify_backup_group(
/// Returns
/// - Ok(failed_dirs) where failed_dirs had verification errors
/// - Err(_) if task was aborted
pub fn verify_all_backups(datastore: Arc<DataStore>, worker: Arc<WorkerTask>) -> Result<Vec<String>, Error> {
pub fn verify_all_backups(
datastore: Arc<DataStore>,
worker: Arc<dyn TaskState + Send + Sync>,
upid: &UPID,
) -> Result<Vec<String>, Error> {
let mut errors = Vec::new();
let mut list = match BackupGroup::list_groups(&datastore.base_path()) {
@ -382,7 +444,12 @@ pub fn verify_all_backups(datastore: Arc<DataStore>, worker: Arc<WorkerTask>) ->
.filter(|group| !(group.backup_type() == "host" && group.backup_id() == "benchmark"))
.collect::<Vec<BackupGroup>>(),
Err(err) => {
worker.log(format!("verify datastore {} - unable to list backups: {}", datastore.name(), err));
task_log!(
worker,
"verify datastore {} - unable to list backups: {}",
datastore.name(),
err,
);
return Ok(errors);
}
};
@ -400,7 +467,7 @@ pub fn verify_all_backups(datastore: Arc<DataStore>, worker: Arc<WorkerTask>) ->
// start with 64 chunks since we assume there are few corrupt ones
let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64)));
worker.log(format!("verify datastore {} ({} snapshots)", datastore.name(), snapshot_count));
task_log!(worker, "verify datastore {} ({} snapshots)", datastore.name(), snapshot_count);
let mut done = 0;
for group in list {
@ -411,6 +478,7 @@ pub fn verify_all_backups(datastore: Arc<DataStore>, worker: Arc<WorkerTask>) ->
corrupt_chunks.clone(),
Some((done, snapshot_count)),
worker.clone(),
upid,
)?;
errors.append(&mut group_errors);