verify: improve code reuse, fix filter function

Try to reuse verify_all_backups(), because this function has better
logging and well defined snaphot order.
This commit is contained in:
Dietmar Maurer 2020-10-28 12:58:15 +01:00
parent d642802d8c
commit a4915dfc2b
4 changed files with 70 additions and 65 deletions

View File

@ -579,6 +579,7 @@ pub fn verify(
move |worker| {
let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024*16)));
let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64)));
let filter = |_backup_info: &BackupInfo| { true };
let failed_dirs = if let Some(backup_dir) = backup_dir {
let mut res = Vec::new();
@ -602,10 +603,11 @@ pub fn verify(
None,
worker.clone(),
worker.upid(),
&filter,
)?;
failed_dirs
} else {
verify_all_backups(datastore, worker.clone(), worker.upid())?
verify_all_backups(datastore, worker.clone(), worker.upid(), &filter)?
};
if failed_dirs.len() > 0 {
worker.log("Failed to verify following snapshots:");

View File

@ -404,7 +404,7 @@ pub fn verify_backup_dir_with_lock(
/// Returns
/// - Ok((count, failed_dirs)) where failed_dirs had verification errors
/// - Err(_) if task was aborted
pub fn verify_backup_group(
pub fn verify_backup_group<F: Fn(&BackupInfo) -> bool>(
datastore: Arc<DataStore>,
group: &BackupGroup,
verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
@ -412,6 +412,7 @@ pub fn verify_backup_group(
progress: Option<(usize, usize)>, // (done, snapshot_count)
worker: Arc<dyn TaskState + Send + Sync>,
upid: &UPID,
filter: &F,
) -> Result<(usize, Vec<String>), Error> {
let mut errors = Vec::new();
@ -437,6 +438,17 @@ pub fn verify_backup_group(
BackupInfo::sort_list(&mut list, false); // newest first
for info in list {
count += 1;
if filter(&info) == false {
task_log!(
worker,
"SKIPPED: verify {}:{} (already verified)",
datastore.name(),
info.backup_dir,
);
continue;
}
if !verify_backup_dir(
datastore.clone(),
&info.backup_dir,
@ -470,10 +482,11 @@ pub fn verify_backup_group(
/// Returns
/// - Ok(failed_dirs) where failed_dirs had verification errors
/// - Err(_) if task was aborted
pub fn verify_all_backups(
pub fn verify_all_backups<F: Fn(&BackupInfo) -> bool>(
datastore: Arc<DataStore>,
worker: Arc<dyn TaskState + Send + Sync>,
upid: &UPID,
filter: &F,
) -> Result<Vec<String>, Error> {
let mut errors = Vec::new();
@ -518,6 +531,7 @@ pub fn verify_all_backups(
Some((done, snapshot_count)),
worker.clone(),
upid,
filter,
)?;
errors.append(&mut group_errors);

View File

@ -57,7 +57,11 @@ const VERIFY_ERR_TEMPLATE: &str = r###"
Job ID: {{job.id}}
Datastore: {{job.store}}
Verification failed: {{error}}
Verification failed on these snapshots:
{{#each errors}}
{{this}}
{{/each}}
"###;
@ -150,27 +154,31 @@ pub fn send_gc_status(
pub fn send_verify_status(
email: &str,
job: VerificationJobConfig,
result: &Result<(), Error>,
result: &Result<Vec<String>, Error>,
) -> Result<(), Error> {
let text = match result {
Ok(()) => {
Ok(errors) if errors.is_empty() => {
let data = json!({ "job": job });
HANDLEBARS.render("verify_ok_template", &data)?
}
Err(err) => {
let data = json!({ "job": job, "error": err.to_string() });
Ok(errors) => {
let data = json!({ "job": job, "errors": errors });
HANDLEBARS.render("verify_err_template", &data)?
}
Err(_) => {
// aboreted job - do not send any email
return Ok(());
}
};
let subject = match result {
Ok(()) => format!(
Ok(errors) if errors.is_empty() => format!(
"Verify Datastore '{}' successful",
job.store,
),
Err(_) => format!(
_ => format!(
"Verify Datastore '{}' failed",
job.store,
),

View File

@ -1,7 +1,4 @@
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
use anyhow::{bail, Error};
use anyhow::{format_err, Error};
use crate::{
server::WorkerTask,
@ -11,7 +8,7 @@ use crate::{
backup::{
DataStore,
BackupInfo,
verify_backup_dir,
verify_all_backups,
},
task_log,
};
@ -23,28 +20,36 @@ pub fn do_verification_job(
userid: &Userid,
schedule: Option<String>,
) -> Result<String, Error> {
let datastore = DataStore::lookup_datastore(&verification_job.store)?;
let mut backups_to_verify = BackupInfo::list_backups(&datastore.base_path())?;
if verification_job.ignore_verified.unwrap_or(true) {
backups_to_verify.retain(|backup_info| {
let manifest = match datastore.load_manifest(&backup_info.backup_dir) {
Ok((manifest, _)) => manifest,
Err(_) => return false,
};
let datastore2 = datastore.clone();
let raw_verify_state = manifest.unprotected["verify_state"].clone();
let last_state = match serde_json::from_value::<SnapshotVerifyState>(raw_verify_state) {
Ok(last_state) => last_state,
Err(_) => return true,
};
let outdated_after = verification_job.outdated_after.clone();
let ignore_verified = verification_job.ignore_verified.unwrap_or(true);
let now = proxmox::tools::time::epoch_i64();
let days_since_last_verify = (now - last_state.upid.starttime) / 86400;
verification_job.outdated_after.is_some()
&& days_since_last_verify > verification_job.outdated_after.unwrap()
})
}
let filter = move |backup_info: &BackupInfo| {
if !ignore_verified {
return true;
}
let manifest = match datastore2.load_manifest(&backup_info.backup_dir) {
Ok((manifest, _)) => manifest,
Err(_) => return false,
};
let raw_verify_state = manifest.unprotected["verify_state"].clone();
let last_state = match serde_json::from_value::<SnapshotVerifyState>(raw_verify_state) {
Ok(last_state) => last_state,
Err(_) => return true,
};
let now = proxmox::tools::time::epoch_i64();
let days_since_last_verify = (now - last_state.upid.starttime) / 86400;
outdated_after
.map(|v| days_since_last_verify > v)
.unwrap_or(true)
};
let email = crate::server::lookup_user_email(userid);
@ -59,42 +64,18 @@ pub fn do_verification_job(
job.start(&worker.upid().to_string())?;
task_log!(worker,"Starting datastore verify job '{}'", job_id);
task_log!(worker,"verifying {} backups", backups_to_verify.len());
if let Some(event_str) = schedule {
task_log!(worker,"task triggered by schedule '{}'", event_str);
}
let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024 * 16)));
let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64)));
let result = proxmox::try_block!({
let mut failed_dirs: Vec<String> = Vec::new();
let result = verify_all_backups(datastore, worker.clone(), worker.upid(), &filter);
let job_result = match result {
Ok(ref errors) if errors.is_empty() => Ok(()),
Ok(_) => Err(format_err!("verification failed - please check the log for details")),
Err(_) => Err(format_err!("verification failed - job aborted")),
};
for backup_info in backups_to_verify {
let verification_result = verify_backup_dir(
datastore.clone(),
&backup_info.backup_dir,
verified_chunks.clone(),
corrupt_chunks.clone(),
worker.clone(),
worker.upid().clone()
);
if let Ok(false) = verification_result {
failed_dirs.push(backup_info.backup_dir.to_string());
} // otherwise successful or aborted
}
if !failed_dirs.is_empty() {
task_log!(worker,"Failed to verify following snapshots:",);
for dir in failed_dirs {
task_log!(worker, "\t{}", dir)
}
bail!("verification failed - please check the log for details");
}
Ok(())
});
let status = worker.create_state(&result);
let status = worker.create_state(&job_result);
match job.finish(status) {
Err(err) => eprintln!(
@ -111,7 +92,7 @@ pub fn do_verification_job(
}
}
result
job_result
},
)?;
Ok(upid_str)