verify: improve code reuse, fix filter function

Try to reuse verify_all_backups(), because this function has better
logging and well defined snaphot order.
This commit is contained in:
Dietmar Maurer 2020-10-28 12:58:15 +01:00
parent d642802d8c
commit a4915dfc2b
4 changed files with 70 additions and 65 deletions

View File

@ -579,6 +579,7 @@ pub fn verify(
move |worker| { move |worker| {
let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024*16))); let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024*16)));
let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64))); let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64)));
let filter = |_backup_info: &BackupInfo| { true };
let failed_dirs = if let Some(backup_dir) = backup_dir { let failed_dirs = if let Some(backup_dir) = backup_dir {
let mut res = Vec::new(); let mut res = Vec::new();
@ -602,10 +603,11 @@ pub fn verify(
None, None,
worker.clone(), worker.clone(),
worker.upid(), worker.upid(),
&filter,
)?; )?;
failed_dirs failed_dirs
} else { } else {
verify_all_backups(datastore, worker.clone(), worker.upid())? verify_all_backups(datastore, worker.clone(), worker.upid(), &filter)?
}; };
if failed_dirs.len() > 0 { if failed_dirs.len() > 0 {
worker.log("Failed to verify following snapshots:"); worker.log("Failed to verify following snapshots:");

View File

@ -404,7 +404,7 @@ pub fn verify_backup_dir_with_lock(
/// Returns /// Returns
/// - Ok((count, failed_dirs)) where failed_dirs had verification errors /// - Ok((count, failed_dirs)) where failed_dirs had verification errors
/// - Err(_) if task was aborted /// - Err(_) if task was aborted
pub fn verify_backup_group( pub fn verify_backup_group<F: Fn(&BackupInfo) -> bool>(
datastore: Arc<DataStore>, datastore: Arc<DataStore>,
group: &BackupGroup, group: &BackupGroup,
verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>, verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
@ -412,6 +412,7 @@ pub fn verify_backup_group(
progress: Option<(usize, usize)>, // (done, snapshot_count) progress: Option<(usize, usize)>, // (done, snapshot_count)
worker: Arc<dyn TaskState + Send + Sync>, worker: Arc<dyn TaskState + Send + Sync>,
upid: &UPID, upid: &UPID,
filter: &F,
) -> Result<(usize, Vec<String>), Error> { ) -> Result<(usize, Vec<String>), Error> {
let mut errors = Vec::new(); let mut errors = Vec::new();
@ -437,6 +438,17 @@ pub fn verify_backup_group(
BackupInfo::sort_list(&mut list, false); // newest first BackupInfo::sort_list(&mut list, false); // newest first
for info in list { for info in list {
count += 1; count += 1;
if filter(&info) == false {
task_log!(
worker,
"SKIPPED: verify {}:{} (already verified)",
datastore.name(),
info.backup_dir,
);
continue;
}
if !verify_backup_dir( if !verify_backup_dir(
datastore.clone(), datastore.clone(),
&info.backup_dir, &info.backup_dir,
@ -470,10 +482,11 @@ pub fn verify_backup_group(
/// Returns /// Returns
/// - Ok(failed_dirs) where failed_dirs had verification errors /// - Ok(failed_dirs) where failed_dirs had verification errors
/// - Err(_) if task was aborted /// - Err(_) if task was aborted
pub fn verify_all_backups( pub fn verify_all_backups<F: Fn(&BackupInfo) -> bool>(
datastore: Arc<DataStore>, datastore: Arc<DataStore>,
worker: Arc<dyn TaskState + Send + Sync>, worker: Arc<dyn TaskState + Send + Sync>,
upid: &UPID, upid: &UPID,
filter: &F,
) -> Result<Vec<String>, Error> { ) -> Result<Vec<String>, Error> {
let mut errors = Vec::new(); let mut errors = Vec::new();
@ -518,6 +531,7 @@ pub fn verify_all_backups(
Some((done, snapshot_count)), Some((done, snapshot_count)),
worker.clone(), worker.clone(),
upid, upid,
filter,
)?; )?;
errors.append(&mut group_errors); errors.append(&mut group_errors);

View File

@ -57,7 +57,11 @@ const VERIFY_ERR_TEMPLATE: &str = r###"
Job ID: {{job.id}} Job ID: {{job.id}}
Datastore: {{job.store}} Datastore: {{job.store}}
Verification failed: {{error}} Verification failed on these snapshots:
{{#each errors}}
{{this}}
{{/each}}
"###; "###;
@ -150,27 +154,31 @@ pub fn send_gc_status(
pub fn send_verify_status( pub fn send_verify_status(
email: &str, email: &str,
job: VerificationJobConfig, job: VerificationJobConfig,
result: &Result<(), Error>, result: &Result<Vec<String>, Error>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let text = match result { let text = match result {
Ok(()) => { Ok(errors) if errors.is_empty() => {
let data = json!({ "job": job }); let data = json!({ "job": job });
HANDLEBARS.render("verify_ok_template", &data)? HANDLEBARS.render("verify_ok_template", &data)?
} }
Err(err) => { Ok(errors) => {
let data = json!({ "job": job, "error": err.to_string() }); let data = json!({ "job": job, "errors": errors });
HANDLEBARS.render("verify_err_template", &data)? HANDLEBARS.render("verify_err_template", &data)?
} }
Err(_) => {
// aboreted job - do not send any email
return Ok(());
}
}; };
let subject = match result { let subject = match result {
Ok(()) => format!( Ok(errors) if errors.is_empty() => format!(
"Verify Datastore '{}' successful", "Verify Datastore '{}' successful",
job.store, job.store,
), ),
Err(_) => format!( _ => format!(
"Verify Datastore '{}' failed", "Verify Datastore '{}' failed",
job.store, job.store,
), ),

View File

@ -1,7 +1,4 @@
use std::collections::HashSet; use anyhow::{format_err, Error};
use std::sync::{Arc, Mutex};
use anyhow::{bail, Error};
use crate::{ use crate::{
server::WorkerTask, server::WorkerTask,
@ -11,7 +8,7 @@ use crate::{
backup::{ backup::{
DataStore, DataStore,
BackupInfo, BackupInfo,
verify_backup_dir, verify_all_backups,
}, },
task_log, task_log,
}; };
@ -23,28 +20,36 @@ pub fn do_verification_job(
userid: &Userid, userid: &Userid,
schedule: Option<String>, schedule: Option<String>,
) -> Result<String, Error> { ) -> Result<String, Error> {
let datastore = DataStore::lookup_datastore(&verification_job.store)?; let datastore = DataStore::lookup_datastore(&verification_job.store)?;
let mut backups_to_verify = BackupInfo::list_backups(&datastore.base_path())?; let datastore2 = datastore.clone();
if verification_job.ignore_verified.unwrap_or(true) {
backups_to_verify.retain(|backup_info| {
let manifest = match datastore.load_manifest(&backup_info.backup_dir) {
Ok((manifest, _)) => manifest,
Err(_) => return false,
};
let raw_verify_state = manifest.unprotected["verify_state"].clone(); let outdated_after = verification_job.outdated_after.clone();
let last_state = match serde_json::from_value::<SnapshotVerifyState>(raw_verify_state) { let ignore_verified = verification_job.ignore_verified.unwrap_or(true);
Ok(last_state) => last_state,
Err(_) => return true,
};
let now = proxmox::tools::time::epoch_i64(); let filter = move |backup_info: &BackupInfo| {
let days_since_last_verify = (now - last_state.upid.starttime) / 86400; if !ignore_verified {
verification_job.outdated_after.is_some() return true;
&& days_since_last_verify > verification_job.outdated_after.unwrap() }
}) let manifest = match datastore2.load_manifest(&backup_info.backup_dir) {
} Ok((manifest, _)) => manifest,
Err(_) => return false,
};
let raw_verify_state = manifest.unprotected["verify_state"].clone();
let last_state = match serde_json::from_value::<SnapshotVerifyState>(raw_verify_state) {
Ok(last_state) => last_state,
Err(_) => return true,
};
let now = proxmox::tools::time::epoch_i64();
let days_since_last_verify = (now - last_state.upid.starttime) / 86400;
outdated_after
.map(|v| days_since_last_verify > v)
.unwrap_or(true)
};
let email = crate::server::lookup_user_email(userid); let email = crate::server::lookup_user_email(userid);
@ -59,42 +64,18 @@ pub fn do_verification_job(
job.start(&worker.upid().to_string())?; job.start(&worker.upid().to_string())?;
task_log!(worker,"Starting datastore verify job '{}'", job_id); task_log!(worker,"Starting datastore verify job '{}'", job_id);
task_log!(worker,"verifying {} backups", backups_to_verify.len());
if let Some(event_str) = schedule { if let Some(event_str) = schedule {
task_log!(worker,"task triggered by schedule '{}'", event_str); task_log!(worker,"task triggered by schedule '{}'", event_str);
} }
let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024 * 16))); let result = verify_all_backups(datastore, worker.clone(), worker.upid(), &filter);
let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64))); let job_result = match result {
let result = proxmox::try_block!({ Ok(ref errors) if errors.is_empty() => Ok(()),
let mut failed_dirs: Vec<String> = Vec::new(); Ok(_) => Err(format_err!("verification failed - please check the log for details")),
Err(_) => Err(format_err!("verification failed - job aborted")),
};
for backup_info in backups_to_verify { let status = worker.create_state(&job_result);
let verification_result = verify_backup_dir(
datastore.clone(),
&backup_info.backup_dir,
verified_chunks.clone(),
corrupt_chunks.clone(),
worker.clone(),
worker.upid().clone()
);
if let Ok(false) = verification_result {
failed_dirs.push(backup_info.backup_dir.to_string());
} // otherwise successful or aborted
}
if !failed_dirs.is_empty() {
task_log!(worker,"Failed to verify following snapshots:",);
for dir in failed_dirs {
task_log!(worker, "\t{}", dir)
}
bail!("verification failed - please check the log for details");
}
Ok(())
});
let status = worker.create_state(&result);
match job.finish(status) { match job.finish(status) {
Err(err) => eprintln!( Err(err) => eprintln!(
@ -111,7 +92,7 @@ pub fn do_verification_job(
} }
} }
result job_result
}, },
)?; )?;
Ok(upid_str) Ok(upid_str)