use jobstate mechanism for verify/garbage_collection schedules
also changes: * correct comment about reset (replace 'sync' with 'action') * check schedule change correctly (only when it is actually changed) with this changes, we can drop the 'lookup_last_worker' method Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
This commit is contained in:
parent
6c25588e63
commit
d7a122a026
|
@ -132,6 +132,8 @@ pub fn create_datastore(param: Value) -> Result<(), Error> {
|
||||||
datastore::save_config(&config)?;
|
datastore::save_config(&config)?;
|
||||||
|
|
||||||
crate::config::jobstate::create_state_file("prune", &datastore.name)?;
|
crate::config::jobstate::create_state_file("prune", &datastore.name)?;
|
||||||
|
crate::config::jobstate::create_state_file("garbage_collection", &datastore.name)?;
|
||||||
|
crate::config::jobstate::create_state_file("verify", &datastore.name)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -313,13 +315,23 @@ pub fn update_datastore(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if gc_schedule.is_some() { data.gc_schedule = gc_schedule; }
|
let mut gc_schedule_changed = false;
|
||||||
|
if gc_schedule.is_some() {
|
||||||
|
gc_schedule_changed = data.gc_schedule != gc_schedule;
|
||||||
|
data.gc_schedule = gc_schedule;
|
||||||
|
}
|
||||||
|
|
||||||
let mut prune_schedule_changed = false;
|
let mut prune_schedule_changed = false;
|
||||||
if prune_schedule.is_some() {
|
if prune_schedule.is_some() {
|
||||||
prune_schedule_changed = true;
|
prune_schedule_changed = data.prune_schedule != prune_schedule;
|
||||||
data.prune_schedule = prune_schedule;
|
data.prune_schedule = prune_schedule;
|
||||||
}
|
}
|
||||||
if verify_schedule.is_some() { data.verify_schedule = verify_schedule; }
|
|
||||||
|
let mut verify_schedule_changed = false;
|
||||||
|
if verify_schedule.is_some() {
|
||||||
|
verify_schedule_changed = data.verify_schedule != verify_schedule;
|
||||||
|
data.verify_schedule = verify_schedule;
|
||||||
|
}
|
||||||
|
|
||||||
if keep_last.is_some() { data.keep_last = keep_last; }
|
if keep_last.is_some() { data.keep_last = keep_last; }
|
||||||
if keep_hourly.is_some() { data.keep_hourly = keep_hourly; }
|
if keep_hourly.is_some() { data.keep_hourly = keep_hourly; }
|
||||||
|
@ -332,12 +344,20 @@ pub fn update_datastore(
|
||||||
|
|
||||||
datastore::save_config(&config)?;
|
datastore::save_config(&config)?;
|
||||||
|
|
||||||
// we want to reset the statefile, to avoid an immediate sync in some cases
|
// we want to reset the statefiles, to avoid an immediate action in some cases
|
||||||
// (e.g. going from monthly to weekly in the second week of the month)
|
// (e.g. going from monthly to weekly in the second week of the month)
|
||||||
|
if gc_schedule_changed {
|
||||||
|
crate::config::jobstate::create_state_file("garbage_collection", &name)?;
|
||||||
|
}
|
||||||
|
|
||||||
if prune_schedule_changed {
|
if prune_schedule_changed {
|
||||||
crate::config::jobstate::create_state_file("prune", &name)?;
|
crate::config::jobstate::create_state_file("prune", &name)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if verify_schedule_changed {
|
||||||
|
crate::config::jobstate::create_state_file("verify", &name)?;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,7 +397,10 @@ pub fn delete_datastore(name: String, digest: Option<String>) -> Result<(), Erro
|
||||||
|
|
||||||
datastore::save_config(&config)?;
|
datastore::save_config(&config)?;
|
||||||
|
|
||||||
crate::config::jobstate::remove_state_file("prune", &name)?;
|
// ignore errors
|
||||||
|
let _ = crate::config::jobstate::remove_state_file("prune", &name);
|
||||||
|
let _ = crate::config::jobstate::remove_state_file("garbage_collection", &name);
|
||||||
|
let _ = crate::config::jobstate::remove_state_file("verify", &name);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -202,40 +202,14 @@ async fn schedule_tasks() -> Result<(), Error> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lookup_last_worker(worker_type: &str, worker_id: &str) -> Result<Option<server::UPID>, Error> {
|
|
||||||
|
|
||||||
let list = proxmox_backup::server::read_task_list()?;
|
|
||||||
|
|
||||||
let mut last: Option<&server::UPID> = None;
|
|
||||||
|
|
||||||
for entry in list.iter() {
|
|
||||||
if entry.upid.worker_type == worker_type {
|
|
||||||
if let Some(ref id) = entry.upid.worker_id {
|
|
||||||
if id == worker_id {
|
|
||||||
match last {
|
|
||||||
Some(ref upid) => {
|
|
||||||
if upid.starttime < entry.upid.starttime {
|
|
||||||
last = Some(&entry.upid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
last = Some(&entry.upid)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(last.cloned())
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async fn schedule_datastore_garbage_collection() {
|
async fn schedule_datastore_garbage_collection() {
|
||||||
|
|
||||||
use proxmox_backup::backup::DataStore;
|
use proxmox_backup::backup::DataStore;
|
||||||
use proxmox_backup::server::{UPID, WorkerTask};
|
use proxmox_backup::server::{UPID, WorkerTask};
|
||||||
use proxmox_backup::config::datastore::{self, DataStoreConfig};
|
use proxmox_backup::config::{
|
||||||
|
jobstate::{self, Job},
|
||||||
|
datastore::{self, DataStoreConfig}
|
||||||
|
};
|
||||||
use proxmox_backup::tools::systemd::time::{
|
use proxmox_backup::tools::systemd::time::{
|
||||||
parse_calendar_event, compute_next_event};
|
parse_calendar_event, compute_next_event};
|
||||||
|
|
||||||
|
@ -291,11 +265,10 @@ async fn schedule_datastore_garbage_collection() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
match lookup_last_worker(worker_type, &store) {
|
match jobstate::last_run_time(worker_type, &store) {
|
||||||
Ok(Some(upid)) => upid.starttime,
|
Ok(time) => time,
|
||||||
Ok(None) => 0,
|
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
eprintln!("lookup_last_job_start failed: {}", err);
|
eprintln!("could not get last run time of {} {}: {}", worker_type, store, err);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -314,6 +287,11 @@ async fn schedule_datastore_garbage_collection() {
|
||||||
|
|
||||||
if next > now { continue; }
|
if next > now { continue; }
|
||||||
|
|
||||||
|
let mut job = match Job::new(worker_type, &store) {
|
||||||
|
Ok(job) => job,
|
||||||
|
Err(_) => continue, // could not get lock
|
||||||
|
};
|
||||||
|
|
||||||
let store2 = store.clone();
|
let store2 = store.clone();
|
||||||
|
|
||||||
if let Err(err) = WorkerTask::new_thread(
|
if let Err(err) = WorkerTask::new_thread(
|
||||||
|
@ -322,9 +300,20 @@ async fn schedule_datastore_garbage_collection() {
|
||||||
Userid::backup_userid().clone(),
|
Userid::backup_userid().clone(),
|
||||||
false,
|
false,
|
||||||
move |worker| {
|
move |worker| {
|
||||||
|
job.start(&worker.upid().to_string())?;
|
||||||
|
|
||||||
worker.log(format!("starting garbage collection on store {}", store));
|
worker.log(format!("starting garbage collection on store {}", store));
|
||||||
worker.log(format!("task triggered by schedule '{}'", event_str));
|
worker.log(format!("task triggered by schedule '{}'", event_str));
|
||||||
datastore.garbage_collection(&worker)
|
|
||||||
|
let result = datastore.garbage_collection(&worker);
|
||||||
|
|
||||||
|
let status = worker.create_state(&result);
|
||||||
|
|
||||||
|
if let Err(err) = job.finish(status) {
|
||||||
|
eprintln!("could not finish job state for {}: {}", worker_type, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
}
|
}
|
||||||
) {
|
) {
|
||||||
eprintln!("unable to start garbage collection on store {} - {}", store2, err);
|
eprintln!("unable to start garbage collection on store {} - {}", store2, err);
|
||||||
|
@ -482,7 +471,10 @@ async fn schedule_datastore_prune() {
|
||||||
async fn schedule_datastore_verification() {
|
async fn schedule_datastore_verification() {
|
||||||
use proxmox_backup::backup::{DataStore, verify_all_backups};
|
use proxmox_backup::backup::{DataStore, verify_all_backups};
|
||||||
use proxmox_backup::server::{WorkerTask};
|
use proxmox_backup::server::{WorkerTask};
|
||||||
use proxmox_backup::config::datastore::{self, DataStoreConfig};
|
use proxmox_backup::config::{
|
||||||
|
jobstate::{self, Job},
|
||||||
|
datastore::{self, DataStoreConfig}
|
||||||
|
};
|
||||||
use proxmox_backup::tools::systemd::time::{
|
use proxmox_backup::tools::systemd::time::{
|
||||||
parse_calendar_event, compute_next_event};
|
parse_calendar_event, compute_next_event};
|
||||||
|
|
||||||
|
@ -526,16 +518,10 @@ async fn schedule_datastore_verification() {
|
||||||
|
|
||||||
let worker_type = "verify";
|
let worker_type = "verify";
|
||||||
|
|
||||||
let last = match lookup_last_worker(worker_type, &store) {
|
let last = match jobstate::last_run_time(worker_type, &store) {
|
||||||
Ok(Some(upid)) => {
|
Ok(time) => time,
|
||||||
if proxmox_backup::server::worker_is_active_local(&upid) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
upid.starttime
|
|
||||||
}
|
|
||||||
Ok(None) => 0,
|
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
eprintln!("lookup_last_job_start failed: {}", err);
|
eprintln!("could not get last run time of {} {}: {}", worker_type, store, err);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -553,6 +539,11 @@ async fn schedule_datastore_verification() {
|
||||||
|
|
||||||
if next > now { continue; }
|
if next > now { continue; }
|
||||||
|
|
||||||
|
let mut job = match Job::new(worker_type, &store) {
|
||||||
|
Ok(job) => job,
|
||||||
|
Err(_) => continue, // could not get lock
|
||||||
|
};
|
||||||
|
|
||||||
let worker_id = store.clone();
|
let worker_id = store.clone();
|
||||||
let store2 = store.clone();
|
let store2 = store.clone();
|
||||||
if let Err(err) = WorkerTask::new_thread(
|
if let Err(err) = WorkerTask::new_thread(
|
||||||
|
@ -561,18 +552,29 @@ async fn schedule_datastore_verification() {
|
||||||
Userid::backup_userid().clone(),
|
Userid::backup_userid().clone(),
|
||||||
false,
|
false,
|
||||||
move |worker| {
|
move |worker| {
|
||||||
|
job.start(&worker.upid().to_string())?;
|
||||||
worker.log(format!("starting verification on store {}", store2));
|
worker.log(format!("starting verification on store {}", store2));
|
||||||
worker.log(format!("task triggered by schedule '{}'", event_str));
|
worker.log(format!("task triggered by schedule '{}'", event_str));
|
||||||
if let Ok(failed_dirs) = verify_all_backups(datastore, worker.clone()) {
|
let result = try_block!({
|
||||||
|
let failed_dirs = verify_all_backups(datastore, worker.clone())?;
|
||||||
if failed_dirs.len() > 0 {
|
if failed_dirs.len() > 0 {
|
||||||
worker.log("Failed to verify following snapshots:");
|
worker.log("Failed to verify following snapshots:");
|
||||||
for dir in failed_dirs {
|
for dir in failed_dirs {
|
||||||
worker.log(format!("\t{}", dir));
|
worker.log(format!("\t{}", dir));
|
||||||
}
|
}
|
||||||
bail!("verification failed - please check the log for details");
|
Err(format_err!("verification failed - please check the log for details"))
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let status = worker.create_state(&result);
|
||||||
|
|
||||||
|
if let Err(err) = job.finish(status) {
|
||||||
|
eprintln!("could not finish job state for {}: {}", worker_type, err);
|
||||||
}
|
}
|
||||||
Ok(())
|
|
||||||
|
result
|
||||||
},
|
},
|
||||||
) {
|
) {
|
||||||
eprintln!("unable to start verification on store {} - {}", store, err);
|
eprintln!("unable to start verification on store {} - {}", store, err);
|
||||||
|
|
Loading…
Reference in New Issue