tape/drive: improve tape device locking behaviour
by implementing a custom error type that is either 'TimeOut' or 'Other'. In the api, check in the worker loop for exactly 'TimeOut' errors and continue only then. All other errors lead to a aborted task. Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
This commit is contained in:
parent
5b358ff0b1
commit
e5950360ca
@ -65,6 +65,7 @@ use crate::{
|
||||
drive::{
|
||||
media_changer,
|
||||
lock_tape_device,
|
||||
TapeLockError,
|
||||
set_tape_device_state,
|
||||
},
|
||||
changer::update_changer_online_status,
|
||||
@ -203,12 +204,15 @@ pub fn do_tape_backup_job(
|
||||
// for scheduled tape backup jobs, we wait indefinitely for the lock
|
||||
task_log!(worker, "waiting for drive lock...");
|
||||
loop {
|
||||
if let Ok(lock) = lock_tape_device(&drive_config, &setup.drive) {
|
||||
drive_lock = Some(lock);
|
||||
break;
|
||||
} // ignore errors
|
||||
|
||||
worker.check_abort()?;
|
||||
match lock_tape_device(&drive_config, &setup.drive) {
|
||||
Ok(lock) => {
|
||||
drive_lock = Some(lock);
|
||||
break;
|
||||
}
|
||||
Err(TapeLockError::TimeOut) => continue,
|
||||
Err(TapeLockError::Other(err)) => return Err(err),
|
||||
}
|
||||
}
|
||||
}
|
||||
set_tape_device_state(&setup.drive, &worker.upid().to_string())?;
|
||||
|
@ -477,16 +477,34 @@ pub fn request_and_load_media(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum TapeLockError {
|
||||
#[error("timeout while trying to lock")]
|
||||
TimeOut,
|
||||
#[error("{0}")]
|
||||
Other(#[from] Error),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for TapeLockError {
|
||||
fn from(error: std::io::Error) -> Self {
|
||||
Self::Other(error.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquires an exclusive lock for the tape device
|
||||
///
|
||||
/// Basically calls lock_device_path() using the configured drive path.
|
||||
pub fn lock_tape_device(
|
||||
config: &SectionConfigData,
|
||||
drive: &str,
|
||||
) -> Result<DeviceLockGuard, Error> {
|
||||
) -> Result<DeviceLockGuard, TapeLockError> {
|
||||
let path = tape_device_path(config, drive)?;
|
||||
lock_device_path(&path)
|
||||
.map_err(|err| format_err!("unable to lock drive '{}' - {}", drive, err))
|
||||
lock_device_path(&path).map_err(|err| match err {
|
||||
TapeLockError::Other(err) => {
|
||||
TapeLockError::Other(format_err!("unable to lock drive '{}' - {}", drive, err))
|
||||
}
|
||||
other => other,
|
||||
})
|
||||
}
|
||||
|
||||
/// Writes the given state for the specified drive
|
||||
@ -555,7 +573,7 @@ pub struct DeviceLockGuard(std::fs::File);
|
||||
//
|
||||
// Uses systemd escape_unit to compute a file name from `device_path`, the try
|
||||
// to lock `/var/lock/<name>`.
|
||||
fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, Error> {
|
||||
fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, TapeLockError> {
|
||||
|
||||
let lock_name = crate::tools::systemd::escape_unit(device_path, true);
|
||||
|
||||
@ -564,7 +582,13 @@ fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, Error> {
|
||||
|
||||
let timeout = std::time::Duration::new(10, 0);
|
||||
let mut file = std::fs::OpenOptions::new().create(true).append(true).open(path)?;
|
||||
proxmox::tools::fs::lock_file(&mut file, true, Some(timeout))?;
|
||||
if let Err(err) = proxmox::tools::fs::lock_file(&mut file, true, Some(timeout)) {
|
||||
if err.kind() == std::io::ErrorKind::Interrupted {
|
||||
return Err(TapeLockError::TimeOut);
|
||||
} else {
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
|
||||
let backup_user = crate::backup::backup_user()?;
|
||||
fchown(file.as_raw_fd(), Some(backup_user.uid), Some(backup_user.gid))?;
|
||||
|
Loading…
Reference in New Issue
Block a user