tape/drive: improve tape device locking behaviour
by implementing a custom error type that is either 'TimeOut' or 'Other'. In the api, check in the worker loop for exactly 'TimeOut' errors and continue only then. All other errors lead to a aborted task. Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
This commit is contained in:
parent
5b358ff0b1
commit
e5950360ca
@ -65,6 +65,7 @@ use crate::{
|
|||||||
drive::{
|
drive::{
|
||||||
media_changer,
|
media_changer,
|
||||||
lock_tape_device,
|
lock_tape_device,
|
||||||
|
TapeLockError,
|
||||||
set_tape_device_state,
|
set_tape_device_state,
|
||||||
},
|
},
|
||||||
changer::update_changer_online_status,
|
changer::update_changer_online_status,
|
||||||
@ -203,12 +204,15 @@ pub fn do_tape_backup_job(
|
|||||||
// for scheduled tape backup jobs, we wait indefinitely for the lock
|
// for scheduled tape backup jobs, we wait indefinitely for the lock
|
||||||
task_log!(worker, "waiting for drive lock...");
|
task_log!(worker, "waiting for drive lock...");
|
||||||
loop {
|
loop {
|
||||||
if let Ok(lock) = lock_tape_device(&drive_config, &setup.drive) {
|
|
||||||
drive_lock = Some(lock);
|
|
||||||
break;
|
|
||||||
} // ignore errors
|
|
||||||
|
|
||||||
worker.check_abort()?;
|
worker.check_abort()?;
|
||||||
|
match lock_tape_device(&drive_config, &setup.drive) {
|
||||||
|
Ok(lock) => {
|
||||||
|
drive_lock = Some(lock);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(TapeLockError::TimeOut) => continue,
|
||||||
|
Err(TapeLockError::Other(err)) => return Err(err),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
set_tape_device_state(&setup.drive, &worker.upid().to_string())?;
|
set_tape_device_state(&setup.drive, &worker.upid().to_string())?;
|
||||||
|
@ -477,16 +477,34 @@ pub fn request_and_load_media(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
pub enum TapeLockError {
|
||||||
|
#[error("timeout while trying to lock")]
|
||||||
|
TimeOut,
|
||||||
|
#[error("{0}")]
|
||||||
|
Other(#[from] Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::io::Error> for TapeLockError {
|
||||||
|
fn from(error: std::io::Error) -> Self {
|
||||||
|
Self::Other(error.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Acquires an exclusive lock for the tape device
|
/// Acquires an exclusive lock for the tape device
|
||||||
///
|
///
|
||||||
/// Basically calls lock_device_path() using the configured drive path.
|
/// Basically calls lock_device_path() using the configured drive path.
|
||||||
pub fn lock_tape_device(
|
pub fn lock_tape_device(
|
||||||
config: &SectionConfigData,
|
config: &SectionConfigData,
|
||||||
drive: &str,
|
drive: &str,
|
||||||
) -> Result<DeviceLockGuard, Error> {
|
) -> Result<DeviceLockGuard, TapeLockError> {
|
||||||
let path = tape_device_path(config, drive)?;
|
let path = tape_device_path(config, drive)?;
|
||||||
lock_device_path(&path)
|
lock_device_path(&path).map_err(|err| match err {
|
||||||
.map_err(|err| format_err!("unable to lock drive '{}' - {}", drive, err))
|
TapeLockError::Other(err) => {
|
||||||
|
TapeLockError::Other(format_err!("unable to lock drive '{}' - {}", drive, err))
|
||||||
|
}
|
||||||
|
other => other,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Writes the given state for the specified drive
|
/// Writes the given state for the specified drive
|
||||||
@ -555,7 +573,7 @@ pub struct DeviceLockGuard(std::fs::File);
|
|||||||
//
|
//
|
||||||
// Uses systemd escape_unit to compute a file name from `device_path`, the try
|
// Uses systemd escape_unit to compute a file name from `device_path`, the try
|
||||||
// to lock `/var/lock/<name>`.
|
// to lock `/var/lock/<name>`.
|
||||||
fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, Error> {
|
fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, TapeLockError> {
|
||||||
|
|
||||||
let lock_name = crate::tools::systemd::escape_unit(device_path, true);
|
let lock_name = crate::tools::systemd::escape_unit(device_path, true);
|
||||||
|
|
||||||
@ -564,7 +582,13 @@ fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, Error> {
|
|||||||
|
|
||||||
let timeout = std::time::Duration::new(10, 0);
|
let timeout = std::time::Duration::new(10, 0);
|
||||||
let mut file = std::fs::OpenOptions::new().create(true).append(true).open(path)?;
|
let mut file = std::fs::OpenOptions::new().create(true).append(true).open(path)?;
|
||||||
proxmox::tools::fs::lock_file(&mut file, true, Some(timeout))?;
|
if let Err(err) = proxmox::tools::fs::lock_file(&mut file, true, Some(timeout)) {
|
||||||
|
if err.kind() == std::io::ErrorKind::Interrupted {
|
||||||
|
return Err(TapeLockError::TimeOut);
|
||||||
|
} else {
|
||||||
|
return Err(err.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let backup_user = crate::backup::backup_user()?;
|
let backup_user = crate::backup::backup_user()?;
|
||||||
fchown(file.as_raw_fd(), Some(backup_user.uid), Some(backup_user.gid))?;
|
fchown(file.as_raw_fd(), Some(backup_user.uid), Some(backup_user.gid))?;
|
||||||
|
Loading…
Reference in New Issue
Block a user