tape/drive: improve tape device locking behaviour
by implementing a custom error type that is either 'TimeOut' or 'Other'. In the api, check in the worker loop for exactly 'TimeOut' errors and continue only then. All other errors lead to a aborted task. Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
This commit is contained in:
		
				
					committed by
					
						 Thomas Lamprecht
						Thomas Lamprecht
					
				
			
			
				
	
			
			
			
						parent
						
							5b358ff0b1
						
					
				
				
					commit
					e5950360ca
				
			| @ -65,6 +65,7 @@ use crate::{ | ||||
|         drive::{ | ||||
|             media_changer, | ||||
|             lock_tape_device, | ||||
|             TapeLockError, | ||||
|             set_tape_device_state, | ||||
|         }, | ||||
|         changer::update_changer_online_status, | ||||
| @ -203,12 +204,15 @@ pub fn do_tape_backup_job( | ||||
|                     // for scheduled tape backup jobs, we wait indefinitely for the lock | ||||
|                     task_log!(worker, "waiting for drive lock..."); | ||||
|                     loop { | ||||
|                         if let Ok(lock) = lock_tape_device(&drive_config, &setup.drive) { | ||||
|                             drive_lock = Some(lock); | ||||
|                             break; | ||||
|                         } // ignore errors | ||||
|  | ||||
|                         worker.check_abort()?; | ||||
|                         match lock_tape_device(&drive_config, &setup.drive) { | ||||
|                             Ok(lock) => { | ||||
|                                 drive_lock = Some(lock); | ||||
|                                 break; | ||||
|                             } | ||||
|                             Err(TapeLockError::TimeOut) => continue, | ||||
|                             Err(TapeLockError::Other(err)) => return Err(err), | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 set_tape_device_state(&setup.drive, &worker.upid().to_string())?; | ||||
|  | ||||
| @ -477,16 +477,34 @@ pub fn request_and_load_media( | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(thiserror::Error, Debug)] | ||||
| pub enum TapeLockError { | ||||
|     #[error("timeout while trying to lock")] | ||||
|     TimeOut, | ||||
|     #[error("{0}")] | ||||
|     Other(#[from] Error), | ||||
| } | ||||
|  | ||||
| impl From<std::io::Error> for TapeLockError { | ||||
|     fn from(error: std::io::Error) -> Self { | ||||
|         Self::Other(error.into()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Acquires an exclusive lock for the tape device | ||||
| /// | ||||
| /// Basically calls lock_device_path() using the configured drive path. | ||||
| pub fn lock_tape_device( | ||||
|     config: &SectionConfigData, | ||||
|     drive: &str, | ||||
| ) -> Result<DeviceLockGuard, Error> { | ||||
| ) -> Result<DeviceLockGuard, TapeLockError> { | ||||
|     let path = tape_device_path(config, drive)?; | ||||
|     lock_device_path(&path) | ||||
|         .map_err(|err| format_err!("unable to lock drive '{}' - {}", drive, err)) | ||||
|     lock_device_path(&path).map_err(|err| match err { | ||||
|         TapeLockError::Other(err) => { | ||||
|             TapeLockError::Other(format_err!("unable to lock drive '{}' - {}", drive, err)) | ||||
|         } | ||||
|         other => other, | ||||
|     }) | ||||
| } | ||||
|  | ||||
| /// Writes the given state for the specified drive | ||||
| @ -555,7 +573,7 @@ pub struct DeviceLockGuard(std::fs::File); | ||||
| // | ||||
| // Uses systemd escape_unit to compute a file name from `device_path`, the try | ||||
| // to lock `/var/lock/<name>`. | ||||
| fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, Error> { | ||||
| fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, TapeLockError> { | ||||
|  | ||||
|     let lock_name = crate::tools::systemd::escape_unit(device_path, true); | ||||
|  | ||||
| @ -564,7 +582,13 @@ fn lock_device_path(device_path: &str) -> Result<DeviceLockGuard, Error> { | ||||
|  | ||||
|     let timeout = std::time::Duration::new(10, 0); | ||||
|     let mut file = std::fs::OpenOptions::new().create(true).append(true).open(path)?; | ||||
|     proxmox::tools::fs::lock_file(&mut file, true, Some(timeout))?; | ||||
|     if let Err(err) =  proxmox::tools::fs::lock_file(&mut file, true, Some(timeout)) { | ||||
|         if err.kind() == std::io::ErrorKind::Interrupted { | ||||
|             return Err(TapeLockError::TimeOut); | ||||
|         } else { | ||||
|             return Err(err.into()); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let backup_user = crate::backup::backup_user()?; | ||||
|     fchown(file.as_raw_fd(), Some(backup_user.uid), Some(backup_user.gid))?; | ||||
|  | ||||
		Reference in New Issue
	
	Block a user