file-restore: try to kill VM when stale
Helps to clean up a VM that has crashed, is not responding to vsock API calls, but still has a running QEMU instance. We always check the process commandline to ensure we don't kill a random process that took over the PID. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
parent
c4e1af3069
commit
7d39e47182
@ -98,6 +98,7 @@ async fn cleanup_map(map: &mut HashMap<String, VMState>) -> bool {
|
||||
"VM '{}' (pid: {}, cid: {}) was not reachable, removing from map",
|
||||
name, state.pid, state.cid
|
||||
);
|
||||
let _ = super::qemu_helper::try_kill_vm(state.pid);
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,6 +132,7 @@ async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Err
|
||||
Err(err) => {
|
||||
eprintln!("stale VM detected, restarting ({})", err);
|
||||
// VM is dead, restart
|
||||
let _ = super::qemu_helper::try_kill_vm(vm.pid);
|
||||
let vms = start_vm(vm.cid, details).await?;
|
||||
new_cid = vms.cid;
|
||||
state.map.insert(name, vms.clone());
|
||||
|
@ -56,7 +56,7 @@ fn validate_img_existance() -> Result<(), Error> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn try_kill_vm(pid: i32) -> Result<(), Error> {
|
||||
pub fn try_kill_vm(pid: i32) -> Result<(), Error> {
|
||||
let pid = Pid::from_raw(pid);
|
||||
if let Ok(()) = kill(pid, None) {
|
||||
// process is running (and we could kill it), check if it is actually ours
|
||||
|
Loading…
Reference in New Issue
Block a user