file-restore: try to kill VM when stale
Helps to clean up a VM that has crashed, is not responding to vsock API calls, but still has a running QEMU instance. We always check the process commandline to ensure we don't kill a random process that took over the PID. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
parent
c4e1af3069
commit
7d39e47182
@ -98,6 +98,7 @@ async fn cleanup_map(map: &mut HashMap<String, VMState>) -> bool {
|
|||||||
"VM '{}' (pid: {}, cid: {}) was not reachable, removing from map",
|
"VM '{}' (pid: {}, cid: {}) was not reachable, removing from map",
|
||||||
name, state.pid, state.cid
|
name, state.pid, state.cid
|
||||||
);
|
);
|
||||||
|
let _ = super::qemu_helper::try_kill_vm(state.pid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -131,6 +132,7 @@ async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Err
|
|||||||
Err(err) => {
|
Err(err) => {
|
||||||
eprintln!("stale VM detected, restarting ({})", err);
|
eprintln!("stale VM detected, restarting ({})", err);
|
||||||
// VM is dead, restart
|
// VM is dead, restart
|
||||||
|
let _ = super::qemu_helper::try_kill_vm(vm.pid);
|
||||||
let vms = start_vm(vm.cid, details).await?;
|
let vms = start_vm(vm.cid, details).await?;
|
||||||
new_cid = vms.cid;
|
new_cid = vms.cid;
|
||||||
state.map.insert(name, vms.clone());
|
state.map.insert(name, vms.clone());
|
||||||
|
@ -56,7 +56,7 @@ fn validate_img_existance() -> Result<(), Error> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_kill_vm(pid: i32) -> Result<(), Error> {
|
pub fn try_kill_vm(pid: i32) -> Result<(), Error> {
|
||||||
let pid = Pid::from_raw(pid);
|
let pid = Pid::from_raw(pid);
|
||||||
if let Ok(()) = kill(pid, None) {
|
if let Ok(()) = kill(pid, None) {
|
||||||
// process is running (and we could kill it), check if it is actually ours
|
// process is running (and we could kill it), check if it is actually ours
|
||||||
|
Loading…
Reference in New Issue
Block a user