file-restore: try to kill VM when stale

Helps to clean up a VM that has crashed, is not responding to vsock API
calls, but still has a running QEMU instance.

We always check the process commandline to ensure we don't kill a random
process that took over the PID.

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
Stefan Reiter 2021-05-06 17:26:17 +02:00 committed by Thomas Lamprecht
parent c4e1af3069
commit 7d39e47182
2 changed files with 3 additions and 1 deletions

View File

@ -98,6 +98,7 @@ async fn cleanup_map(map: &mut HashMap<String, VMState>) -> bool {
"VM '{}' (pid: {}, cid: {}) was not reachable, removing from map",
name, state.pid, state.cid
);
let _ = super::qemu_helper::try_kill_vm(state.pid);
}
}
@ -131,6 +132,7 @@ async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Err
Err(err) => {
eprintln!("stale VM detected, restarting ({})", err);
// VM is dead, restart
let _ = super::qemu_helper::try_kill_vm(vm.pid);
let vms = start_vm(vm.cid, details).await?;
new_cid = vms.cid;
state.map.insert(name, vms.clone());

View File

@ -56,7 +56,7 @@ fn validate_img_existance() -> Result<(), Error> {
Ok(())
}
fn try_kill_vm(pid: i32) -> Result<(), Error> {
pub fn try_kill_vm(pid: i32) -> Result<(), Error> {
let pid = Pid::from_raw(pid);
if let Ok(()) = kill(pid, None) {
// process is running (and we could kill it), check if it is actually ours