file-restore: try to kill VM when stale

Helps to clean up a VM that has crashed, is not responding to vsock API calls, but still has a running QEMU instance. We always check the process commandline to ensure we don't kill a random process that took over the PID. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-05-06 17:26:17 +02:00
parent c4e1af3069
commit 7d39e47182
2 changed files with 3 additions and 1 deletions
--- a/src/bin/proxmox_file_restore/block_driver_qemu.rs
+++ b/src/bin/proxmox_file_restore/block_driver_qemu.rs
@ -98,6 +98,7 @@ async fn cleanup_map(map: &mut HashMap<String, VMState>) -> bool {
                "VM '{}' (pid: {}, cid: {}) was not reachable, removing from map",
                name, state.pid, state.cid
            );
+            let _ = super::qemu_helper::try_kill_vm(state.pid);
        }
    }

@ -131,6 +132,7 @@ async fn ensure_running(details: &SnapRestoreDetails) -> Result<VsockClient, Err
                Err(err) => {
                    eprintln!("stale VM detected, restarting ({})", err);
                    // VM is dead, restart
+                    let _ = super::qemu_helper::try_kill_vm(vm.pid);
                    let vms = start_vm(vm.cid, details).await?;
                    new_cid = vms.cid;
                    state.map.insert(name, vms.clone());
--- a/src/bin/proxmox_file_restore/qemu_helper.rs
+++ b/src/bin/proxmox_file_restore/qemu_helper.rs
@ -56,7 +56,7 @@ fn validate_img_existance() -> Result<(), Error> {
    Ok(())
 }

-fn try_kill_vm(pid: i32) -> Result<(), Error> {
+pub fn try_kill_vm(pid: i32) -> Result<(), Error> {
    let pid = Pid::from_raw(pid);
    if let Ok(()) = kill(pid, None) {
        // process is running (and we could kill it), check if it is actually ours