tools/daemon: improve reload behaviour

it seems that sometimes, the child process signal gets handled
before the parent process signal. Systemd then ignores the
childs signal (finished reloading) and only after going into
reloading state because of the parent. this will never finish.

Instead, wait for the state to change to 'reloading' after sending
that signal in the parent, an only fork afterwards. This way
we ensure that systemd knows about the reloading before actually trying
to do it.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
Tested-By: Fabian Ebner <f.ebner@proxmox.com>
This commit is contained in:
Dominik Csapak 2020-12-17 15:50:18 +01:00 committed by Thomas Lamprecht
parent 2afdc7f27d
commit 0ec79339f7
1 changed files with 29 additions and 18 deletions

View File

@ -291,6 +291,7 @@ where
if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
log::error!("failed to notify systemd about the state change: {}", e);
}
wait_service_is_state(service_name, "reloading").await?;
if let Err(e) = reloader.take().unwrap().fork_restart() {
log::error!("error during reload: {}", e);
let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
@ -305,7 +306,7 @@ where
// FIXME: this is a hack, replace with sd_notify_barrier when available
if server::is_reload_request() {
wait_service_is_active(service_name).await?;
wait_service_is_not_state(service_name, "reloading").await?;
}
log::info!("daemon shut down...");
@ -313,9 +314,7 @@ where
}
// hack, do not use if unsure!
async fn wait_service_is_active(service: &str) -> Result<(), Error> {
tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
loop {
async fn get_service_state(service: &str) -> Result<String, Error> {
let text = match tokio::process::Command::new("systemctl")
.args(&["is-active", service])
.output()
@ -328,11 +327,23 @@ async fn wait_service_is_active(service: &str) -> Result<(), Error> {
Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
};
if text.trim().trim_start() != "reloading" {
return Ok(());
Ok(text.trim().trim_start().to_string())
}
async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> {
tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
while get_service_state(service).await? != state {
tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
}
Ok(())
}
async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> {
tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
while get_service_state(service).await? == state {
tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
}
Ok(())
}
#[link(name = "systemd")]