rest server: daemon: update PID file before sending MAINPID notification
There is a race upon reload, where it can happen that: 1. systemd forks off /bin/kill -HUP $MAINPID 2. Current instance forks off new one and notifies systemd with the new MAINPID. 3. systemd sets new MAINPID. 4. systemd receives SIGCHLD for the kill process (which is the current control process for the service) and reads the PID of the old instance from the PID file, resetting MAINPID to the PID of the old instance. 5. Old instance exits. 6. systemd receives SIGCHLD for the old instance, reads the PID of the old instance from the PID file once more. systemd sees that the MAINPID matches the child PID and considers the service exited. 7. systemd receivese notification from the new PID and is confused. The service won't get active, because the notification wasn't handled. To fix it, update the PID file before sending the MAINPID notification, similar to what a comment in systemd's src/core/service.c suggests: > /* Forking services may occasionally move to a new PID. > * As long as they update the PID file before exiting the old > * PID, they're fine. */ but for our Type=notify "before sending the notification" rather than "before exiting", because otherwise, the mix-up in 4. could still happen (although it might not actually be problematic without the mix-up in 6., it still seems better to avoid). Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
This commit is contained in:
parent
f4d246072d
commit
e9b9f33aee
|
@ -210,7 +210,9 @@ async fn run() -> Result<(), Error> {
|
|||
|
||||
// then we have to create a daemon that listens, accepts and serves
|
||||
// the api to clients
|
||||
proxmox_rest_server::daemon::create_daemon(([127, 0, 0, 1], 65000).into(), move |listener| {
|
||||
proxmox_rest_server::daemon::create_daemon(
|
||||
([127, 0, 0, 1], 65000).into(),
|
||||
move |listener| {
|
||||
let incoming = hyper::server::conn::AddrIncoming::from_listener(listener)?;
|
||||
|
||||
Ok(async move {
|
||||
|
@ -218,7 +220,9 @@ async fn run() -> Result<(), Error> {
|
|||
|
||||
Ok(())
|
||||
})
|
||||
})
|
||||
},
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
|
|
|
@ -15,6 +15,7 @@ use nix::unistd::{fork, ForkResult};
|
|||
|
||||
use proxmox_io::{ReadExt, WriteExt};
|
||||
use proxmox_sys::fd::{fd_change_cloexec, Fd};
|
||||
use proxmox_sys::fs::CreateOptions;
|
||||
|
||||
// Unfortunately FnBox is nightly-only and Box<FnOnce> is unusable, so just use Box<Fn>...
|
||||
type BoxedStoreFunc = Box<dyn FnMut() -> Result<String, Error> + UnwindSafe + Send>;
|
||||
|
@ -81,7 +82,7 @@ impl Reloader {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn fork_restart(self) -> Result<(), Error> {
|
||||
pub fn fork_restart(self, pid_fn: Option<&str>) -> Result<(), Error> {
|
||||
// Get our parameters as Vec<CString>
|
||||
let args = std::env::args_os();
|
||||
let mut new_args = Vec::with_capacity(args.len());
|
||||
|
@ -179,6 +180,16 @@ impl Reloader {
|
|||
}
|
||||
});
|
||||
|
||||
if let Some(pid_fn) = pid_fn {
|
||||
let pid_str = format!("{}\n", child);
|
||||
proxmox_sys::fs::replace_file(
|
||||
pid_fn,
|
||||
pid_str.as_bytes(),
|
||||
CreateOptions::new(),
|
||||
false,
|
||||
)?;
|
||||
}
|
||||
|
||||
if let Err(e) = systemd_notify(SystemdNotify::MainPid(child)) {
|
||||
log::error!("failed to notify systemd about the new main pid: {}", e);
|
||||
}
|
||||
|
@ -250,6 +261,7 @@ impl Reloadable for tokio::net::TcpListener {
|
|||
pub async fn create_daemon<F, S>(
|
||||
address: std::net::SocketAddr,
|
||||
create_service: F,
|
||||
pidfn: Option<&str>,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
F: FnOnce(tokio::net::TcpListener) -> Result<S, Error>,
|
||||
|
@ -295,7 +307,7 @@ where
|
|||
log::error!("failed to wait on systemd-processing: {}", e);
|
||||
}
|
||||
|
||||
if let Err(e) = reloader.take().unwrap().fork_restart() {
|
||||
if let Err(e) = reloader.take().unwrap().fork_restart(pidfn) {
|
||||
log::error!("error during reload: {}", e);
|
||||
let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
|
||||
}
|
||||
|
|
|
@ -138,7 +138,9 @@ async fn run() -> Result<(), Error> {
|
|||
)?;
|
||||
|
||||
// http server future:
|
||||
let server = daemon::create_daemon(([127, 0, 0, 1], 82).into(), move |listener| {
|
||||
let server = daemon::create_daemon(
|
||||
([127, 0, 0, 1], 82).into(),
|
||||
move |listener| {
|
||||
let incoming = hyper::server::conn::AddrIncoming::from_listener(listener)?;
|
||||
|
||||
Ok(async {
|
||||
|
@ -150,7 +152,9 @@ async fn run() -> Result<(), Error> {
|
|||
.map_err(Error::from)
|
||||
.await
|
||||
})
|
||||
});
|
||||
},
|
||||
Some(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN),
|
||||
);
|
||||
|
||||
proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
|
||||
|
||||
|
|
|
@ -301,7 +301,9 @@ async fn run() -> Result<(), Error> {
|
|||
Ok(Value::Null)
|
||||
})?;
|
||||
|
||||
let server = daemon::create_daemon(([0, 0, 0, 0, 0, 0, 0, 0], 8007).into(), move |listener| {
|
||||
let server = daemon::create_daemon(
|
||||
([0, 0, 0, 0, 0, 0, 0, 0], 8007).into(),
|
||||
move |listener| {
|
||||
let connections = accept_connections(listener, acceptor, debug);
|
||||
let connections = hyper::server::accept::from_stream(ReceiverStream::new(connections));
|
||||
|
||||
|
@ -314,7 +316,9 @@ async fn run() -> Result<(), Error> {
|
|||
.map_err(Error::from)
|
||||
.await
|
||||
})
|
||||
});
|
||||
},
|
||||
Some(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN),
|
||||
);
|
||||
|
||||
proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
|
||||
|
||||
|
|
Loading…
Reference in New Issue