file-restore-daemon: add watchdog module
Add a watchdog that will automatically shut down the VM after 10 minutes, if no API call is received. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
parent
dd9cef56fc
commit
a26ebad5f9
|
@ -8,5 +8,8 @@ use proxmox::api::api;
|
|||
pub struct RestoreDaemonStatus {
|
||||
/// VM uptime in seconds
|
||||
pub uptime: i64,
|
||||
/// time left until auto-shutdown, keep in mind that this is useless when 'keep-timeout' is
|
||||
/// not set, as then the status call will have reset the timer before returning the value
|
||||
pub timeout: i64,
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,8 @@ fn main() -> Result<(), Error> {
|
|||
}
|
||||
|
||||
async fn run() -> Result<(), Error> {
|
||||
watchdog_init();
|
||||
|
||||
let auth_config = Arc::new(
|
||||
auth::ticket_auth().map_err(|err| format_err!("reading ticket file failed: {}", err))?,
|
||||
);
|
||||
|
|
|
@ -8,6 +8,8 @@ use proxmox::list_subdirs_api_method;
|
|||
|
||||
use proxmox_backup::api2::types::*;
|
||||
|
||||
use super::{watchdog_remaining, watchdog_ping};
|
||||
|
||||
// NOTE: All API endpoints must have Permission::Superuser, as the configs for authentication do
|
||||
// not exist within the restore VM. Safety is guaranteed by checking a ticket via a custom ApiAuth.
|
||||
|
||||
|
@ -27,22 +29,32 @@ fn read_uptime() -> Result<f32, Error> {
|
|||
}
|
||||
|
||||
#[api(
|
||||
input: {
|
||||
properties: {
|
||||
"keep-timeout": {
|
||||
type: bool,
|
||||
description: "If true, do not reset the watchdog timer on this API call.",
|
||||
default: false,
|
||||
optional: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
access: {
|
||||
description: "Permissions are handled outside restore VM.",
|
||||
permission: &Permission::Superuser,
|
||||
description: "Permissions are handled outside restore VM. This call can be made without a ticket, but keep-timeout is always assumed 'true' then.",
|
||||
permission: &Permission::World,
|
||||
},
|
||||
returns: {
|
||||
type: RestoreDaemonStatus,
|
||||
}
|
||||
)]
|
||||
/// General status information
|
||||
fn status(
|
||||
_param: Value,
|
||||
_info: &ApiMethod,
|
||||
_rpcenv: &mut dyn RpcEnvironment,
|
||||
) -> Result<RestoreDaemonStatus, Error> {
|
||||
fn status(rpcenv: &mut dyn RpcEnvironment, keep_timeout: bool) -> Result<RestoreDaemonStatus, Error> {
|
||||
if !keep_timeout && rpcenv.get_auth_id().is_some() {
|
||||
watchdog_ping();
|
||||
}
|
||||
Ok(RestoreDaemonStatus {
|
||||
uptime: read_uptime()? as i64,
|
||||
timeout: watchdog_remaining(),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -3,3 +3,6 @@ mod api;
|
|||
pub use api::*;
|
||||
|
||||
pub mod auth;
|
||||
|
||||
mod watchdog;
|
||||
pub use watchdog::*;
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
//! Tokio-based watchdog that shuts down the VM if not pinged for TIMEOUT
|
||||
use std::sync::atomic::{AtomicI64, Ordering};
|
||||
use proxmox::tools::time::epoch_i64;
|
||||
|
||||
const TIMEOUT: i64 = 600; // seconds
|
||||
static TRIGGERED: AtomicI64 = AtomicI64::new(0);
|
||||
|
||||
fn handle_expired() -> ! {
|
||||
use nix::sys::reboot;
|
||||
println!("watchdog expired, shutting down");
|
||||
let err = reboot::reboot(reboot::RebootMode::RB_POWER_OFF).unwrap_err();
|
||||
println!("'reboot' syscall failed: {}", err);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
async fn watchdog_loop() {
|
||||
use tokio::time::{sleep, Duration};
|
||||
loop {
|
||||
let remaining = watchdog_remaining();
|
||||
if remaining <= 0 {
|
||||
handle_expired();
|
||||
}
|
||||
sleep(Duration::from_secs(remaining as u64)).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize watchdog
|
||||
pub fn watchdog_init() {
|
||||
watchdog_ping();
|
||||
tokio::spawn(watchdog_loop());
|
||||
}
|
||||
|
||||
/// Trigger watchdog keepalive
|
||||
pub fn watchdog_ping() {
|
||||
TRIGGERED.fetch_max(epoch_i64(), Ordering::AcqRel);
|
||||
}
|
||||
|
||||
/// Returns the remaining time before watchdog expiry in seconds
|
||||
pub fn watchdog_remaining() -> i64 {
|
||||
TIMEOUT - (epoch_i64() - TRIGGERED.load(Ordering::Acquire))
|
||||
}
|
Loading…
Reference in New Issue