file-restore-daemon: watchdog: add inhibit for long downloads

The extract API call may be active for more than the watchdog timeout,
so a simple ping is not enough.

This adds an "inhibit" API, which will stop the watchdog from completing
as long as at least one WatchdogInhibitor instance is alive. Keep one in
the download task, so it will be dropped once it completes (or errors).

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
Stefan Reiter 2021-05-06 17:26:21 +02:00 committed by Thomas Lamprecht
parent 75f9f40922
commit 1fde4167ea
2 changed files with 29 additions and 3 deletions

View File

@ -25,7 +25,7 @@ use proxmox_backup::tools::{self, fs::read_subdir, zip::zip_directory};
use pxar::encoder::aio::TokioWriter;
use super::{disk::ResolveResult, watchdog_remaining, watchdog_ping};
use super::{disk::ResolveResult, watchdog_remaining, watchdog_inhibit, watchdog_ping};
// NOTE: All API endpoints must have Permission::Superuser, as the configs for authentication do
// not exist within the restore VM. Safety is guaranteed by checking a ticket via a custom ApiAuth.
@ -248,8 +248,10 @@ fn extract(
_info: &ApiMethod,
_rpcenv: Box<dyn RpcEnvironment>,
) -> ApiResponseFuture {
watchdog_ping();
// download can take longer than watchdog timeout, inhibit until done
let _inhibitor = watchdog_inhibit();
async move {
let _inhibitor = _inhibitor;
let path = tools::required_string_param(&param, "path")?;
let mut path = base64::decode(path)?;
if let Some(b'/') = path.last() {
@ -283,6 +285,7 @@ fn extract(
if pxar {
tokio::spawn(async move {
let _inhibitor = _inhibitor;
let result = async move {
// pxar always expects a directory as it's root, so to accommodate files as
// well we encode the parent dir with a filter only matching the target instead
@ -340,6 +343,7 @@ fn extract(
});
} else {
tokio::spawn(async move {
let _inhibitor = _inhibitor;
let result = async move {
if vm_path.is_dir() {
zip_directory(&mut writer, &vm_path).await?;

View File

@ -4,6 +4,9 @@ use proxmox::tools::time::epoch_i64;
const TIMEOUT: i64 = 600; // seconds
static TRIGGERED: AtomicI64 = AtomicI64::new(0);
static INHIBITORS: AtomicI64 = AtomicI64::new(0);
pub struct WatchdogInhibitor {}
fn handle_expired() -> ! {
use nix::sys::reboot;
@ -37,5 +40,24 @@ pub fn watchdog_ping() {
/// Returns the remaining time before watchdog expiry in seconds
pub fn watchdog_remaining() -> i64 {
TIMEOUT - (epoch_i64() - TRIGGERED.load(Ordering::Acquire))
if INHIBITORS.load(Ordering::Acquire) > 0 {
TIMEOUT
} else {
TIMEOUT - (epoch_i64() - TRIGGERED.load(Ordering::Acquire))
}
}
/// Returns an object that inhibts watchdog expiry for its lifetime, it will issue a ping on Drop
pub fn watchdog_inhibit() -> WatchdogInhibitor {
let prev = INHIBITORS.fetch_add(1, Ordering::AcqRel);
log::info!("Inhibit added: {}", prev + 1);
WatchdogInhibitor {}
}
impl Drop for WatchdogInhibitor {
fn drop(&mut self) {
watchdog_ping();
let prev = INHIBITORS.fetch_sub(1, Ordering::AcqRel);
log::info!("Inhibit dropped: {}", prev - 1);
}
}