proxmox-rrd: use a journal to reduce amount of bytes written

Append pending changes in a simple text based format that allows for
lockless appends as long as we stay below 4 KiB data per write.

Apply the journal every 30 minutes and on daemon startup.

Note that we do not ensure that the journal is synced, this is a
perfomance optimization we can make as the kernel defaults to
writeback in-flight data every 30s (sysctl vm/dirty_expire_centisecs)
anyway, so we lose at max half a minute of data on a crash, here one
should have in mind that we normally expose 1 minute as finest
granularity anyway, so not really much lost.

Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Dietmar Maurer
2021-10-13 10:24:38 +02:00
committed by Thomas Lamprecht
parent 890b88cbef
commit 1d44f175c6
7 changed files with 262 additions and 61 deletions

View File

@ -74,7 +74,7 @@ async fn run() -> Result<(), Error> {
proxmox_backup::server::create_run_dir()?;
RRD_CACHE.create_rrdb_dir()?;
RRD_CACHE.apply_journal()?;
proxmox_backup::server::jobstate::create_jobstate_dir()?;
proxmox_backup::tape::create_tape_status_dir()?;

View File

@ -889,14 +889,10 @@ async fn command_reopen_auth_logfiles() -> Result<(), Error> {
async fn run_stat_generator() {
let mut count = 0;
loop {
count += 1;
let save = if count >= 6 { count = 0; true } else { false };
let delay_target = Instant::now() + Duration::from_secs(10);
generate_host_stats(save).await;
generate_host_stats().await;
tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
@ -904,19 +900,19 @@ async fn run_stat_generator() {
}
fn rrd_update_gauge(name: &str, value: f64, save: bool) {
if let Err(err) = RRD_CACHE.update_value(name, value, DST::Gauge, save) {
fn rrd_update_gauge(name: &str, value: f64) {
if let Err(err) = RRD_CACHE.update_value(name, value, DST::Gauge) {
eprintln!("rrd::update_value '{}' failed - {}", name, err);
}
}
fn rrd_update_derive(name: &str, value: f64, save: bool) {
if let Err(err) = RRD_CACHE.update_value(name, value, DST::Derive, save) {
fn rrd_update_derive(name: &str, value: f64) {
if let Err(err) = RRD_CACHE.update_value(name, value, DST::Derive) {
eprintln!("rrd::update_value '{}' failed - {}", name, err);
}
}
async fn generate_host_stats(save: bool) {
async fn generate_host_stats() {
use proxmox::sys::linux::procfs::{
read_meminfo, read_proc_stat, read_proc_net_dev, read_loadavg};
@ -924,8 +920,8 @@ async fn generate_host_stats(save: bool) {
match read_proc_stat() {
Ok(stat) => {
rrd_update_gauge("host/cpu", stat.cpu, save);
rrd_update_gauge("host/iowait", stat.iowait_percent, save);
rrd_update_gauge("host/cpu", stat.cpu);
rrd_update_gauge("host/iowait", stat.iowait_percent);
}
Err(err) => {
eprintln!("read_proc_stat failed - {}", err);
@ -934,10 +930,10 @@ async fn generate_host_stats(save: bool) {
match read_meminfo() {
Ok(meminfo) => {
rrd_update_gauge("host/memtotal", meminfo.memtotal as f64, save);
rrd_update_gauge("host/memused", meminfo.memused as f64, save);
rrd_update_gauge("host/swaptotal", meminfo.swaptotal as f64, save);
rrd_update_gauge("host/swapused", meminfo.swapused as f64, save);
rrd_update_gauge("host/memtotal", meminfo.memtotal as f64);
rrd_update_gauge("host/memused", meminfo.memused as f64);
rrd_update_gauge("host/swaptotal", meminfo.swaptotal as f64);
rrd_update_gauge("host/swapused", meminfo.swapused as f64);
}
Err(err) => {
eprintln!("read_meminfo failed - {}", err);
@ -954,8 +950,8 @@ async fn generate_host_stats(save: bool) {
netin += item.receive;
netout += item.send;
}
rrd_update_derive("host/netin", netin as f64, save);
rrd_update_derive("host/netout", netout as f64, save);
rrd_update_derive("host/netin", netin as f64);
rrd_update_derive("host/netout", netout as f64);
}
Err(err) => {
eprintln!("read_prox_net_dev failed - {}", err);
@ -964,7 +960,7 @@ async fn generate_host_stats(save: bool) {
match read_loadavg() {
Ok(loadavg) => {
rrd_update_gauge("host/loadavg", loadavg.0 as f64, save);
rrd_update_gauge("host/loadavg", loadavg.0 as f64);
}
Err(err) => {
eprintln!("read_loadavg failed - {}", err);
@ -973,7 +969,7 @@ async fn generate_host_stats(save: bool) {
let disk_manager = DiskManage::new();
gather_disk_stats(disk_manager.clone(), Path::new("/"), "host", save);
gather_disk_stats(disk_manager.clone(), Path::new("/"), "host");
match pbs_config::datastore::config() {
Ok((config, _)) => {
@ -984,7 +980,7 @@ async fn generate_host_stats(save: bool) {
let rrd_prefix = format!("datastore/{}", config.name);
let path = std::path::Path::new(&config.path);
gather_disk_stats(disk_manager.clone(), path, &rrd_prefix, save);
gather_disk_stats(disk_manager.clone(), path, &rrd_prefix);
}
}
Err(err) => {
@ -1025,14 +1021,14 @@ fn check_schedule(worker_type: &str, event_str: &str, id: &str) -> bool {
next <= now
}
fn gather_disk_stats(disk_manager: Arc<DiskManage>, path: &Path, rrd_prefix: &str, save: bool) {
fn gather_disk_stats(disk_manager: Arc<DiskManage>, path: &Path, rrd_prefix: &str) {
match proxmox_backup::tools::disks::disk_usage(path) {
Ok(status) => {
let rrd_key = format!("{}/total", rrd_prefix);
rrd_update_gauge(&rrd_key, status.total as f64, save);
rrd_update_gauge(&rrd_key, status.total as f64);
let rrd_key = format!("{}/used", rrd_prefix);
rrd_update_gauge(&rrd_key, status.used as f64, save);
rrd_update_gauge(&rrd_key, status.used as f64);
}
Err(err) => {
eprintln!("read disk_usage on {:?} failed - {}", path, err);
@ -1064,17 +1060,17 @@ fn gather_disk_stats(disk_manager: Arc<DiskManage>, path: &Path, rrd_prefix: &st
}
if let Some(stat) = device_stat {
let rrd_key = format!("{}/read_ios", rrd_prefix);
rrd_update_derive(&rrd_key, stat.read_ios as f64, save);
rrd_update_derive(&rrd_key, stat.read_ios as f64);
let rrd_key = format!("{}/read_bytes", rrd_prefix);
rrd_update_derive(&rrd_key, (stat.read_sectors*512) as f64, save);
rrd_update_derive(&rrd_key, (stat.read_sectors*512) as f64);
let rrd_key = format!("{}/write_ios", rrd_prefix);
rrd_update_derive(&rrd_key, stat.write_ios as f64, save);
rrd_update_derive(&rrd_key, stat.write_ios as f64);
let rrd_key = format!("{}/write_bytes", rrd_prefix);
rrd_update_derive(&rrd_key, (stat.write_sectors*512) as f64, save);
rrd_update_derive(&rrd_key, (stat.write_sectors*512) as f64);
let rrd_key = format!("{}/io_ticks", rrd_prefix);
rrd_update_derive(&rrd_key, (stat.io_ticks as f64)/1000.0, save);
rrd_update_derive(&rrd_key, (stat.io_ticks as f64)/1000.0);
}
}
Err(err) => {

View File

@ -51,10 +51,13 @@ lazy_static::lazy_static!{
.owner(backup_user.uid)
.group(backup_user.gid);
let apply_interval = 30.0*60.0; // 30 minutes
RRDCache::new(
"/var/lib/proxmox-backup/rrdb",
Some(file_options),
Some(dir_options),
)
apply_interval,
).unwrap()
};
}