src/backup/datastore.rs: generic index_mark_used_chunks implementation, improve GC stats

This commit is contained in:
Dietmar Maurer 2019-07-04 07:57:43 +02:00
parent 86eda3eb0d
commit a660978c9a
5 changed files with 64 additions and 59 deletions

View File

@ -12,20 +12,24 @@ use super::DataChunk;
#[derive(Clone, Serialize)]
pub struct GarbageCollectionStatus {
pub upid: Option<String>,
pub used_bytes: usize,
pub used_chunks: usize,
pub disk_bytes: usize,
pub index_file_count: usize,
pub index_data_bytes: u64,
pub disk_bytes: u64,
pub disk_chunks: usize,
pub removed_bytes: u64,
pub removed_chunks: usize,
}
impl Default for GarbageCollectionStatus {
fn default() -> Self {
GarbageCollectionStatus {
upid: None,
used_bytes: 0,
used_chunks: 0,
index_file_count: 0,
index_data_bytes: 0,
disk_bytes: 0,
disk_chunks: 0,
removed_bytes: 0,
removed_chunks: 0,
}
}
}
@ -143,13 +147,9 @@ impl ChunkStore {
})
}
pub fn touch_chunk(&self, digest:&[u8]) -> Result<(), Error> {
pub fn touch_chunk(&self, digest: &[u8; 32]) -> Result<(), Error> {
let mut chunk_path = self.chunk_dir.clone();
let prefix = digest_to_prefix(&digest);
chunk_path.push(&prefix);
let digest_str = proxmox::tools::digest_to_hex(&digest);
chunk_path.push(&digest_str);
let (chunk_path, _digest_str) = self.chunk_path(digest);
const UTIME_NOW: i64 = ((1 << 30) - 1);
const UTIME_OMIT: i64 = ((1 << 30) - 2);
@ -172,7 +172,7 @@ impl ChunkStore {
Ok(())
}
pub fn read_chunk(&self, digest:&[u8; 32]) -> Result<DataChunk, Error> {
pub fn read_chunk(&self, digest: &[u8; 32]) -> Result<DataChunk, Error> {
let (chunk_path, digest_str) = self.chunk_path(digest);
let mut file = std::fs::File::open(&chunk_path)
@ -302,9 +302,11 @@ impl ChunkStore {
err,
);
}
} else {
status.removed_chunks += 1;
status.removed_bytes += stat.st_size as u64;
} else {
status.disk_chunks += 1;
status.disk_bytes += stat.st_size as usize;
status.disk_bytes += stat.st_size as u64;
}
}
drop(lock);

View File

@ -201,6 +201,28 @@ impl DataStore {
Ok(list)
}
// mark chunks used by ``index`` as used
fn index_mark_used_chunks<I: IndexFile>(
&self,
index: I,
file_name: &Path, // only used for error reporting
status: &mut GarbageCollectionStatus,
) -> Result<(), Error> {
status.index_file_count += 1;
status.index_data_bytes += index.index_bytes();
for pos in 0..index.index_count() {
tools::fail_on_shutdown()?;
let digest = index.index_digest(pos).unwrap();
if let Err(err) = self.chunk_store.touch_chunk(digest) {
bail!("unable to access chunk {}, required by {:?} - {}",
proxmox::tools::digest_to_hex(digest), file_name, err);
}
}
Ok(())
}
fn mark_used_chunks(&self, status: &mut GarbageCollectionStatus) -> Result<(), Error> {
let image_list = self.list_images()?;
@ -212,10 +234,10 @@ impl DataStore {
if let Some(ext) = path.extension() {
if ext == "fidx" {
let index = self.open_fixed_reader(&path)?;
index.mark_used_chunks(status)?;
self.index_mark_used_chunks(index, &path, status)?;
} else if ext == "didx" {
let index = self.open_dynamic_reader(&path)?;
index.mark_used_chunks(status)?;
self.index_mark_used_chunks(index, &path, status)?;
}
}
}
@ -245,10 +267,14 @@ impl DataStore {
worker.log("Start GC phase2 (sweep unused chunks)");
self.chunk_store.sweep_unused_chunks(oldest_writer, &mut gc_status)?;
worker.log(&format!("Used bytes: {}", gc_status.used_bytes));
worker.log(&format!("Used chunks: {}", gc_status.used_chunks));
worker.log(&format!("Disk bytes: {}", gc_status.disk_bytes));
worker.log(&format!("Removed bytes: {}", gc_status.removed_bytes));
worker.log(&format!("Removed chunks: {}", gc_status.removed_chunks));
worker.log(&format!("Original data bytes: {}", gc_status.index_data_bytes));
let comp_per = (gc_status.disk_bytes*100)/gc_status.index_data_bytes;
worker.log(&format!("Disk bytes: {} ({} %)", gc_status.disk_bytes, comp_per));
worker.log(&format!("Disk chunks: {}", gc_status.disk_chunks));
let avg_chunk = gc_status.index_data_bytes/(gc_status.disk_chunks as u64);
worker.log(&format!("Average chunk size: {}", avg_chunk));
*self.last_gc_status.lock().unwrap() = gc_status;

View File

@ -167,25 +167,7 @@ impl DynamicIndexReader {
slice.try_into().unwrap()
}
pub fn mark_used_chunks(&self, _status: &mut GarbageCollectionStatus) -> Result<(), Error> {
unimplemented!();
}
/*
pub fn mark_used_chunks(&self, _status: &mut GarbageCollectionStatus) -> Result<(), Error> {
for pos in 0..self.index_entries {
tools::fail_on_shutdown()?;
let digest = self.chunk_digest(pos);
if let Err(err) = self.store.touch_chunk(digest) {
bail!("unable to access chunk {}, required by {:?} - {}",
proxmox::tools::digest_to_hex(digest), self.filename, err);
}
}
Ok(())
}
pub fn dump_pxar(&self, mut writer: Box<dyn Write>) -> Result<(), Error> {
for pos in 0..self.index_entries {
@ -200,7 +182,7 @@ impl DynamicIndexReader {
Ok(())
}
*/
*/
fn binary_search(
&self,
@ -243,6 +225,14 @@ impl IndexFile for DynamicIndexReader {
})
}
}
fn index_bytes(&self) -> u64 {
if self.index_entries == 0 {
0
} else {
self.chunk_end((self.index_entries - 1) as usize)
}
}
}
pub struct BufferedDynamicReader<S> {

View File

@ -138,27 +138,6 @@ impl FixedIndexReader {
Ok(())
}
pub fn mark_used_chunks(&self, status: &mut GarbageCollectionStatus) -> Result<(), Error> {
if self.index == std::ptr::null_mut() { bail!("detected closed index file."); }
status.used_bytes += self.index_length * self.chunk_size;
status.used_chunks += self.index_length;
for pos in 0..self.index_length {
tools::fail_on_shutdown()?;
let digest = self.index_digest(pos).unwrap();
if let Err(err) = self.store.touch_chunk(digest) {
bail!("unable to access chunk {}, required by {:?} - {}",
proxmox::tools::digest_to_hex(digest), self.filename, err);
}
}
Ok(())
}
pub fn print_info(&self) {
println!("Filename: {:?}", self.filename);
println!("Size: {}", self.size);
@ -180,6 +159,10 @@ impl IndexFile for FixedIndexReader {
Some(unsafe { std::mem::transmute(self.index.add(pos*32)) })
}
}
fn index_bytes(&self) -> u64 {
(self.index_length * self.chunk_size) as u64
}
}
pub struct FixedIndexWriter {

View File

@ -2,9 +2,13 @@ use failure::*;
use futures::*;
use bytes::{Bytes, BytesMut};
/// Trait to get digest list from index files
///
/// To allow easy iteration over all used chunks.
pub trait IndexFile: Send {
fn index_count(&self) -> usize;
fn index_digest(&self, pos: usize) -> Option<&[u8; 32]>;
fn index_bytes(&self) -> u64;
}
/// Encode digest list from an `IndexFile` into a binary stream