From 77703d95aa6464054048fbfb4614c492ec80fd6a Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Wed, 2 Jan 2019 14:27:04 +0100 Subject: [PATCH] implement garbage collection for .aidx files --- src/backup/archive_index.rs | 116 +++++++++++++++++++++++++++++++++++- src/backup/chunk_store.rs | 4 +- src/backup/datastore.rs | 23 ++++++- 3 files changed, 137 insertions(+), 6 deletions(-) diff --git a/src/backup/archive_index.rs b/src/backup/archive_index.rs index 075d4e45..2b9a5ef3 100644 --- a/src/backup/archive_index.rs +++ b/src/backup/archive_index.rs @@ -19,6 +19,120 @@ pub struct ArchiveIndexHeader { reserved: [u8; 4056], // overall size is one page (4096 bytes) } + +pub struct ArchiveIndexReader<'a> { + store: &'a ChunkStore, + file: File, + size: usize, + filename: PathBuf, + index: *const u8, + index_entries: usize, + uuid: [u8; 16], + ctime: u64, +} + +impl <'a> Drop for ArchiveIndexReader<'a> { + + fn drop(&mut self) { + if let Err(err) = self.unmap() { + eprintln!("Unable to unmap file {:?} - {}", self.filename, err); + } + } +} + +impl <'a> ArchiveIndexReader<'a> { + + pub fn open(store: &'a ChunkStore, path: &Path) -> Result { + + let full_path = store.relative_path(path); + + let mut file = std::fs::File::open(&full_path)?; + + let header_size = std::mem::size_of::(); + + // todo: use static assertion when available in rust + if header_size != 4096 { bail!("got unexpected header size for {:?}", path); } + + let mut buffer = vec![0u8; header_size]; + file.read_exact(&mut buffer)?; + + let header = unsafe { &mut * (buffer.as_ptr() as *mut ArchiveIndexHeader) }; + + if header.magic != *b"PROXMOX-AIDX" { + bail!("got unknown magic number for {:?}", path); + } + + let version = u32::from_le(header.version); + if version != 1 { + bail!("got unsupported version number ({}) for {:?}", version, path); + } + + let ctime = u64::from_le(header.ctime); + + let rawfd = file.as_raw_fd(); + + let stat = match nix::sys::stat::fstat(rawfd) { + Ok(stat) => stat, + Err(err) => bail!("fstat {:?} failed - {}", path, err), + }; + + let size = stat.st_size as usize; + + let index_size = (size - header_size); + if (index_size % 40) != 0 { + bail!("got unexpected file size for {:?}", path); + } + + let data = unsafe { nix::sys::mman::mmap( + std::ptr::null_mut(), + index_size, + nix::sys::mman::ProtFlags::PROT_READ, + nix::sys::mman::MapFlags::MAP_PRIVATE, + rawfd, + header_size as i64) }? as *const u8; + + + Ok(Self { + store, + filename: full_path, + file, + size, + index: data, + index_entries: index_size/40, + ctime, + uuid: header.uuid, + }) + } + + fn unmap(&mut self) -> Result<(), Error> { + + if self.index == std::ptr::null_mut() { return Ok(()); } + + if let Err(err) = unsafe { nix::sys::mman::munmap(self.index as *mut std::ffi::c_void, self.size) } { + bail!("unmap file {:?} failed - {}", self.filename, err); + } + + self.index = std::ptr::null_mut(); + + Ok(()) + } + + pub fn mark_used_chunks(&self, status: &mut GarbageCollectionStatus) -> Result<(), Error> { + + for pos in 0..self.index_entries { + let offset = unsafe { *(self.index.add(pos*40) as *const u64) }; + let digest = unsafe { std::slice::from_raw_parts(self.index.add(pos*40+8), 32) }; + + if let Err(err) = self.store.touch_chunk(digest) { + bail!("unable to access chunk {}, required by {:?} - {}", + digest_to_hex(digest), self.filename, err); + } + } + Ok(()) + } +} + + pub struct ArchiveIndexWriter<'a> { store: &'a ChunkStore, chunker: Chunker, @@ -126,7 +240,7 @@ impl <'a> ArchiveIndexWriter<'a> { match self.store.insert_chunk(&self.chunk_buffer) { Ok((is_duplicate, digest)) => { - println!("ADD CHUNK {} {} {} {}", self.chunk_offset, chunk_size, is_duplicate, digest_to_hex(&digest)); + println!("ADD CHUNK {:016x} {} {} {}", self.chunk_offset, chunk_size, is_duplicate, digest_to_hex(&digest)); self.writer.write(unsafe { &std::mem::transmute::(self.chunk_offset as u64) })?; self.writer.write(&digest)?; self.chunk_buffer.truncate(0); diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs index 1adb54a7..6530f5fc 100644 --- a/src/backup/chunk_store.rs +++ b/src/backup/chunk_store.rs @@ -212,7 +212,7 @@ impl ChunkStore { Ok(()) } - pub fn sweep_used_chunks(&self, status: &mut GarbageCollectionStatus) -> Result<(), Error> { + pub fn sweep_unused_chunks(&self, status: &mut GarbageCollectionStatus) -> Result<(), Error> { use nix::fcntl::OFlag; use nix::sys::stat::Mode; @@ -299,7 +299,7 @@ impl ChunkStore { bail!("Atomic rename on store '{}' failed for chunk {} - {}", self.name, digest_str, err); } - println!("PATH {:?}", chunk_path); + //println!("PATH {:?}", chunk_path); drop(lock); diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs index db78c5c4..40f577f8 100644 --- a/src/backup/datastore.rs +++ b/src/backup/datastore.rs @@ -87,6 +87,13 @@ impl DataStore { Ok(index) } + pub fn open_archive_reader>(&self, filename: P) -> Result { + + let index = ArchiveIndexReader::open(&self.chunk_store, filename.as_ref())?; + + Ok(index) + } + pub fn list_images(&self) -> Result, Error> { let base = self.chunk_store.base_path(); @@ -99,6 +106,8 @@ impl DataStore { if let Some(ext) = path.extension() { if ext == "iidx" { list.push(path); + } else if ext == "aidx" { + list.push(path); } } } @@ -112,8 +121,16 @@ impl DataStore { let image_list = self.list_images()?; for path in image_list { - let index = self.open_image_reader(path)?; - index.mark_used_chunks(status)?; + if let Some(ext) = path.extension() { + if ext == "iidx" { + let index = self.open_image_reader(path)?; + index.mark_used_chunks(status)?; + } else if ext == "aidx" { + let index = self.open_archive_reader(path)?; + index.mark_used_chunks(status)?; + } + } + } Ok(()) @@ -131,7 +148,7 @@ impl DataStore { self.mark_used_chunks(&mut gc_status)?; println!("Start GC phase2 (sweep unused chunks)"); - self.chunk_store.sweep_used_chunks(&mut gc_status)?; + self.chunk_store.sweep_unused_chunks(&mut gc_status)?; println!("Used bytes: {}", gc_status.used_bytes); println!("Used chunks: {}", gc_status.used_chunks);