use failure::*; use super::chunk_store::*; use super::chunker::*; use std::sync::Arc; use std::io::{Read, Write, BufWriter}; use std::fs::File; use std::path::{Path, PathBuf}; use std::os::unix::io::AsRawFd; use uuid::Uuid; //use chrono::{Local, TimeZone}; #[repr(C)] pub struct ArchiveIndexHeader { pub magic: [u8; 12], pub version: u32, pub uuid: [u8; 16], pub ctime: u64, reserved: [u8; 4056], // overall size is one page (4096 bytes) } pub struct ArchiveIndexReader { store: Arc, _file: File, size: usize, filename: PathBuf, index: *const u8, index_entries: usize, uuid: [u8; 16], ctime: u64, } impl Drop for ArchiveIndexReader { fn drop(&mut self) { if let Err(err) = self.unmap() { eprintln!("Unable to unmap file {:?} - {}", self.filename, err); } } } impl ArchiveIndexReader { pub fn open(store: Arc, path: &Path) -> Result { let full_path = store.relative_path(path); let mut file = std::fs::File::open(&full_path)?; let header_size = std::mem::size_of::(); // todo: use static assertion when available in rust if header_size != 4096 { bail!("got unexpected header size for {:?}", path); } let mut buffer = vec![0u8; header_size]; file.read_exact(&mut buffer)?; let header = unsafe { &mut * (buffer.as_ptr() as *mut ArchiveIndexHeader) }; if header.magic != *b"PROXMOX-AIDX" { bail!("got unknown magic number for {:?}", path); } let version = u32::from_le(header.version); if version != 1 { bail!("got unsupported version number ({}) for {:?}", version, path); } let ctime = u64::from_le(header.ctime); let rawfd = file.as_raw_fd(); let stat = match nix::sys::stat::fstat(rawfd) { Ok(stat) => stat, Err(err) => bail!("fstat {:?} failed - {}", path, err), }; let size = stat.st_size as usize; let index_size = size - header_size; if (index_size % 40) != 0 { bail!("got unexpected file size for {:?}", path); } let data = unsafe { nix::sys::mman::mmap( std::ptr::null_mut(), index_size, nix::sys::mman::ProtFlags::PROT_READ, nix::sys::mman::MapFlags::MAP_PRIVATE, rawfd, header_size as i64) }? as *const u8; Ok(Self { store, filename: full_path, _file: file, size, index: data, index_entries: index_size/40, ctime, uuid: header.uuid, }) } fn unmap(&mut self) -> Result<(), Error> { if self.index == std::ptr::null_mut() { return Ok(()); } if let Err(err) = unsafe { nix::sys::mman::munmap(self.index as *mut std::ffi::c_void, self.index_entries*40) } { bail!("unmap file {:?} failed - {}", self.filename, err); } self.index = std::ptr::null_mut(); Ok(()) } #[inline] fn chunk_end(&self, pos: usize) -> u64 { if pos >= self.index_entries { panic!("chunk index out of range"); } unsafe { *(self.index.add(pos*40) as *const u64) } } #[inline] fn chunk_digest(&self, pos: usize) -> &[u8] { if pos >= self.index_entries { panic!("chunk index out of range"); } unsafe { std::slice::from_raw_parts(self.index.add(pos*40+8), 32) } } pub fn mark_used_chunks(&self, _status: &mut GarbageCollectionStatus) -> Result<(), Error> { for pos in 0..self.index_entries { let digest = self.chunk_digest(pos); if let Err(err) = self.store.touch_chunk(digest) { bail!("unable to access chunk {}, required by {:?} - {}", digest_to_hex(digest), self.filename, err); } } Ok(()) } pub fn dump_catar(&self, mut writer: Box) -> Result<(), Error> { let mut buffer = Vec::with_capacity(1024*1024); for pos in 0..self.index_entries { let _end = self.chunk_end(pos); let digest = self.chunk_digest(pos); //println!("Dump {:08x}", end ); self.store.read_chunk(digest, &mut buffer)?; writer.write_all(&buffer)?; } Ok(()) } fn binary_search( &self, start_idx: usize, start: u64, end_idx: usize, end: u64, offset: u64 ) -> Result { if (offset >= end) || (offset < start) { bail!("offset out of range"); } if end_idx == start_idx { return Ok(start_idx); // found } let middle_idx = (start_idx + end_idx)/2; let middle_end = self.chunk_end(middle_idx); if offset < middle_end { return self.binary_search(start_idx, start, middle_idx, middle_end, offset); } else { return self.binary_search(middle_idx + 1, middle_end, end_idx, end, offset); } } } pub struct BufferedArchiveReader<'a> { index: &'a ArchiveIndexReader, archive_size: u64, read_buffer: Vec, buffered_chunk_idx: usize, buffered_chunk_start: u64, read_offset: u64, } impl <'a> BufferedArchiveReader<'a> { pub fn new(index: &'a ArchiveIndexReader) -> Self { let archive_size = index.chunk_end(index.index_entries - 1); Self { index: index, archive_size: archive_size, read_buffer: Vec::with_capacity(1024*1024), buffered_chunk_idx: 0, buffered_chunk_start: 0, read_offset: 0, } } pub fn archive_size(&self) -> u64 { self.archive_size } fn buffer_chunk(&mut self, idx: usize) -> Result<(), Error> { let index = self.index; let end = index.chunk_end(idx); let digest = index.chunk_digest(idx); index.store.read_chunk(digest, &mut self.read_buffer)?; self.buffered_chunk_idx = idx; self.buffered_chunk_start = end - (self.read_buffer.len() as u64); //println!("BUFFER {} {}", self.buffered_chunk_start, end); Ok(()) } } impl <'a> crate::tools::BufferedReader for BufferedArchiveReader<'a> { fn buffered_read(&mut self, offset: u64) -> Result<&[u8], Error> { if offset == self.archive_size { return Ok(&self.read_buffer[0..0]); } let buffer_len = self.read_buffer.len(); let index = self.index; // optimization for sequential read if buffer_len > 0 && ((self.buffered_chunk_idx + 1) < index.index_entries) && (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) { let next_idx = self.buffered_chunk_idx + 1; let next_end = index.chunk_end(next_idx); if offset < next_end { self.buffer_chunk(next_idx)?; let buffer_offset = (offset - self.buffered_chunk_start) as usize; return Ok(&self.read_buffer[buffer_offset..]); } } if (buffer_len == 0) || (offset < self.buffered_chunk_start) || (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) { let end_idx = index.index_entries - 1; let end = index.chunk_end(end_idx); let idx = index.binary_search(0, 0, end_idx, end, offset)?; self.buffer_chunk(idx)?; } let buffer_offset = (offset - self.buffered_chunk_start) as usize; Ok(&self.read_buffer[buffer_offset..]) } } impl <'a> std::io::Read for BufferedArchiveReader<'a> { fn read(&mut self, buf: &mut [u8]) -> Result { use std::io::{Error, ErrorKind}; use crate::tools::BufferedReader; let data = match self.buffered_read(self.read_offset) { Ok(v) => v, Err(err) => return Err(Error::new(ErrorKind::Other, err.to_string())), }; let n = if data.len() > buf.len() { buf.len() } else { data.len() }; unsafe { std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr(), n); } self.read_offset += n as u64; return Ok(n); } } impl <'a> std::io::Seek for BufferedArchiveReader<'a> { fn seek(&mut self, pos: std::io::SeekFrom) -> Result { use std::io::{SeekFrom}; let new_offset = match pos { SeekFrom::Start(start_offset) => start_offset as i64, SeekFrom::End(end_offset) => (self.archive_size as i64)+ end_offset, SeekFrom::Current(offset) => (self.read_offset as i64) + offset, }; use std::io::{Error, ErrorKind}; if (new_offset < 0) || (new_offset > (self.archive_size as i64)) { return Err(Error::new( ErrorKind::Other, format!("seek is out of range {} ([0..{}])", new_offset, self.archive_size))); } self.read_offset = new_offset as u64; Ok(self.read_offset) } } pub struct ArchiveIndexWriter { store: Arc, chunker: Chunker, writer: BufWriter, closed: bool, filename: PathBuf, tmp_filename: PathBuf, uuid: [u8; 16], ctime: u64, chunk_offset: usize, last_chunk: usize, chunk_buffer: Vec, } impl Drop for ArchiveIndexWriter { fn drop(&mut self) { let _ = std::fs::remove_file(&self.tmp_filename); // ignore errors } } impl ArchiveIndexWriter { pub fn create(store: Arc, path: &Path, chunk_size: usize) -> Result { let full_path = store.relative_path(path); let mut tmp_path = full_path.clone(); tmp_path.set_extension("tmp_aidx"); let file = std::fs::OpenOptions::new() .create(true).truncate(true) .read(true) .write(true) .open(&tmp_path)?; let mut writer = BufWriter::with_capacity(1024*1024, file); let header_size = std::mem::size_of::(); // todo: use static assertion when available in rust if header_size != 4096 { panic!("got unexpected header size"); } let ctime = std::time::SystemTime::now().duration_since( std::time::SystemTime::UNIX_EPOCH)?.as_secs(); let uuid = Uuid::new_v4(); let mut buffer = vec![0u8; header_size]; let header = crate::tools::map_struct_mut::(&mut buffer)?; header.magic = *b"PROXMOX-AIDX"; header.version = u32::to_le(1); header.ctime = u64::to_le(ctime); header.uuid = *uuid.as_bytes(); writer.write_all(&buffer)?; Ok(Self { store, chunker: Chunker::new(chunk_size), writer: writer, closed: false, filename: full_path, tmp_filename: tmp_path, ctime, uuid: *uuid.as_bytes(), chunk_offset: 0, last_chunk: 0, chunk_buffer: Vec::with_capacity(chunk_size*4), }) } pub fn close(&mut self) -> Result<(), Error> { if self.closed { bail!("cannot close already closed archive index file {:?}", self.filename); } self.closed = true; self.write_chunk_buffer()?; self.writer.flush()?; // fixme: if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { bail!("Atomic rename file {:?} failed - {}", self.filename, err); } Ok(()) } fn write_chunk_buffer(&mut self) -> Result<(), std::io::Error> { use std::io::{Error, ErrorKind}; let chunk_size = self.chunk_buffer.len(); if chunk_size == 0 { return Ok(()); } let expected_chunk_size = self.chunk_offset - self.last_chunk; if expected_chunk_size != self.chunk_buffer.len() { return Err(Error::new( ErrorKind::Other, format!("wrong chunk size {} != {}", expected_chunk_size, chunk_size))); } self.last_chunk = self.chunk_offset; match self.store.insert_chunk(&self.chunk_buffer) { Ok((is_duplicate, digest)) => { println!("ADD CHUNK {:016x} {} {} {}", self.chunk_offset, chunk_size, is_duplicate, digest_to_hex(&digest)); self.writer.write(unsafe { &std::mem::transmute::(self.chunk_offset as u64) })?; self.writer.write(&digest)?; self.chunk_buffer.truncate(0); return Ok(()); } Err(err) => { self.chunk_buffer.truncate(0); return Err(Error::new(ErrorKind::Other, err.to_string())); } } } } impl Write for ArchiveIndexWriter { fn write(&mut self, data: &[u8]) -> std::result::Result { let chunker = &mut self.chunker; let pos = chunker.scan(data); if pos > 0 { self.chunk_buffer.extend(&data[0..pos]); self.chunk_offset += pos; self.write_chunk_buffer()?; Ok(pos) } else { self.chunk_offset += data.len(); self.chunk_buffer.extend(data); Ok(data.len()) } } fn flush(&mut self) -> std::result::Result<(), std::io::Error> { use std::io::{Error, ErrorKind}; Err(Error::new(ErrorKind::Other, "please use close() instead of flush()")) } }