src/backup/catalog.rs: store u64 compressed
This way catalogs get about 1/3 smaller.
This commit is contained in:
parent
bb19af7302
commit
12a1975af9
@ -1,7 +1,6 @@
|
|||||||
use failure::*;
|
use failure::*;
|
||||||
use std::ffi::{CStr, CString};
|
use std::ffi::{CStr, CString};
|
||||||
use std::os::unix::ffi::OsStringExt;
|
use std::os::unix::ffi::OsStringExt;
|
||||||
use std::convert::TryInto;
|
|
||||||
use std::io::{Read, Write, Seek, SeekFrom};
|
use std::io::{Read, Write, Seek, SeekFrom};
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
@ -37,68 +36,71 @@ impl DirInfo {
|
|||||||
DirInfo::new(CString::new(b"/".to_vec()).unwrap())
|
DirInfo::new(CString::new(b"/".to_vec()).unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_entry(data: &mut Vec<u8>, entry: &DirEntry, pos: u64) {
|
fn encode_entry<W: Write>(
|
||||||
|
writer: &mut W,
|
||||||
|
entry: &DirEntry,
|
||||||
|
pos: u64,
|
||||||
|
) -> Result<(), Error> {
|
||||||
match entry {
|
match entry {
|
||||||
DirEntry::Directory { name, start } => {
|
DirEntry::Directory { name, start } => {
|
||||||
data.push(CatalogEntryType::Directory as u8);
|
writer.write_all(&[CatalogEntryType::Directory as u8])?;
|
||||||
data.extend_from_slice(&(name.len() as u32).to_le_bytes());
|
catalog_encode_u64(writer, name.len() as u64)?;
|
||||||
data.extend_from_slice(name);
|
writer.write_all(name)?;
|
||||||
data.extend_from_slice(&(pos-start).to_le_bytes());
|
catalog_encode_u64(writer, pos - start)?;
|
||||||
}
|
}
|
||||||
DirEntry::File { name, size, mtime } => {
|
DirEntry::File { name, size, mtime } => {
|
||||||
data.push(CatalogEntryType::File as u8);
|
writer.write_all(&[CatalogEntryType::File as u8])?;
|
||||||
data.extend_from_slice(&(name.len() as u32).to_le_bytes());
|
catalog_encode_u64(writer, name.len() as u64)?;
|
||||||
data.extend_from_slice(name);
|
writer.write_all(name)?;
|
||||||
data.extend_from_slice(&size.to_le_bytes());
|
catalog_encode_u64(writer, *size)?;
|
||||||
data.extend_from_slice(&mtime.to_le_bytes());
|
catalog_encode_u64(writer, *mtime)?;
|
||||||
}
|
}
|
||||||
DirEntry::Symlink { name } => {
|
DirEntry::Symlink { name } => {
|
||||||
data.push(CatalogEntryType::Symlink as u8);
|
writer.write_all(&[CatalogEntryType::Symlink as u8])?;
|
||||||
data.extend_from_slice(&(name.len() as u32).to_le_bytes());
|
catalog_encode_u64(writer, name.len() as u64)?;
|
||||||
data.extend_from_slice(name);
|
writer.write_all(name)?;
|
||||||
}
|
}
|
||||||
DirEntry::Hardlink { name } => {
|
DirEntry::Hardlink { name } => {
|
||||||
data.push(CatalogEntryType::Hardlink as u8);
|
writer.write_all(&[CatalogEntryType::Hardlink as u8])?;
|
||||||
data.extend_from_slice(&(name.len() as u32).to_le_bytes());
|
catalog_encode_u64(writer, name.len() as u64)?;
|
||||||
data.extend_from_slice(name);
|
writer.write_all(name)?;
|
||||||
}
|
}
|
||||||
DirEntry::BlockDevice { name } => {
|
DirEntry::BlockDevice { name } => {
|
||||||
data.push(CatalogEntryType::BlockDevice as u8);
|
writer.write_all(&[CatalogEntryType::BlockDevice as u8])?;
|
||||||
data.extend_from_slice(&(name.len() as u32).to_le_bytes());
|
catalog_encode_u64(writer, name.len() as u64)?;
|
||||||
data.extend_from_slice(name);
|
writer.write_all(name)?;
|
||||||
}
|
}
|
||||||
DirEntry::CharDevice { name } => {
|
DirEntry::CharDevice { name } => {
|
||||||
data.push(CatalogEntryType::CharDevice as u8);
|
writer.write_all(&[CatalogEntryType::CharDevice as u8])?;
|
||||||
data.extend_from_slice(&(name.len() as u32).to_le_bytes());
|
catalog_encode_u64(writer, name.len() as u64)?;
|
||||||
data.extend_from_slice(name);
|
writer.write_all(name)?;
|
||||||
}
|
}
|
||||||
DirEntry::Fifo { name } => {
|
DirEntry::Fifo { name } => {
|
||||||
data.push(CatalogEntryType::Fifo as u8);
|
writer.write_all(&[CatalogEntryType::Fifo as u8])?;
|
||||||
data.extend_from_slice(&(name.len() as u32).to_le_bytes());
|
catalog_encode_u64(writer, name.len() as u64)?;
|
||||||
data.extend_from_slice(name);
|
writer.write_all(name)?;
|
||||||
}
|
}
|
||||||
DirEntry::Socket { name } => {
|
DirEntry::Socket { name } => {
|
||||||
data.push(CatalogEntryType::Socket as u8);
|
writer.write_all(&[CatalogEntryType::Socket as u8])?;
|
||||||
data.extend_from_slice(&(name.len() as u32).to_le_bytes());
|
catalog_encode_u64(writer, name.len() as u64)?;
|
||||||
data.extend_from_slice(name);
|
writer.write_all(name)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode(self, start: u64) -> Result<(CString, Vec<u8>), Error> {
|
fn encode(self, start: u64) -> Result<(CString, Vec<u8>), Error> {
|
||||||
let mut table = Vec::new();
|
let mut table = Vec::new();
|
||||||
let count: u32 = self.entries.len().try_into()?;
|
catalog_encode_u64(&mut table, self.entries.len() as u64)?;
|
||||||
for entry in self.entries {
|
for entry in self.entries {
|
||||||
Self::encode_entry(&mut table, &entry, start);
|
Self::encode_entry(&mut table, &entry, start)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let data = Vec::new();
|
let mut data = Vec::new();
|
||||||
let mut writer = std::io::Cursor::new(data);
|
catalog_encode_u64(&mut data, table.len() as u64)?;
|
||||||
let size: u32 = (4 + 4 + table.len()).try_into()?;
|
data.extend_from_slice(&table);
|
||||||
writer.write_all(&size.to_le_bytes())?;
|
|
||||||
writer.write_all(&count.to_le_bytes())?;
|
Ok((self.name, data))
|
||||||
writer.write_all(&table)?;
|
|
||||||
Ok((self.name, writer.into_inner()))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,7 +255,7 @@ impl <R: Read + Seek> CatalogReader<R> {
|
|||||||
Self { reader }
|
Self { reader }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next_byte<C: Read>(mut reader: C) -> Result<u8, std::io::Error> {
|
fn next_byte<C: Read>(reader: &mut C) -> Result<u8, std::io::Error> {
|
||||||
let mut buf = [0u8; 1];
|
let mut buf = [0u8; 1];
|
||||||
reader.read_exact(&mut buf)?;
|
reader.read_exact(&mut buf)?;
|
||||||
Ok(buf[0])
|
Ok(buf[0])
|
||||||
@ -272,21 +274,21 @@ impl <R: Read + Seek> CatalogReader<R> {
|
|||||||
|
|
||||||
self.reader.seek(SeekFrom::Start(start))?;
|
self.reader.seek(SeekFrom::Start(start))?;
|
||||||
|
|
||||||
let size = unsafe { self.reader.read_le_value::<u32>()? } as usize;
|
let size = catalog_decode_u64(&mut self.reader)?;
|
||||||
|
|
||||||
if size < 8 { bail!("got small directory size {}", size) };
|
if size < 1 { bail!("got small directory size {}", size) };
|
||||||
|
|
||||||
let data = self.reader.read_exact_allocated(size - 4)?;
|
let data = self.reader.read_exact_allocated(size as usize)?;
|
||||||
|
|
||||||
let mut cursor = &data[..];
|
let mut cursor = &data[..];
|
||||||
|
|
||||||
let entries = unsafe { cursor.read_le_value::<u32>()? };
|
let entries = catalog_decode_u64(&mut cursor)?;
|
||||||
|
|
||||||
//println!("TEST {} {} size {}", start, entries, size);
|
//println!("TEST {} {} size {}", start, entries, size);
|
||||||
|
|
||||||
for _ in 0..entries {
|
for _ in 0..entries {
|
||||||
let etype = CatalogEntryType::try_from(Self::next_byte(&mut cursor)?)?;
|
let etype = CatalogEntryType::try_from(Self::next_byte(&mut cursor)?)?;
|
||||||
let name_len = unsafe { cursor.read_le_value::<u32>()? };
|
let name_len = catalog_decode_u64(&mut cursor)?;
|
||||||
let name = cursor.read_exact_allocated(name_len as usize)?;
|
let name = cursor.read_exact_allocated(name_len as usize)?;
|
||||||
|
|
||||||
let mut path = std::path::PathBuf::from(prefix);
|
let mut path = std::path::PathBuf::from(prefix);
|
||||||
@ -295,7 +297,7 @@ impl <R: Read + Seek> CatalogReader<R> {
|
|||||||
match etype {
|
match etype {
|
||||||
CatalogEntryType::Directory => {
|
CatalogEntryType::Directory => {
|
||||||
println!("{} {:?}", char::from(etype as u8), path);
|
println!("{} {:?}", char::from(etype as u8), path);
|
||||||
let offset = unsafe { cursor.read_le_value::<u64>()? };
|
let offset = catalog_decode_u64(&mut cursor)?;
|
||||||
if offset > start {
|
if offset > start {
|
||||||
bail!("got wrong directory offset ({} > {})", offset, start);
|
bail!("got wrong directory offset ({} > {})", offset, start);
|
||||||
}
|
}
|
||||||
@ -303,8 +305,8 @@ impl <R: Read + Seek> CatalogReader<R> {
|
|||||||
self.dump_dir(&path, pos)?;
|
self.dump_dir(&path, pos)?;
|
||||||
}
|
}
|
||||||
CatalogEntryType::File => {
|
CatalogEntryType::File => {
|
||||||
let size = unsafe { cursor.read_le_value::<u64>()? };
|
let size = catalog_decode_u64(&mut cursor)?;
|
||||||
let mtime = unsafe { cursor.read_le_value::<u64>()? };
|
let mtime = catalog_decode_u64(&mut cursor)?;
|
||||||
|
|
||||||
let dt = Local.timestamp(mtime as i64, 0);
|
let dt = Local.timestamp(mtime as i64, 0);
|
||||||
|
|
||||||
@ -325,3 +327,75 @@ impl <R: Read + Seek> CatalogReader<R> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Serialize u64 as short, variable length byte sequence
|
||||||
|
///
|
||||||
|
/// Stores 7 bits per byte, Bit 8 indicates the end of the sequence (when not set).
|
||||||
|
/// We limit values to a maximum of 2^63.
|
||||||
|
pub fn catalog_encode_u64<W: Write>(writer: &mut W, v: u64) -> Result<(), Error> {
|
||||||
|
let mut enc = Vec::new();
|
||||||
|
|
||||||
|
if (v & (1<<63)) != 0 { bail!("catalog_encode_u64 failed - value >= 2^63"); }
|
||||||
|
let mut d = v;
|
||||||
|
loop {
|
||||||
|
if d < 128 {
|
||||||
|
enc.push(d as u8);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
enc.push((128 | (d & 127)) as u8);
|
||||||
|
d = d >> 7;
|
||||||
|
}
|
||||||
|
writer.write_all(&enc)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deserialize u64 from variable length byte sequence
|
||||||
|
///
|
||||||
|
/// We currently read maximal 9 bytes, which give a maximum of 63 bits.
|
||||||
|
pub fn catalog_decode_u64<R: Read>(reader: &mut R) -> Result<u64, Error> {
|
||||||
|
|
||||||
|
let mut v: u64 = 0;
|
||||||
|
let mut buf = [0u8];
|
||||||
|
|
||||||
|
for i in 0..9 { // only allow 9 bytes (63 bits)
|
||||||
|
if buf.is_empty() {
|
||||||
|
bail!("decode_u64 failed - unexpected EOB");
|
||||||
|
}
|
||||||
|
reader.read_exact(&mut buf)?;
|
||||||
|
let t = buf[0];
|
||||||
|
if t < 128 {
|
||||||
|
v |= (t as u64) << (i*7);
|
||||||
|
return Ok(v);
|
||||||
|
} else {
|
||||||
|
v |= ((t & 127) as u64) << (i*7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bail!("decode_u64 failed - missing end marker");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_catalog_u64_encoder() {
|
||||||
|
|
||||||
|
fn test_encode_decode(value: u64) {
|
||||||
|
|
||||||
|
let mut data = Vec::new();
|
||||||
|
catalog_encode_u64(&mut data, value).unwrap();
|
||||||
|
|
||||||
|
//println!("ENCODE {} {:?}", value, data);
|
||||||
|
|
||||||
|
let slice = &mut &data[..];
|
||||||
|
let decoded = catalog_decode_u64(slice).unwrap();
|
||||||
|
|
||||||
|
//println!("DECODE {}", decoded);
|
||||||
|
|
||||||
|
assert!(decoded == value);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_encode_decode(126);
|
||||||
|
test_encode_decode((1<<12)-1);
|
||||||
|
test_encode_decode((1<<20)-1);
|
||||||
|
test_encode_decode((1<<50)-1);
|
||||||
|
test_encode_decode((1<<63)-1);
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user