diff --git a/src/tools.rs b/src/tools.rs index 1837c0e0..5a9f020a 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -36,6 +36,7 @@ pub mod logrotate; pub mod loopdev; pub mod fuse_loop; pub mod socket; +pub mod zip; mod parallel_handler; pub use parallel_handler::*; diff --git a/src/tools/zip.rs b/src/tools/zip.rs new file mode 100644 index 00000000..3248239f --- /dev/null +++ b/src/tools/zip.rs @@ -0,0 +1,518 @@ +//! ZIP Helper +//! +//! Provides an interface to create a ZIP File from ZipEntries +//! for a more detailed description of the ZIP format, see: +//! https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + +use std::convert::TryInto; +use std::ffi::OsString; +use std::io; +use std::mem::size_of; +use std::os::unix::ffi::OsStrExt; +use std::path::{Component, Path, PathBuf}; + +use anyhow::{Error, Result}; +use endian_trait::Endian; +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; + +use crc32fast::Hasher; +use proxmox::tools::time::gmtime; +use proxmox::tools::byte_buffer::ByteBuffer; + +const LOCAL_FH_SIG: u32 = 0x04034B50; +const LOCAL_FF_SIG: u32 = 0x08074B50; +const CENTRAL_DIRECTORY_FH_SIG: u32 = 0x02014B50; +const END_OF_CENTRAL_DIR: u32 = 0x06054B50; +const VERSION_NEEDED: u16 = 0x002d; +const VERSION_MADE_BY: u16 = 0x032d; + +const ZIP64_EOCD_RECORD: u32 = 0x06064B50; +const ZIP64_EOCD_LOCATOR: u32 = 0x07064B50; + +// bits for time: +// 0-4: day of the month (1-31) +// 5-8: month: (1 = jan, etc.) +// 9-15: year offset from 1980 +// +// bits for date: +// 0-4: second / 2 +// 5-10: minute (0-59) +// 11-15: hour (0-23) +// +// see https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime +fn epoch_to_dos(epoch: i64) -> (u16, u16) { + let gmtime = match gmtime(epoch) { + Ok(gmtime) => gmtime, + Err(_) => return (0, 0), + }; + + let seconds = (gmtime.tm_sec / 2) & 0b11111; + let minutes = gmtime.tm_min & 0xb111111; + let hours = gmtime.tm_hour & 0b11111; + let time: u16 = ((hours << 11) | (minutes << 5) | (seconds)) as u16; + + let date: u16 = if gmtime.tm_year > (2108 - 1900) || gmtime.tm_year < (1980 - 1900) { + 0 + } else { + let day = gmtime.tm_mday & 0b11111; + let month = (gmtime.tm_mon + 1) & 0b1111; + let year = (gmtime.tm_year + 1900 - 1980) & 0b1111111; + ((year << 9) | (month << 5) | (day)) as u16 + }; + + (date, time) +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64Field { + field_type: u16, + field_size: u16, + uncompressed_size: u64, + compressed_size: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64FieldWithOffset { + field_type: u16, + field_size: u16, + uncompressed_size: u64, + compressed_size: u64, + offset: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct LocalFileHeader { + signature: u32, + version_needed: u16, + flags: u16, + compression: u16, + time: u16, + date: u16, + crc32: u32, + compressed_size: u32, + uncompressed_size: u32, + filename_len: u16, + extra_field_len: u16, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct LocalFileFooter { + signature: u32, + crc32: u32, + compressed_size: u64, + uncompressed_size: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct CentralDirectoryFileHeader { + signature: u32, + version_made_by: u16, + version_needed: u16, + flags: u16, + compression: u16, + time: u16, + date: u16, + crc32: u32, + compressed_size: u32, + uncompressed_size: u32, + filename_len: u16, + extra_field_len: u16, + comment_len: u16, + start_disk: u16, + internal_flags: u16, + external_flags: u32, + offset: u32, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct EndOfCentralDir { + signature: u32, + disk_number: u16, + start_disk: u16, + disk_record_count: u16, + total_record_count: u16, + directory_size: u32, + directory_offset: u32, + comment_len: u16, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64EOCDRecord { + signature: u32, + field_size: u64, + version_made_by: u16, + version_needed: u16, + disk_number: u32, + disk_number_central_dir: u32, + disk_record_count: u64, + total_record_count: u64, + directory_size: u64, + directory_offset: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64EOCDLocator { + signature: u32, + disk_number: u32, + offset: u64, + disk_count: u32, +} + +async fn write_struct(output: &mut T, data: E) -> io::Result<()> +where + T: AsyncWrite + ?Sized + Unpin, + E: Endian, +{ + let data = data.to_le(); + + let data = unsafe { + std::slice::from_raw_parts( + &data as *const E as *const u8, + core::mem::size_of_val(&data), + ) + }; + output.write_all(data).await +} + +/// Represents an Entry in a ZIP File +/// +/// used to add to a ZipEncoder +pub struct ZipEntry { + filename: OsString, + mtime: i64, + mode: u16, + crc32: u32, + uncompressed_size: u64, + compressed_size: u64, + offset: u64, + is_file: bool, +} + +impl ZipEntry { + /// Creates a new ZipEntry + /// + /// if is_file is false the path will contain an trailing separator, + /// so that the zip file understands that it is a directory + pub fn new>(path: P, mtime: i64, mode: u16, is_file: bool) -> Self { + let mut relpath = PathBuf::new(); + + for comp in path.as_ref().components() { + if let Component::Normal(_) = comp { + relpath.push(comp); + } + } + + if !is_file { + relpath.push(""); // adds trailing slash + } + + Self { + filename: relpath.into(), + crc32: 0, + mtime, + mode, + uncompressed_size: 0, + compressed_size: 0, + offset: 0, + is_file, + } + } + + async fn write_local_header(&self, mut buf: &mut W) -> io::Result + where + W: AsyncWrite + Unpin + ?Sized, + { + let filename = self.filename.as_bytes(); + let filename_len = filename.len(); + let header_size = size_of::(); + let zip_field_size = size_of::(); + let size: usize = header_size + filename_len + zip_field_size; + + let (date, time) = epoch_to_dos(self.mtime); + + write_struct( + &mut buf, + LocalFileHeader { + signature: LOCAL_FH_SIG, + version_needed: 0x2d, + flags: 1 << 3, + compression: 0, + time, + date, + crc32: 0, + compressed_size: 0xFFFFFFFF, + uncompressed_size: 0xFFFFFFFF, + filename_len: filename_len as u16, + extra_field_len: zip_field_size as u16, + }, + ) + .await?; + + buf.write_all(filename).await?; + + write_struct( + &mut buf, + Zip64Field { + field_type: 0x0001, + field_size: 2 * 8, + uncompressed_size: 0, + compressed_size: 0, + }, + ) + .await?; + + Ok(size) + } + + async fn write_data_descriptor( + &self, + mut buf: &mut W, + ) -> io::Result { + let size = size_of::(); + + write_struct( + &mut buf, + LocalFileFooter { + signature: LOCAL_FF_SIG, + crc32: self.crc32, + compressed_size: self.compressed_size, + uncompressed_size: self.uncompressed_size, + }, + ) + .await?; + + Ok(size) + } + + async fn write_central_directory_header( + &self, + mut buf: &mut W, + ) -> io::Result { + let filename = self.filename.as_bytes(); + let filename_len = filename.len(); + let header_size = size_of::(); + let zip_field_size = size_of::(); + let size: usize = header_size + filename_len + zip_field_size; + + let (date, time) = epoch_to_dos(self.mtime); + + write_struct( + &mut buf, + CentralDirectoryFileHeader { + signature: CENTRAL_DIRECTORY_FH_SIG, + version_made_by: VERSION_MADE_BY, + version_needed: VERSION_NEEDED, + flags: 1 << 3, + compression: 0, + time, + date, + crc32: self.crc32, + compressed_size: 0xFFFFFFFF, + uncompressed_size: 0xFFFFFFFF, + filename_len: filename_len as u16, + extra_field_len: zip_field_size as u16, + comment_len: 0, + start_disk: 0, + internal_flags: 0, + external_flags: (self.mode as u32) << 16 | (!self.is_file as u32) << 4, + offset: 0xFFFFFFFF, + }, + ) + .await?; + + buf.write_all(filename).await?; + + write_struct( + &mut buf, + Zip64FieldWithOffset { + field_type: 1, + field_size: 3 * 8, + uncompressed_size: self.uncompressed_size, + compressed_size: self.compressed_size, + offset: self.offset, + }, + ) + .await?; + + Ok(size) + } +} + +/// Wraps a writer that implements AsyncWrite for creating a ZIP archive +/// +/// This will create a ZIP archive on the fly with files added with +/// 'add_entry'. To Finish the file, call 'finish' +/// Example: +/// ```no_run +/// use proxmox_backup::tools::zip::*; +/// use tokio::fs::File; +/// +/// #[tokio::async] +/// async fn main() -> std::io::Result<()> { +/// let target = File::open("foo.zip").await?; +/// let mut source = File::open("foo.txt").await?; +/// +/// let mut zip = ZipEncoder::new(target); +/// zip.add_entry(ZipEntry { +/// "foo.txt", +/// 0, +/// 0o100755, +/// true, +/// }, source).await?; +/// +/// zip.finish().await? +/// +/// Ok(()) +/// } +/// ``` +pub struct ZipEncoder +where + W: AsyncWrite + Unpin, +{ + byte_count: usize, + files: Vec, + target: W, + buf: ByteBuffer, +} + +impl ZipEncoder { + pub fn new(target: W) -> Self { + Self { + byte_count: 0, + files: Vec::new(), + target, + buf: ByteBuffer::with_capacity(1024*1024), + } + } + + pub async fn add_entry( + &mut self, + mut entry: ZipEntry, + content: Option, + ) -> Result<(), Error> { + entry.offset = self.byte_count.try_into()?; + self.byte_count += entry.write_local_header(&mut self.target).await?; + if let Some(mut content) = content { + let mut hasher = Hasher::new(); + let mut size = 0; + loop { + + let count = self.buf.read_from_async(&mut content).await?; + + // end of file + if count == 0 { + break; + } + + size += count; + hasher.update(&self.buf); + self.target.write_all(&self.buf).await?; + self.buf.consume(count); + } + + self.byte_count += size; + entry.compressed_size = size.try_into()?; + entry.uncompressed_size = size.try_into()?; + entry.crc32 = hasher.finalize(); + } + self.byte_count += entry.write_data_descriptor(&mut self.target).await?; + + self.files.push(entry); + + Ok(()) + } + + async fn write_eocd( + &mut self, + central_dir_size: usize, + central_dir_offset: usize, + ) -> Result<(), Error> { + let entrycount = self.files.len(); + + let mut count = entrycount as u16; + let mut directory_size = central_dir_size as u32; + let mut directory_offset = central_dir_offset as u32; + + if central_dir_size > u32::MAX as usize + || central_dir_offset > u32::MAX as usize + || entrycount > u16::MAX as usize + { + count = 0xFFFF; + directory_size = 0xFFFFFFFF; + directory_offset = 0xFFFFFFFF; + + write_struct( + &mut self.target, + Zip64EOCDRecord { + signature: ZIP64_EOCD_RECORD, + field_size: 44, + version_made_by: VERSION_MADE_BY, + version_needed: VERSION_NEEDED, + disk_number: 0, + disk_number_central_dir: 0, + disk_record_count: entrycount.try_into()?, + total_record_count: entrycount.try_into()?, + directory_size: central_dir_size.try_into()?, + directory_offset: central_dir_offset.try_into()?, + }, + ) + .await?; + + let locator_offset = central_dir_offset + central_dir_size; + + write_struct( + &mut self.target, + Zip64EOCDLocator { + signature: ZIP64_EOCD_LOCATOR, + disk_number: 0, + offset: locator_offset.try_into()?, + disk_count: 1, + }, + ) + .await?; + } + + write_struct( + &mut self.target, + EndOfCentralDir { + signature: END_OF_CENTRAL_DIR, + disk_number: 0, + start_disk: 0, + disk_record_count: count, + total_record_count: count, + directory_size, + directory_offset, + comment_len: 0, + }, + ) + .await?; + + Ok(()) + } + + pub async fn finish(&mut self) -> Result<(), Error> { + let central_dir_offset = self.byte_count; + let mut central_dir_size = 0; + + for file in &self.files { + central_dir_size += file + .write_central_directory_header(&mut self.target) + .await?; + } + + self.write_eocd(central_dir_size, central_dir_offset) + .await?; + + self.target.flush().await?; + + Ok(()) + } +}