//! ZIP Helper //! //! Provides an interface to create a ZIP File from ZipEntries //! for a more detailed description of the ZIP format, see: //! https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT use std::convert::TryInto; use std::ffi::OsString; use std::io; use std::mem::size_of; use std::os::unix::ffi::OsStrExt; use std::path::{Component, Path, PathBuf}; use anyhow::{Error, Result}; use endian_trait::Endian; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use crc32fast::Hasher; use proxmox::tools::time::gmtime; use proxmox::tools::byte_buffer::ByteBuffer; const LOCAL_FH_SIG: u32 = 0x04034B50; const LOCAL_FF_SIG: u32 = 0x08074B50; const CENTRAL_DIRECTORY_FH_SIG: u32 = 0x02014B50; const END_OF_CENTRAL_DIR: u32 = 0x06054B50; const VERSION_NEEDED: u16 = 0x002d; const VERSION_MADE_BY: u16 = 0x032d; const ZIP64_EOCD_RECORD: u32 = 0x06064B50; const ZIP64_EOCD_LOCATOR: u32 = 0x07064B50; // bits for time: // 0-4: day of the month (1-31) // 5-8: month: (1 = jan, etc.) // 9-15: year offset from 1980 // // bits for date: // 0-4: second / 2 // 5-10: minute (0-59) // 11-15: hour (0-23) // // see https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime fn epoch_to_dos(epoch: i64) -> (u16, u16) { let gmtime = match gmtime(epoch) { Ok(gmtime) => gmtime, Err(_) => return (0, 0), }; let seconds = (gmtime.tm_sec / 2) & 0b11111; let minutes = gmtime.tm_min & 0xb111111; let hours = gmtime.tm_hour & 0b11111; let time: u16 = ((hours << 11) | (minutes << 5) | (seconds)) as u16; let date: u16 = if gmtime.tm_year > (2108 - 1900) || gmtime.tm_year < (1980 - 1900) { 0 } else { let day = gmtime.tm_mday & 0b11111; let month = (gmtime.tm_mon + 1) & 0b1111; let year = (gmtime.tm_year + 1900 - 1980) & 0b1111111; ((year << 9) | (month << 5) | (day)) as u16 }; (date, time) } #[derive(Endian)] #[repr(C, packed)] struct Zip64Field { field_type: u16, field_size: u16, uncompressed_size: u64, compressed_size: u64, } #[derive(Endian)] #[repr(C, packed)] struct Zip64FieldWithOffset { field_type: u16, field_size: u16, uncompressed_size: u64, compressed_size: u64, offset: u64, } #[derive(Endian)] #[repr(C, packed)] struct LocalFileHeader { signature: u32, version_needed: u16, flags: u16, compression: u16, time: u16, date: u16, crc32: u32, compressed_size: u32, uncompressed_size: u32, filename_len: u16, extra_field_len: u16, } #[derive(Endian)] #[repr(C, packed)] struct LocalFileFooter { signature: u32, crc32: u32, compressed_size: u64, uncompressed_size: u64, } #[derive(Endian)] #[repr(C, packed)] struct CentralDirectoryFileHeader { signature: u32, version_made_by: u16, version_needed: u16, flags: u16, compression: u16, time: u16, date: u16, crc32: u32, compressed_size: u32, uncompressed_size: u32, filename_len: u16, extra_field_len: u16, comment_len: u16, start_disk: u16, internal_flags: u16, external_flags: u32, offset: u32, } #[derive(Endian)] #[repr(C, packed)] struct EndOfCentralDir { signature: u32, disk_number: u16, start_disk: u16, disk_record_count: u16, total_record_count: u16, directory_size: u32, directory_offset: u32, comment_len: u16, } #[derive(Endian)] #[repr(C, packed)] struct Zip64EOCDRecord { signature: u32, field_size: u64, version_made_by: u16, version_needed: u16, disk_number: u32, disk_number_central_dir: u32, disk_record_count: u64, total_record_count: u64, directory_size: u64, directory_offset: u64, } #[derive(Endian)] #[repr(C, packed)] struct Zip64EOCDLocator { signature: u32, disk_number: u32, offset: u64, disk_count: u32, } async fn write_struct(output: &mut T, data: E) -> io::Result<()> where T: AsyncWrite + ?Sized + Unpin, E: Endian, { let data = data.to_le(); let data = unsafe { std::slice::from_raw_parts( &data as *const E as *const u8, core::mem::size_of_val(&data), ) }; output.write_all(data).await } /// Represents an Entry in a ZIP File /// /// used to add to a ZipEncoder pub struct ZipEntry { filename: OsString, mtime: i64, mode: u16, crc32: u32, uncompressed_size: u64, compressed_size: u64, offset: u64, is_file: bool, } impl ZipEntry { /// Creates a new ZipEntry /// /// if is_file is false the path will contain an trailing separator, /// so that the zip file understands that it is a directory pub fn new>(path: P, mtime: i64, mode: u16, is_file: bool) -> Self { let mut relpath = PathBuf::new(); for comp in path.as_ref().components() { if let Component::Normal(_) = comp { relpath.push(comp); } } if !is_file { relpath.push(""); // adds trailing slash } Self { filename: relpath.into(), crc32: 0, mtime, mode, uncompressed_size: 0, compressed_size: 0, offset: 0, is_file, } } async fn write_local_header(&self, mut buf: &mut W) -> io::Result where W: AsyncWrite + Unpin + ?Sized, { let filename = self.filename.as_bytes(); let filename_len = filename.len(); let header_size = size_of::(); let zip_field_size = size_of::(); let size: usize = header_size + filename_len + zip_field_size; let (date, time) = epoch_to_dos(self.mtime); write_struct( &mut buf, LocalFileHeader { signature: LOCAL_FH_SIG, version_needed: 0x2d, flags: 1 << 3, compression: 0, time, date, crc32: 0, compressed_size: 0xFFFFFFFF, uncompressed_size: 0xFFFFFFFF, filename_len: filename_len as u16, extra_field_len: zip_field_size as u16, }, ) .await?; buf.write_all(filename).await?; write_struct( &mut buf, Zip64Field { field_type: 0x0001, field_size: 2 * 8, uncompressed_size: 0, compressed_size: 0, }, ) .await?; Ok(size) } async fn write_data_descriptor( &self, mut buf: &mut W, ) -> io::Result { let size = size_of::(); write_struct( &mut buf, LocalFileFooter { signature: LOCAL_FF_SIG, crc32: self.crc32, compressed_size: self.compressed_size, uncompressed_size: self.uncompressed_size, }, ) .await?; Ok(size) } async fn write_central_directory_header( &self, mut buf: &mut W, ) -> io::Result { let filename = self.filename.as_bytes(); let filename_len = filename.len(); let header_size = size_of::(); let zip_field_size = size_of::(); let size: usize = header_size + filename_len + zip_field_size; let (date, time) = epoch_to_dos(self.mtime); write_struct( &mut buf, CentralDirectoryFileHeader { signature: CENTRAL_DIRECTORY_FH_SIG, version_made_by: VERSION_MADE_BY, version_needed: VERSION_NEEDED, flags: 1 << 3, compression: 0, time, date, crc32: self.crc32, compressed_size: 0xFFFFFFFF, uncompressed_size: 0xFFFFFFFF, filename_len: filename_len as u16, extra_field_len: zip_field_size as u16, comment_len: 0, start_disk: 0, internal_flags: 0, external_flags: (self.mode as u32) << 16 | (!self.is_file as u32) << 4, offset: 0xFFFFFFFF, }, ) .await?; buf.write_all(filename).await?; write_struct( &mut buf, Zip64FieldWithOffset { field_type: 1, field_size: 3 * 8, uncompressed_size: self.uncompressed_size, compressed_size: self.compressed_size, offset: self.offset, }, ) .await?; Ok(size) } } /// Wraps a writer that implements AsyncWrite for creating a ZIP archive /// /// This will create a ZIP archive on the fly with files added with /// 'add_entry'. To Finish the file, call 'finish' /// Example: /// ```no_run /// use proxmox_backup::tools::zip::*; /// use tokio::fs::File; /// /// #[tokio::async] /// async fn main() -> std::io::Result<()> { /// let target = File::open("foo.zip").await?; /// let mut source = File::open("foo.txt").await?; /// /// let mut zip = ZipEncoder::new(target); /// zip.add_entry(ZipEntry { /// "foo.txt", /// 0, /// 0o100755, /// true, /// }, source).await?; /// /// zip.finish().await? /// /// Ok(()) /// } /// ``` pub struct ZipEncoder where W: AsyncWrite + Unpin, { byte_count: usize, files: Vec, target: W, buf: ByteBuffer, } impl ZipEncoder { pub fn new(target: W) -> Self { Self { byte_count: 0, files: Vec::new(), target, buf: ByteBuffer::with_capacity(1024*1024), } } pub async fn add_entry( &mut self, mut entry: ZipEntry, content: Option, ) -> Result<(), Error> { entry.offset = self.byte_count.try_into()?; self.byte_count += entry.write_local_header(&mut self.target).await?; if let Some(mut content) = content { let mut hasher = Hasher::new(); let mut size = 0; loop { let count = self.buf.read_from_async(&mut content).await?; // end of file if count == 0 { break; } size += count; hasher.update(&self.buf); self.target.write_all(&self.buf).await?; self.buf.consume(count); } self.byte_count += size; entry.compressed_size = size.try_into()?; entry.uncompressed_size = size.try_into()?; entry.crc32 = hasher.finalize(); } self.byte_count += entry.write_data_descriptor(&mut self.target).await?; self.files.push(entry); Ok(()) } async fn write_eocd( &mut self, central_dir_size: usize, central_dir_offset: usize, ) -> Result<(), Error> { let entrycount = self.files.len(); let mut count = entrycount as u16; let mut directory_size = central_dir_size as u32; let mut directory_offset = central_dir_offset as u32; if central_dir_size > u32::MAX as usize || central_dir_offset > u32::MAX as usize || entrycount > u16::MAX as usize { count = 0xFFFF; directory_size = 0xFFFFFFFF; directory_offset = 0xFFFFFFFF; write_struct( &mut self.target, Zip64EOCDRecord { signature: ZIP64_EOCD_RECORD, field_size: 44, version_made_by: VERSION_MADE_BY, version_needed: VERSION_NEEDED, disk_number: 0, disk_number_central_dir: 0, disk_record_count: entrycount.try_into()?, total_record_count: entrycount.try_into()?, directory_size: central_dir_size.try_into()?, directory_offset: central_dir_offset.try_into()?, }, ) .await?; let locator_offset = central_dir_offset + central_dir_size; write_struct( &mut self.target, Zip64EOCDLocator { signature: ZIP64_EOCD_LOCATOR, disk_number: 0, offset: locator_offset.try_into()?, disk_count: 1, }, ) .await?; } write_struct( &mut self.target, EndOfCentralDir { signature: END_OF_CENTRAL_DIR, disk_number: 0, start_disk: 0, disk_record_count: count, total_record_count: count, directory_size, directory_offset, comment_len: 0, }, ) .await?; Ok(()) } pub async fn finish(&mut self) -> Result<(), Error> { let central_dir_offset = self.byte_count; let mut central_dir_size = 0; for file in &self.files { central_dir_size += file .write_central_directory_header(&mut self.target) .await?; } self.write_eocd(central_dir_size, central_dir_offset) .await?; self.target.flush().await?; Ok(()) } }