pxar/extract: add sequential variant of extract_sub_dir
extract_sub_dir_seq, together with seq_files_extractor, allow extracting files from a pxar Decoder, along with the existing option for an Accessor. To facilitate code re-use, some helper functions are extracted in the process. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
		
				
					committed by
					
						 Thomas Lamprecht
						Thomas Lamprecht
					
				
			
			
				
	
			
			
			
						parent
						
							801ec1dbf9
						
					
				
				
					commit
					edf0940649
				
			| @ -16,9 +16,10 @@ use nix::fcntl::OFlag; | |||||||
| use nix::sys::stat::Mode; | use nix::sys::stat::Mode; | ||||||
|  |  | ||||||
| use pathpatterns::{MatchEntry, MatchList, MatchType}; | use pathpatterns::{MatchEntry, MatchList, MatchType}; | ||||||
| use pxar::format::Device; |  | ||||||
| use pxar::Metadata; |  | ||||||
| use pxar::accessor::aio::{Accessor, FileContents, FileEntry}; | use pxar::accessor::aio::{Accessor, FileContents, FileEntry}; | ||||||
|  | use pxar::decoder::aio::Decoder; | ||||||
|  | use pxar::format::Device; | ||||||
|  | use pxar::{Entry, EntryKind, Metadata}; | ||||||
|  |  | ||||||
| use proxmox::c_result; | use proxmox::c_result; | ||||||
| use proxmox::tools::{ | use proxmox::tools::{ | ||||||
| @ -93,8 +94,6 @@ where | |||||||
|     let mut err_path_stack = vec![OsString::from("/")]; |     let mut err_path_stack = vec![OsString::from("/")]; | ||||||
|     let mut current_match = options.extract_match_default; |     let mut current_match = options.extract_match_default; | ||||||
|     while let Some(entry) = decoder.next() { |     while let Some(entry) = decoder.next() { | ||||||
|         use pxar::EntryKind; |  | ||||||
|  |  | ||||||
|         let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?; |         let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?; | ||||||
|  |  | ||||||
|         let file_name_os = entry.file_name(); |         let file_name_os = entry.file_name(); | ||||||
| @ -556,7 +555,6 @@ where | |||||||
|     T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, |     T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, | ||||||
|     W: tokio::io::AsyncWrite + Unpin + Send + 'static, |     W: tokio::io::AsyncWrite + Unpin + Send + 'static, | ||||||
| { | { | ||||||
|     use pxar::EntryKind; |  | ||||||
|     Box::pin(async move { |     Box::pin(async move { | ||||||
|         let metadata = file.entry().metadata(); |         let metadata = file.entry().metadata(); | ||||||
|         let path = file.entry().path().strip_prefix(&prefix)?.to_path_buf(); |         let path = file.entry().path().strip_prefix(&prefix)?.to_path_buf(); | ||||||
| @ -616,10 +614,42 @@ where | |||||||
|     }) |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn get_extractor<DEST>(destination: DEST, metadata: Metadata) -> Result<Extractor, Error> | ||||||
|  | where | ||||||
|  |     DEST: AsRef<Path>, | ||||||
|  | { | ||||||
|  |     create_path( | ||||||
|  |         &destination, | ||||||
|  |         None, | ||||||
|  |         Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))), | ||||||
|  |     ) | ||||||
|  |     .map_err(|err| { | ||||||
|  |         format_err!( | ||||||
|  |             "error creating directory {:?}: {}", | ||||||
|  |             destination.as_ref(), | ||||||
|  |             err | ||||||
|  |         ) | ||||||
|  |     })?; | ||||||
|  |  | ||||||
|  |     let dir = Dir::open( | ||||||
|  |         destination.as_ref(), | ||||||
|  |         OFlag::O_DIRECTORY | OFlag::O_CLOEXEC, | ||||||
|  |         Mode::empty(), | ||||||
|  |     ) | ||||||
|  |     .map_err(|err| { | ||||||
|  |         format_err!( | ||||||
|  |             "unable to open target directory {:?}: {}", | ||||||
|  |             destination.as_ref(), | ||||||
|  |             err, | ||||||
|  |         ) | ||||||
|  |     })?; | ||||||
|  |  | ||||||
|  |     Ok(Extractor::new(dir, metadata, false, Flags::DEFAULT)) | ||||||
|  | } | ||||||
|  |  | ||||||
| pub async fn extract_sub_dir<T, DEST, PATH>( | pub async fn extract_sub_dir<T, DEST, PATH>( | ||||||
|     destination: DEST, |     destination: DEST, | ||||||
|     mut decoder: Accessor<T>, |     decoder: Accessor<T>, | ||||||
|     path: PATH, |     path: PATH, | ||||||
|     verbose: bool, |     verbose: bool, | ||||||
| ) -> Result<(), Error> | ) -> Result<(), Error> | ||||||
| @ -630,111 +660,205 @@ where | |||||||
| { | { | ||||||
|     let root = decoder.open_root().await?; |     let root = decoder.open_root().await?; | ||||||
|  |  | ||||||
|     create_path( |     let mut extractor = get_extractor( | ||||||
|         &destination, |         destination, | ||||||
|         None, |  | ||||||
|         Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))), |  | ||||||
|     ) |  | ||||||
|     .map_err(|err| format_err!("error creating directory {:?}: {}", destination.as_ref(), err))?; |  | ||||||
|  |  | ||||||
|     let dir = Dir::open( |  | ||||||
|         destination.as_ref(), |  | ||||||
|         OFlag::O_DIRECTORY | OFlag::O_CLOEXEC, |  | ||||||
|         Mode::empty(), |  | ||||||
|     ) |  | ||||||
|     .map_err(|err| format_err!("unable to open target directory {:?}: {}", destination.as_ref(), err,))?; |  | ||||||
|  |  | ||||||
|     let mut extractor =  Extractor::new( |  | ||||||
|         dir, |  | ||||||
|         root.lookup_self().await?.entry().metadata().clone(), |         root.lookup_self().await?.entry().metadata().clone(), | ||||||
|         false, |     )?; | ||||||
|         Flags::DEFAULT, |  | ||||||
|     ); |  | ||||||
|  |  | ||||||
|     let file = root |     let file = root | ||||||
|         .lookup(&path).await? |         .lookup(&path) | ||||||
|  |         .await? | ||||||
|         .ok_or(format_err!("error opening '{:?}'", path.as_ref()))?; |         .ok_or(format_err!("error opening '{:?}'", path.as_ref()))?; | ||||||
|  |  | ||||||
|     recurse_files_extractor(&mut extractor, &mut decoder, file, verbose).await |     recurse_files_extractor(&mut extractor, file, verbose).await | ||||||
| } | } | ||||||
|  |  | ||||||
| fn recurse_files_extractor<'a, T>( | pub async fn extract_sub_dir_seq<S, DEST>( | ||||||
|  |     destination: DEST, | ||||||
|  |     mut decoder: Decoder<S>, | ||||||
|  |     verbose: bool, | ||||||
|  | ) -> Result<(), Error> | ||||||
|  | where | ||||||
|  |     S: pxar::decoder::SeqRead + Unpin + Send + 'static, | ||||||
|  |     DEST: AsRef<Path>, | ||||||
|  | { | ||||||
|  |     decoder.enable_goodbye_entries(true); | ||||||
|  |     let root = match decoder.next().await { | ||||||
|  |         Some(Ok(root)) => root, | ||||||
|  |         Some(Err(err)) => bail!("error getting root entry from pxar: {}", err), | ||||||
|  |         None => bail!("cannot extract empty archive"), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let mut extractor = get_extractor(destination, root.metadata().clone())?; | ||||||
|  |  | ||||||
|  |     if let Err(err) = seq_files_extractor(&mut extractor, decoder, verbose).await { | ||||||
|  |         eprintln!("error extracting pxar archive: {}", err); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Ok(()) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn extract_special( | ||||||
|  |     extractor: &mut Extractor, | ||||||
|  |     entry: &Entry, | ||||||
|  |     file_name: &CStr, | ||||||
|  | ) -> Result<(), Error> { | ||||||
|  |     let metadata = entry.metadata(); | ||||||
|  |     match entry.kind() { | ||||||
|  |         EntryKind::Symlink(link) => { | ||||||
|  |             extractor.extract_symlink(file_name, metadata, link.as_ref())?; | ||||||
|  |         } | ||||||
|  |         EntryKind::Hardlink(link) => { | ||||||
|  |             extractor.extract_hardlink(file_name, link.as_os_str())?; | ||||||
|  |         } | ||||||
|  |         EntryKind::Device(dev) => { | ||||||
|  |             if extractor.contains_flags(Flags::WITH_DEVICE_NODES) { | ||||||
|  |                 extractor.extract_device(file_name, metadata, dev)?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         EntryKind::Fifo => { | ||||||
|  |             if extractor.contains_flags(Flags::WITH_FIFOS) { | ||||||
|  |                 extractor.extract_special(file_name, metadata, 0)?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         EntryKind::Socket => { | ||||||
|  |             if extractor.contains_flags(Flags::WITH_SOCKETS) { | ||||||
|  |                 extractor.extract_special(file_name, metadata, 0)?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         _ => bail!("extract_special used with unsupported entry kind"), | ||||||
|  |     } | ||||||
|  |     Ok(()) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn get_filename(entry: &Entry) -> Result<(OsString, CString), Error> { | ||||||
|  |     let file_name_os = entry.file_name().to_owned(); | ||||||
|  |  | ||||||
|  |     // safety check: a file entry in an archive must never contain slashes: | ||||||
|  |     if file_name_os.as_bytes().contains(&b'/') { | ||||||
|  |         bail!("archive file entry contains slashes, which is invalid and a security concern"); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let file_name = CString::new(file_name_os.as_bytes()) | ||||||
|  |         .map_err(|_| format_err!("encountered file name with null-bytes"))?; | ||||||
|  |  | ||||||
|  |     Ok((file_name_os, file_name)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | async fn recurse_files_extractor<'a, T>( | ||||||
|     extractor: &'a mut Extractor, |     extractor: &'a mut Extractor, | ||||||
|     decoder: &'a mut Accessor<T>, |  | ||||||
|     file: FileEntry<T>, |     file: FileEntry<T>, | ||||||
|     verbose: bool, |     verbose: bool, | ||||||
| ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>> | ) -> Result<(), Error> | ||||||
| where | where | ||||||
|     T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, |     T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, | ||||||
| { | { | ||||||
|     use pxar::EntryKind; |     let entry = file.entry(); | ||||||
|     Box::pin(async move { |     let metadata = entry.metadata(); | ||||||
|         let metadata = file.entry().metadata(); |     let (file_name_os, file_name) = get_filename(entry)?; | ||||||
|         let file_name_os = file.file_name(); |  | ||||||
|  |  | ||||||
|         // safety check: a file entry in an archive must never contain slashes: |     if verbose { | ||||||
|         if file_name_os.as_bytes().contains(&b'/') { |         eprintln!("extracting: {}", file.path().display()); | ||||||
|             bail!("archive file entry contains slashes, which is invalid and a security concern"); |     } | ||||||
|  |  | ||||||
|  |     match file.kind() { | ||||||
|  |         EntryKind::Directory => { | ||||||
|  |             extractor | ||||||
|  |                 .enter_directory(file_name_os.to_owned(), metadata.clone(), true) | ||||||
|  |                 .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?; | ||||||
|  |  | ||||||
|  |             let dir = file.enter_directory().await?; | ||||||
|  |             let mut seq_decoder = dir.decode_full().await?; | ||||||
|  |             seq_decoder.enable_goodbye_entries(true); | ||||||
|  |             seq_files_extractor(extractor, seq_decoder, verbose).await?; | ||||||
|  |             extractor.leave_directory()?; | ||||||
|         } |         } | ||||||
|  |         EntryKind::File { size, .. } => { | ||||||
|         let file_name = CString::new(file_name_os.as_bytes()) |             extractor | ||||||
|             .map_err(|_| format_err!("encountered file name with null-bytes"))?; |                 .async_extract_file( | ||||||
|  |                     &file_name, | ||||||
|         if verbose { |                     metadata, | ||||||
|             eprintln!("extracting: {}", file.path().display()); |                     *size, | ||||||
|  |                     &mut file.contents().await.map_err(|_| { | ||||||
|  |                         format_err!("found regular file entry without contents in archive") | ||||||
|  |                     })?, | ||||||
|  |                 ) | ||||||
|  |                 .await? | ||||||
|         } |         } | ||||||
|  |         EntryKind::GoodbyeTable => {} // ignore | ||||||
|         match file.kind() { |         _ => extract_special(extractor, entry, &file_name)?, | ||||||
|             EntryKind::Directory => { |     } | ||||||
|                 extractor |     Ok(()) | ||||||
|                     .enter_directory(file_name_os.to_owned(), metadata.clone(), true) |  | ||||||
|                     .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?; |  | ||||||
|  |  | ||||||
|                 let dir = file.enter_directory().await?; |  | ||||||
|                 let mut readdir = dir.read_dir(); |  | ||||||
|                 while let Some(entry) = readdir.next().await { |  | ||||||
|                     let entry = entry?.decode_entry().await?; |  | ||||||
|                     let filename = entry.path().to_path_buf(); |  | ||||||
|  |  | ||||||
|                     // log errors and continue |  | ||||||
|                     if let Err(err) = recurse_files_extractor(extractor, decoder, entry, verbose).await { |  | ||||||
|                         eprintln!("error extracting {:?}: {}", filename.display(), err); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 extractor.leave_directory()?; |  | ||||||
|             } |  | ||||||
|             EntryKind::Symlink(link) => { |  | ||||||
|                 extractor.extract_symlink(&file_name, metadata, link.as_ref())?; |  | ||||||
|             } |  | ||||||
|             EntryKind::Hardlink(link) => { |  | ||||||
|                 extractor.extract_hardlink(&file_name, link.as_os_str())?; |  | ||||||
|             } |  | ||||||
|             EntryKind::Device(dev) => { |  | ||||||
|                 if extractor.contains_flags(Flags::WITH_DEVICE_NODES) { |  | ||||||
|                     extractor.extract_device(&file_name, metadata, dev)?; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             EntryKind::Fifo => { |  | ||||||
|                 if extractor.contains_flags(Flags::WITH_FIFOS) { |  | ||||||
|                     extractor.extract_special(&file_name, metadata, 0)?; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             EntryKind::Socket => { |  | ||||||
|                 if extractor.contains_flags(Flags::WITH_SOCKETS) { |  | ||||||
|                     extractor.extract_special(&file_name, metadata, 0)?; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             EntryKind::File { size, .. } => extractor.async_extract_file( |  | ||||||
|                 &file_name, |  | ||||||
|                 metadata, |  | ||||||
|                 *size, |  | ||||||
|                 &mut file.contents().await.map_err(|_| { |  | ||||||
|                     format_err!("found regular file entry without contents in archive") |  | ||||||
|                 })?, |  | ||||||
|             ).await?, |  | ||||||
|             EntryKind::GoodbyeTable => {}, // ignore |  | ||||||
|         } |  | ||||||
|         Ok(()) |  | ||||||
|     }) |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | async fn seq_files_extractor<'a, T>( | ||||||
|  |     extractor: &'a mut Extractor, | ||||||
|  |     mut decoder: pxar::decoder::aio::Decoder<T>, | ||||||
|  |     verbose: bool, | ||||||
|  | ) -> Result<(), Error> | ||||||
|  | where | ||||||
|  |     T: pxar::decoder::SeqRead, | ||||||
|  | { | ||||||
|  |     let mut dir_level = 0; | ||||||
|  |     loop { | ||||||
|  |         let entry = match decoder.next().await { | ||||||
|  |             Some(entry) => entry?, | ||||||
|  |             None => return Ok(()), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let metadata = entry.metadata(); | ||||||
|  |         let (file_name_os, file_name) = get_filename(&entry)?; | ||||||
|  |  | ||||||
|  |         if verbose && !matches!(entry.kind(), EntryKind::GoodbyeTable) { | ||||||
|  |             eprintln!("extracting: {}", entry.path().display()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Err(err) = async { | ||||||
|  |             match entry.kind() { | ||||||
|  |                 EntryKind::Directory => { | ||||||
|  |                     dir_level += 1; | ||||||
|  |                     extractor | ||||||
|  |                         .enter_directory(file_name_os.to_owned(), metadata.clone(), true) | ||||||
|  |                         .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?; | ||||||
|  |                 } | ||||||
|  |                 EntryKind::File { size, .. } => { | ||||||
|  |                     extractor | ||||||
|  |                         .async_extract_file( | ||||||
|  |                             &file_name, | ||||||
|  |                             metadata, | ||||||
|  |                             *size, | ||||||
|  |                             &mut decoder.contents().ok_or_else(|| { | ||||||
|  |                                 format_err!("found regular file entry without contents in archive") | ||||||
|  |                             })?, | ||||||
|  |                         ) | ||||||
|  |                         .await? | ||||||
|  |                 } | ||||||
|  |                 EntryKind::GoodbyeTable => { | ||||||
|  |                     dir_level -= 1; | ||||||
|  |                     extractor.leave_directory()?; | ||||||
|  |                 } | ||||||
|  |                 _ => extract_special(extractor, &entry, &file_name)?, | ||||||
|  |             } | ||||||
|  |             Ok(()) as Result<(), Error> | ||||||
|  |         } | ||||||
|  |         .await | ||||||
|  |         { | ||||||
|  |             let display = entry.path().display().to_string(); | ||||||
|  |             eprintln!( | ||||||
|  |                 "error extracting {}: {}", | ||||||
|  |                 if matches!(entry.kind(), EntryKind::GoodbyeTable) { | ||||||
|  |                     "<directory>" | ||||||
|  |                 } else { | ||||||
|  |                     &display | ||||||
|  |                 }, | ||||||
|  |                 err | ||||||
|  |             ); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if dir_level < 0 { | ||||||
|  |             // we've encountered one Goodbye more then Directory, meaning we've left the dir we | ||||||
|  |             // started in - exit early, otherwise the extractor might panic | ||||||
|  |             return Ok(()); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | |||||||
| @ -59,7 +59,10 @@ mod flags; | |||||||
| pub use flags::Flags; | pub use flags::Flags; | ||||||
|  |  | ||||||
| pub use create::{create_archive, PxarCreateOptions}; | pub use create::{create_archive, PxarCreateOptions}; | ||||||
| pub use extract::{create_zip, extract_archive, extract_sub_dir, ErrorHandler, PxarExtractOptions}; | pub use extract::{ | ||||||
|  |     create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq, ErrorHandler, | ||||||
|  |     PxarExtractOptions, | ||||||
|  | }; | ||||||
|  |  | ||||||
| /// The format requires to build sorted directory lookup tables in | /// The format requires to build sorted directory lookup tables in | ||||||
| /// memory, so we restrict the number of allowed entries to limit | /// memory, so we restrict the number of allowed entries to limit | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user