pxar/extract: add sequential variant of extract_sub_dir

extract_sub_dir_seq, together with seq_files_extractor, allow extracting
files from a pxar Decoder, along with the existing option for an
Accessor. To facilitate code re-use, some helper functions are extracted
in the process.

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
Stefan Reiter 2021-03-31 12:22:00 +02:00 committed by Thomas Lamprecht
parent 801ec1dbf9
commit edf0940649
2 changed files with 224 additions and 97 deletions

View File

@ -16,9 +16,10 @@ use nix::fcntl::OFlag;
use nix::sys::stat::Mode; use nix::sys::stat::Mode;
use pathpatterns::{MatchEntry, MatchList, MatchType}; use pathpatterns::{MatchEntry, MatchList, MatchType};
use pxar::format::Device;
use pxar::Metadata;
use pxar::accessor::aio::{Accessor, FileContents, FileEntry}; use pxar::accessor::aio::{Accessor, FileContents, FileEntry};
use pxar::decoder::aio::Decoder;
use pxar::format::Device;
use pxar::{Entry, EntryKind, Metadata};
use proxmox::c_result; use proxmox::c_result;
use proxmox::tools::{ use proxmox::tools::{
@ -93,8 +94,6 @@ where
let mut err_path_stack = vec![OsString::from("/")]; let mut err_path_stack = vec![OsString::from("/")];
let mut current_match = options.extract_match_default; let mut current_match = options.extract_match_default;
while let Some(entry) = decoder.next() { while let Some(entry) = decoder.next() {
use pxar::EntryKind;
let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?; let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?;
let file_name_os = entry.file_name(); let file_name_os = entry.file_name();
@ -556,7 +555,6 @@ where
T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
W: tokio::io::AsyncWrite + Unpin + Send + 'static, W: tokio::io::AsyncWrite + Unpin + Send + 'static,
{ {
use pxar::EntryKind;
Box::pin(async move { Box::pin(async move {
let metadata = file.entry().metadata(); let metadata = file.entry().metadata();
let path = file.entry().path().strip_prefix(&prefix)?.to_path_buf(); let path = file.entry().path().strip_prefix(&prefix)?.to_path_buf();
@ -616,10 +614,42 @@ where
}) })
} }
fn get_extractor<DEST>(destination: DEST, metadata: Metadata) -> Result<Extractor, Error>
where
DEST: AsRef<Path>,
{
create_path(
&destination,
None,
Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
)
.map_err(|err| {
format_err!(
"error creating directory {:?}: {}",
destination.as_ref(),
err
)
})?;
let dir = Dir::open(
destination.as_ref(),
OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
Mode::empty(),
)
.map_err(|err| {
format_err!(
"unable to open target directory {:?}: {}",
destination.as_ref(),
err,
)
})?;
Ok(Extractor::new(dir, metadata, false, Flags::DEFAULT))
}
pub async fn extract_sub_dir<T, DEST, PATH>( pub async fn extract_sub_dir<T, DEST, PATH>(
destination: DEST, destination: DEST,
mut decoder: Accessor<T>, decoder: Accessor<T>,
path: PATH, path: PATH,
verbose: bool, verbose: bool,
) -> Result<(), Error> ) -> Result<(), Error>
@ -630,47 +660,79 @@ where
{ {
let root = decoder.open_root().await?; let root = decoder.open_root().await?;
create_path( let mut extractor = get_extractor(
&destination, destination,
None,
Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
)
.map_err(|err| format_err!("error creating directory {:?}: {}", destination.as_ref(), err))?;
let dir = Dir::open(
destination.as_ref(),
OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
Mode::empty(),
)
.map_err(|err| format_err!("unable to open target directory {:?}: {}", destination.as_ref(), err,))?;
let mut extractor = Extractor::new(
dir,
root.lookup_self().await?.entry().metadata().clone(), root.lookup_self().await?.entry().metadata().clone(),
false, )?;
Flags::DEFAULT,
);
let file = root let file = root
.lookup(&path).await? .lookup(&path)
.await?
.ok_or(format_err!("error opening '{:?}'", path.as_ref()))?; .ok_or(format_err!("error opening '{:?}'", path.as_ref()))?;
recurse_files_extractor(&mut extractor, &mut decoder, file, verbose).await recurse_files_extractor(&mut extractor, file, verbose).await
} }
fn recurse_files_extractor<'a, T>( pub async fn extract_sub_dir_seq<S, DEST>(
extractor: &'a mut Extractor, destination: DEST,
decoder: &'a mut Accessor<T>, mut decoder: Decoder<S>,
file: FileEntry<T>,
verbose: bool, verbose: bool,
) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>> ) -> Result<(), Error>
where where
T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, S: pxar::decoder::SeqRead + Unpin + Send + 'static,
DEST: AsRef<Path>,
{ {
use pxar::EntryKind; decoder.enable_goodbye_entries(true);
Box::pin(async move { let root = match decoder.next().await {
let metadata = file.entry().metadata(); Some(Ok(root)) => root,
let file_name_os = file.file_name(); Some(Err(err)) => bail!("error getting root entry from pxar: {}", err),
None => bail!("cannot extract empty archive"),
};
let mut extractor = get_extractor(destination, root.metadata().clone())?;
if let Err(err) = seq_files_extractor(&mut extractor, decoder, verbose).await {
eprintln!("error extracting pxar archive: {}", err);
}
Ok(())
}
fn extract_special(
extractor: &mut Extractor,
entry: &Entry,
file_name: &CStr,
) -> Result<(), Error> {
let metadata = entry.metadata();
match entry.kind() {
EntryKind::Symlink(link) => {
extractor.extract_symlink(file_name, metadata, link.as_ref())?;
}
EntryKind::Hardlink(link) => {
extractor.extract_hardlink(file_name, link.as_os_str())?;
}
EntryKind::Device(dev) => {
if extractor.contains_flags(Flags::WITH_DEVICE_NODES) {
extractor.extract_device(file_name, metadata, dev)?;
}
}
EntryKind::Fifo => {
if extractor.contains_flags(Flags::WITH_FIFOS) {
extractor.extract_special(file_name, metadata, 0)?;
}
}
EntryKind::Socket => {
if extractor.contains_flags(Flags::WITH_SOCKETS) {
extractor.extract_special(file_name, metadata, 0)?;
}
}
_ => bail!("extract_special used with unsupported entry kind"),
}
Ok(())
}
fn get_filename(entry: &Entry) -> Result<(OsString, CString), Error> {
let file_name_os = entry.file_name().to_owned();
// safety check: a file entry in an archive must never contain slashes: // safety check: a file entry in an archive must never contain slashes:
if file_name_os.as_bytes().contains(&b'/') { if file_name_os.as_bytes().contains(&b'/') {
@ -680,6 +742,21 @@ where
let file_name = CString::new(file_name_os.as_bytes()) let file_name = CString::new(file_name_os.as_bytes())
.map_err(|_| format_err!("encountered file name with null-bytes"))?; .map_err(|_| format_err!("encountered file name with null-bytes"))?;
Ok((file_name_os, file_name))
}
async fn recurse_files_extractor<'a, T>(
extractor: &'a mut Extractor,
file: FileEntry<T>,
verbose: bool,
) -> Result<(), Error>
where
T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
{
let entry = file.entry();
let metadata = entry.metadata();
let (file_name_os, file_name) = get_filename(entry)?;
if verbose { if verbose {
eprintln!("extracting: {}", file.path().display()); eprintln!("extracting: {}", file.path().display());
} }
@ -691,50 +768,97 @@ where
.map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?; .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
let dir = file.enter_directory().await?; let dir = file.enter_directory().await?;
let mut readdir = dir.read_dir(); let mut seq_decoder = dir.decode_full().await?;
while let Some(entry) = readdir.next().await { seq_decoder.enable_goodbye_entries(true);
let entry = entry?.decode_entry().await?; seq_files_extractor(extractor, seq_decoder, verbose).await?;
let filename = entry.path().to_path_buf();
// log errors and continue
if let Err(err) = recurse_files_extractor(extractor, decoder, entry, verbose).await {
eprintln!("error extracting {:?}: {}", filename.display(), err);
}
}
extractor.leave_directory()?; extractor.leave_directory()?;
} }
EntryKind::Symlink(link) => { EntryKind::File { size, .. } => {
extractor.extract_symlink(&file_name, metadata, link.as_ref())?; extractor
} .async_extract_file(
EntryKind::Hardlink(link) => {
extractor.extract_hardlink(&file_name, link.as_os_str())?;
}
EntryKind::Device(dev) => {
if extractor.contains_flags(Flags::WITH_DEVICE_NODES) {
extractor.extract_device(&file_name, metadata, dev)?;
}
}
EntryKind::Fifo => {
if extractor.contains_flags(Flags::WITH_FIFOS) {
extractor.extract_special(&file_name, metadata, 0)?;
}
}
EntryKind::Socket => {
if extractor.contains_flags(Flags::WITH_SOCKETS) {
extractor.extract_special(&file_name, metadata, 0)?;
}
}
EntryKind::File { size, .. } => extractor.async_extract_file(
&file_name, &file_name,
metadata, metadata,
*size, *size,
&mut file.contents().await.map_err(|_| { &mut file.contents().await.map_err(|_| {
format_err!("found regular file entry without contents in archive") format_err!("found regular file entry without contents in archive")
})?, })?,
).await?, )
EntryKind::GoodbyeTable => {}, // ignore .await?
}
EntryKind::GoodbyeTable => {} // ignore
_ => extract_special(extractor, entry, &file_name)?,
} }
Ok(()) Ok(())
})
} }
async fn seq_files_extractor<'a, T>(
extractor: &'a mut Extractor,
mut decoder: pxar::decoder::aio::Decoder<T>,
verbose: bool,
) -> Result<(), Error>
where
T: pxar::decoder::SeqRead,
{
let mut dir_level = 0;
loop {
let entry = match decoder.next().await {
Some(entry) => entry?,
None => return Ok(()),
};
let metadata = entry.metadata();
let (file_name_os, file_name) = get_filename(&entry)?;
if verbose && !matches!(entry.kind(), EntryKind::GoodbyeTable) {
eprintln!("extracting: {}", entry.path().display());
}
if let Err(err) = async {
match entry.kind() {
EntryKind::Directory => {
dir_level += 1;
extractor
.enter_directory(file_name_os.to_owned(), metadata.clone(), true)
.map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
}
EntryKind::File { size, .. } => {
extractor
.async_extract_file(
&file_name,
metadata,
*size,
&mut decoder.contents().ok_or_else(|| {
format_err!("found regular file entry without contents in archive")
})?,
)
.await?
}
EntryKind::GoodbyeTable => {
dir_level -= 1;
extractor.leave_directory()?;
}
_ => extract_special(extractor, &entry, &file_name)?,
}
Ok(()) as Result<(), Error>
}
.await
{
let display = entry.path().display().to_string();
eprintln!(
"error extracting {}: {}",
if matches!(entry.kind(), EntryKind::GoodbyeTable) {
"<directory>"
} else {
&display
},
err
);
}
if dir_level < 0 {
// we've encountered one Goodbye more then Directory, meaning we've left the dir we
// started in - exit early, otherwise the extractor might panic
return Ok(());
}
}
}

View File

@ -59,7 +59,10 @@ mod flags;
pub use flags::Flags; pub use flags::Flags;
pub use create::{create_archive, PxarCreateOptions}; pub use create::{create_archive, PxarCreateOptions};
pub use extract::{create_zip, extract_archive, extract_sub_dir, ErrorHandler, PxarExtractOptions}; pub use extract::{
create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq, ErrorHandler,
PxarExtractOptions,
};
/// The format requires to build sorted directory lookup tables in /// The format requires to build sorted directory lookup tables in
/// memory, so we restrict the number of allowed entries to limit /// memory, so we restrict the number of allowed entries to limit