pxar: encoder: limit number of max entries held at once in memory during archive creation.

Limit the total number of entries and therefore the approximate memory
consumption instead of doing this on a per directory basis as it was previously.
This makes more sense as it limits not only the width but also the depth of the
directory tree.

Further, instead of hardcoding this value, allow to pass this information as
additional optional parameter 'entires-max'.
By this, creation of the archive with directories containing a large number of
entries is possible.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
This commit is contained in:
Christian Ebner 2020-01-10 12:50:06 +01:00 committed by Dietmar Maurer
parent 141304d64e
commit 6fc053ed85
6 changed files with 54 additions and 13 deletions

View File

@ -231,9 +231,17 @@ async fn backup_directory<P: AsRef<Path>>(
skip_lost_and_found: bool,
crypt_config: Option<Arc<CryptConfig>>,
catalog: Arc<Mutex<CatalogWriter<SenderWriter>>>,
entries_max: usize,
) -> Result<BackupStats, Error> {
let pxar_stream = PxarBackupStream::open(dir_path.as_ref(), device_set, verbose, skip_lost_and_found, catalog)?;
let pxar_stream = PxarBackupStream::open(
dir_path.as_ref(),
device_set,
verbose,
skip_lost_and_found,
catalog,
entries_max,
)?;
let mut chunk_stream = ChunkStream::new(pxar_stream, chunk_size);
let (mut tx, rx) = mpsc::channel(10); // allow to buffer 10 chunks
@ -776,6 +784,12 @@ fn spawn_catalog_upload(
schema: CHUNK_SIZE_SCHEMA,
optional: true,
},
"entries-max": {
type: Integer,
description: "Max number of entries to hold in memory.",
optional: true,
default: pxar::ENCODER_MAX_ENTRIES as isize,
},
}
}
)]
@ -812,6 +826,8 @@ async fn create_backup(
let include_dev = param["include-dev"].as_array();
let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64);
let mut devices = if all_file_systems { None } else { Some(HashSet::new()) };
if let Some(include_dev) = include_dev {
@ -960,6 +976,7 @@ async fn create_backup(
skip_lost_and_found,
crypt_config.clone(),
catalog.clone(),
entries_max as usize,
).await?;
manifest.add_file(target, stats.size, stats.csum)?;
catalog.lock().unwrap().end_directory()?;

View File

@ -178,6 +178,7 @@ fn create_archive(
let no_sockets = param["no-sockets"].as_bool().unwrap_or(false);
let empty = Vec::new();
let exclude_pattern = param["exclude"].as_array().unwrap_or(&empty);
let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64);
let devices = if all_file_systems { None } else { Some(HashSet::new()) };
@ -232,6 +233,7 @@ fn create_archive(
false,
feature_flags,
pattern_list,
entries_max as usize,
)?;
writer.flush()?;
@ -342,6 +344,15 @@ const API_METHOD_CREATE_ARCHIVE: ApiMethod = ApiMethod::new(
&StringSchema::new("Path or pattern matching files to restore.").schema()
).schema()
),
(
"entries-max",
true,
&IntegerSchema::new("Max number of entries loaded at once into memory")
.default(pxar::ENCODER_MAX_ENTRIES as isize)
.minimum(0)
.maximum(std::isize::MAX)
.schema()
),
]),
)
);

View File

@ -48,6 +48,7 @@ impl PxarBackupStream {
verbose: bool,
skip_lost_and_found: bool,
catalog: Arc<Mutex<CatalogWriter<W>>>,
entries_max: usize,
) -> Result<Self, Error> {
let (rx, tx) = nix::unistd::pipe()?;
@ -73,6 +74,7 @@ impl PxarBackupStream {
skip_lost_and_found,
pxar::flags::DEFAULT,
exclude_pattern,
entries_max,
) {
let mut error = error2.lock().unwrap();
*error = Some(err.to_string());
@ -95,12 +97,13 @@ impl PxarBackupStream {
verbose: bool,
skip_lost_and_found: bool,
catalog: Arc<Mutex<CatalogWriter<W>>>,
entries_max: usize,
) -> Result<Self, Error> {
let dir = nix::dir::Dir::open(dirname, OFlag::O_DIRECTORY, Mode::empty())?;
let path = std::path::PathBuf::from(dirname);
Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog)
Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog, entries_max)
}
}

View File

@ -29,11 +29,6 @@ use crate::tools::acl;
use crate::tools::fs;
use crate::tools::xattr;
/// The format requires to build sorted directory lookup tables in
/// memory, so we restrict the number of allowed entries to limit
/// maximum memory usage.
pub const MAX_DIRECTORY_ENTRIES: usize = 256 * 1024;
#[derive(Eq, PartialEq, Hash)]
struct HardLinkInfo {
st_dev: u64,
@ -55,6 +50,8 @@ pub struct Encoder<'a, W: Write, C: BackupCatalogWriter> {
// Flags signaling features supported by the filesystem
fs_feature_flags: u64,
hardlinks: HashMap<HardLinkInfo, (PathBuf, u64)>,
entry_counter: usize,
entry_max: usize,
}
impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
@ -82,6 +79,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
skip_lost_and_found: bool, // fixme: should be a feature flag ??
feature_flags: u64,
mut excludes: Vec<MatchPattern>,
entry_max: usize,
) -> Result<(), Error> {
const FILE_COPY_BUFFER_SIZE: usize = 1024 * 1024;
@ -126,6 +124,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
feature_flags,
fs_feature_flags,
hardlinks: HashMap::new(),
entry_counter: 0,
entry_max,
};
if verbose {
@ -762,14 +762,16 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
self.full_path().join(filename_osstr)
);
}
(_, child_pattern) => name_list.push((filename, stat, child_pattern)),
(_, child_pattern) => {
self.entry_counter += 1;
name_list.push((filename, stat, child_pattern));
}
}
if name_list.len() > MAX_DIRECTORY_ENTRIES {
if self.entry_counter > self.entry_max {
bail!(
"too many directory items in {:?} (> {})",
self.full_path(),
MAX_DIRECTORY_ENTRIES
"exceeded max number of entries (> {})",
self.entry_max
);
}
}
@ -778,8 +780,9 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
}
name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0));
let num_entries = name_list.len();
let mut goodbye_items = Vec::with_capacity(name_list.len());
let mut goodbye_items = Vec::with_capacity(num_entries);
for (filename, stat, exclude_list) in name_list {
let start_pos = self.writer_pos;
@ -1049,6 +1052,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
let goodbye_offset = self.writer_pos - dir_start_pos;
self.write_goodbye_table(goodbye_offset, &mut goodbye_items)?;
self.entry_counter -= num_entries;
//println!("encode_dir: {:?} end1 {}", self.full_path(), self.writer_pos);
Ok(())

View File

@ -256,3 +256,8 @@ pub fn check_ca_header<T>(head: &PxarHeader, htype: u64) -> Result<(), Error> {
Ok(())
}
/// The format requires to build sorted directory lookup tables in
/// memory, so we restrict the number of allowed entries to limit
/// maximum memory usage.
pub const ENCODER_MAX_ENTRIES: usize = 1024 * 1024;

View File

@ -37,6 +37,7 @@ fn run_test(dir_name: &str) -> Result<(), Error> {
false,
flags::DEFAULT,
Vec::new(),
ENCODER_MAX_ENTRIES,
)?;
Command::new("cmp")