tape restore: avoid multiple stat calls for same chunk

This commit is contained in:
Dietmar Maurer 2021-04-16 13:17:17 +02:00
parent 1369bcdbba
commit 28570d19a6
3 changed files with 38 additions and 9 deletions

View File

@ -1,6 +1,7 @@
use std::panic::UnwindSafe;
use std::path::Path;
use std::sync::Arc;
use std::collections::HashMap;
use anyhow::{bail, format_err, Error};
use serde_json::Value;
@ -1334,7 +1335,8 @@ pub fn catalog_media(
drive.rewind()?;
drive.read_label()?; // skip over labels - we already read them above
restore_media(&worker, &mut drive, &media_id, None, verbose)?;
let mut checked_chunks = HashMap::new();
restore_media(&worker, &mut drive, &media_id, None, &mut checked_chunks, verbose)?;
Ok(())
},

View File

@ -335,6 +335,8 @@ pub fn restore(
}
}
let mut checked_chunks_map = HashMap::new();
for media_id in media_id_list.iter() {
request_and_restore_media(
&worker,
@ -342,6 +344,7 @@ pub fn restore(
&drive_config,
&drive,
&store_map,
&mut checked_chunks_map,
&auth_id,
&notify_user,
&owner,
@ -375,6 +378,7 @@ pub fn request_and_restore_media(
drive_config: &SectionConfigData,
drive_name: &str,
store_map: &DataStoreMap,
checked_chunks_map: &mut HashMap<String, HashSet<[u8;32]>>,
authid: &Authid,
notify_user: &Option<Userid>,
owner: &Option<Authid>,
@ -416,6 +420,7 @@ pub fn request_and_restore_media(
&mut drive,
&info,
Some((&store_map, restore_owner)),
checked_chunks_map,
false,
)
}
@ -428,6 +433,7 @@ pub fn restore_media(
drive: &mut Box<dyn TapeDriver>,
media_id: &MediaId,
target: Option<(&DataStoreMap, &Authid)>,
checked_chunks_map: &mut HashMap<String, HashSet<[u8;32]>>,
verbose: bool,
) -> Result<(), Error> {
@ -451,7 +457,7 @@ pub fn restore_media(
Ok(reader) => reader,
};
restore_archive(worker, reader, current_file_number, target, &mut catalog, verbose)?;
restore_archive(worker, reader, current_file_number, target, &mut catalog, checked_chunks_map, verbose)?;
}
MediaCatalog::finish_temporary_database(status_path, &media_id.label.uuid, true)?;
@ -465,6 +471,7 @@ fn restore_archive<'a>(
current_file_number: u64,
target: Option<(&DataStoreMap, &Authid)>,
catalog: &mut MediaCatalog,
checked_chunks_map: &mut HashMap<String, HashSet<[u8;32]>>,
verbose: bool,
) -> Result<(), Error> {
let header: MediaContentHeader = unsafe { reader.read_le_value()? };
@ -490,6 +497,8 @@ fn restore_archive<'a>(
let datastore_name = archive_header.store;
let snapshot = archive_header.snapshot;
let checked_chunks = checked_chunks_map.entry(datastore_name.clone()).or_insert(HashSet::new());
task_log!(worker, "File {}: snapshot archive {}:{}", current_file_number, datastore_name, snapshot);
let backup_dir: BackupDir = snapshot.parse()?;
@ -516,7 +525,7 @@ fn restore_archive<'a>(
if is_new {
task_log!(worker, "restore snapshot {}", backup_dir);
match restore_snapshot_archive(worker, reader, &path, &datastore) {
match restore_snapshot_archive(worker, reader, &path, &datastore, checked_chunks) {
Err(err) => {
std::fs::remove_dir_all(&path)?;
bail!("restore snapshot {} failed - {}", backup_dir, err);
@ -565,7 +574,11 @@ fn restore_archive<'a>(
.and_then(|t| t.0.get_datastore(&source_datastore));
if datastore.is_some() || target.is_none() {
if let Some(chunks) = restore_chunk_archive(worker, reader, datastore, verbose)? {
let checked_chunks = checked_chunks_map
.entry(datastore.map(|d| d.name()).unwrap_or("_unused_").to_string())
.or_insert(HashSet::new());
if let Some(chunks) = restore_chunk_archive(worker, reader, datastore, checked_chunks, verbose)? {
catalog.start_chunk_archive(
Uuid::from(header.uuid),
current_file_number,
@ -607,10 +620,11 @@ fn restore_chunk_archive<'a>(
worker: &WorkerTask,
reader: Box<dyn 'a + TapeRead>,
datastore: Option<&DataStore>,
checked_chunks: &mut HashSet<[u8;32]>,
verbose: bool,
) -> Result<Option<Vec<[u8;32]>>, Error> {
let mut chunks = Vec::new();
let mut chunks = Vec::new();
let mut decoder = ChunkArchiveDecoder::new(reader);
@ -654,6 +668,7 @@ fn restore_chunk_archive<'a>(
} else if verbose {
task_log!(worker, "Found existing chunk: {}", proxmox::tools::digest_to_hex(&digest));
}
checked_chunks.insert(digest.clone());
} else if verbose {
task_log!(worker, "Found chunk: {}", proxmox::tools::digest_to_hex(&digest));
}
@ -668,10 +683,11 @@ fn restore_snapshot_archive<'a>(
reader: Box<dyn 'a + TapeRead>,
snapshot_path: &Path,
datastore: &DataStore,
checked_chunks: &mut HashSet<[u8;32]>,
) -> Result<bool, Error> {
let mut decoder = pxar::decoder::sync::Decoder::from_std(reader)?;
match try_restore_snapshot_archive(worker, &mut decoder, snapshot_path, datastore) {
match try_restore_snapshot_archive(worker, &mut decoder, snapshot_path, datastore, checked_chunks) {
Ok(()) => Ok(true),
Err(err) => {
let reader = decoder.input();
@ -697,6 +713,7 @@ fn try_restore_snapshot_archive<R: pxar::decoder::SeqRead>(
decoder: &mut pxar::decoder::sync::Decoder<R>,
snapshot_path: &Path,
datastore: &DataStore,
checked_chunks: &mut HashSet<[u8;32]>,
) -> Result<(), Error> {
let _root = match decoder.next() {
@ -787,13 +804,13 @@ fn try_restore_snapshot_archive<R: pxar::decoder::SeqRead>(
let index = DynamicIndexReader::open(&archive_path)?;
let (csum, size) = index.compute_csum();
manifest.verify_file(&item.filename, &csum, size)?;
datastore.fast_index_verification(&index)?;
datastore.fast_index_verification(&index, checked_chunks)?;
}
ArchiveType::FixedIndex => {
let index = FixedIndexReader::open(&archive_path)?;
let (csum, size) = index.compute_csum();
manifest.verify_file(&item.filename, &csum, size)?;
datastore.fast_index_verification(&index)?;
datastore.fast_index_verification(&index, checked_chunks)?;
}
ArchiveType::Blob => {
let mut tmpfile = std::fs::File::open(&archive_path)?;

View File

@ -154,10 +154,18 @@ impl DataStore {
}
/// Fast index verification - only check if chunks exists
pub fn fast_index_verification(&self, index: &dyn IndexFile) -> Result<(), Error> {
pub fn fast_index_verification(
&self,
index: &dyn IndexFile,
checked: &mut HashSet<[u8;32]>,
) -> Result<(), Error> {
for pos in 0..index.index_count() {
let info = index.chunk_info(pos).unwrap();
if checked.contains(&info.digest) {
continue;
}
self.stat_chunk(&info.digest).
map_err(|err| {
format_err!(
@ -166,6 +174,8 @@ impl DataStore {
err,
)
})?;
checked.insert(info.digest);
}
Ok(())