proxmox-backup/src/api2/backup/environment.rs

use anyhow::{bail, format_err, Error};
use std::sync::{Arc, Mutex};
use std::collections::HashMap;
use nix::dir::Dir;

use ::serde::{Serialize};
use serde_json::{json, Value};

use proxmox::tools::digest_to_hex;
use proxmox::tools::fs::{replace_file, CreateOptions};
use proxmox::api::{RpcEnvironment, RpcEnvironmentType};

use crate::api2::types::Authid;
use crate::backup::*;
use crate::server::WorkerTask;
use crate::server::formatter::*;
use hyper::{Body, Response};

#[derive(Copy, Clone, Serialize)]
struct UploadStatistic {
    count: u64,
    size: u64,
    compressed_size: u64,
    duplicates: u64,
}

impl UploadStatistic {
    fn new() -> Self {
        Self {
            count: 0,
            size: 0,
            compressed_size: 0,
            duplicates: 0,
        }
    }
}

impl std::ops::Add for UploadStatistic {
    type Output = Self;

    fn add(self, other: Self) -> Self {
        Self {
            count: self.count + other.count,
            size: self.size + other.size,
            compressed_size: self.compressed_size + other.compressed_size,
            duplicates: self.duplicates + other.duplicates,
        }
    }
}

struct DynamicWriterState {
    name: String,
    index: DynamicIndexWriter,
    offset: u64,
    chunk_count: u64,
    upload_stat: UploadStatistic,
}

struct FixedWriterState {
    name: String,
    index: FixedIndexWriter,
    size: usize,
    chunk_size: u32,
    chunk_count: u64,
    small_chunk_count: usize, // allow 0..1 small chunks (last chunk may be smaller)
    upload_stat: UploadStatistic,
    incremental: bool,
}

// key=digest, value=length
type KnownChunksMap = HashMap<[u8;32], u32>;

struct SharedBackupState {
    finished: bool,
    uid_counter: usize,
    file_counter: usize, // successfully uploaded files
    dynamic_writers: HashMap<usize, DynamicWriterState>,
    fixed_writers: HashMap<usize, FixedWriterState>,
    known_chunks: KnownChunksMap,
    backup_size: u64, // sums up size of all files
    backup_stat: UploadStatistic,
}

impl SharedBackupState {

    // Raise error if finished flag is set
    fn ensure_unfinished(&self) -> Result<(), Error> {
        if self.finished {
            bail!("backup already marked as finished.");
        }
        Ok(())
    }

    // Get an unique integer ID
    pub fn next_uid(&mut self) -> usize {
        self.uid_counter += 1;
        self.uid_counter
    }
}


/// `RpcEnvironmet` implementation for backup service
#[derive(Clone)]
pub struct BackupEnvironment {
    env_type: RpcEnvironmentType,
    result_attributes: Value,
    auth_id: Authid,
    pub debug: bool,
    pub formatter: &'static OutputFormatter,
    pub worker: Arc<WorkerTask>,
    pub datastore: Arc<DataStore>,
    pub backup_dir: BackupDir,
    pub last_backup: Option<BackupInfo>,
    state: Arc<Mutex<SharedBackupState>>
}

impl BackupEnvironment {
    pub fn new(
        env_type: RpcEnvironmentType,
        auth_id: Authid,
        worker: Arc<WorkerTask>,
        datastore: Arc<DataStore>,
        backup_dir: BackupDir,
    ) -> Self {

        let state = SharedBackupState {
            finished: false,
            uid_counter: 0,
            file_counter: 0,
            dynamic_writers: HashMap::new(),
            fixed_writers: HashMap::new(),
            known_chunks: HashMap::new(),
            backup_size: 0,
            backup_stat: UploadStatistic::new(),
        };

        Self {
            result_attributes: json!({}),
            env_type,
            auth_id,
            worker,
            datastore,
            debug: false,
            formatter: &JSON_FORMATTER,
            backup_dir,
            last_backup: None,
            state: Arc::new(Mutex::new(state)),
        }
    }

    /// Register a Chunk with associated length.
    ///
    /// We do not fully trust clients, so a client may only use registered
    /// chunks. Please use this method to register chunks from previous backups.
    pub fn register_chunk(&self, digest: [u8; 32], length: u32) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        state.known_chunks.insert(digest, length);

        Ok(())
    }

    /// Register fixed length chunks after upload.
    ///
    /// Like `register_chunk()`, but additionally record statistics for
    /// the fixed index writer.
    pub fn register_fixed_chunk(
        &self,
        wid: usize,
        digest: [u8; 32],
        size: u32,
        compressed_size: u32,
        is_duplicate: bool,
    ) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        let mut data = match state.fixed_writers.get_mut(&wid) {
            Some(data) => data,
            None => bail!("fixed writer '{}' not registered", wid),
        };

        if size > data.chunk_size {
            bail!("fixed writer '{}' - got large chunk ({} > {}", data.name, size, data.chunk_size);
        }

        if size < data.chunk_size {
            data.small_chunk_count += 1;
            if data.small_chunk_count > 1 {
                bail!("fixed writer '{}' - detected multiple end chunks (chunk size too small)");
            }
        }

        // record statistics
        data.upload_stat.count += 1;
        data.upload_stat.size += size as u64;
        data.upload_stat.compressed_size += compressed_size as u64;
        if is_duplicate { data.upload_stat.duplicates += 1; }

        // register chunk
        state.known_chunks.insert(digest, size);

        Ok(())
    }

    /// Register dynamic length chunks after upload.
    ///
    /// Like `register_chunk()`, but additionally record statistics for
    /// the dynamic index writer.
    pub fn register_dynamic_chunk(
        &self,
        wid: usize,
        digest: [u8; 32],
        size: u32,
        compressed_size: u32,
        is_duplicate: bool,
    ) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        let mut data = match state.dynamic_writers.get_mut(&wid) {
            Some(data) => data,
            None => bail!("dynamic writer '{}' not registered", wid),
        };

        // record statistics
        data.upload_stat.count += 1;
        data.upload_stat.size += size as u64;
        data.upload_stat.compressed_size += compressed_size as u64;
        if is_duplicate { data.upload_stat.duplicates += 1; }

        // register chunk
        state.known_chunks.insert(digest, size);

        Ok(())
    }

    pub fn lookup_chunk(&self, digest: &[u8; 32]) -> Option<u32> {
        let state = self.state.lock().unwrap();

        match state.known_chunks.get(digest) {
            Some(len) => Some(*len),
            None => None,
        }
    }

    /// Store the writer with an unique ID
    pub fn register_dynamic_writer(&self, index: DynamicIndexWriter, name: String) -> Result<usize, Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        let uid = state.next_uid();

        state.dynamic_writers.insert(uid, DynamicWriterState {
            index, name, offset: 0, chunk_count: 0, upload_stat: UploadStatistic::new(),
        });

        Ok(uid)
    }

    /// Store the writer with an unique ID
    pub fn register_fixed_writer(&self, index: FixedIndexWriter, name: String, size: usize, chunk_size: u32, incremental: bool) -> Result<usize, Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        let uid = state.next_uid();

        state.fixed_writers.insert(uid, FixedWriterState {
            index, name, chunk_count: 0, size, chunk_size, small_chunk_count: 0, upload_stat: UploadStatistic::new(), incremental,
        });

        Ok(uid)
    }

    /// Append chunk to dynamic writer
    pub fn dynamic_writer_append_chunk(&self, wid: usize, offset: u64, size: u32, digest: &[u8; 32]) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        let mut data = match state.dynamic_writers.get_mut(&wid) {
            Some(data) => data,
            None => bail!("dynamic writer '{}' not registered", wid),
        };


        if data.offset != offset {
            bail!("dynamic writer '{}' append chunk failed - got strange chunk offset ({} != {})",
                  data.name, data.offset, offset);
        }

        data.offset += size as u64;
        data.chunk_count += 1;

        data.index.add_chunk(data.offset, digest)?;

        Ok(())
    }

    /// Append chunk to fixed writer
    pub fn fixed_writer_append_chunk(&self, wid: usize, offset: u64, size: u32, digest: &[u8; 32]) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        let mut data = match state.fixed_writers.get_mut(&wid) {
            Some(data) => data,
            None => bail!("fixed writer '{}' not registered", wid),
        };

        let end = (offset as usize) + (size as usize);
        let idx = data.index.check_chunk_alignment(end, size as usize)?;

        data.chunk_count += 1;

        data.index.add_digest(idx, digest)?;

        Ok(())
    }

    fn log_upload_stat(&self, archive_name:  &str, csum: &[u8; 32], uuid: &[u8; 16], size: u64, chunk_count: u64, upload_stat: &UploadStatistic) {
        self.log(format!("Upload statistics for '{}'", archive_name));
        self.log(format!("UUID: {}", digest_to_hex(uuid)));
        self.log(format!("Checksum: {}", digest_to_hex(csum)));
        self.log(format!("Size: {}", size));
        self.log(format!("Chunk count: {}", chunk_count));

        if size == 0 || chunk_count == 0 {
            return;
        }

        self.log(format!("Upload size: {} ({}%)", upload_stat.size, (upload_stat.size*100)/size));

        // account for zero chunk, which might be uploaded but never used
        let client_side_duplicates = if chunk_count < upload_stat.count {
            0
        } else {
            chunk_count - upload_stat.count
        };

        let server_side_duplicates = upload_stat.duplicates;

        if (client_side_duplicates + server_side_duplicates) > 0 {
            let per = (client_side_duplicates + server_side_duplicates)*100/chunk_count;
            self.log(format!("Duplicates: {}+{} ({}%)", client_side_duplicates, server_side_duplicates, per));
        }

        if upload_stat.size > 0 {
            self.log(format!("Compression: {}%", (upload_stat.compressed_size*100)/upload_stat.size));
        }
    }

    /// Close dynamic writer
    pub fn dynamic_writer_close(&self, wid: usize, chunk_count: u64, size: u64, csum: [u8; 32]) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        let mut data = match state.dynamic_writers.remove(&wid) {
            Some(data) => data,
            None => bail!("dynamic writer '{}' not registered", wid),
        };

        if data.chunk_count != chunk_count {
            bail!("dynamic writer '{}' close failed - unexpected chunk count ({} != {})", data.name, data.chunk_count, chunk_count);
        }

        if data.offset != size {
            bail!("dynamic writer '{}' close failed - unexpected file size ({} != {})", data.name, data.offset, size);
        }

        let uuid = data.index.uuid;

        let expected_csum = data.index.close()?;

        if csum != expected_csum {
            bail!("dynamic writer '{}' close failed - got unexpected checksum", data.name);
        }

        self.log_upload_stat(&data.name, &csum, &uuid, size, chunk_count, &data.upload_stat);

        state.file_counter += 1;
        state.backup_size += size;
        state.backup_stat = state.backup_stat + data.upload_stat;

        Ok(())
    }

    /// Close fixed writer
    pub fn fixed_writer_close(&self, wid: usize, chunk_count: u64, size: u64, csum: [u8; 32]) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        let mut data = match state.fixed_writers.remove(&wid) {
            Some(data) => data,
            None => bail!("fixed writer '{}' not registered", wid),
        };

        if data.chunk_count != chunk_count {
            bail!("fixed writer '{}' close failed - received wrong number of chunk ({} != {})", data.name, data.chunk_count, chunk_count);
        }

        if !data.incremental {
            let expected_count = data.index.index_length();

            if chunk_count != (expected_count as u64) {
                bail!("fixed writer '{}' close failed - unexpected chunk count ({} != {})", data.name, expected_count, chunk_count);
            }

            if size != (data.size as u64) {
                bail!("fixed writer '{}' close failed - unexpected file size ({} != {})", data.name, data.size, size);
            }
        }

        let uuid = data.index.uuid;
        let expected_csum = data.index.close()?;

        if csum != expected_csum {
            bail!("fixed writer '{}' close failed - got unexpected checksum", data.name);
        }

        self.log_upload_stat(&data.name, &expected_csum, &uuid, size, chunk_count, &data.upload_stat);

        state.file_counter += 1;
        state.backup_size += size;
        state.backup_stat = state.backup_stat + data.upload_stat;

        Ok(())
    }

    pub fn add_blob(&self, file_name: &str, data: Vec<u8>) -> Result<(), Error> {

        let mut path = self.datastore.base_path();
        path.push(self.backup_dir.relative_path());
        path.push(file_name);

        let blob_len = data.len();
        let orig_len = data.len(); // fixme:

        // always verify blob/CRC at server side
        let blob = DataBlob::load_from_reader(&mut &data[..])?;

        let raw_data = blob.raw_data();
        replace_file(&path, raw_data, CreateOptions::new())?;

        self.log(format!("add blob {:?} ({} bytes, comp: {})", path, orig_len, blob_len));

        let mut state = self.state.lock().unwrap();
        state.file_counter += 1;
        state.backup_size += orig_len as u64;
        state.backup_stat.size += blob_len as u64;

        Ok(())
    }

    /// Mark backup as finished
    pub fn finish_backup(&self) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();

        state.ensure_unfinished()?;

        // test if all writer are correctly closed
        if !state.dynamic_writers.is_empty() || !state.fixed_writers.is_empty() {
            bail!("found open index writer - unable to finish backup");
        }

        if state.file_counter == 0 {
            bail!("backup does not contain valid files (file count == 0)");
        }

        // check for valid manifest and store stats
        let stats = serde_json::to_value(state.backup_stat)?;
        self.datastore.update_manifest(&self.backup_dir, |manifest| {
            manifest.unprotected["chunk_upload_stats"] = stats;
        }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?;

        if let Some(base) = &self.last_backup {
            let path = self.datastore.snapshot_path(&base.backup_dir);
            if !path.exists() {
                bail!(
                    "base snapshot {} was removed during backup, cannot finish as chunks might be missing",
                    base.backup_dir
                );
            }
        }

        // marks the backup as successful
        state.finished = true;

        Ok(())
    }

    /// If verify-new is set on the datastore, this will run a new verify task
    /// for the backup. If not, this will return and also drop the passed lock
    /// immediately.
    pub fn verify_after_complete(&self, snap_lock: Dir) -> Result<(), Error> {
        self.ensure_finished()?;

        if !self.datastore.verify_new() {
            // no verify requested, do nothing
            return Ok(());
        }

        let worker_id = format!("{}:{}/{}/{:08X}",
            self.datastore.name(),
            self.backup_dir.group().backup_type(),
            self.backup_dir.group().backup_id(),
            self.backup_dir.backup_time());

        let datastore = self.datastore.clone();
        let backup_dir = self.backup_dir.clone();

        WorkerTask::new_thread(
            "verify",
            Some(worker_id),
            self.auth_id.clone(),
            false,
            move |worker| {
                worker.log("Automatically verifying newly added snapshot");


                let verify_worker = crate::backup::VerifyWorker::new(worker.clone(), datastore);
                if !verify_backup_dir_with_lock(
                    &verify_worker,
                    &backup_dir,
                    worker.upid().clone(),
                    None,
                    snap_lock,
                )? {
                    bail!("verification failed - please check the log for details");
                }

                Ok(())
            },
        ).map(|_| ())
    }

    pub fn log<S: AsRef<str>>(&self, msg: S) {
        self.worker.log(msg);
    }

    pub fn debug<S: AsRef<str>>(&self, msg: S) {
        if self.debug { self.worker.log(msg); }
    }

    pub fn format_response(&self, result: Result<Value, Error>) -> Response<Body> {
        match result {
            Ok(data) => (self.formatter.format_data)(data, self),
            Err(err) => (self.formatter.format_error)(err),
        }
    }

    /// Raise error if finished flag is not set
    pub fn ensure_finished(&self) -> Result<(), Error> {
        let state = self.state.lock().unwrap();
        if !state.finished {
            bail!("backup ended but finished flag is not set.");
        }
        Ok(())
    }

    /// Return true if the finished flag is set
    pub fn finished(&self) -> bool {
        let state = self.state.lock().unwrap();
        state.finished
    }

    /// Remove complete backup
    pub fn remove_backup(&self) -> Result<(), Error> {
        let mut state = self.state.lock().unwrap();
        state.finished = true;

        self.datastore.remove_backup_dir(&self.backup_dir, true)?;

        Ok(())
    }
}

impl RpcEnvironment for BackupEnvironment {

    fn result_attrib_mut(&mut self) -> &mut Value {
        &mut self.result_attributes
    }

    fn result_attrib(&self) -> &Value {
        &self.result_attributes
    }

    fn env_type(&self) -> RpcEnvironmentType {
        self.env_type
    }

    fn set_auth_id(&mut self, _auth_id: Option<String>) {
        panic!("unable to change auth_id");
    }

    fn get_auth_id(&self) -> Option<String> {
        Some(self.auth_id.to_string())
    }
}

impl AsRef<BackupEnvironment> for dyn RpcEnvironment {
    fn as_ref(&self) -> &BackupEnvironment {
        self.as_any().downcast_ref::<BackupEnvironment>().unwrap()
    }
}

impl AsRef<BackupEnvironment> for Box<dyn RpcEnvironment> {
    fn as_ref(&self) -> &BackupEnvironment {
        self.as_any().downcast_ref::<BackupEnvironment>().unwrap()
    }
}