From d32a8652bd81e69be90ce351a778a47da750b786 Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Wed, 31 Mar 2021 12:21:54 +0200 Subject: [PATCH] file-restore-daemon: add disk module Includes functionality for scanning and referring to partitions on attached disks (i.e. snapshot images). Fairly modular structure, so adding ZFS/LVM/etc... support in the future should be easy. The path is encoded as "/disk/bucket/component/path/to/file", e.g. "/drive-scsi0/part/0/etc/passwd". See the comments for further explanations on the design. Signed-off-by: Stefan Reiter --- src/bin/proxmox-restore-daemon.rs | 16 +- src/bin/proxmox_restore_daemon/disk.rs | 329 +++++++++++++++++++++++++ src/bin/proxmox_restore_daemon/mod.rs | 3 + 3 files changed, 347 insertions(+), 1 deletion(-) create mode 100644 src/bin/proxmox_restore_daemon/disk.rs diff --git a/src/bin/proxmox-restore-daemon.rs b/src/bin/proxmox-restore-daemon.rs index 6b453ad3..a2701b7c 100644 --- a/src/bin/proxmox-restore-daemon.rs +++ b/src/bin/proxmox-restore-daemon.rs @@ -1,13 +1,14 @@ ///! Daemon binary to run inside a micro-VM for secure single file restore of disk images use anyhow::{bail, format_err, Error}; use log::error; +use lazy_static::lazy_static; use std::os::unix::{ io::{FromRawFd, RawFd}, net, }; use std::path::Path; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; @@ -26,6 +27,13 @@ pub const MAX_PENDING: usize = 32; /// Will be present in base initramfs pub const VM_DETECT_FILE: &str = "/restore-vm-marker"; +lazy_static! { + /// The current disks state. Use for accessing data on the attached snapshots. + pub static ref DISK_STATE: Arc> = { + Arc::new(Mutex::new(DiskState::scan().unwrap())) + }; +} + /// This is expected to be run by 'proxmox-file-restore' within a mini-VM fn main() -> Result<(), Error> { if !Path::new(VM_DETECT_FILE).exists() { @@ -41,6 +49,12 @@ fn main() -> Result<(), Error> { .write_style(env_logger::WriteStyle::Never) .init(); + // scan all attached disks now, before starting the API + // this will panic and stop the VM if anything goes wrong + { + let _disk_state = DISK_STATE.lock().unwrap(); + } + proxmox_backup::tools::runtime::main(run()) } diff --git a/src/bin/proxmox_restore_daemon/disk.rs b/src/bin/proxmox_restore_daemon/disk.rs new file mode 100644 index 00000000..f9d7c8aa --- /dev/null +++ b/src/bin/proxmox_restore_daemon/disk.rs @@ -0,0 +1,329 @@ +//! Low-level disk (image) access functions for file restore VMs. +use anyhow::{bail, format_err, Error}; +use lazy_static::lazy_static; +use log::{info, warn}; + +use std::collections::HashMap; +use std::fs::{create_dir_all, File}; +use std::io::{BufRead, BufReader}; +use std::path::{Component, Path, PathBuf}; + +use proxmox::const_regex; +use proxmox::tools::fs; +use proxmox_backup::api2::types::BLOCKDEVICE_NAME_REGEX; + +const_regex! { + VIRTIO_PART_REGEX = r"^vd[a-z]+(\d+)$"; +} + +lazy_static! { + static ref FS_OPT_MAP: HashMap<&'static str, &'static str> = { + let mut m = HashMap::new(); + + // otherwise ext complains about mounting read-only + m.insert("ext2", "noload"); + m.insert("ext3", "noload"); + m.insert("ext4", "noload"); + + // ufs2 is used as default since FreeBSD 5.0 released in 2003, so let's assume that + // whatever the user is trying to restore is not using anything older... + m.insert("ufs", "ufstype=ufs2"); + + m + }; +} + +pub enum ResolveResult { + Path(PathBuf), + BucketTypes(Vec<&'static str>), + BucketComponents(Vec), +} + +struct PartitionBucketData { + dev_node: String, + number: i32, + mountpoint: Option, +} + +/// A "Bucket" represents a mapping found on a disk, e.g. a partition, a zfs dataset or an LV. A +/// uniquely identifying path to a file then consists of four components: +/// "/disk/bucket/component/path" +/// where +/// disk: fidx file name +/// bucket: bucket type +/// component: identifier of the specific bucket +/// path: relative path of the file on the filesystem indicated by the other parts, may contain +/// more subdirectories +/// e.g.: "/drive-scsi0/part/0/etc/passwd" +enum Bucket { + Partition(PartitionBucketData), +} + +impl Bucket { + fn filter_mut<'a, A: AsRef, B: AsRef>( + haystack: &'a mut Vec, + ty: A, + comp: B, + ) -> Option<&'a mut Bucket> { + let ty = ty.as_ref(); + let comp = comp.as_ref(); + haystack.iter_mut().find(|b| match b { + Bucket::Partition(data) => ty == "part" && comp.parse::().unwrap() == data.number, + }) + } + + fn type_string(&self) -> &'static str { + match self { + Bucket::Partition(_) => "part", + } + } + + fn component_string(&self) -> String { + match self { + Bucket::Partition(data) => data.number.to_string(), + } + } +} + +/// Functions related to the local filesystem. This mostly exists so we can use 'supported_fs' in +/// try_mount while a Bucket is still mutably borrowed from DiskState. +struct Filesystems { + supported_fs: Vec, +} + +impl Filesystems { + fn scan() -> Result { + // detect kernel supported filesystems + let mut supported_fs = Vec::new(); + for f in BufReader::new(File::open("/proc/filesystems")?) + .lines() + .filter_map(Result::ok) + { + // ZFS is treated specially, don't attempt to do a regular mount with it + let f = f.trim(); + if !f.starts_with("nodev") && f != "zfs" { + supported_fs.push(f.to_owned()); + } + } + + Ok(Self { supported_fs }) + } + + fn ensure_mounted(&self, bucket: &mut Bucket) -> Result { + match bucket { + Bucket::Partition(data) => { + // regular data partition à la "/dev/vdxN" + if let Some(mp) = &data.mountpoint { + return Ok(mp.clone()); + } + + let mp = format!("/mnt{}/", data.dev_node); + self.try_mount(&data.dev_node, &mp)?; + let mp = PathBuf::from(mp); + data.mountpoint = Some(mp.clone()); + Ok(mp) + } + } + } + + fn try_mount(&self, source: &str, target: &str) -> Result<(), Error> { + use nix::mount::*; + + create_dir_all(target)?; + + // try all supported fs until one works - this is the way Busybox's 'mount' does it too: + // https://git.busybox.net/busybox/tree/util-linux/mount.c?id=808d93c0eca49e0b22056e23d965f0d967433fbb#n2152 + // note that ZFS is intentionally left out (see scan()) + let flags = + MsFlags::MS_RDONLY | MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV; + for fs in &self.supported_fs { + let fs: &str = fs.as_ref(); + let opts = FS_OPT_MAP.get(fs).copied(); + match mount(Some(source), target, Some(fs), flags, opts) { + Ok(()) => { + info!("mounting '{}' succeeded, fstype: '{}'", source, fs); + return Ok(()); + } + Err(err) => { + warn!("mount error on '{}' ({}) - {}", source, fs, err); + } + } + } + + bail!("all mounts failed or no supported file system") + } +} + +pub struct DiskState { + filesystems: Filesystems, + disk_map: HashMap>, +} + +impl DiskState { + /// Scan all disks for supported buckets. + pub fn scan() -> Result { + // create mapping for virtio drives and .fidx files (via serial description) + // note: disks::DiskManager relies on udev, which we don't have + let mut disk_map = HashMap::new(); + for entry in proxmox_backup::tools::fs::scan_subdir( + libc::AT_FDCWD, + "/sys/block", + &BLOCKDEVICE_NAME_REGEX, + )? + .filter_map(Result::ok) + { + let name = unsafe { entry.file_name_utf8_unchecked() }; + if !name.starts_with("vd") { + continue; + } + + let sys_path: &str = &format!("/sys/block/{}", name); + + let serial = fs::file_read_string(&format!("{}/serial", sys_path)); + let fidx = match serial { + Ok(serial) => serial, + Err(err) => { + warn!("disk '{}': could not read serial file - {}", name, err); + continue; + } + }; + + let mut parts = Vec::new(); + for entry in proxmox_backup::tools::fs::scan_subdir( + libc::AT_FDCWD, + sys_path, + &VIRTIO_PART_REGEX, + )? + .filter_map(Result::ok) + { + let part_name = unsafe { entry.file_name_utf8_unchecked() }; + let devnode = format!("/dev/{}", part_name); + let part_path = format!("/sys/block/{}/{}", name, part_name); + + // create partition device node for further use + let dev_num_str = fs::file_read_firstline(&format!("{}/dev", part_path))?; + let (major, minor) = dev_num_str.split_at(dev_num_str.find(':').unwrap()); + Self::mknod_blk(&devnode, major.parse()?, minor[1..].trim_end().parse()?)?; + + let number = fs::file_read_firstline(&format!("{}/partition", part_path))? + .trim() + .parse::()?; + + info!( + "drive '{}' ('{}'): found partition '{}' ({})", + name, fidx, devnode, number + ); + + let bucket = Bucket::Partition(PartitionBucketData { + dev_node: devnode, + mountpoint: None, + number, + }); + + parts.push(bucket); + } + + disk_map.insert(fidx.to_owned(), parts); + } + + Ok(Self { + filesystems: Filesystems::scan()?, + disk_map, + }) + } + + /// Given a path like "/drive-scsi0.img.fidx/part/0/etc/passwd", this will mount the first + /// partition of 'drive-scsi0' on-demand (i.e. if not already mounted) and return a path + /// pointing to the requested file locally, e.g. "/mnt/vda1/etc/passwd", which can be used to + /// read the file. Given a partial path, i.e. only "/drive-scsi0.img.fidx" or + /// "/drive-scsi0.img.fidx/part", it will return a list of available bucket types or bucket + /// components respectively + pub fn resolve(&mut self, path: &Path) -> Result { + let mut cmp = path.components().peekable(); + match cmp.peek() { + Some(Component::RootDir) | Some(Component::CurDir) => { + cmp.next(); + } + None => bail!("empty path cannot be resolved to file location"), + _ => {} + } + + let req_fidx = match cmp.next() { + Some(Component::Normal(x)) => x.to_string_lossy(), + _ => bail!("no or invalid image in path"), + }; + + let buckets = match self.disk_map.get_mut(req_fidx.as_ref()) { + Some(x) => x, + None => bail!("given image '{}' not found", req_fidx), + }; + + let bucket_type = match cmp.next() { + Some(Component::Normal(x)) => x.to_string_lossy(), + Some(c) => bail!("invalid bucket in path: {:?}", c), + None => { + // list bucket types available + let mut types = buckets + .iter() + .map(|b| b.type_string()) + .collect::>(); + // dedup requires duplicates to be consecutive, which is the case - see scan() + types.dedup(); + return Ok(ResolveResult::BucketTypes(types)); + } + }; + + let component = match cmp.next() { + Some(Component::Normal(x)) => x.to_string_lossy(), + Some(c) => bail!("invalid bucket component in path: {:?}", c), + None => { + // list bucket components available + let comps = buckets + .iter() + .filter(|b| b.type_string() == bucket_type) + .map(Bucket::component_string) + .collect(); + return Ok(ResolveResult::BucketComponents(comps)); + } + }; + + let mut bucket = match Bucket::filter_mut(buckets, &bucket_type, &component) { + Some(bucket) => bucket, + None => bail!( + "bucket/component path not found: {}/{}/{}", + req_fidx, + bucket_type, + component + ), + }; + + // bucket found, check mount + let mountpoint = self + .filesystems + .ensure_mounted(&mut bucket) + .map_err(|err| { + format_err!( + "mounting '{}/{}/{}' failed: {}", + req_fidx, + bucket_type, + component, + err + ) + })?; + + let mut local_path = PathBuf::new(); + local_path.push(mountpoint); + for rem in cmp { + local_path.push(rem); + } + + Ok(ResolveResult::Path(local_path)) + } + + fn mknod_blk(path: &str, maj: u64, min: u64) -> Result<(), Error> { + use nix::sys::stat; + let dev = stat::makedev(maj, min); + stat::mknod(path, stat::SFlag::S_IFBLK, stat::Mode::S_IRWXU, dev)?; + Ok(()) + } +} diff --git a/src/bin/proxmox_restore_daemon/mod.rs b/src/bin/proxmox_restore_daemon/mod.rs index 3b52cf06..58e2bb6e 100644 --- a/src/bin/proxmox_restore_daemon/mod.rs +++ b/src/bin/proxmox_restore_daemon/mod.rs @@ -6,3 +6,6 @@ pub mod auth; mod watchdog; pub use watchdog::*; + +mod disk; +pub use disk::*;