tools: add helpful vector and read operations

After importing the I/O ops trait via:
    use crate::tools::io::ops::*;

Instead of:
    let mut buffer = vec![0u8; 65536];
    file.read_exact(&mut buffer)?;
use:
    let buffer = file.read_exact_allocated(65536)?;

After importing the vector helpers via:
    use crate::tools::vec::{self, ops::*};

For a buffer which *could* be uninitialized but you prefer
zero-initialization anyway for security reasons, instead of:
    let mut buffer = vec![0u8; len];
use:
    let mut buffer = vec::undefined(len);
which zero-initializes, but, if the `valgrind` feature flag
is enabled, marks the vector as having undefined contents,
so reading from it will cause valgrind errors.

Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
Wolfgang Bumiller 2019-05-22 14:43:24 +02:00
parent 8bea85b42e
commit 9cdda3f7c7
6 changed files with 721 additions and 0 deletions

View File

@ -47,6 +47,11 @@ pam = "0.7"
zstd = "0.4"
xdg = "2.2"
mio = "0.6"
valgrind_request = { version = "1.1", optional = true }
[features]
default = []
valgrind = ["valgrind_request"]
[replace]
"zstd-sys:1.4.8" = { path = "zstd-sys" }

View File

@ -37,6 +37,8 @@ pub mod read;
pub mod write;
pub mod acl;
pub mod xattr;
pub mod vec;
pub mod io;
mod process_locker;
pub use process_locker::*;

303
src/tools/io.rs Normal file
View File

@ -0,0 +1,303 @@
//! Module providing I/O helpers (sync and async).
//!
//! The [`ops`](io::ops) module provides helper traits for types implementing [`Read`](std::io::Read).
//!
//! The top level functions in of this module here are used for standalone implementations of
//! various functionality which is actually intended to be available as methods to types
//! implementing `AsyncRead`, which, however, without async/await cannot be methods due to them
//! having non-static lifetimes in that case.
//!
//! ```
//! use std::io;
//!
//! use crate::tools::io::read_exact_allocated;
//! use crate::tools::vec::{self, ops::*};
//!
//! // Currently usable:
//! fn do_something() -> impl Future<Item = Vec<u8>, Error = io::Error> {
//! tokio::fs::File::open("some.file")
//! .and_then(|file| read_exact_allocated(file, unsafe { vec::uninitialized(1024) }))
//! .and_then(|(file, mut buffer)| {
//! so_something_with(&buffer);
//! // append more data:
//! tokio::io::read_exact(file, unsafe { buffer.grow_uninitialized(1024) })
//! })
//! .and_then(|(_file, bigger_buffer)| {
//! use_the(bigger_buffer);
//! Ok(bigger_buffer)
//! });
//! }
//!
//! // Future async/await variant:
//! async fn do_something() -> Vec<u8> {
//! let mut file = tokio::fs::File::open("some.file").await?;
//! let mut buffer = file.read_exact_allocated(1024).await?;
//! do_something_with(buffer);
//! file.append_to_vec(&mut buffer, 1024).await?;
//! buffer
//! }
//! ```
use std::io;
use futures::Future;
use futures::{Async, Poll};
use tokio::io::AsyncRead;
use crate::tools::vec::{self, ops::*};
pub mod ops;
/// Create a future which reads an exact amount of bytes from an input.
///
/// The future's output is a tuple containing the input and a newly allocated `Vec<u8>` containing
/// the data.
///
/// Example:
/// ```
/// tokio::fs::File::open("some.file")
/// .and_then(|file| read_exact_allocated(file, 1024))
/// .and_then(|(_file, data)| {
/// use_the(data);
/// })
/// ```
pub fn read_exact_allocated<R: AsyncRead>(reader: R, size: usize) -> ReadExactAllocated<R> {
ReadExactAllocated(Some(reader), None, size)
}
/// A future returned by [`read_exact_allocated`].
pub struct ReadExactAllocated<R: AsyncRead>(Option<R>, Option<Vec<u8>>, usize);
impl<R: AsyncRead> Future for ReadExactAllocated<R> {
type Item = (R, Vec<u8>);
type Error = io::Error;
fn poll(&mut self) -> Poll<Self::Item, Self::Error> {
assert!(self.0.is_some(), "polled after ready");
// allocation happens on first poll:
if self.1.is_none() {
self.1 = Some(unsafe { vec::uninitialized(self.2) });
// now self.2 is the position:
self.2 = 0;
}
let mut buffer = self.1.take().unwrap();
loop {
match self.0.as_mut().unwrap().poll_read(&mut buffer[self.2..]) {
Ok(Async::Ready(0)) => {
self.0 = None;
return Err(io::Error::from(io::ErrorKind::UnexpectedEof));
}
Ok(Async::Ready(some)) => {
self.2 += some;
if self.2 == buffer.len() {
self.0 = None;
return Ok(Async::Ready((self.0.take().unwrap(), buffer)));
}
continue;
}
Ok(Async::NotReady) => {
self.1 = Some(buffer);
return Ok(Async::NotReady);
}
Err(err) => {
self.0 = None;
return Err(err);
}
}
}
}
}
/// Create a future which appends up to at most `size` bytes to a vector, growing it as needed.
///
/// This will grow the vector as if a single `.reserve(amount_to_read)` call was made and fill it
/// with as much data as a single read call will provide.
///
/// The future's output is a tuple containing the input, the vector and the number of bytes
/// actually read.
///
/// Example:
/// ```
/// tokio::fs::File::open("some.file")
/// .and_then(|file| append_to_vec(file, Vec::new(), 1024))
/// .and_then(|(_file, data, size)| {
/// assert!(data.len() == size);
/// println!("Actually got {} bytes of data.", size);
/// use_the(data);
/// })
/// ```
pub fn append_to_vec<R, V>(reader: R, mut vector: V, size: usize) -> AppendToVec<R, V>
where
R: AsyncRead,
V: AsMut<Vec<u8>>,
{
let pos = vector.as_mut().len();
unsafe {
vector.as_mut().grow_uninitialized(size);
}
AppendToVec(Some(reader), Some(vector), pos)
}
pub struct AppendToVec<R, V>(Option<R>, Option<V>, usize)
where
R: AsyncRead,
V: AsMut<Vec<u8>>;
impl<R, V> Future for AppendToVec<R, V>
where
R: AsyncRead,
V: AsMut<Vec<u8>>,
{
type Item = (R, V, usize);
type Error = io::Error;
fn poll(&mut self) -> Poll<Self::Item, Self::Error> {
assert!(self.0.is_some() && self.1.is_some(), "polled after ready");
let mut output = self.1.take().unwrap();
match self.0.as_mut().unwrap().poll_read(&mut output.as_mut()[self.2..]) {
Ok(Async::Ready(some)) => {
unsafe {
output.as_mut().set_len(self.2 + some);
}
return Ok(Async::Ready((self.0.take().unwrap(), output, some)));
}
Ok(Async::NotReady) => {
self.1 = Some(output);
return Ok(Async::NotReady);
}
Err(err) => {
self.0 = None;
return Err(err);
}
}
}
}
/// Create a future which appends an exact amount of bytes to a vector, growing it as needed.
///
/// This will grow the vector as if a single `.reserve(amount_to_read)` call was made and fill it
/// as much data as requested. If not enough data is available, this produces an
/// [`io::Error`](std::io::Error) of kind
/// [`ErrorKind::UnexpectedEof`](std::io::ErrorKind::UnexpectedEof).
///
/// The future's output is a tuple containing the input and the vector.
///
/// Example:
/// ```
/// tokio::fs::File::open("some.file")
/// .and_then(|file| append_exact_to_vec(file, Vec::new(), 1024))
/// .and_then(|(_file, data)| {
/// assert!(data.len() == size);
/// println!("Actually got {} bytes of data.", size);
/// use_the(data);
/// })
/// ```
pub fn append_exact_to_vec<R, V>(reader: R, mut vector: V, size: usize) -> AppendExactToVec<R, V>
where
R: AsyncRead,
V: AsMut<Vec<u8>>,
{
let pos = vector.as_mut().len();
unsafe {
vector.as_mut().grow_uninitialized(size);
}
AppendExactToVec(Some(reader), Some(vector), pos)
}
pub struct AppendExactToVec<R, V>(Option<R>, Option<V>, usize)
where
R: AsyncRead,
V: AsMut<Vec<u8>>;
impl<R, V> Future for AppendExactToVec<R, V>
where
R: AsyncRead,
V: AsMut<Vec<u8>>,
{
type Item = (R, V);
type Error = io::Error;
fn poll(&mut self) -> Poll<Self::Item, Self::Error> {
assert!(self.0.is_some() && self.1.is_some(), "polled after ready");
let mut output = self.1.take().unwrap();
loop {
match self.0.as_mut().unwrap().poll_read(&mut output.as_mut()[self.2..]) {
Ok(Async::Ready(0)) => {
self.0 = None;
return Err(io::Error::from(io::ErrorKind::UnexpectedEof));
}
Ok(Async::Ready(some)) => {
self.2 += some;
if self.2 == output.as_mut().len() {
self.0 = None;
return Ok(Async::Ready((self.0.take().unwrap(), output)));
}
continue;
}
Ok(Async::NotReady) => {
self.1 = Some(output);
return Ok(Async::NotReady);
}
Err(err) => {
self.0 = None;
return Err(err);
}
}
}
}
}
/*
* TODO: A trait such as the one below is only useful inside `async fn`, so this partwill have to
* wait...
*
* When we have async/await we can finish this and move it into io/async_read.rs
/// Some additional related functionality for types implementing `AsyncRead`. Note that most of
/// these methods map to functions from the [`io`](super::io) module, which are standalone
/// variants.
///
/// This trait only works with standard futures or as part of `poll_fn` bodies, due to it requiring
/// non-static lifetimes on futures.
pub trait AsyncReadExtOps: AsyncRead + Sized {
/// Read data into a newly allocated vector. This is a shortcut for:
/// ```
/// let mut data = Vec::with_capacity(len);
/// unsafe {
/// data.set_len(len);
/// }
/// reader.read_exact(&mut data)
/// ```
///
/// With this trait, we just use:
/// ```
/// use crate::tools::vec::ops::*;
///
/// let data = reader.read_exact_allocated(len).await?;
/// ```
fn read_exact_allocated(&mut self, size: usize) -> ReadExactAllocated<&mut Self> {
ReadExactAllocated(crate::tools::io::read_exact_allocated(self, size))
}
}
impl<T: AsyncRead + Sized> AsyncReadExtOps for T {
}
pub struct ReadExactAllocated<R: AsyncRead>(crate::tools::io::ReadExactAllocated<R>);
impl<R: AsyncRead> futures::Future for ReadExactAllocated<R> {
type Item = Vec<u8>;
type Error = io::Error;
fn poll(&mut self) -> futures::Poll<Self::Item, Self::Error> {
let (_this, data) = futures::try_ready!(self.0.poll());
Ok(futures::Async::Ready(data))
}
}
*/

203
src/tools/io/ops.rs Normal file
View File

@ -0,0 +1,203 @@
//! This module provides additional operations for handling byte buffers for types implementing
//! [`Read`](std::io::Read).
//!
//! See the [`ReadExtOps`](ops::ReadExtOps) trait for examples.
use std::io;
use endian_trait::Endian;
use crate::tools::vec::{self, ops::*};
/// Adds some additional related functionality for types implementing [`Read`](std::io::Read).
///
/// Particularly for reading into a newly allocated buffer, appending to a `Vec<u8>` or reading
/// values of a specific endianess (types implementing [`Endian`]).
///
/// Examples:
/// ```
/// use crate::tools::io::ops::*;
///
/// let mut file = std::fs::File::open("some.data")?;
///
/// // read some bytes into a newly allocated Vec<u8>:
/// let mut data = file.read_exact_allocated(header.data_size as usize)?;
///
/// // appending data to a vector:
/// let actually_appended = file.append_to_vec(&mut data, length)?; // .read() version
/// file.append_exact_to_vec(&mut data, length)?; // .read_exact() version
/// ```
///
/// Or for reading values of a defined representation and endianess:
///
/// ```
/// #[derive(Endian)]
/// #[repr(C)]
/// struct Header {
/// version: u16,
/// data_size: u16,
/// }
///
/// // We have given `Header` a proper binary representation via `#[repr]`, so this is safe:
/// let header: Header = unsafe { file.read_le_value()? };
/// let mut blob = file.read_exact_allocated(header.data_size as usize)?;
/// ```
///
/// [`Endian`]: https://docs.rs/endian_trait/0.6/endian_trait/trait.Endian.html
pub trait ReadExtOps {
/// Read data into a newly allocated vector. This is a shortcut for:
/// ```
/// let mut data = Vec::with_capacity(len);
/// unsafe {
/// data.set_len(len);
/// }
/// reader.read_exact(&mut data)?;
/// ```
///
/// With this trait, we just use:
/// ```
/// use crate::tools::vec::ops::*;
///
/// let data = reader.read_exact_allocated(len);
/// ```
fn read_exact_allocated(&mut self, size: usize) -> io::Result<Vec<u8>>;
/// Append data to a vector, growing it as necessary. Returns the amount of data appended.
fn append_to_vec(&mut self, out: &mut Vec<u8>, size: usize) -> io::Result<usize>;
/// Append an exact amount of data to a vector, growing it as necessary.
fn append_exact_to_vec(&mut self, out: &mut Vec<u8>, size: usize) -> io::Result<()>;
/// Read a value with host endianess.
///
/// This is limited to types implementing the [`Endian`] trait under the assumption that
/// this is only done for types which are supposed to be read/writable directly.
///
/// There's no way to directly depend on a type having a specific `#[repr(...)]`, therefore
/// this is considered unsafe.
///
/// ```
/// use crate::tools::vec::ops::*;
///
/// #[derive(Endian)]
/// #[repr(C, packed)]
/// struct Data {
/// value: u16,
/// count: u32,
/// }
///
/// let mut file = std::fs::File::open("my-raw.dat")?;
/// // We know `Data` has a safe binary representation (#[repr(C, packed)]), so we can
/// // safely use our helper:
/// let data: Data = unsafe { file.read_host_value()? };
/// ```
///
/// [`Endian`]: https://docs.rs/endian_trait/0.6/endian_trait/trait.Endian.html
unsafe fn read_host_value<T: Endian>(&mut self) -> io::Result<T>;
/// Read a little endian value.
///
/// The return type is required to implement the [`Endian`] trait, and we make the
/// assumption that this is only done for types which are supposed to be read/writable
/// directly.
///
/// There's no way to directly depend on a type having a specific `#[repr(...)]`, therefore
/// this is considered unsafe.
///
/// ```
/// use crate::tools::vec::ops::*;
///
/// #[derive(Endian)]
/// #[repr(C, packed)]
/// struct Data {
/// value: u16,
/// count: u32,
/// }
///
/// let mut file = std::fs::File::open("my-little-endian.dat")?;
/// // We know `Data` has a safe binary representation (#[repr(C, packed)]), so we can
/// // safely use our helper:
/// let data: Data = unsafe { file.read_le_value()? };
/// ```
///
/// [`Endian`]: https://docs.rs/endian_trait/0.6/endian_trait/trait.Endian.html
unsafe fn read_le_value<T: Endian>(&mut self) -> io::Result<T>;
/// Read a big endian value.
///
/// The return type is required to implement the [`Endian`] trait, and we make the
/// assumption that this is only done for types which are supposed to be read/writable
/// directly.
///
/// There's no way to directly depend on a type having a specific `#[repr(...)]`, therefore
/// this is considered unsafe.
///
/// ```
/// use crate::tools::vec::ops::*;
///
/// #[derive(Endian)]
/// #[repr(C, packed)]
/// struct Data {
/// value: u16,
/// count: u32,
/// }
///
/// let mut file = std::fs::File::open("my-big-endian.dat")?;
/// // We know `Data` has a safe binary representation (#[repr(C, packed)]), so we can
/// // safely use our helper:
/// let data: Data = unsafe { file.read_be_value()? };
/// ```
///
/// [`Endian`]: https://docs.rs/endian_trait/0.6/endian_trait/trait.Endian.html
unsafe fn read_be_value<T: Endian>(&mut self) -> io::Result<T>;
}
impl<R: io::Read> ReadExtOps for R {
fn read_exact_allocated(&mut self, size: usize) -> io::Result<Vec<u8>> {
let mut out = unsafe { vec::uninitialized(size) };
self.read_exact(&mut out)?;
Ok(out)
}
fn append_to_vec(&mut self, out: &mut Vec<u8>, size: usize) -> io::Result<usize> {
let pos = out.len();
unsafe {
out.grow_uninitialized(size);
}
let got = self.read(&mut out[pos..])?;
unsafe {
out.set_len(pos + got);
}
Ok(got)
}
fn append_exact_to_vec(&mut self, out: &mut Vec<u8>, size: usize) -> io::Result<()> {
let pos = out.len();
unsafe {
out.grow_uninitialized(size);
}
self.read_exact(&mut out[pos..])?;
Ok(())
}
unsafe fn read_host_value<T: Endian>(&mut self) -> io::Result<T> {
let mut value: T = std::mem::uninitialized();
self.read_exact(std::slice::from_raw_parts_mut(
&mut value as *mut T as *mut u8,
std::mem::size_of::<T>(),
))?;
Ok(value)
}
unsafe fn read_le_value<T: Endian>(&mut self) -> io::Result<T> {
Ok(self.read_host_value::<T>()?.
from_le()
)
}
unsafe fn read_be_value<T: Endian>(&mut self) -> io::Result<T> {
Ok(self.read_host_value::<T>()?
.from_be()
)
}
}

118
src/tools/vec.rs Normal file
View File

@ -0,0 +1,118 @@
//! Byte vector helpers.
//!
//! We have a lot of I/O code such as:
//! ```
//! let mut buffer = vec![0u8; header_size];
//! file.read_exact(&mut buffer)?;
//! ```
//! (We even have this case with a 4M buffer!)
//!
//! This needlessly initializes the buffer to zero (which not only wastes time (an insane amount of
//! time on debug builds, actually) but also prevents tools such as valgrind from pointing out
//! access to actually uninitialized data, which may hide bugs...)
//!
//! This module provides some helpers for this kind of code. Many of these are supposed to stay on
//! a lower level, with I/O helpers for types implementing [`Read`](std::io::Read) being available
//! in the [`tools::io`](crate::tools::io) module.
//!
//! Examples:
//! ```
//! use crate::tools::vec::{self, ops::*};
//!
//! let mut buffer = vec::undefined(size); // A zero-initialized buffer with valgrind support
//!
//! let mut buffer = unsafe { vec::uninitialized(size) }; // an actually uninitialized buffer
//! vec::clear(&mut buffer); // zero out an &mut [u8]
//!
//! vec::clear(unsafe {
//! buffer.grow_unintialized(more); // grow the buffer with uninitialized bytes
//! });
//! ```
pub mod ops;
/// Create an uninitialized byte vector of a specific size.
///
/// This is just a shortcut for:
/// ```
/// let mut v = Vec::with_capacity(len);
/// unsafe {
/// v.set_len(len);
/// }
/// ```
#[inline]
pub unsafe fn uninitialized(len: usize) -> Vec<u8> {
let mut out = Vec::with_capacity(len);
out.set_len(len);
out
}
/// Shortcut to zero out a slice of bytes.
#[inline]
pub fn clear(data: &mut [u8]) {
unsafe {
std::ptr::write_bytes(data.as_mut_ptr(), 0, data.len());
}
}
/// Create a newly allocated, zero initialized byte vector.
#[inline]
pub fn zeroed(len: usize) -> Vec<u8> {
unsafe {
let mut out = uninitialized(len);
clear(&mut out);
out
}
}
/// Create a newly allocated byte vector of a specific size with "undefined" content.
///
/// The data will be zero initialized, but, if the `valgrind` feature is activated, it will be
/// marked as uninitialized for debugging.
#[inline]
pub fn undefined(len: usize) -> Vec<u8> {
undefined_impl(len)
}
#[cfg(not(feature = "valgrind"))]
fn undefined_impl(len: usize) -> Vec<u8> {
zeroed(len)
}
#[cfg(feature = "valgrind")]
fn undefined_impl(len: usize) -> Vec<u8> {
let out = zeroed(len);
vg::make_slice_undefined(&out[..]);
out
}
#[cfg(feature = "valgrind")]
mod vg {
type ValgrindValue = valgrind_request::Value;
/// Mark a memory region as undefined when using valgrind, causing it to treat read access to
/// it as error.
#[inline]
pub(crate) fn make_mem_undefined(addr: *const u8, len: usize) -> ValgrindValue {
const MAKE_MEM_UNDEFINED: ValgrindValue =
(((b'M' as ValgrindValue) << 24) | ((b'C' as ValgrindValue) << 16)) + 1;
unsafe {
valgrind_request::do_client_request(
0,
&[
MAKE_MEM_UNDEFINED,
addr as usize as ValgrindValue,
len as ValgrindValue,
0, 0, 0,
],
)
}
}
/// Mark a slice of bytes as undefined when using valgrind, causing it to treat read access to
/// it as error.
#[inline]
pub(crate) fn make_slice_undefined(data: &[u8]) -> ValgrindValue {
make_mem_undefined(data.as_ptr(), data.len())
}
}

90
src/tools/vec/ops.rs Normal file
View File

@ -0,0 +1,90 @@
//! This module provides additional operations for `Vec<u8>`.
//!
//! Example:
//! ```
//! use crate::tools::vec::{self, ops::*};
//!
//! fn append_1024_to_vec<T: Read>(input: T, buffer: &mut Vec<u8>) -> std::io::Result<()> {
//! input.read_exact(unsafe { buffer.grow_uninitialized(1024) })
//! }
//! ```
/// Some additional byte vector operations useful for I/O code.
/// Example:
/// ```
/// use crate::tools::vec::{self, ops::*};
///
/// let mut data = file.read_exact_allocated(1024)?;
/// do_something();
/// file.read_exact(unsafe {
/// data.grow_uninitialized(1024);
/// })?;
/// ```
///
/// Note that this module also provides a safe alternative for the case where
/// `grow_uninitialized()` is directly followed by a `read_exact()` call via the [`ReadExtOps`]
/// trait:
/// ```
/// file.append_to_vec(&mut data, 1024)?;
/// ```
///
/// [`ReadExtOps`]: crate::tools::io::ops::ReadExtOps
pub trait VecU8ExtOps {
/// Grow a vector without initializing its elements. The difference to simply using `reserve`
/// is that it also updates the actual length, making the newly allocated data part of the
/// slice.
///
/// This is a shortcut for:
/// ```
/// vec.reserve(more);
/// let total = vec.len() + more;
/// unsafe {
/// vec.set_len(total);
/// }
/// ```
///
/// This returns a mutable slice to the newly allocated space, so it can be used inline:
/// ```
/// file.read_exact(unsafe { buffer.grow_uninitialized(1024) })?;
/// ```
///
/// Although for the above case it is recommended to use the even shorter version from the
/// [`ReadExtOps`] trait:
/// ```
/// // use crate::tools::vec::ops::ReadExtOps;
/// file.append_to_vec(&mut buffer, 1024)?;
/// ```
///
/// [`ReadExtOps`]: crate::tools::io::ops::ReadExtOps
unsafe fn grow_uninitialized(&mut self, more: usize) -> &mut [u8];
/// Resize a vector to a specific size without initializing its data. This is a shortcut for:
/// ```
/// if new_size <= vec.len() {
/// vec.truncate(new_size);
/// } else {
/// unsafe {
/// vec.grow_uninitialized(new_size - vec.len());
/// }
/// }
/// ```
unsafe fn resize_uninitialized(&mut self, total: usize);
}
impl VecU8ExtOps for Vec<u8> {
unsafe fn grow_uninitialized(&mut self, more: usize) -> &mut [u8] {
let old_len = self.len();
self.reserve(more);
let total = old_len + more;
self.set_len(total);
&mut self[old_len..]
}
unsafe fn resize_uninitialized(&mut self, new_size: usize) {
if new_size <= self.len() {
self.truncate(new_size);
} else {
self.grow_uninitialized(new_size - self.len());
}
}
}