proxmox-backup/proxmox-rest-server/src/rest.rs

771 lines
25 KiB
Rust
Raw Normal View History

use std::collections::HashMap;
use std::future::Future;
2019-10-26 09:36:01 +00:00
use std::hash::BuildHasher;
use std::path::{Path, PathBuf};
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use std::task::{Context, Poll};
use anyhow::{bail, format_err, Error};
2019-11-22 12:02:05 +00:00
use futures::future::{self, FutureExt, TryFutureExt};
use futures::stream::TryStreamExt;
use hyper::body::HttpBody;
use hyper::header::{self, HeaderMap};
use hyper::http::request::Parts;
use hyper::{Body, Request, Response, StatusCode};
use lazy_static::lazy_static;
use regex::Regex;
use serde_json::Value;
use tokio::fs::File;
use tokio::time::Instant;
use url::form_urlencoded;
2021-09-30 11:49:29 +00:00
use tower_service::Service;
use proxmox_router::{
check_api_permission, ApiHandler, ApiMethod, HttpError, Permission, RpcEnvironment,
RpcEnvironmentType, UserInformation,
};
use proxmox_router::http_err;
use proxmox_schema::{
parse_parameter_strings, parse_simple_value, verify_json_object, ObjectSchemaType,
ParameterSchema,
};
use proxmox_http::client::RateLimitedStream;
use proxmox_async::compression::{DeflateEncoder, Level};
use proxmox_async::stream::AsyncReaderStream;
use crate::{
ApiConfig, FileLogger, AuthError, RestEnvironment, CompressionMethod,
normalize_uri_path, formatter::*,
};
extern "C" {
fn tzset();
}
2019-02-01 08:54:56 +00:00
struct AuthStringExtension(String);
struct EmptyUserInformation {}
impl UserInformation for EmptyUserInformation {
fn is_superuser(&self, _userid: &str) -> bool { false }
fn is_group_member(&self, _userid: &str, _group: &str) -> bool { false }
fn lookup_privs(&self, _userid: &str, _path: &[&str]) -> u64 { 0 }
}
/// REST server implementation (configured with [ApiConfig])
2021-09-30 11:49:29 +00:00
///
/// This struct implements the [Service] trait in order to use it with
/// [hyper::server::Builder::serve].
2018-11-15 09:18:01 +00:00
pub struct RestServer {
api_config: Arc<ApiConfig>,
2018-11-15 09:18:01 +00:00
}
server: rest: implement max URI path and query length request limits Add a generous limit now and return the correct error (414 URI Too Long). Otherwise we could to pretty larger GET requests, 64 KiB and possible bigger (at 64 KiB my simple curl test failed due to shell/curl limitations). For now allow a 3072 characters as combined length of URI path and query. This is conform with the HTTP/1.1 RFCs (e.g., RFC 7231, 6.5.12 and RFC 2616, 3.2.1) which do not specify any limits, upper or lower, but require that all server accessible resources mus be reachable without getting 414, which is normally fulfilled as we have various length limits for stuff which could be in an URI, in place, e.g.: * user id: max. 64 chars * datastore: max. 32 chars The only known problematic API endpoint is the catalog one, used in the GUI's pxar file browser: GET /api2/json/admin/datastore/<id>/catalog?..&filepath=<path> The <path> is the encoded archive path, and can be arbitrary long. But, this is a flawed design, as even without this new limit one can easily generate archives which cannot be browsed anymore, as hyper only accepts requests with max. 64 KiB in the URI. So rather, we should move that to a GET-as-POST call, which has no such limitations (and would not need to base32 encode the path). Note: This change was inspired by adding a request access log, which profits from such limits as we can then rely on certain atomicity guarantees when writing requests to the log. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2020-10-15 15:49:16 +00:00
const MAX_URI_QUERY_LENGTH: usize = 3072;
const CHUNK_SIZE_LIMIT: u64 = 32 * 1024;
server: rest: implement max URI path and query length request limits Add a generous limit now and return the correct error (414 URI Too Long). Otherwise we could to pretty larger GET requests, 64 KiB and possible bigger (at 64 KiB my simple curl test failed due to shell/curl limitations). For now allow a 3072 characters as combined length of URI path and query. This is conform with the HTTP/1.1 RFCs (e.g., RFC 7231, 6.5.12 and RFC 2616, 3.2.1) which do not specify any limits, upper or lower, but require that all server accessible resources mus be reachable without getting 414, which is normally fulfilled as we have various length limits for stuff which could be in an URI, in place, e.g.: * user id: max. 64 chars * datastore: max. 32 chars The only known problematic API endpoint is the catalog one, used in the GUI's pxar file browser: GET /api2/json/admin/datastore/<id>/catalog?..&filepath=<path> The <path> is the encoded archive path, and can be arbitrary long. But, this is a flawed design, as even without this new limit one can easily generate archives which cannot be browsed anymore, as hyper only accepts requests with max. 64 KiB in the URI. So rather, we should move that to a GET-as-POST call, which has no such limitations (and would not need to base32 encode the path). Note: This change was inspired by adding a request access log, which profits from such limits as we can then rely on certain atomicity guarantees when writing requests to the log. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2020-10-15 15:49:16 +00:00
2018-11-15 09:18:01 +00:00
impl RestServer {
2021-09-30 11:49:29 +00:00
/// Creates a new instance.
2018-11-15 09:18:01 +00:00
pub fn new(api_config: ApiConfig) -> Self {
Self {
api_config: Arc::new(api_config),
}
2018-11-15 09:18:01 +00:00
}
}
impl Service<&Pin<Box<tokio_openssl::SslStream<RateLimitedStream<tokio::net::TcpStream>>>>>
for RestServer
{
type Response = ApiService;
type Error = Error;
type Future = Pin<Box<dyn Future<Output = Result<ApiService, Error>> + Send>>;
fn poll_ready(&mut self, _cx: &mut Context) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
fn call(
&mut self,
ctx: &Pin<Box<tokio_openssl::SslStream<RateLimitedStream<tokio::net::TcpStream>>>>,
) -> Self::Future {
match ctx.get_ref().peer_addr() {
Err(err) => future::err(format_err!("unable to get peer address - {}", err)).boxed(),
Ok(peer) => future::ok(ApiService {
peer,
api_config: self.api_config.clone(),
})
.boxed(),
}
}
}
2021-09-30 11:49:29 +00:00
impl Service<&Pin<Box<tokio_openssl::SslStream<tokio::net::TcpStream>>>>
for RestServer
{
type Response = ApiService;
type Error = Error;
type Future = Pin<Box<dyn Future<Output = Result<ApiService, Error>> + Send>>;
fn poll_ready(&mut self, _cx: &mut Context) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
fn call(
&mut self,
ctx: &Pin<Box<tokio_openssl::SslStream<tokio::net::TcpStream>>>,
) -> Self::Future {
match ctx.get_ref().peer_addr() {
Err(err) => future::err(format_err!("unable to get peer address - {}", err)).boxed(),
Ok(peer) => future::ok(ApiService {
peer,
api_config: self.api_config.clone(),
})
.boxed(),
2019-07-03 10:00:43 +00:00
}
2018-11-15 09:18:01 +00:00
}
}
2021-09-30 11:49:29 +00:00
impl Service<&hyper::server::conn::AddrStream> for RestServer {
type Response = ApiService;
type Error = Error;
type Future = Pin<Box<dyn Future<Output = Result<ApiService, Error>> + Send>>;
fn poll_ready(&mut self, _cx: &mut Context) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
fn call(&mut self, ctx: &hyper::server::conn::AddrStream) -> Self::Future {
let peer = ctx.remote_addr();
future::ok(ApiService {
peer,
api_config: self.api_config.clone(),
})
.boxed()
}
}
2021-09-30 11:49:29 +00:00
impl Service<&tokio::net::UnixStream> for RestServer {
type Response = ApiService;
type Error = Error;
type Future = Pin<Box<dyn Future<Output = Result<ApiService, Error>> + Send>>;
fn poll_ready(&mut self, _cx: &mut Context) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
fn call(&mut self, _ctx: &tokio::net::UnixStream) -> Self::Future {
// TODO: Find a way to actually represent the vsock peer in the ApiService struct - for now
// it doesn't really matter, so just use a fake IP address
let fake_peer = "0.0.0.0:807".parse().unwrap();
future::ok(ApiService {
peer: fake_peer,
api_config: self.api_config.clone(),
})
.boxed()
}
}
// Helper [Service] containing the peer Address
//
// The lower level connection [Service] implementation on
// [RestServer] extracts the peer address and return an [ApiService].
//
// Rust wants this type 'pub' here (else we get 'private type `ApiService`
// in public interface'). The type is still private because the crate does
// not export it.
2018-11-15 09:18:01 +00:00
pub struct ApiService {
pub peer: std::net::SocketAddr,
2018-11-15 09:18:01 +00:00
pub api_config: Arc<ApiConfig>,
}
fn log_response(
logfile: Option<&Arc<Mutex<FileLogger>>>,
peer: &std::net::SocketAddr,
method: hyper::Method,
path_query: &str,
resp: &Response<Body>,
user_agent: Option<String>,
) {
if resp.extensions().get::<NoLogExtension>().is_some() {
return;
};
server: rest: implement max URI path and query length request limits Add a generous limit now and return the correct error (414 URI Too Long). Otherwise we could to pretty larger GET requests, 64 KiB and possible bigger (at 64 KiB my simple curl test failed due to shell/curl limitations). For now allow a 3072 characters as combined length of URI path and query. This is conform with the HTTP/1.1 RFCs (e.g., RFC 7231, 6.5.12 and RFC 2616, 3.2.1) which do not specify any limits, upper or lower, but require that all server accessible resources mus be reachable without getting 414, which is normally fulfilled as we have various length limits for stuff which could be in an URI, in place, e.g.: * user id: max. 64 chars * datastore: max. 32 chars The only known problematic API endpoint is the catalog one, used in the GUI's pxar file browser: GET /api2/json/admin/datastore/<id>/catalog?..&filepath=<path> The <path> is the encoded archive path, and can be arbitrary long. But, this is a flawed design, as even without this new limit one can easily generate archives which cannot be browsed anymore, as hyper only accepts requests with max. 64 KiB in the URI. So rather, we should move that to a GET-as-POST call, which has no such limitations (and would not need to base32 encode the path). Note: This change was inspired by adding a request access log, which profits from such limits as we can then rely on certain atomicity guarantees when writing requests to the log. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2020-10-15 15:49:16 +00:00
// we also log URL-to-long requests, so avoid message bigger than PIPE_BUF (4k on Linux)
// to profit from atomicty guarantees for O_APPEND opened logfiles
let path = &path_query[..MAX_URI_QUERY_LENGTH.min(path_query.len())];
server: rest: implement max URI path and query length request limits Add a generous limit now and return the correct error (414 URI Too Long). Otherwise we could to pretty larger GET requests, 64 KiB and possible bigger (at 64 KiB my simple curl test failed due to shell/curl limitations). For now allow a 3072 characters as combined length of URI path and query. This is conform with the HTTP/1.1 RFCs (e.g., RFC 7231, 6.5.12 and RFC 2616, 3.2.1) which do not specify any limits, upper or lower, but require that all server accessible resources mus be reachable without getting 414, which is normally fulfilled as we have various length limits for stuff which could be in an URI, in place, e.g.: * user id: max. 64 chars * datastore: max. 32 chars The only known problematic API endpoint is the catalog one, used in the GUI's pxar file browser: GET /api2/json/admin/datastore/<id>/catalog?..&filepath=<path> The <path> is the encoded archive path, and can be arbitrary long. But, this is a flawed design, as even without this new limit one can easily generate archives which cannot be browsed anymore, as hyper only accepts requests with max. 64 KiB in the URI. So rather, we should move that to a GET-as-POST call, which has no such limitations (and would not need to base32 encode the path). Note: This change was inspired by adding a request access log, which profits from such limits as we can then rely on certain atomicity guarantees when writing requests to the log. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2020-10-15 15:49:16 +00:00
let status = resp.status();
if !(status.is_success() || status.is_informational()) {
let reason = status.canonical_reason().unwrap_or("unknown reason");
let message = match resp.extensions().get::<ErrorMessageExtension>() {
Some(data) => &data.0,
None => "request failed",
};
log::error!(
"{} {}: {} {}: [client {}] {}",
method.as_str(),
path,
status.as_str(),
reason,
peer,
message
);
}
if let Some(logfile) = logfile {
let auth_id = match resp.extensions().get::<AuthStringExtension>() {
Some(AuthStringExtension(auth_id)) => auth_id.clone(),
None => "-".to_string(),
};
let now = proxmox_time::epoch_i64();
// time format which apache/nginx use (by default), copied from pve-http-server
let datetime = proxmox_time::strftime_local("%d/%m/%Y:%H:%M:%S %z", now)
.unwrap_or_else(|_| "-".to_string());
logfile.lock().unwrap().log(format!(
"{} - {} [{}] \"{} {}\" {} {} {}",
peer.ip(),
auth_id,
datetime,
method.as_str(),
path,
status.as_str(),
resp.body().size_hint().lower(),
user_agent.unwrap_or_else(|| "-".to_string()),
));
}
}
fn get_proxied_peer(headers: &HeaderMap) -> Option<std::net::SocketAddr> {
lazy_static! {
static ref RE: Regex = Regex::new(r#"for="([^"]+)""#).unwrap();
}
let forwarded = headers.get(header::FORWARDED)?.to_str().ok()?;
let capture = RE.captures(&forwarded)?;
let rhost = capture.get(1)?.as_str();
rhost.parse().ok()
}
fn get_user_agent(headers: &HeaderMap) -> Option<String> {
let agent = headers.get(header::USER_AGENT)?.to_str();
agent
.map(|s| {
let mut s = s.to_owned();
s.truncate(128);
s
})
.ok()
}
2021-09-30 11:49:29 +00:00
impl Service<Request<Body>> for ApiService {
type Response = Response<Body>;
type Error = Error;
#[allow(clippy::type_complexity)]
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
fn poll_ready(&mut self, _cx: &mut Context) -> Poll<Result<(), Self::Error>> {
Poll::Ready(Ok(()))
}
2018-11-15 09:18:01 +00:00
fn call(&mut self, req: Request<Body>) -> Self::Future {
let path = req.uri().path_and_query().unwrap().as_str().to_owned();
let method = req.method().clone();
let user_agent = get_user_agent(req.headers());
let config = Arc::clone(&self.api_config);
let peer = match get_proxied_peer(req.headers()) {
Some(proxied_peer) => proxied_peer,
None => self.peer,
};
async move {
let response = match handle_request(Arc::clone(&config), req, &peer).await {
Ok(response) => response,
2018-11-15 09:18:01 +00:00
Err(err) => {
let (err, code) = match err.downcast_ref::<HttpError>() {
Some(apierr) => (apierr.message.clone(), apierr.code),
_ => (err.to_string(), StatusCode::BAD_REQUEST),
};
Response::builder()
.status(code)
.extension(ErrorMessageExtension(err.to_string()))
.body(err.into())?
2018-11-15 09:18:01 +00:00
}
};
let logger = config.get_access_log();
log_response(logger, &peer, method, &path, &response, user_agent);
Ok(response)
}
.boxed()
2018-11-15 09:18:01 +00:00
}
}
2019-11-22 17:44:14 +00:00
fn parse_query_parameters<S: 'static + BuildHasher + Send>(
param_schema: ParameterSchema,
2019-11-22 17:44:14 +00:00
form: &str, // x-www-form-urlencoded body data
parts: &Parts,
uri_param: &HashMap<String, String, S>,
) -> Result<Value, Error> {
let mut param_list: Vec<(String, String)> = vec![];
if !form.is_empty() {
for (k, v) in form_urlencoded::parse(form.as_bytes()).into_owned() {
param_list.push((k, v));
}
}
if let Some(query_str) = parts.uri.query() {
for (k, v) in form_urlencoded::parse(query_str.as_bytes()).into_owned() {
if k == "_dc" {
continue;
} // skip extjs "disable cache" parameter
2019-11-22 17:44:14 +00:00
param_list.push((k, v));
}
}
for (k, v) in uri_param {
param_list.push((k.clone(), v.clone()));
}
let params = parse_parameter_strings(&param_list, param_schema, true)?;
Ok(params)
}
async fn get_request_parameters<S: 'static + BuildHasher + Send>(
param_schema: ParameterSchema,
parts: Parts,
req_body: Body,
2019-10-26 09:36:01 +00:00
uri_param: HashMap<String, String, S>,
2019-11-22 12:02:05 +00:00
) -> Result<Value, Error> {
let mut is_json = false;
if let Some(value) = parts.headers.get(header::CONTENT_TYPE) {
match value.to_str().map(|v| v.split(';').next()) {
Ok(Some("application/x-www-form-urlencoded")) => {
is_json = false;
}
Ok(Some("application/json")) => {
is_json = true;
}
2019-11-22 12:02:05 +00:00
_ => bail!("unsupported content type {:?}", value.to_str()),
}
}
let body = TryStreamExt::map_err(req_body, |err| {
http_err!(BAD_REQUEST, "Problems reading request body: {}", err)
})
.try_fold(Vec::new(), |mut acc, chunk| async move {
// FIXME: max request body size?
if acc.len() + chunk.len() < 64 * 1024 {
acc.extend_from_slice(&*chunk);
Ok(acc)
} else {
Err(http_err!(BAD_REQUEST, "Request body too large"))
}
})
.await?;
let utf8_data =
std::str::from_utf8(&body).map_err(|err| format_err!("Request body not uft8: {}", err))?;
2019-11-22 12:02:05 +00:00
if is_json {
2019-11-22 17:44:14 +00:00
let mut params: Value = serde_json::from_str(utf8_data)?;
2019-11-22 12:02:05 +00:00
for (k, v) in uri_param {
if let Some((_optional, prop_schema)) = param_schema.lookup(&k) {
2019-11-22 12:02:05 +00:00
params[&k] = parse_simple_value(&v, prop_schema)?;
}
2019-11-22 12:02:05 +00:00
}
verify_json_object(&params, &param_schema)?;
2019-11-22 12:02:05 +00:00
return Ok(params);
2019-11-22 17:44:14 +00:00
} else {
parse_query_parameters(param_schema, utf8_data, &parts, &uri_param)
2019-11-22 12:02:05 +00:00
}
}
struct NoLogExtension();
2019-11-22 12:02:05 +00:00
async fn proxy_protected_request(
2019-02-01 08:54:56 +00:00
info: &'static ApiMethod,
mut parts: Parts,
req_body: Body,
peer: &std::net::SocketAddr,
2019-11-22 12:02:05 +00:00
) -> Result<Response<Body>, Error> {
let mut uri_parts = parts.uri.clone().into_parts();
uri_parts.scheme = Some(http::uri::Scheme::HTTP);
uri_parts.authority = Some(http::uri::Authority::from_static("127.0.0.1:82"));
let new_uri = http::Uri::from_parts(uri_parts).unwrap();
parts.uri = new_uri;
let mut request = Request::from_parts(parts, req_body);
request.headers_mut().insert(
header::FORWARDED,
format!("for=\"{}\";", peer).parse().unwrap(),
);
2019-11-22 12:02:05 +00:00
let reload_timezone = info.reload_timezone;
let resp = hyper::client::Client::new()
.request(request)
.map_err(Error::from)
.map_ok(|mut resp| {
resp.extensions_mut().insert(NoLogExtension());
resp
2019-11-22 12:02:05 +00:00
})
.await?;
if reload_timezone {
unsafe {
tzset();
}
}
2019-11-22 12:02:05 +00:00
Ok(resp)
}
pub(crate) async fn handle_api_request<Env: RpcEnvironment, S: 'static + BuildHasher + Send>(
mut rpcenv: Env,
2018-11-15 09:25:59 +00:00
info: &'static ApiMethod,
formatter: &'static dyn OutputFormatter,
parts: Parts,
req_body: Body,
2019-10-26 09:36:01 +00:00
uri_param: HashMap<String, String, S>,
2019-11-22 12:02:05 +00:00
) -> Result<Response<Body>, Error> {
let delay_unauth_time = std::time::Instant::now() + std::time::Duration::from_millis(3000);
let compression = extract_compression_method(&parts.headers);
2019-11-22 17:44:14 +00:00
let result = match info.handler {
ApiHandler::AsyncHttp(handler) => {
2019-11-22 17:44:14 +00:00
let params = parse_query_parameters(info.parameters, "", &parts, &uri_param)?;
(handler)(parts, req_body, params, info, Box::new(rpcenv)).await
}
ApiHandler::Sync(handler) => {
let params =
get_request_parameters(info.parameters, parts, req_body, uri_param).await?;
(handler)(params, info, &mut rpcenv).map(|data| formatter.format_data(data, &rpcenv))
2019-11-22 17:44:14 +00:00
}
ApiHandler::Async(handler) => {
let params =
get_request_parameters(info.parameters, parts, req_body, uri_param).await?;
(handler)(params, info, &mut rpcenv)
.await
.map(|data| formatter.format_data(data, &rpcenv))
}
2019-11-22 17:44:14 +00:00
};
let mut resp = match result {
2019-11-22 17:44:14 +00:00
Ok(resp) => resp,
2019-11-22 12:02:05 +00:00
Err(err) => {
if let Some(httperr) = err.downcast_ref::<HttpError>() {
if httperr.code == StatusCode::UNAUTHORIZED {
tokio::time::sleep_until(Instant::from_std(delay_unauth_time)).await;
2019-11-22 12:02:05 +00:00
}
2019-02-01 08:54:56 +00:00
}
formatter.format_error(err)
2019-11-22 12:02:05 +00:00
}
};
2019-02-01 08:54:56 +00:00
let resp = match compression {
Some(CompressionMethod::Deflate) => {
resp.headers_mut().insert(
header::CONTENT_ENCODING,
CompressionMethod::Deflate.content_encoding(),
);
resp.map(|body| {
Body::wrap_stream(DeflateEncoder::with_quality(
TryStreamExt::map_err(body, |err| {
proxmox::io_format_err!("error during compression: {}", err)
}),
server: rest: switch from fastest to default deflate compression level I made some comparision with bombardier[0], the one listed here are 30s looped requests with two concurrent clients: [ static download of ext-all.js ]: lvl avg / stdev / max none 1.98 MiB 100 % 5.17ms / 1.30ms / 32.38ms fastest 813.14 KiB 42 % 20.53ms / 2.85ms / 58.71ms default 626.35 KiB 30 % 39.70ms / 3.98ms / 85.47ms [ deterministic (pre-defined data), but real API call ]: lvl avg / stdev / max none 129.09 KiB 100 % 2.70ms / 471.58us / 26.93ms fastest 42.12 KiB 33 % 3.47ms / 606.46us / 32.42ms default 34.82 KiB 27 % 4.28ms / 737.99us / 33.75ms The reduction is quite better with default, but it's also slower, but only when testing over unconstrained network. For real world scenarios where compression actually matters, e.g., when using a spotty train connection, we will be faster again with better compression. A GPRS limited connection (Firefox developer console) requires the following load (until the DOMContentLoaded event triggered) times: lvl t x faster none 9m 18.6s x 1.0 fastest 3m 20.0s x 2.8 default 2m 30.0s x 3.7 So for worst case using sligthly more CPU time on the server has a tremendous effect on the client load time. Using a more realistical example and limiting for "Good 2G" gives: none 1m 1.8s x 1.0 fastest 22.6s x 2.7 default 16.6s x 3.7 16s is somewhat OK, >1m just isn't... So, use default level to ensure we get bearable load times on clients, and if we want to improve transmission size AND speed then we could always use a in-memory cache, only a few MiB would be required for the compressable static files we server. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-04-07 15:12:01 +00:00
Level::Default,
))
})
}
None => resp,
};
if info.reload_timezone {
unsafe {
tzset();
}
}
2019-11-22 12:02:05 +00:00
Ok(resp)
}
fn extension_to_content_type(filename: &Path) -> (&'static str, bool) {
if let Some(ext) = filename.extension().and_then(|osstr| osstr.to_str()) {
return match ext {
"css" => ("text/css", false),
"html" => ("text/html", false),
"js" => ("application/javascript", false),
"json" => ("application/json", false),
"map" => ("application/json", false),
"png" => ("image/png", true),
"ico" => ("image/x-icon", true),
"gif" => ("image/gif", true),
"svg" => ("image/svg+xml", false),
"jar" => ("application/java-archive", true),
"woff" => ("application/font-woff", true),
"woff2" => ("application/font-woff2", true),
"ttf" => ("application/font-snft", true),
"pdf" => ("application/pdf", true),
"epub" => ("application/epub+zip", true),
"mp3" => ("audio/mpeg", true),
"oga" => ("audio/ogg", true),
"tgz" => ("application/x-compressed-tar", true),
_ => ("application/octet-stream", false),
};
}
("application/octet-stream", false)
}
async fn simple_static_file_download(
filename: PathBuf,
content_type: &'static str,
compression: Option<CompressionMethod>,
) -> Result<Response<Body>, Error> {
use tokio::io::AsyncReadExt;
let mut file = File::open(filename)
.await
.map_err(|err| http_err!(BAD_REQUEST, "File open failed: {}", err))?;
let mut data: Vec<u8> = Vec::new();
let mut response = match compression {
Some(CompressionMethod::Deflate) => {
server: rest: switch from fastest to default deflate compression level I made some comparision with bombardier[0], the one listed here are 30s looped requests with two concurrent clients: [ static download of ext-all.js ]: lvl avg / stdev / max none 1.98 MiB 100 % 5.17ms / 1.30ms / 32.38ms fastest 813.14 KiB 42 % 20.53ms / 2.85ms / 58.71ms default 626.35 KiB 30 % 39.70ms / 3.98ms / 85.47ms [ deterministic (pre-defined data), but real API call ]: lvl avg / stdev / max none 129.09 KiB 100 % 2.70ms / 471.58us / 26.93ms fastest 42.12 KiB 33 % 3.47ms / 606.46us / 32.42ms default 34.82 KiB 27 % 4.28ms / 737.99us / 33.75ms The reduction is quite better with default, but it's also slower, but only when testing over unconstrained network. For real world scenarios where compression actually matters, e.g., when using a spotty train connection, we will be faster again with better compression. A GPRS limited connection (Firefox developer console) requires the following load (until the DOMContentLoaded event triggered) times: lvl t x faster none 9m 18.6s x 1.0 fastest 3m 20.0s x 2.8 default 2m 30.0s x 3.7 So for worst case using sligthly more CPU time on the server has a tremendous effect on the client load time. Using a more realistical example and limiting for "Good 2G" gives: none 1m 1.8s x 1.0 fastest 22.6s x 2.7 default 16.6s x 3.7 16s is somewhat OK, >1m just isn't... So, use default level to ensure we get bearable load times on clients, and if we want to improve transmission size AND speed then we could always use a in-memory cache, only a few MiB would be required for the compressable static files we server. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-04-07 15:12:01 +00:00
let mut enc = DeflateEncoder::with_quality(data, Level::Default);
enc.compress_vec(&mut file, CHUNK_SIZE_LIMIT as usize)
.await?;
let mut response = Response::new(enc.into_inner().into());
response.headers_mut().insert(
header::CONTENT_ENCODING,
CompressionMethod::Deflate.content_encoding(),
);
response
}
None => {
file.read_to_end(&mut data)
.await
.map_err(|err| http_err!(BAD_REQUEST, "File read failed: {}", err))?;
Response::new(data.into())
}
};
response.headers_mut().insert(
header::CONTENT_TYPE,
header::HeaderValue::from_static(content_type),
);
Ok(response)
}
async fn chuncked_static_file_download(
filename: PathBuf,
content_type: &'static str,
compression: Option<CompressionMethod>,
) -> Result<Response<Body>, Error> {
let mut resp = Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, content_type);
let file = File::open(filename)
.await
.map_err(|err| http_err!(BAD_REQUEST, "File open failed: {}", err))?;
let body = match compression {
Some(CompressionMethod::Deflate) => {
resp = resp.header(
header::CONTENT_ENCODING,
CompressionMethod::Deflate.content_encoding(),
);
Body::wrap_stream(DeflateEncoder::with_quality(
AsyncReaderStream::new(file),
server: rest: switch from fastest to default deflate compression level I made some comparision with bombardier[0], the one listed here are 30s looped requests with two concurrent clients: [ static download of ext-all.js ]: lvl avg / stdev / max none 1.98 MiB 100 % 5.17ms / 1.30ms / 32.38ms fastest 813.14 KiB 42 % 20.53ms / 2.85ms / 58.71ms default 626.35 KiB 30 % 39.70ms / 3.98ms / 85.47ms [ deterministic (pre-defined data), but real API call ]: lvl avg / stdev / max none 129.09 KiB 100 % 2.70ms / 471.58us / 26.93ms fastest 42.12 KiB 33 % 3.47ms / 606.46us / 32.42ms default 34.82 KiB 27 % 4.28ms / 737.99us / 33.75ms The reduction is quite better with default, but it's also slower, but only when testing over unconstrained network. For real world scenarios where compression actually matters, e.g., when using a spotty train connection, we will be faster again with better compression. A GPRS limited connection (Firefox developer console) requires the following load (until the DOMContentLoaded event triggered) times: lvl t x faster none 9m 18.6s x 1.0 fastest 3m 20.0s x 2.8 default 2m 30.0s x 3.7 So for worst case using sligthly more CPU time on the server has a tremendous effect on the client load time. Using a more realistical example and limiting for "Good 2G" gives: none 1m 1.8s x 1.0 fastest 22.6s x 2.7 default 16.6s x 3.7 16s is somewhat OK, >1m just isn't... So, use default level to ensure we get bearable load times on clients, and if we want to improve transmission size AND speed then we could always use a in-memory cache, only a few MiB would be required for the compressable static files we server. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-04-07 15:12:01 +00:00
Level::Default,
))
}
None => Body::wrap_stream(AsyncReaderStream::new(file)),
};
Ok(resp.body(body).unwrap())
}
async fn handle_static_file_download(
filename: PathBuf,
compression: Option<CompressionMethod>,
) -> Result<Response<Body>, Error> {
let metadata = tokio::fs::metadata(filename.clone())
.map_err(|err| http_err!(BAD_REQUEST, "File access problems: {}", err))
.await?;
let (content_type, nocomp) = extension_to_content_type(&filename);
let compression = if nocomp { None } else { compression };
if metadata.len() < CHUNK_SIZE_LIMIT {
simple_static_file_download(filename, content_type, compression).await
} else {
chuncked_static_file_download(filename, content_type, compression).await
}
}
// FIXME: support handling multiple compression methods
fn extract_compression_method(headers: &http::HeaderMap) -> Option<CompressionMethod> {
if let Some(Ok(encodings)) = headers.get(header::ACCEPT_ENCODING).map(|v| v.to_str()) {
for encoding in encodings.split(&[',', ' '][..]) {
if let Ok(method) = encoding.parse() {
return Some(method);
}
}
}
None
}
async fn handle_request(
api: Arc<ApiConfig>,
req: Request<Body>,
peer: &std::net::SocketAddr,
) -> Result<Response<Body>, Error> {
let (parts, body) = req.into_parts();
let method = parts.method.clone();
let (path, components) = normalize_uri_path(parts.uri.path())?;
let comp_len = components.len();
server: rest: implement max URI path and query length request limits Add a generous limit now and return the correct error (414 URI Too Long). Otherwise we could to pretty larger GET requests, 64 KiB and possible bigger (at 64 KiB my simple curl test failed due to shell/curl limitations). For now allow a 3072 characters as combined length of URI path and query. This is conform with the HTTP/1.1 RFCs (e.g., RFC 7231, 6.5.12 and RFC 2616, 3.2.1) which do not specify any limits, upper or lower, but require that all server accessible resources mus be reachable without getting 414, which is normally fulfilled as we have various length limits for stuff which could be in an URI, in place, e.g.: * user id: max. 64 chars * datastore: max. 32 chars The only known problematic API endpoint is the catalog one, used in the GUI's pxar file browser: GET /api2/json/admin/datastore/<id>/catalog?..&filepath=<path> The <path> is the encoded archive path, and can be arbitrary long. But, this is a flawed design, as even without this new limit one can easily generate archives which cannot be browsed anymore, as hyper only accepts requests with max. 64 KiB in the URI. So rather, we should move that to a GET-as-POST call, which has no such limitations (and would not need to base32 encode the path). Note: This change was inspired by adding a request access log, which profits from such limits as we can then rely on certain atomicity guarantees when writing requests to the log. Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2020-10-15 15:49:16 +00:00
let query = parts.uri.query().unwrap_or_default();
if path.len() + query.len() > MAX_URI_QUERY_LENGTH {
return Ok(Response::builder()
.status(StatusCode::URI_TOO_LONG)
.body("".into())
.unwrap());
}
let env_type = api.env_type();
let mut rpcenv = RestEnvironment::new(env_type, Arc::clone(&api));
2019-01-27 09:18:52 +00:00
rpcenv.set_client_ip(Some(*peer));
2019-01-31 11:22:00 +00:00
let delay_unauth_time = std::time::Instant::now() + std::time::Duration::from_millis(3000);
let access_forbidden_time = std::time::Instant::now() + std::time::Duration::from_millis(500);
2019-01-31 11:22:00 +00:00
if comp_len >= 1 && components[0] == "api2" {
if comp_len >= 2 {
let format = components[1];
2019-11-22 12:02:05 +00:00
let formatter: &dyn OutputFormatter = match format {
"json" => JSON_FORMATTER,
"extjs" => EXTJS_FORMATTER,
_ => bail!("Unsupported output format '{}'.", format),
2018-12-05 11:42:25 +00:00
};
2018-11-16 08:15:33 +00:00
let mut uri_param = HashMap::new();
let api_method = api.find_method(&components[2..], method.clone(), &mut uri_param);
2018-11-16 08:15:33 +00:00
let mut auth_required = true;
if let Some(api_method) = api_method {
if let Permission::World = *api_method.access.permission {
auth_required = false; // no auth for endpoints with World permission
}
}
let mut user_info: Box<dyn UserInformation + Send + Sync> = Box::new(EmptyUserInformation {});
if auth_required {
match api.check_auth(&parts.headers, &method).await {
Ok((authid, info)) => {
rpcenv.set_auth_id(Some(authid));
user_info = info;
}
Err(auth_err) => {
let err = match auth_err {
AuthError::Generic(err) => err,
AuthError::NoData => {
format_err!("no authentication credentials provided.")
}
};
// fixme: log Username??
rpcenv.log_failed_auth(None, &err.to_string());
// always delay unauthorized calls by 3 seconds (from start of request)
let err = http_err!(UNAUTHORIZED, "authentication failed - {}", err);
tokio::time::sleep_until(Instant::from_std(delay_unauth_time)).await;
return Ok(formatter.format_error(err));
}
2019-01-31 11:22:00 +00:00
}
}
match api_method {
2019-11-21 08:36:41 +00:00
None => {
let err = http_err!(NOT_FOUND, "Path '{}' not found.", path);
return Ok(formatter.format_error(err));
}
2019-11-21 08:36:41 +00:00
Some(api_method) => {
let auth_id = rpcenv.get_auth_id();
let user_info = user_info;
if !check_api_permission(
api_method.access.permission,
auth_id.as_deref(),
&uri_param,
user_info.as_ref(),
) {
let err = http_err!(FORBIDDEN, "permission check failed");
tokio::time::sleep_until(Instant::from_std(access_forbidden_time)).await;
return Ok(formatter.format_error(err));
2020-04-16 08:01:59 +00:00
}
let result = if api_method.protected && env_type == RpcEnvironmentType::PUBLIC {
proxy_protected_request(api_method, parts, body, peer).await
} else {
handle_api_request(rpcenv, api_method, formatter, parts, body, uri_param)
.await
};
let mut response = match result {
Ok(resp) => resp,
Err(err) => formatter.format_error(err),
};
if let Some(auth_id) = auth_id {
response.extensions_mut().insert(AuthStringExtension(auth_id));
}
return Ok(response);
}
}
}
} else {
// not Auth required for accessing files!
if method != hyper::Method::GET {
2019-11-22 12:02:05 +00:00
bail!("Unsupported HTTP method {}", method);
}
if comp_len == 0 {
match api.check_auth(&parts.headers, &method).await {
Ok((auth_id, _user_info)) => {
rpcenv.set_auth_id(Some(auth_id));
return Ok(api.get_index(rpcenv, parts).await);
}
Err(AuthError::Generic(_)) => {
tokio::time::sleep_until(Instant::from_std(delay_unauth_time)).await;
}
Err(AuthError::NoData) => {}
}
return Ok(api.get_index(rpcenv, parts).await);
} else {
let filename = api.find_alias(&components);
let compression = extract_compression_method(&parts.headers);
return handle_static_file_download(filename, compression).await;
}
}
Err(http_err!(NOT_FOUND, "Path '{}' not found.", path))
}