Adds GET /api/v1/admin/system returning disk (scoped to storage_dir via statvfs), memory, CPU, and a server-side alerts array that fires at >90% disk or memory. Disk uses nix::sys::statvfs directly rather than sysinfo's Disks API to avoid mountpoint-matching gymnastics for the storage_dir. A new `Storage::local_root() -> Option<&Path>` trait method exposes the root; the default returns None so a future S3Storage gets `disk: null` in the response instead of fabricated numbers. CPU is sampled inline (refresh → 250ms sleep → refresh → read) so the endpoint adds 250ms of latency per call. No background-cache yet — admin traffic is low-volume and the moving parts aren't worth it until polling shows up. Alerts are evaluated server-side so the frontend can render them without re-implementing the thresholds.
164 lines
4.6 KiB
Rust
164 lines
4.6 KiB
Rust
//! System metrics for the admin dashboard.
|
|
//!
|
|
//! Disk is `statvfs(storage_dir)` so the number reflects the volume the
|
|
//! app actually writes to (not the root filesystem of the host). When the
|
|
//! storage backend doesn't expose a local path (e.g. a future S3 impl)
|
|
//! the disk fields are `null` rather than fabricated.
|
|
//!
|
|
//! Memory and CPU come from `sysinfo`. CPU requires two refreshes with
|
|
//! at least 200ms between them to compute a meaningful delta; the
|
|
//! handler eats the 250ms wall-clock cost on each request. Admin
|
|
//! traffic is low-volume so a background cache isn't worth the moving
|
|
//! parts yet — revisit if polling becomes frequent.
|
|
|
|
use std::path::Path;
|
|
use std::time::Duration;
|
|
|
|
use axum::extract::State;
|
|
use axum::routing::get;
|
|
use axum::{Json, Router};
|
|
use serde::Serialize;
|
|
use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind, System};
|
|
|
|
use crate::app::AppState;
|
|
use crate::auth::extractor::RequireAdmin;
|
|
use crate::error::AppResult;
|
|
|
|
const ALERT_THRESHOLD_PERCENT: f64 = 90.0;
|
|
|
|
pub fn routes() -> Router<AppState> {
|
|
Router::new().route("/admin/system", get(system))
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct SystemStats {
|
|
pub disk: Option<DiskStats>,
|
|
pub memory: MemoryStats,
|
|
pub cpu: CpuStats,
|
|
pub alerts: Vec<Alert>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct DiskStats {
|
|
pub total_bytes: u64,
|
|
pub used_bytes: u64,
|
|
pub free_bytes: u64,
|
|
pub percent_used: f64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct MemoryStats {
|
|
pub total_bytes: u64,
|
|
pub used_bytes: u64,
|
|
pub percent_used: f64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct CpuStats {
|
|
pub percent_used: f64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
pub struct Alert {
|
|
pub level: AlertLevel,
|
|
pub message: String,
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Clone, Copy)]
|
|
#[serde(rename_all = "snake_case")]
|
|
pub enum AlertLevel {
|
|
Warning,
|
|
}
|
|
|
|
async fn system(
|
|
State(state): State<AppState>,
|
|
_admin: RequireAdmin,
|
|
) -> AppResult<Json<SystemStats>> {
|
|
let disk = state.storage.local_root().and_then(disk_stats_for);
|
|
let (memory, cpu) = memory_and_cpu().await;
|
|
let mut alerts = Vec::new();
|
|
if let Some(d) = &disk {
|
|
if d.percent_used >= ALERT_THRESHOLD_PERCENT {
|
|
alerts.push(Alert {
|
|
level: AlertLevel::Warning,
|
|
message: format!(
|
|
"disk near full ({:.0}% used)",
|
|
d.percent_used
|
|
),
|
|
});
|
|
}
|
|
}
|
|
if memory.percent_used >= ALERT_THRESHOLD_PERCENT {
|
|
alerts.push(Alert {
|
|
level: AlertLevel::Warning,
|
|
message: format!(
|
|
"memory near full ({:.0}% used)",
|
|
memory.percent_used
|
|
),
|
|
});
|
|
}
|
|
Ok(Json(SystemStats {
|
|
disk,
|
|
memory,
|
|
cpu,
|
|
alerts,
|
|
}))
|
|
}
|
|
|
|
fn disk_stats_for(root: &Path) -> Option<DiskStats> {
|
|
let s = nix::sys::statvfs::statvfs(root).ok()?;
|
|
// statvfs reports `f_frsize * f_blocks` for total bytes. `f_bavail`
|
|
// is "free to non-root callers" which is what an operator actually
|
|
// cares about — `f_bfree` includes blocks reserved for root.
|
|
let block = s.fragment_size();
|
|
let total = block * s.blocks();
|
|
let avail = block * s.blocks_available();
|
|
let used = total.saturating_sub(avail);
|
|
let percent_used = if total > 0 {
|
|
(used as f64) * 100.0 / (total as f64)
|
|
} else {
|
|
0.0
|
|
};
|
|
Some(DiskStats {
|
|
total_bytes: total,
|
|
used_bytes: used,
|
|
free_bytes: avail,
|
|
percent_used,
|
|
})
|
|
}
|
|
|
|
async fn memory_and_cpu() -> (MemoryStats, CpuStats) {
|
|
// sysinfo's CPU sampling needs two refreshes with a delay between
|
|
// them — the first seeds the delta counters, the second measures.
|
|
// We do this once per request; admin traffic is low enough that the
|
|
// 250ms cost is invisible.
|
|
let mut sys = System::new_with_specifics(
|
|
RefreshKind::new()
|
|
.with_cpu(CpuRefreshKind::everything())
|
|
.with_memory(MemoryRefreshKind::everything()),
|
|
);
|
|
sys.refresh_cpu_all();
|
|
// Yield the runtime instead of blocking it for the gap.
|
|
tokio::time::sleep(Duration::from_millis(250)).await;
|
|
sys.refresh_cpu_all();
|
|
sys.refresh_memory();
|
|
|
|
let total = sys.total_memory();
|
|
let used = sys.used_memory();
|
|
let mem_pct = if total > 0 {
|
|
(used as f64) * 100.0 / (total as f64)
|
|
} else {
|
|
0.0
|
|
};
|
|
let memory = MemoryStats {
|
|
total_bytes: total,
|
|
used_bytes: used,
|
|
percent_used: mem_pct,
|
|
};
|
|
|
|
let cpu = CpuStats {
|
|
percent_used: sys.global_cpu_usage() as f64,
|
|
};
|
|
(memory, cpu)
|
|
}
|