//! System metrics for the admin dashboard. //! //! Disk is `statvfs(storage_dir)` so the number reflects the volume the //! app actually writes to (not the root filesystem of the host). When the //! storage backend doesn't expose a local path (e.g. a future S3 impl) //! the disk fields are `null` rather than fabricated. //! //! Memory and CPU come from `sysinfo`. CPU requires two refreshes with //! at least 200ms between them to compute a meaningful delta; the //! handler eats the 250ms wall-clock cost on each request. Admin //! traffic is low-volume so a background cache isn't worth the moving //! parts yet — revisit if polling becomes frequent. use std::path::Path; use std::time::Duration; use axum::extract::State; use axum::routing::get; use axum::{Json, Router}; use serde::Serialize; use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind, System}; use crate::app::AppState; use crate::auth::extractor::RequireAdmin; use crate::error::AppResult; const ALERT_THRESHOLD_PERCENT: f64 = 90.0; pub fn routes() -> Router { Router::new().route("/admin/system", get(system)) } #[derive(Debug, Serialize)] pub struct SystemStats { pub disk: Option, pub memory: MemoryStats, pub cpu: CpuStats, pub alerts: Vec, } #[derive(Debug, Serialize)] pub struct DiskStats { pub total_bytes: u64, pub used_bytes: u64, pub free_bytes: u64, pub percent_used: f64, } #[derive(Debug, Serialize)] pub struct MemoryStats { pub total_bytes: u64, pub used_bytes: u64, pub percent_used: f64, } #[derive(Debug, Serialize)] pub struct CpuStats { pub percent_used: f64, } #[derive(Debug, Serialize)] pub struct Alert { pub level: AlertLevel, pub message: String, } #[derive(Debug, Serialize, Clone, Copy)] #[serde(rename_all = "snake_case")] pub enum AlertLevel { Warning, } async fn system( State(state): State, _admin: RequireAdmin, ) -> AppResult> { let disk = state.storage.local_root().and_then(disk_stats_for); let (memory, cpu) = memory_and_cpu().await; let mut alerts = Vec::new(); if let Some(d) = &disk { if d.percent_used >= ALERT_THRESHOLD_PERCENT { alerts.push(Alert { level: AlertLevel::Warning, message: format!( "disk near full ({:.0}% used)", d.percent_used ), }); } } if memory.percent_used >= ALERT_THRESHOLD_PERCENT { alerts.push(Alert { level: AlertLevel::Warning, message: format!( "memory near full ({:.0}% used)", memory.percent_used ), }); } Ok(Json(SystemStats { disk, memory, cpu, alerts, })) } fn disk_stats_for(root: &Path) -> Option { let s = nix::sys::statvfs::statvfs(root).ok()?; // statvfs reports `f_frsize * f_blocks` for total bytes. `f_bavail` // is "free to non-root callers" which is what an operator actually // cares about — `f_bfree` includes blocks reserved for root. let block = s.fragment_size(); let total = block * s.blocks(); let avail = block * s.blocks_available(); let used = total.saturating_sub(avail); let percent_used = if total > 0 { (used as f64) * 100.0 / (total as f64) } else { 0.0 }; Some(DiskStats { total_bytes: total, used_bytes: used, free_bytes: avail, percent_used, }) } async fn memory_and_cpu() -> (MemoryStats, CpuStats) { // sysinfo's CPU sampling needs two refreshes with a delay between // them — the first seeds the delta counters, the second measures. // We do this once per request; admin traffic is low enough that the // 250ms cost is invisible. let mut sys = System::new_with_specifics( RefreshKind::new() .with_cpu(CpuRefreshKind::everything()) .with_memory(MemoryRefreshKind::everything()), ); sys.refresh_cpu_all(); // Yield the runtime instead of blocking it for the gap. tokio::time::sleep(Duration::from_millis(250)).await; sys.refresh_cpu_all(); sys.refresh_memory(); let total = sys.total_memory(); let used = sys.used_memory(); let mem_pct = if total > 0 { (used as f64) * 100.0 / (total as f64) } else { 0.0 }; let memory = MemoryStats { total_bytes: total, used_bytes: used, percent_used: mem_pct, }; let cpu = CpuStats { percent_used: sys.global_cpu_usage() as f64, }; (memory, cpu) }