//! Orphan `*.tmp.*` blob sweeper (v1.1.6, v1.1.5 follow-up). //! //! The files repo writes blobs atomically: it streams into a //! `.tmp.-` temp file, fsyncs, then renames to the final //! `` path. A crash between create and rename leaves an orphan temp //! file that is never read and never reclaimed. This sweeper deletes //! those: every `PICLOUD_FILES_ORPHAN_SWEEP_INTERVAL_SEC` (default 6h) it //! walks `/files/` and unlinks any `*.tmp.*` file older than //! `PICLOUD_FILES_ORPHAN_TMP_TTL_SEC` (default 1h). //! //! Deliberately bounded: it does NOT cross-check on-disk files against DB //! rows (the full reconciling sweeper is v1.3+). It only targets the temp //! files, which are unambiguously orphans once past the TTL — no live //! writer keeps one around for an hour. use std::path::{Path, PathBuf}; use std::time::{Duration, SystemTime}; const ENV_INTERVAL: &str = "PICLOUD_FILES_ORPHAN_SWEEP_INTERVAL_SEC"; const ENV_TMP_TTL: &str = "PICLOUD_FILES_ORPHAN_TMP_TTL_SEC"; const DEFAULT_INTERVAL_SECS: u64 = 21_600; // 6h const DEFAULT_TMP_TTL_SECS: u64 = 3_600; // 1h /// Marker that identifies a temp blob (`.tmp.-`). A final /// blob is named just `` (a UUID), so it never contains this. const TMP_MARKER: &str = ".tmp."; #[derive(Debug, Default, Clone, Copy)] pub struct SweepStats { pub dirs_walked: u64, pub files_deleted: u64, pub bytes_reclaimed: u64, } /// Spawn the periodic orphan sweep. Spawned at startup alongside the /// cron scheduler and the realtime/cache GC tasks. pub fn spawn_files_orphan_sweep(files_root: PathBuf) { let interval = Duration::from_secs(read_secs(ENV_INTERVAL, DEFAULT_INTERVAL_SECS)); let ttl = Duration::from_secs(read_secs(ENV_TMP_TTL, DEFAULT_TMP_TTL_SECS)); tokio::spawn(async move { let mut ticker = tokio::time::interval(interval); ticker.tick().await; // skip the immediate first fire loop { ticker.tick().await; let root = files_root.clone(); // Blocking filesystem walk off the async worker. let stats = tokio::task::spawn_blocking(move || sweep_orphan_tmp_files(&root, ttl)) .await .unwrap_or_default(); tracing::info!( dirs_walked = stats.dirs_walked, files_deleted = stats.files_deleted, bytes_reclaimed = stats.bytes_reclaimed, "files orphan sweep complete" ); } }); } /// Walk `/files/` and delete `*.tmp.*` files older than /// `ttl`. Missing root is not an error (returns zeroed stats). Pure + /// synchronous so it's unit-testable without a runtime. #[must_use] pub fn sweep_orphan_tmp_files(files_root: &Path, ttl: Duration) -> SweepStats { let mut stats = SweepStats::default(); let blobs_dir = files_root.join("files"); if !blobs_dir.is_dir() { return stats; } let now = SystemTime::now(); walk(&blobs_dir, ttl, now, &mut stats); stats } fn walk(dir: &Path, ttl: Duration, now: SystemTime, stats: &mut SweepStats) { stats.dirs_walked += 1; let Ok(entries) = std::fs::read_dir(dir) else { return; }; for entry in entries.flatten() { let Ok(ft) = entry.file_type() else { continue; }; let path = entry.path(); if ft.is_dir() { walk(&path, ttl, now, stats); continue; } if !ft.is_file() { continue; } if !entry.file_name().to_string_lossy().contains(TMP_MARKER) { continue; } let Ok(meta) = entry.metadata() else { continue; }; let age = meta .modified() .ok() .and_then(|m| now.duration_since(m).ok()) .unwrap_or(Duration::ZERO); if age >= ttl { let size = meta.len(); if std::fs::remove_file(&path).is_ok() { stats.files_deleted += 1; stats.bytes_reclaimed += size; } } } } fn read_secs(key: &str, default: u64) -> u64 { match std::env::var(key) { Err(_) => default, Ok(v) => match v.parse::() { Ok(n) if n > 0 => n, _ => { tracing::warn!(env = key, value = %v, "invalid; using default"); default } }, } } #[cfg(test)] mod tests { use super::*; use std::sync::atomic::{AtomicU64, Ordering}; static SEQ: AtomicU64 = AtomicU64::new(0); fn tmp_root() -> PathBuf { let n = SEQ.fetch_add(1, Ordering::Relaxed); let dir = std::env::temp_dir().join(format!("picloud-sweep-test-{}-{n}", std::process::id())); std::fs::create_dir_all(dir.join("files").join("ab")).unwrap(); dir } fn touch(path: &Path) { std::fs::write(path, b"x").unwrap(); } #[test] fn deletes_old_tmp_files() { let root = tmp_root(); let tmp = root.join("files/ab/uuid.tmp.123-0"); touch(&tmp); // ttl 0 → any tmp file counts as old. let stats = sweep_orphan_tmp_files(&root, Duration::ZERO); assert_eq!(stats.files_deleted, 1); assert!(!tmp.exists()); assert!(stats.bytes_reclaimed >= 1); } #[test] fn keeps_young_tmp_files() { let root = tmp_root(); let tmp = root.join("files/ab/uuid.tmp.123-0"); touch(&tmp); // Large TTL → the just-created file is too young to reap. let stats = sweep_orphan_tmp_files(&root, Duration::from_secs(3600)); assert_eq!(stats.files_deleted, 0); assert!(tmp.exists()); } #[test] fn keeps_non_tmp_files() { let root = tmp_root(); let blob = root.join("files/ab/0123456789abcdef"); touch(&blob); let stats = sweep_orphan_tmp_files(&root, Duration::ZERO); assert_eq!(stats.files_deleted, 0); assert!(blob.exists()); } #[test] fn missing_root_does_not_panic() { let root = std::env::temp_dir().join("picloud-sweep-nonexistent-xyz"); let stats = sweep_orphan_tmp_files(&root, Duration::ZERO); assert_eq!(stats.files_deleted, 0); assert_eq!(stats.dirs_walked, 0); } }