Server-side realtime SSE on per-app pub/sub topics, plus the three
v1.1.5 follow-ups and the version bumps.
Realtime:
- topics registry (0021) + admin endpoints + Capability::AppTopicManage
(-> app:admin; no new scope).
- GET /realtime/topics/{topic} SSE endpoint (orchestrator-core data
plane): Host -> app, RealtimeAuthority gate (404 missing/internal,
401 bad/absent token), broadcast::Receiver stream + heartbeat.
- RealtimeBroadcaster / RealtimeEvent / RealtimeAuthority traits
(picloud-shared); InProcessBroadcaster + GC (orchestrator-core);
DB-backed RealtimeAuthorityImpl (manager-core). Publish path fans out
to in-process subscribers after the durable outbox commit (best-effort,
panic-isolated).
- HMAC subscriber tokens (subscriber_token.rs) + app_secrets table (0022)
+ pubsub::subscriber_token SDK (schema 1.6 -> 1.7). TTL clamp + env
overrides.
- Dashboard Topics tab (register/list/edit/delete, prominent external
badge, flip confirmation).
v1.1.5 follow-ups:
- Empty blobs accepted (NewFile/FileUpdate::validate) + round-trip test.
- Orphan *.tmp.* sweeper (spawn_files_orphan_sweep).
- Dispatcher e2e tests, one per trigger kind (DATABASE_URL-gated).
Versions: workspace 1.1.6, SDK 1.7, dashboard 0.12.0. Schema-snapshot
golden re-blessed.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
186 lines
6.2 KiB
Rust
186 lines
6.2 KiB
Rust
//! Orphan `*.tmp.*` blob sweeper (v1.1.6, v1.1.5 follow-up).
|
|
//!
|
|
//! The files repo writes blobs atomically: it streams into a
|
|
//! `<id>.tmp.<pid>-<seq>` temp file, fsyncs, then renames to the final
|
|
//! `<id>` path. A crash between create and rename leaves an orphan temp
|
|
//! file that is never read and never reclaimed. This sweeper deletes
|
|
//! those: every `PICLOUD_FILES_ORPHAN_SWEEP_INTERVAL_SEC` (default 6h) it
|
|
//! walks `<root>/files/` and unlinks any `*.tmp.*` file older than
|
|
//! `PICLOUD_FILES_ORPHAN_TMP_TTL_SEC` (default 1h).
|
|
//!
|
|
//! Deliberately bounded: it does NOT cross-check on-disk files against DB
|
|
//! rows (the full reconciling sweeper is v1.3+). It only targets the temp
|
|
//! files, which are unambiguously orphans once past the TTL — no live
|
|
//! writer keeps one around for an hour.
|
|
|
|
use std::path::{Path, PathBuf};
|
|
use std::time::{Duration, SystemTime};
|
|
|
|
const ENV_INTERVAL: &str = "PICLOUD_FILES_ORPHAN_SWEEP_INTERVAL_SEC";
|
|
const ENV_TMP_TTL: &str = "PICLOUD_FILES_ORPHAN_TMP_TTL_SEC";
|
|
const DEFAULT_INTERVAL_SECS: u64 = 21_600; // 6h
|
|
const DEFAULT_TMP_TTL_SECS: u64 = 3_600; // 1h
|
|
|
|
/// Marker that identifies a temp blob (`<id>.tmp.<pid>-<seq>`). A final
|
|
/// blob is named just `<id>` (a UUID), so it never contains this.
|
|
const TMP_MARKER: &str = ".tmp.";
|
|
|
|
#[derive(Debug, Default, Clone, Copy)]
|
|
pub struct SweepStats {
|
|
pub dirs_walked: u64,
|
|
pub files_deleted: u64,
|
|
pub bytes_reclaimed: u64,
|
|
}
|
|
|
|
/// Spawn the periodic orphan sweep. Spawned at startup alongside the
|
|
/// cron scheduler and the realtime/cache GC tasks.
|
|
pub fn spawn_files_orphan_sweep(files_root: PathBuf) {
|
|
let interval = Duration::from_secs(read_secs(ENV_INTERVAL, DEFAULT_INTERVAL_SECS));
|
|
let ttl = Duration::from_secs(read_secs(ENV_TMP_TTL, DEFAULT_TMP_TTL_SECS));
|
|
tokio::spawn(async move {
|
|
let mut ticker = tokio::time::interval(interval);
|
|
ticker.tick().await; // skip the immediate first fire
|
|
loop {
|
|
ticker.tick().await;
|
|
let root = files_root.clone();
|
|
// Blocking filesystem walk off the async worker.
|
|
let stats = tokio::task::spawn_blocking(move || sweep_orphan_tmp_files(&root, ttl))
|
|
.await
|
|
.unwrap_or_default();
|
|
tracing::info!(
|
|
dirs_walked = stats.dirs_walked,
|
|
files_deleted = stats.files_deleted,
|
|
bytes_reclaimed = stats.bytes_reclaimed,
|
|
"files orphan sweep complete"
|
|
);
|
|
}
|
|
});
|
|
}
|
|
|
|
/// Walk `<files_root>/files/` and delete `*.tmp.*` files older than
|
|
/// `ttl`. Missing root is not an error (returns zeroed stats). Pure +
|
|
/// synchronous so it's unit-testable without a runtime.
|
|
#[must_use]
|
|
pub fn sweep_orphan_tmp_files(files_root: &Path, ttl: Duration) -> SweepStats {
|
|
let mut stats = SweepStats::default();
|
|
let blobs_dir = files_root.join("files");
|
|
if !blobs_dir.is_dir() {
|
|
return stats;
|
|
}
|
|
let now = SystemTime::now();
|
|
walk(&blobs_dir, ttl, now, &mut stats);
|
|
stats
|
|
}
|
|
|
|
fn walk(dir: &Path, ttl: Duration, now: SystemTime, stats: &mut SweepStats) {
|
|
stats.dirs_walked += 1;
|
|
let Ok(entries) = std::fs::read_dir(dir) else {
|
|
return;
|
|
};
|
|
for entry in entries.flatten() {
|
|
let Ok(ft) = entry.file_type() else {
|
|
continue;
|
|
};
|
|
let path = entry.path();
|
|
if ft.is_dir() {
|
|
walk(&path, ttl, now, stats);
|
|
continue;
|
|
}
|
|
if !ft.is_file() {
|
|
continue;
|
|
}
|
|
if !entry.file_name().to_string_lossy().contains(TMP_MARKER) {
|
|
continue;
|
|
}
|
|
let Ok(meta) = entry.metadata() else {
|
|
continue;
|
|
};
|
|
let age = meta
|
|
.modified()
|
|
.ok()
|
|
.and_then(|m| now.duration_since(m).ok())
|
|
.unwrap_or(Duration::ZERO);
|
|
if age >= ttl {
|
|
let size = meta.len();
|
|
if std::fs::remove_file(&path).is_ok() {
|
|
stats.files_deleted += 1;
|
|
stats.bytes_reclaimed += size;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn read_secs(key: &str, default: u64) -> u64 {
|
|
match std::env::var(key) {
|
|
Err(_) => default,
|
|
Ok(v) => match v.parse::<u64>() {
|
|
Ok(n) if n > 0 => n,
|
|
_ => {
|
|
tracing::warn!(env = key, value = %v, "invalid; using default");
|
|
default
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::sync::atomic::{AtomicU64, Ordering};
|
|
|
|
static SEQ: AtomicU64 = AtomicU64::new(0);
|
|
|
|
fn tmp_root() -> PathBuf {
|
|
let n = SEQ.fetch_add(1, Ordering::Relaxed);
|
|
let dir =
|
|
std::env::temp_dir().join(format!("picloud-sweep-test-{}-{n}", std::process::id()));
|
|
std::fs::create_dir_all(dir.join("files").join("ab")).unwrap();
|
|
dir
|
|
}
|
|
|
|
fn touch(path: &Path) {
|
|
std::fs::write(path, b"x").unwrap();
|
|
}
|
|
|
|
#[test]
|
|
fn deletes_old_tmp_files() {
|
|
let root = tmp_root();
|
|
let tmp = root.join("files/ab/uuid.tmp.123-0");
|
|
touch(&tmp);
|
|
// ttl 0 → any tmp file counts as old.
|
|
let stats = sweep_orphan_tmp_files(&root, Duration::ZERO);
|
|
assert_eq!(stats.files_deleted, 1);
|
|
assert!(!tmp.exists());
|
|
assert!(stats.bytes_reclaimed >= 1);
|
|
}
|
|
|
|
#[test]
|
|
fn keeps_young_tmp_files() {
|
|
let root = tmp_root();
|
|
let tmp = root.join("files/ab/uuid.tmp.123-0");
|
|
touch(&tmp);
|
|
// Large TTL → the just-created file is too young to reap.
|
|
let stats = sweep_orphan_tmp_files(&root, Duration::from_secs(3600));
|
|
assert_eq!(stats.files_deleted, 0);
|
|
assert!(tmp.exists());
|
|
}
|
|
|
|
#[test]
|
|
fn keeps_non_tmp_files() {
|
|
let root = tmp_root();
|
|
let blob = root.join("files/ab/0123456789abcdef");
|
|
touch(&blob);
|
|
let stats = sweep_orphan_tmp_files(&root, Duration::ZERO);
|
|
assert_eq!(stats.files_deleted, 0);
|
|
assert!(blob.exists());
|
|
}
|
|
|
|
#[test]
|
|
fn missing_root_does_not_panic() {
|
|
let root = std::env::temp_dir().join("picloud-sweep-nonexistent-xyz");
|
|
let stats = sweep_orphan_tmp_files(&root, Duration::ZERO);
|
|
assert_eq!(stats.files_deleted, 0);
|
|
assert_eq!(stats.dirs_walked, 0);
|
|
}
|
|
}
|