feat(v1.1.5): files SDK + files:* triggers

Filesystem-backed blob storage as the fifth concrete trigger kind.

- `files::collection(c).{create,head,get,update,delete,list}` Rhai SDK
  (blob in/out; metadata maps; missing-field throws naming the field).
- `FilesService` trait in picloud-shared; `FsFilesRepo` (atomic
  write: temp→fsync→rename→fsync-dir→DB; single-pass SHA-256;
  checksum-verified reads → Corrupted) + `FilesServiceImpl` in
  manager-core. Metadata in Postgres (0018), bytes on disk under
  PICLOUD_FILES_ROOT with 0o700 shard dirs.
- `files:*` trigger kind via the Layout-E pattern (0019: widen both
  CHECKs + files_trigger_details), TriggerEvent::Files (metadata only,
  no bytes), emit_files fan-out, dispatcher arm, admin endpoint
  POST /triggers/files (reuses validate_trigger_target).
- AppFilesRead/AppFilesWrite capabilities → script:read/script:write
  (seven-scope commitment held). AppPubsubPublish reserved for v1.1.6.
- Admin files API (list + delete) + dashboard Files view per app.

Cross-app isolation keyed on cx.app_id at every layer. ~45 new tests
(service in-memory, fs tempdir, bridge integration). No DB required
for the suite. publish_ephemeral and the orphan sweep stay deferred.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
MechaCat02
2026-06-03 21:18:17 +02:00
parent 03d03ea6e7
commit 6e132b6ee0
29 changed files with 3599 additions and 31 deletions

339
crates/shared/src/files.rs Normal file
View File

@@ -0,0 +1,339 @@
//! `FilesService` — the v1.1.5 filesystem-backed blob store contract.
//!
//! Lives in `picloud-shared` (not `executor-core`) so the Rhai bridge,
//! the manager-core filesystem+Postgres impl, and any in-memory test
//! impl can all depend on the same trait without dragging
//! `executor-core` into a Postgres or filesystem dependency.
//!
//! Implementations MUST derive every storage `app_id` from `cx.app_id`
//! — never from a script-passed argument. That is the cross-app
//! isolation boundary; see `docs/sdk-shape.md`.
//!
//! `FilesService` is collection-scoped: scripts get a handle via
//! `files::collection(name)` and call
//! `create`/`head`/`get`/`update`/`delete`/`list` on it. The blob bytes
//! never travel through Postgres or through trigger payloads — the row
//! is metadata + a SHA-256 checksum; the bytes live on the filesystem.
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use uuid::Uuid;
use crate::SdkCallCx;
/// POSIX-portable filename cap (255 bytes).
pub const MAX_FILE_NAME_BYTES: usize = 255;
/// RFC 6838 puts a reasonable media-type ceiling around 127 chars.
pub const MAX_CONTENT_TYPE_BYTES: usize = 127;
/// Payload for `create` — a brand-new blob. The id is server-generated
/// (a UUID); scripts never supply it.
#[derive(Debug, Clone)]
pub struct NewFile {
pub name: String,
pub content_type: String,
pub data: Vec<u8>,
}
/// Payload for `update` — replacement bytes plus optional metadata. If
/// `name` / `content_type` are `None` the prior values are kept.
#[derive(Debug, Clone)]
pub struct FileUpdate {
pub data: Vec<u8>,
pub name: Option<String>,
pub content_type: Option<String>,
}
/// File metadata as scripts and triggers see it. Serialized into
/// `ServiceEvent.payload` (the blob bytes are NOT included — files are
/// too big to ship through trigger payloads), and surfaced to Rhai by
/// `head` / `list`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FileMeta {
pub id: Uuid,
pub collection: String,
pub name: String,
pub content_type: String,
pub size: u64,
/// Lowercase hex SHA-256 of the content.
pub checksum: String,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
/// One page of file metadata from `FilesService::list`. `next_cursor`
/// is `Some` when more pages exist, `None` when exhausted.
#[derive(Debug, Clone)]
pub struct FilesListPage {
pub files: Vec<FileMeta>,
pub next_cursor: Option<String>,
}
#[async_trait]
pub trait FilesService: Send + Sync {
/// Create a new blob; returns its server-generated id. Throws on a
/// missing required field, an over-limit blob, or an invalid
/// collection name.
async fn create(
&self,
cx: &SdkCallCx,
collection: &str,
new: NewFile,
) -> Result<Uuid, FilesError>;
/// Metadata only — no body read. `None` if the file is missing.
async fn head(
&self,
cx: &SdkCallCx,
collection: &str,
id: &str,
) -> Result<Option<FileMeta>, FilesError>;
/// Full content. `None` if missing. Verifies the stored checksum
/// against the bytes on disk and returns `FilesError::Corrupted`
/// when they diverge.
async fn get(
&self,
cx: &SdkCallCx,
collection: &str,
id: &str,
) -> Result<Option<Vec<u8>>, FilesError>;
/// Replace content (and optionally metadata). Throws `NotFound`
/// when the file doesn't exist.
async fn update(
&self,
cx: &SdkCallCx,
collection: &str,
id: &str,
upd: FileUpdate,
) -> Result<(), FilesError>;
/// Delete by id; returns whether the file was present.
async fn delete(&self, cx: &SdkCallCx, collection: &str, id: &str) -> Result<bool, FilesError>;
/// Cursor-paginated metadata listing (same shape as KV's list).
async fn list(
&self,
cx: &SdkCallCx,
collection: &str,
cursor: Option<&str>,
limit: u32,
) -> Result<FilesListPage, FilesError>;
}
/// Failure modes surfaced to the Rhai bridge. The bridge converts each
/// to a Rhai runtime error string; the discriminants exist so internal
/// callers (admin endpoints, tests) can react more precisely.
#[derive(Debug, Error)]
pub enum FilesError {
/// Empty collection name, or one containing a path separator / `..`
/// / NUL — rejected at the SDK boundary per `docs/sdk-shape.md`.
#[error("invalid collection name: {0}")]
InvalidCollection(String),
/// A required field on `create` was missing or empty. The string
/// names the field (`name` / `content_type` / `data`).
#[error("missing required field: {0}")]
MissingField(&'static str),
/// Blob exceeds the per-file size cap (default 100 MB,
/// `PICLOUD_FILES_MAX_FILE_SIZE_BYTES`).
#[error("file too large: {size} bytes exceeds limit of {limit} bytes")]
TooLarge { size: usize, limit: usize },
/// Filename exceeds `MAX_FILE_NAME_BYTES`.
#[error("file name too long: {0} bytes exceeds 255")]
NameTooLong(usize),
/// Content-type exceeds `MAX_CONTENT_TYPE_BYTES`.
#[error("content_type too long: {0} bytes exceeds 127")]
ContentTypeTooLong(usize),
/// `update` on a non-existent file.
#[error("file not found")]
NotFound,
/// The bytes on disk no longer match the stored checksum — the
/// filesystem corrupted or a backup was misconfigured. The operator
/// decides what to do with the metadata-vs-bytes mismatch; the repo
/// does NOT auto-delete.
#[error("file content corrupted (checksum mismatch)")]
Corrupted,
/// Caller principal lacked the required capability. Only raised when
/// `cx.principal.is_some()` — scripts running with `principal: None`
/// (public HTTP) operate under script-as-gate semantics and skip
/// the capability check.
#[error("forbidden")]
Forbidden,
/// Anything else — Postgres unavailable, filesystem I/O error, etc.
#[error("files backend error: {0}")]
Backend(String),
}
impl NewFile {
/// Validate required fields + length caps at the SDK boundary.
/// `data` must be non-empty (v1.1.5 treats an empty blob as a
/// missing `data` field — see HANDBACK §7).
///
/// # Errors
///
/// Returns the field-specific [`FilesError`] for the first failing
/// check.
pub fn validate(&self, max_size: usize) -> Result<(), FilesError> {
if self.name.trim().is_empty() {
return Err(FilesError::MissingField("name"));
}
if self.content_type.trim().is_empty() {
return Err(FilesError::MissingField("content_type"));
}
if self.data.is_empty() {
return Err(FilesError::MissingField("data"));
}
if self.name.len() > MAX_FILE_NAME_BYTES {
return Err(FilesError::NameTooLong(self.name.len()));
}
if self.content_type.len() > MAX_CONTENT_TYPE_BYTES {
return Err(FilesError::ContentTypeTooLong(self.content_type.len()));
}
if self.data.len() > max_size {
return Err(FilesError::TooLarge {
size: self.data.len(),
limit: max_size,
});
}
Ok(())
}
}
impl FileUpdate {
/// Validate the replacement bytes + any supplied metadata.
///
/// # Errors
///
/// Returns the field-specific [`FilesError`] for the first failing
/// check.
pub fn validate(&self, max_size: usize) -> Result<(), FilesError> {
if self.data.is_empty() {
return Err(FilesError::MissingField("data"));
}
if let Some(name) = &self.name {
if name.trim().is_empty() {
return Err(FilesError::MissingField("name"));
}
if name.len() > MAX_FILE_NAME_BYTES {
return Err(FilesError::NameTooLong(name.len()));
}
}
if let Some(ct) = &self.content_type {
if ct.trim().is_empty() {
return Err(FilesError::MissingField("content_type"));
}
if ct.len() > MAX_CONTENT_TYPE_BYTES {
return Err(FilesError::ContentTypeTooLong(ct.len()));
}
}
if self.data.len() > max_size {
return Err(FilesError::TooLarge {
size: self.data.len(),
limit: max_size,
});
}
Ok(())
}
}
/// Reject a collection name that is empty or could escape the per-app
/// files tree. UUID-shaped ids never produce traversal paths, but
/// collection names come from scripts so they're validated defensively
/// at both the SDK boundary and the repo.
///
/// # Errors
///
/// Returns [`FilesError::InvalidCollection`] when the name is empty or
/// contains `/`, `\`, `..`, or a NUL byte.
pub fn validate_collection(collection: &str) -> Result<(), FilesError> {
if collection.is_empty() {
return Err(FilesError::InvalidCollection("must not be empty".into()));
}
if collection.contains('/')
|| collection.contains('\\')
|| collection.contains("..")
|| collection.contains('\0')
{
return Err(FilesError::InvalidCollection(format!(
"collection {collection:?} must not contain '/', '\\', '..', or NUL"
)));
}
Ok(())
}
/// Stub used by the test harness so executor-core integration tests
/// (which don't touch files) can construct a `Services` bundle without
/// a filesystem or Postgres. Every call returns
/// `FilesError::Backend("...")` so accidental use surfaces clearly.
#[derive(Debug, Default, Clone, Copy)]
pub struct NoopFilesService;
#[async_trait]
impl FilesService for NoopFilesService {
async fn create(
&self,
_cx: &SdkCallCx,
_collection: &str,
_new: NewFile,
) -> Result<Uuid, FilesError> {
Err(FilesError::Backend("files is not wired in".into()))
}
async fn head(
&self,
_cx: &SdkCallCx,
_collection: &str,
_id: &str,
) -> Result<Option<FileMeta>, FilesError> {
Err(FilesError::Backend("files is not wired in".into()))
}
async fn get(
&self,
_cx: &SdkCallCx,
_collection: &str,
_id: &str,
) -> Result<Option<Vec<u8>>, FilesError> {
Err(FilesError::Backend("files is not wired in".into()))
}
async fn update(
&self,
_cx: &SdkCallCx,
_collection: &str,
_id: &str,
_upd: FileUpdate,
) -> Result<(), FilesError> {
Err(FilesError::Backend("files is not wired in".into()))
}
async fn delete(
&self,
_cx: &SdkCallCx,
_collection: &str,
_id: &str,
) -> Result<bool, FilesError> {
Err(FilesError::Backend("files is not wired in".into()))
}
async fn list(
&self,
_cx: &SdkCallCx,
_collection: &str,
_cursor: Option<&str>,
_limit: u32,
) -> Result<FilesListPage, FilesError> {
Err(FilesError::Backend("files is not wired in".into()))
}
}