feat(v1.1.5): files SDK + files:* triggers
Filesystem-backed blob storage as the fifth concrete trigger kind.
- `files::collection(c).{create,head,get,update,delete,list}` Rhai SDK
(blob in/out; metadata maps; missing-field throws naming the field).
- `FilesService` trait in picloud-shared; `FsFilesRepo` (atomic
write: temp→fsync→rename→fsync-dir→DB; single-pass SHA-256;
checksum-verified reads → Corrupted) + `FilesServiceImpl` in
manager-core. Metadata in Postgres (0018), bytes on disk under
PICLOUD_FILES_ROOT with 0o700 shard dirs.
- `files:*` trigger kind via the Layout-E pattern (0019: widen both
CHECKs + files_trigger_details), TriggerEvent::Files (metadata only,
no bytes), emit_files fan-out, dispatcher arm, admin endpoint
POST /triggers/files (reuses validate_trigger_target).
- AppFilesRead/AppFilesWrite capabilities → script:read/script:write
(seven-scope commitment held). AppPubsubPublish reserved for v1.1.6.
- Admin files API (list + delete) + dashboard Files view per app.
Cross-app isolation keyed on cx.app_id at every layer. ~45 new tests
(service in-memory, fs tempdir, bridge integration). No DB required
for the suite. publish_ephemeral and the orphan sweep stay deferred.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
339
crates/shared/src/files.rs
Normal file
339
crates/shared/src/files.rs
Normal file
@@ -0,0 +1,339 @@
|
||||
//! `FilesService` — the v1.1.5 filesystem-backed blob store contract.
|
||||
//!
|
||||
//! Lives in `picloud-shared` (not `executor-core`) so the Rhai bridge,
|
||||
//! the manager-core filesystem+Postgres impl, and any in-memory test
|
||||
//! impl can all depend on the same trait without dragging
|
||||
//! `executor-core` into a Postgres or filesystem dependency.
|
||||
//!
|
||||
//! Implementations MUST derive every storage `app_id` from `cx.app_id`
|
||||
//! — never from a script-passed argument. That is the cross-app
|
||||
//! isolation boundary; see `docs/sdk-shape.md`.
|
||||
//!
|
||||
//! `FilesService` is collection-scoped: scripts get a handle via
|
||||
//! `files::collection(name)` and call
|
||||
//! `create`/`head`/`get`/`update`/`delete`/`list` on it. The blob bytes
|
||||
//! never travel through Postgres or through trigger payloads — the row
|
||||
//! is metadata + a SHA-256 checksum; the bytes live on the filesystem.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::SdkCallCx;
|
||||
|
||||
/// POSIX-portable filename cap (255 bytes).
|
||||
pub const MAX_FILE_NAME_BYTES: usize = 255;
|
||||
/// RFC 6838 puts a reasonable media-type ceiling around 127 chars.
|
||||
pub const MAX_CONTENT_TYPE_BYTES: usize = 127;
|
||||
|
||||
/// Payload for `create` — a brand-new blob. The id is server-generated
|
||||
/// (a UUID); scripts never supply it.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NewFile {
|
||||
pub name: String,
|
||||
pub content_type: String,
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
/// Payload for `update` — replacement bytes plus optional metadata. If
|
||||
/// `name` / `content_type` are `None` the prior values are kept.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileUpdate {
|
||||
pub data: Vec<u8>,
|
||||
pub name: Option<String>,
|
||||
pub content_type: Option<String>,
|
||||
}
|
||||
|
||||
/// File metadata as scripts and triggers see it. Serialized into
|
||||
/// `ServiceEvent.payload` (the blob bytes are NOT included — files are
|
||||
/// too big to ship through trigger payloads), and surfaced to Rhai by
|
||||
/// `head` / `list`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct FileMeta {
|
||||
pub id: Uuid,
|
||||
pub collection: String,
|
||||
pub name: String,
|
||||
pub content_type: String,
|
||||
pub size: u64,
|
||||
/// Lowercase hex SHA-256 of the content.
|
||||
pub checksum: String,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// One page of file metadata from `FilesService::list`. `next_cursor`
|
||||
/// is `Some` when more pages exist, `None` when exhausted.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FilesListPage {
|
||||
pub files: Vec<FileMeta>,
|
||||
pub next_cursor: Option<String>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait FilesService: Send + Sync {
|
||||
/// Create a new blob; returns its server-generated id. Throws on a
|
||||
/// missing required field, an over-limit blob, or an invalid
|
||||
/// collection name.
|
||||
async fn create(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
new: NewFile,
|
||||
) -> Result<Uuid, FilesError>;
|
||||
|
||||
/// Metadata only — no body read. `None` if the file is missing.
|
||||
async fn head(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
id: &str,
|
||||
) -> Result<Option<FileMeta>, FilesError>;
|
||||
|
||||
/// Full content. `None` if missing. Verifies the stored checksum
|
||||
/// against the bytes on disk and returns `FilesError::Corrupted`
|
||||
/// when they diverge.
|
||||
async fn get(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
id: &str,
|
||||
) -> Result<Option<Vec<u8>>, FilesError>;
|
||||
|
||||
/// Replace content (and optionally metadata). Throws `NotFound`
|
||||
/// when the file doesn't exist.
|
||||
async fn update(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
id: &str,
|
||||
upd: FileUpdate,
|
||||
) -> Result<(), FilesError>;
|
||||
|
||||
/// Delete by id; returns whether the file was present.
|
||||
async fn delete(&self, cx: &SdkCallCx, collection: &str, id: &str) -> Result<bool, FilesError>;
|
||||
|
||||
/// Cursor-paginated metadata listing (same shape as KV's list).
|
||||
async fn list(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
cursor: Option<&str>,
|
||||
limit: u32,
|
||||
) -> Result<FilesListPage, FilesError>;
|
||||
}
|
||||
|
||||
/// Failure modes surfaced to the Rhai bridge. The bridge converts each
|
||||
/// to a Rhai runtime error string; the discriminants exist so internal
|
||||
/// callers (admin endpoints, tests) can react more precisely.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum FilesError {
|
||||
/// Empty collection name, or one containing a path separator / `..`
|
||||
/// / NUL — rejected at the SDK boundary per `docs/sdk-shape.md`.
|
||||
#[error("invalid collection name: {0}")]
|
||||
InvalidCollection(String),
|
||||
|
||||
/// A required field on `create` was missing or empty. The string
|
||||
/// names the field (`name` / `content_type` / `data`).
|
||||
#[error("missing required field: {0}")]
|
||||
MissingField(&'static str),
|
||||
|
||||
/// Blob exceeds the per-file size cap (default 100 MB,
|
||||
/// `PICLOUD_FILES_MAX_FILE_SIZE_BYTES`).
|
||||
#[error("file too large: {size} bytes exceeds limit of {limit} bytes")]
|
||||
TooLarge { size: usize, limit: usize },
|
||||
|
||||
/// Filename exceeds `MAX_FILE_NAME_BYTES`.
|
||||
#[error("file name too long: {0} bytes exceeds 255")]
|
||||
NameTooLong(usize),
|
||||
|
||||
/// Content-type exceeds `MAX_CONTENT_TYPE_BYTES`.
|
||||
#[error("content_type too long: {0} bytes exceeds 127")]
|
||||
ContentTypeTooLong(usize),
|
||||
|
||||
/// `update` on a non-existent file.
|
||||
#[error("file not found")]
|
||||
NotFound,
|
||||
|
||||
/// The bytes on disk no longer match the stored checksum — the
|
||||
/// filesystem corrupted or a backup was misconfigured. The operator
|
||||
/// decides what to do with the metadata-vs-bytes mismatch; the repo
|
||||
/// does NOT auto-delete.
|
||||
#[error("file content corrupted (checksum mismatch)")]
|
||||
Corrupted,
|
||||
|
||||
/// Caller principal lacked the required capability. Only raised when
|
||||
/// `cx.principal.is_some()` — scripts running with `principal: None`
|
||||
/// (public HTTP) operate under script-as-gate semantics and skip
|
||||
/// the capability check.
|
||||
#[error("forbidden")]
|
||||
Forbidden,
|
||||
|
||||
/// Anything else — Postgres unavailable, filesystem I/O error, etc.
|
||||
#[error("files backend error: {0}")]
|
||||
Backend(String),
|
||||
}
|
||||
|
||||
impl NewFile {
|
||||
/// Validate required fields + length caps at the SDK boundary.
|
||||
/// `data` must be non-empty (v1.1.5 treats an empty blob as a
|
||||
/// missing `data` field — see HANDBACK §7).
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns the field-specific [`FilesError`] for the first failing
|
||||
/// check.
|
||||
pub fn validate(&self, max_size: usize) -> Result<(), FilesError> {
|
||||
if self.name.trim().is_empty() {
|
||||
return Err(FilesError::MissingField("name"));
|
||||
}
|
||||
if self.content_type.trim().is_empty() {
|
||||
return Err(FilesError::MissingField("content_type"));
|
||||
}
|
||||
if self.data.is_empty() {
|
||||
return Err(FilesError::MissingField("data"));
|
||||
}
|
||||
if self.name.len() > MAX_FILE_NAME_BYTES {
|
||||
return Err(FilesError::NameTooLong(self.name.len()));
|
||||
}
|
||||
if self.content_type.len() > MAX_CONTENT_TYPE_BYTES {
|
||||
return Err(FilesError::ContentTypeTooLong(self.content_type.len()));
|
||||
}
|
||||
if self.data.len() > max_size {
|
||||
return Err(FilesError::TooLarge {
|
||||
size: self.data.len(),
|
||||
limit: max_size,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl FileUpdate {
|
||||
/// Validate the replacement bytes + any supplied metadata.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns the field-specific [`FilesError`] for the first failing
|
||||
/// check.
|
||||
pub fn validate(&self, max_size: usize) -> Result<(), FilesError> {
|
||||
if self.data.is_empty() {
|
||||
return Err(FilesError::MissingField("data"));
|
||||
}
|
||||
if let Some(name) = &self.name {
|
||||
if name.trim().is_empty() {
|
||||
return Err(FilesError::MissingField("name"));
|
||||
}
|
||||
if name.len() > MAX_FILE_NAME_BYTES {
|
||||
return Err(FilesError::NameTooLong(name.len()));
|
||||
}
|
||||
}
|
||||
if let Some(ct) = &self.content_type {
|
||||
if ct.trim().is_empty() {
|
||||
return Err(FilesError::MissingField("content_type"));
|
||||
}
|
||||
if ct.len() > MAX_CONTENT_TYPE_BYTES {
|
||||
return Err(FilesError::ContentTypeTooLong(ct.len()));
|
||||
}
|
||||
}
|
||||
if self.data.len() > max_size {
|
||||
return Err(FilesError::TooLarge {
|
||||
size: self.data.len(),
|
||||
limit: max_size,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Reject a collection name that is empty or could escape the per-app
|
||||
/// files tree. UUID-shaped ids never produce traversal paths, but
|
||||
/// collection names come from scripts so they're validated defensively
|
||||
/// at both the SDK boundary and the repo.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`FilesError::InvalidCollection`] when the name is empty or
|
||||
/// contains `/`, `\`, `..`, or a NUL byte.
|
||||
pub fn validate_collection(collection: &str) -> Result<(), FilesError> {
|
||||
if collection.is_empty() {
|
||||
return Err(FilesError::InvalidCollection("must not be empty".into()));
|
||||
}
|
||||
if collection.contains('/')
|
||||
|| collection.contains('\\')
|
||||
|| collection.contains("..")
|
||||
|| collection.contains('\0')
|
||||
{
|
||||
return Err(FilesError::InvalidCollection(format!(
|
||||
"collection {collection:?} must not contain '/', '\\', '..', or NUL"
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stub used by the test harness so executor-core integration tests
|
||||
/// (which don't touch files) can construct a `Services` bundle without
|
||||
/// a filesystem or Postgres. Every call returns
|
||||
/// `FilesError::Backend("...")` so accidental use surfaces clearly.
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct NoopFilesService;
|
||||
|
||||
#[async_trait]
|
||||
impl FilesService for NoopFilesService {
|
||||
async fn create(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_new: NewFile,
|
||||
) -> Result<Uuid, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn head(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_id: &str,
|
||||
) -> Result<Option<FileMeta>, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn get(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_id: &str,
|
||||
) -> Result<Option<Vec<u8>>, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn update(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_id: &str,
|
||||
_upd: FileUpdate,
|
||||
) -> Result<(), FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn delete(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_id: &str,
|
||||
) -> Result<bool, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn list(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_cursor: Option<&str>,
|
||||
_limit: u32,
|
||||
) -> Result<FilesListPage, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user