feat(v1.1.5): files SDK + files:* triggers
Filesystem-backed blob storage as the fifth concrete trigger kind.
- `files::collection(c).{create,head,get,update,delete,list}` Rhai SDK
(blob in/out; metadata maps; missing-field throws naming the field).
- `FilesService` trait in picloud-shared; `FsFilesRepo` (atomic
write: temp→fsync→rename→fsync-dir→DB; single-pass SHA-256;
checksum-verified reads → Corrupted) + `FilesServiceImpl` in
manager-core. Metadata in Postgres (0018), bytes on disk under
PICLOUD_FILES_ROOT with 0o700 shard dirs.
- `files:*` trigger kind via the Layout-E pattern (0019: widen both
CHECKs + files_trigger_details), TriggerEvent::Files (metadata only,
no bytes), emit_files fan-out, dispatcher arm, admin endpoint
POST /triggers/files (reuses validate_trigger_target).
- AppFilesRead/AppFilesWrite capabilities → script:read/script:write
(seven-scope commitment held). AppPubsubPublish reserved for v1.1.6.
- Admin files API (list + delete) + dashboard Files view per app.
Cross-app isolation keyed on cx.app_id at every layer. ~45 new tests
(service in-memory, fs tempdir, bridge integration). No DB required
for the suite. publish_ephemeral and the orphan sweep stay deferred.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
339
crates/shared/src/files.rs
Normal file
339
crates/shared/src/files.rs
Normal file
@@ -0,0 +1,339 @@
|
||||
//! `FilesService` — the v1.1.5 filesystem-backed blob store contract.
|
||||
//!
|
||||
//! Lives in `picloud-shared` (not `executor-core`) so the Rhai bridge,
|
||||
//! the manager-core filesystem+Postgres impl, and any in-memory test
|
||||
//! impl can all depend on the same trait without dragging
|
||||
//! `executor-core` into a Postgres or filesystem dependency.
|
||||
//!
|
||||
//! Implementations MUST derive every storage `app_id` from `cx.app_id`
|
||||
//! — never from a script-passed argument. That is the cross-app
|
||||
//! isolation boundary; see `docs/sdk-shape.md`.
|
||||
//!
|
||||
//! `FilesService` is collection-scoped: scripts get a handle via
|
||||
//! `files::collection(name)` and call
|
||||
//! `create`/`head`/`get`/`update`/`delete`/`list` on it. The blob bytes
|
||||
//! never travel through Postgres or through trigger payloads — the row
|
||||
//! is metadata + a SHA-256 checksum; the bytes live on the filesystem.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::SdkCallCx;
|
||||
|
||||
/// POSIX-portable filename cap (255 bytes).
|
||||
pub const MAX_FILE_NAME_BYTES: usize = 255;
|
||||
/// RFC 6838 puts a reasonable media-type ceiling around 127 chars.
|
||||
pub const MAX_CONTENT_TYPE_BYTES: usize = 127;
|
||||
|
||||
/// Payload for `create` — a brand-new blob. The id is server-generated
|
||||
/// (a UUID); scripts never supply it.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NewFile {
|
||||
pub name: String,
|
||||
pub content_type: String,
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
/// Payload for `update` — replacement bytes plus optional metadata. If
|
||||
/// `name` / `content_type` are `None` the prior values are kept.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FileUpdate {
|
||||
pub data: Vec<u8>,
|
||||
pub name: Option<String>,
|
||||
pub content_type: Option<String>,
|
||||
}
|
||||
|
||||
/// File metadata as scripts and triggers see it. Serialized into
|
||||
/// `ServiceEvent.payload` (the blob bytes are NOT included — files are
|
||||
/// too big to ship through trigger payloads), and surfaced to Rhai by
|
||||
/// `head` / `list`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct FileMeta {
|
||||
pub id: Uuid,
|
||||
pub collection: String,
|
||||
pub name: String,
|
||||
pub content_type: String,
|
||||
pub size: u64,
|
||||
/// Lowercase hex SHA-256 of the content.
|
||||
pub checksum: String,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// One page of file metadata from `FilesService::list`. `next_cursor`
|
||||
/// is `Some` when more pages exist, `None` when exhausted.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FilesListPage {
|
||||
pub files: Vec<FileMeta>,
|
||||
pub next_cursor: Option<String>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait FilesService: Send + Sync {
|
||||
/// Create a new blob; returns its server-generated id. Throws on a
|
||||
/// missing required field, an over-limit blob, or an invalid
|
||||
/// collection name.
|
||||
async fn create(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
new: NewFile,
|
||||
) -> Result<Uuid, FilesError>;
|
||||
|
||||
/// Metadata only — no body read. `None` if the file is missing.
|
||||
async fn head(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
id: &str,
|
||||
) -> Result<Option<FileMeta>, FilesError>;
|
||||
|
||||
/// Full content. `None` if missing. Verifies the stored checksum
|
||||
/// against the bytes on disk and returns `FilesError::Corrupted`
|
||||
/// when they diverge.
|
||||
async fn get(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
id: &str,
|
||||
) -> Result<Option<Vec<u8>>, FilesError>;
|
||||
|
||||
/// Replace content (and optionally metadata). Throws `NotFound`
|
||||
/// when the file doesn't exist.
|
||||
async fn update(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
id: &str,
|
||||
upd: FileUpdate,
|
||||
) -> Result<(), FilesError>;
|
||||
|
||||
/// Delete by id; returns whether the file was present.
|
||||
async fn delete(&self, cx: &SdkCallCx, collection: &str, id: &str) -> Result<bool, FilesError>;
|
||||
|
||||
/// Cursor-paginated metadata listing (same shape as KV's list).
|
||||
async fn list(
|
||||
&self,
|
||||
cx: &SdkCallCx,
|
||||
collection: &str,
|
||||
cursor: Option<&str>,
|
||||
limit: u32,
|
||||
) -> Result<FilesListPage, FilesError>;
|
||||
}
|
||||
|
||||
/// Failure modes surfaced to the Rhai bridge. The bridge converts each
|
||||
/// to a Rhai runtime error string; the discriminants exist so internal
|
||||
/// callers (admin endpoints, tests) can react more precisely.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum FilesError {
|
||||
/// Empty collection name, or one containing a path separator / `..`
|
||||
/// / NUL — rejected at the SDK boundary per `docs/sdk-shape.md`.
|
||||
#[error("invalid collection name: {0}")]
|
||||
InvalidCollection(String),
|
||||
|
||||
/// A required field on `create` was missing or empty. The string
|
||||
/// names the field (`name` / `content_type` / `data`).
|
||||
#[error("missing required field: {0}")]
|
||||
MissingField(&'static str),
|
||||
|
||||
/// Blob exceeds the per-file size cap (default 100 MB,
|
||||
/// `PICLOUD_FILES_MAX_FILE_SIZE_BYTES`).
|
||||
#[error("file too large: {size} bytes exceeds limit of {limit} bytes")]
|
||||
TooLarge { size: usize, limit: usize },
|
||||
|
||||
/// Filename exceeds `MAX_FILE_NAME_BYTES`.
|
||||
#[error("file name too long: {0} bytes exceeds 255")]
|
||||
NameTooLong(usize),
|
||||
|
||||
/// Content-type exceeds `MAX_CONTENT_TYPE_BYTES`.
|
||||
#[error("content_type too long: {0} bytes exceeds 127")]
|
||||
ContentTypeTooLong(usize),
|
||||
|
||||
/// `update` on a non-existent file.
|
||||
#[error("file not found")]
|
||||
NotFound,
|
||||
|
||||
/// The bytes on disk no longer match the stored checksum — the
|
||||
/// filesystem corrupted or a backup was misconfigured. The operator
|
||||
/// decides what to do with the metadata-vs-bytes mismatch; the repo
|
||||
/// does NOT auto-delete.
|
||||
#[error("file content corrupted (checksum mismatch)")]
|
||||
Corrupted,
|
||||
|
||||
/// Caller principal lacked the required capability. Only raised when
|
||||
/// `cx.principal.is_some()` — scripts running with `principal: None`
|
||||
/// (public HTTP) operate under script-as-gate semantics and skip
|
||||
/// the capability check.
|
||||
#[error("forbidden")]
|
||||
Forbidden,
|
||||
|
||||
/// Anything else — Postgres unavailable, filesystem I/O error, etc.
|
||||
#[error("files backend error: {0}")]
|
||||
Backend(String),
|
||||
}
|
||||
|
||||
impl NewFile {
|
||||
/// Validate required fields + length caps at the SDK boundary.
|
||||
/// `data` must be non-empty (v1.1.5 treats an empty blob as a
|
||||
/// missing `data` field — see HANDBACK §7).
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns the field-specific [`FilesError`] for the first failing
|
||||
/// check.
|
||||
pub fn validate(&self, max_size: usize) -> Result<(), FilesError> {
|
||||
if self.name.trim().is_empty() {
|
||||
return Err(FilesError::MissingField("name"));
|
||||
}
|
||||
if self.content_type.trim().is_empty() {
|
||||
return Err(FilesError::MissingField("content_type"));
|
||||
}
|
||||
if self.data.is_empty() {
|
||||
return Err(FilesError::MissingField("data"));
|
||||
}
|
||||
if self.name.len() > MAX_FILE_NAME_BYTES {
|
||||
return Err(FilesError::NameTooLong(self.name.len()));
|
||||
}
|
||||
if self.content_type.len() > MAX_CONTENT_TYPE_BYTES {
|
||||
return Err(FilesError::ContentTypeTooLong(self.content_type.len()));
|
||||
}
|
||||
if self.data.len() > max_size {
|
||||
return Err(FilesError::TooLarge {
|
||||
size: self.data.len(),
|
||||
limit: max_size,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl FileUpdate {
|
||||
/// Validate the replacement bytes + any supplied metadata.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns the field-specific [`FilesError`] for the first failing
|
||||
/// check.
|
||||
pub fn validate(&self, max_size: usize) -> Result<(), FilesError> {
|
||||
if self.data.is_empty() {
|
||||
return Err(FilesError::MissingField("data"));
|
||||
}
|
||||
if let Some(name) = &self.name {
|
||||
if name.trim().is_empty() {
|
||||
return Err(FilesError::MissingField("name"));
|
||||
}
|
||||
if name.len() > MAX_FILE_NAME_BYTES {
|
||||
return Err(FilesError::NameTooLong(name.len()));
|
||||
}
|
||||
}
|
||||
if let Some(ct) = &self.content_type {
|
||||
if ct.trim().is_empty() {
|
||||
return Err(FilesError::MissingField("content_type"));
|
||||
}
|
||||
if ct.len() > MAX_CONTENT_TYPE_BYTES {
|
||||
return Err(FilesError::ContentTypeTooLong(ct.len()));
|
||||
}
|
||||
}
|
||||
if self.data.len() > max_size {
|
||||
return Err(FilesError::TooLarge {
|
||||
size: self.data.len(),
|
||||
limit: max_size,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Reject a collection name that is empty or could escape the per-app
|
||||
/// files tree. UUID-shaped ids never produce traversal paths, but
|
||||
/// collection names come from scripts so they're validated defensively
|
||||
/// at both the SDK boundary and the repo.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`FilesError::InvalidCollection`] when the name is empty or
|
||||
/// contains `/`, `\`, `..`, or a NUL byte.
|
||||
pub fn validate_collection(collection: &str) -> Result<(), FilesError> {
|
||||
if collection.is_empty() {
|
||||
return Err(FilesError::InvalidCollection("must not be empty".into()));
|
||||
}
|
||||
if collection.contains('/')
|
||||
|| collection.contains('\\')
|
||||
|| collection.contains("..")
|
||||
|| collection.contains('\0')
|
||||
{
|
||||
return Err(FilesError::InvalidCollection(format!(
|
||||
"collection {collection:?} must not contain '/', '\\', '..', or NUL"
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stub used by the test harness so executor-core integration tests
|
||||
/// (which don't touch files) can construct a `Services` bundle without
|
||||
/// a filesystem or Postgres. Every call returns
|
||||
/// `FilesError::Backend("...")` so accidental use surfaces clearly.
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct NoopFilesService;
|
||||
|
||||
#[async_trait]
|
||||
impl FilesService for NoopFilesService {
|
||||
async fn create(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_new: NewFile,
|
||||
) -> Result<Uuid, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn head(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_id: &str,
|
||||
) -> Result<Option<FileMeta>, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn get(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_id: &str,
|
||||
) -> Result<Option<Vec<u8>>, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn update(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_id: &str,
|
||||
_upd: FileUpdate,
|
||||
) -> Result<(), FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn delete(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_id: &str,
|
||||
) -> Result<bool, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
|
||||
async fn list(
|
||||
&self,
|
||||
_cx: &SdkCallCx,
|
||||
_collection: &str,
|
||||
_cursor: Option<&str>,
|
||||
_limit: u32,
|
||||
) -> Result<FilesListPage, FilesError> {
|
||||
Err(FilesError::Backend("files is not wired in".into()))
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@ pub mod error;
|
||||
pub mod events;
|
||||
pub mod exec_summary;
|
||||
pub mod execution_log;
|
||||
pub mod files;
|
||||
pub mod http;
|
||||
pub mod ids;
|
||||
pub mod inbox;
|
||||
@@ -36,6 +37,10 @@ pub use error::Error;
|
||||
pub use events::{EmitError, NoopEventEmitter, ServiceEvent, ServiceEventEmitter};
|
||||
pub use exec_summary::ExecResponseSummary;
|
||||
pub use execution_log::{ExecutionLog, ExecutionStatus};
|
||||
pub use files::{
|
||||
validate_collection as validate_files_collection, FileMeta, FileUpdate, FilesError,
|
||||
FilesListPage, FilesService, NewFile, NoopFilesService,
|
||||
};
|
||||
pub use http::{HttpError, HttpRequest, HttpResponse, HttpService, NoopHttpService};
|
||||
pub use ids::{AdminUserId, ApiKeyId, AppId, ExecutionId, RequestId, ScriptId, TriggerId};
|
||||
pub use inbox::{
|
||||
@@ -50,6 +55,8 @@ pub use sandbox::ScriptSandbox;
|
||||
pub use script::{Script, ScriptKind};
|
||||
pub use sdk_cx::SdkCallCx;
|
||||
pub use services::Services;
|
||||
pub use trigger_event::{DeadLetterEventDetail, DocsEventOp, KvEventOp, TriggerEvent};
|
||||
pub use trigger_event::{
|
||||
DeadLetterEventDetail, DocsEventOp, FilesEventOp, KvEventOp, TriggerEvent,
|
||||
};
|
||||
pub use validator::{ScriptValidator, ValidatedScript, ValidationError};
|
||||
pub use version::{API_VERSION, PRODUCT_VERSION, SDK_VERSION, WIRE_VERSION};
|
||||
|
||||
@@ -20,9 +20,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{
|
||||
DeadLetterService, DocsService, HttpService, KvService, ModuleSource, NoopDeadLetterService,
|
||||
NoopDocsService, NoopEventEmitter, NoopHttpService, NoopKvService, NoopModuleSource,
|
||||
ServiceEventEmitter,
|
||||
DeadLetterService, DocsService, FilesService, HttpService, KvService, ModuleSource,
|
||||
NoopDeadLetterService, NoopDocsService, NoopEventEmitter, NoopFilesService, NoopHttpService,
|
||||
NoopKvService, NoopModuleSource, ServiceEventEmitter,
|
||||
};
|
||||
|
||||
/// SDK service bundle. See module docs for the lifecycle and the v1.1.x
|
||||
@@ -60,6 +60,13 @@ pub struct Services {
|
||||
/// the picloud binary; `NoopHttpService` in tests that don't make
|
||||
/// network calls.
|
||||
pub http: Arc<dyn HttpService>,
|
||||
|
||||
/// Filesystem-backed blob storage (v1.1.5). Scripts get
|
||||
/// `files::collection(name).{create,head,get,update,delete,list}`.
|
||||
/// Backed by a Postgres-metadata + on-disk-bytes repo in the
|
||||
/// picloud binary; `NoopFilesService` in tests that don't touch
|
||||
/// files.
|
||||
pub files: Arc<dyn FilesService>,
|
||||
}
|
||||
|
||||
impl Services {
|
||||
@@ -74,6 +81,7 @@ impl Services {
|
||||
events: Arc<dyn ServiceEventEmitter>,
|
||||
modules: Arc<dyn ModuleSource>,
|
||||
http: Arc<dyn HttpService>,
|
||||
files: Arc<dyn FilesService>,
|
||||
) -> Self {
|
||||
Self {
|
||||
kv,
|
||||
@@ -82,6 +90,7 @@ impl Services {
|
||||
events,
|
||||
modules,
|
||||
http,
|
||||
files,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,6 +108,7 @@ impl Services {
|
||||
Arc::new(NoopEventEmitter),
|
||||
Arc::new(NoopModuleSource),
|
||||
Arc::new(NoopHttpService),
|
||||
Arc::new(NoopFilesService),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,6 +78,39 @@ impl DocsEventOp {
|
||||
}
|
||||
}
|
||||
|
||||
/// Operations a files trigger can fire on. v1.1.5. Stored as a
|
||||
/// lowercase string in `files_trigger_details.ops` (Postgres `text[]`).
|
||||
/// CRUD verbs (`create`) mirror `DocsEventOp`, distinct from KV's
|
||||
/// set/upsert flavour.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum FilesEventOp {
|
||||
Create,
|
||||
Update,
|
||||
Delete,
|
||||
}
|
||||
|
||||
impl FilesEventOp {
|
||||
#[must_use]
|
||||
pub const fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Create => "create",
|
||||
Self::Update => "update",
|
||||
Self::Delete => "delete",
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn from_wire(s: &str) -> Option<Self> {
|
||||
match s {
|
||||
"create" => Some(Self::Create),
|
||||
"update" => Some(Self::Update),
|
||||
"delete" => Some(Self::Delete),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Discriminated description of a triggering event. Lifted from the
|
||||
/// outbox row's payload at dispatch time. Each variant carries the
|
||||
/// fields the corresponding `ctx.event` shape exposes to the script.
|
||||
@@ -123,6 +156,27 @@ pub enum TriggerEvent {
|
||||
fired_at: DateTime<Utc>,
|
||||
},
|
||||
|
||||
/// A files create / update / delete fired this handler. v1.1.5.
|
||||
/// Carries the affected file's **metadata only** — never the blob
|
||||
/// bytes (files are too big to ship through trigger payloads). A
|
||||
/// handler that wants the bytes calls
|
||||
/// `files::collection(c).get(id)` itself. `prev` is the prior
|
||||
/// metadata for update (and the deleted-row metadata for delete);
|
||||
/// absent on create. Surfaced to scripts as `ctx.event.files`.
|
||||
Files {
|
||||
op: FilesEventOp,
|
||||
collection: String,
|
||||
/// UUID as string — Rhai sees it as a string.
|
||||
id: String,
|
||||
name: String,
|
||||
content_type: String,
|
||||
size: u64,
|
||||
/// Lowercase hex SHA-256.
|
||||
checksum: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
prev: Option<serde_json::Value>,
|
||||
},
|
||||
|
||||
/// A dead-letter row fired this handler. The original event is
|
||||
/// nested verbatim plus the dead-letter metadata the design notes
|
||||
/// §4 require.
|
||||
@@ -148,6 +202,7 @@ impl TriggerEvent {
|
||||
Self::Kv { .. } => "kv",
|
||||
Self::Docs { .. } => "docs",
|
||||
Self::Cron { .. } => "cron",
|
||||
Self::Files { .. } => "files",
|
||||
Self::DeadLetter { .. } => "dead_letter",
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user