feat: end-to-end script CRUD + Rhai execution

Brings the MVP feature set online: upload a Rhai script, get an HTTP endpoint that runs it sandboxed in-process, list/update/delete it, and have invalid sources rejected at upload time. Verified live through Caddy with a full lifecycle (`create → list → get → execute → update → delete`) plus error paths (syntax error, duplicate name, deleted). Layout — every concern lands behind the trait seam its layer owns, so cluster-mode in v1.3+ is a swap of two impls, not a rewrite: * shared::ScriptValidator — manager calls into validation without a hard dep on executor-core; executor-core impls the trait on `Engine`. Pinned in shared so neither crate has to know about the other. * executor-core::Engine — real Rhai engine: sandbox limits (max operations / string size / map size / call depth), disabled `print`, blocked `import` (DummyModuleResolver), `log::trace /info/warn/error` registered as a static module with shared log-capture buffer (no `log::debug` because `debug` is a Rhai reserved keyword — `log::trace` covers the same need). - `ctx` is pushed as a Scope constant exposing execution_id, script_id, script_name, request_id, invocation_type, request.{path,headers,body}. - Response convention: a Map with `statusCode` is the structured shape (`{statusCode, headers?, body}`); any other return value is a 200 with the value as the body. - Engine::execute is now synchronous (pure compute); the async wrapper + wall-clock timeout live in LocalExecutorClient, which spawns_blocking and applies a 300s hard ceiling regardless of per-script config. - 10 unit tests cover validate, exec, structured response, ctx exposure, log capture, op-budget enforcement, runtime errors, blocked imports, JSON round-tripping. * manager-core::repo — full sqlx CRUD over the `scripts` table, with proper unique-violation handling for duplicate names. Embedded migrations via `sqlx::migrate!` (one initial `0001_init.sql` for pgcrypto + scripts + execution_logs). * manager-core::api — `admin_router` mounts `/scripts` and `/scripts/{id}`. Create + Update validate source through the injected `ScriptValidator` before persistence. Returns proper 422/409/404 status codes via `ApiError::IntoResponse`. * orchestrator-core::api — `data_plane_router` mounts `/execute/{id}`: resolves the script through `ScriptResolver`, constructs the `ExecRequest` from headers+body, awaits `ExecutorClient::execute(..., timeout)`, translates the `ExecResponse` to an axum `Response` with header passthrough. Maps `ExecError` variants to 422/504/502/507. * picloud all-in-one — opens the pool, runs migrations, builds one engine, nests both routers under `/api/admin` and `/api`, enables structured JSON tracing and graceful shutdown on SIGTERM. Single `PostgresScriptRepository` Arc is shared by the admin router (writes) and the resolver (reads). Other changes: * Workspace axum bump 0.7 → 0.8 for the `{id}` path syntax matching the route definitions. * Workspace clippy: allow `needless_pass_by_value` and `boxed_local` to keep API ergonomics over pedantic noise. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 00:00:36 +02:00
parent 9efe678983
commit 4f044e7b81
19 changed files with 1272 additions and 76 deletions
--- a/crates/manager-core/Cargo.toml
+++ b/crates/manager-core/Cargo.toml
@@ -13,6 +13,7 @@ picloud-shared.workspace = true
 picloud-orchestrator-core.workspace = true

 async-trait.workspace = true
+axum.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 thiserror.workspace = true
--- a/crates/manager-core/migrations/0001_init.sql
+++ b/crates/manager-core/migrations/0001_init.sql
@@ -0,0 +1,43 @@
+-- pgcrypto provides gen_random_uuid(). hstore is not needed yet (v1.1+
+-- KV service); leave it for the migration that introduces that feature.
+CREATE EXTENSION IF NOT EXISTS pgcrypto;
+
+CREATE TABLE scripts (
+    id              UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    name            TEXT NOT NULL,
+    description     TEXT,
+    version         INTEGER NOT NULL DEFAULT 1,
+    source          TEXT NOT NULL,
+
+    timeout_seconds INTEGER NOT NULL DEFAULT 30  CHECK (timeout_seconds  > 0 AND timeout_seconds  <= 300),
+    memory_limit_mb INTEGER NOT NULL DEFAULT 256 CHECK (memory_limit_mb  > 0 AND memory_limit_mb  <= 2048),
+
+    created_at      TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at      TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+-- Names are user-facing; unique so the dashboard list and any future
+-- name-based routing have an obvious identifier to surface.
+CREATE UNIQUE INDEX scripts_name_uidx ON scripts (LOWER(name));
+
+CREATE TABLE execution_logs (
+    id               UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    script_id        UUID NOT NULL REFERENCES scripts(id) ON DELETE CASCADE,
+    request_id       UUID NOT NULL,
+
+    request_path     TEXT,
+    request_headers  JSONB NOT NULL DEFAULT '{}'::jsonb,
+    request_body     JSONB,
+
+    response_code    INTEGER,
+    response_body    JSONB,
+
+    logs             JSONB NOT NULL DEFAULT '[]'::jsonb,
+    duration_ms      INTEGER NOT NULL DEFAULT 0,
+    status           TEXT   NOT NULL CHECK (status IN ('success','error','timeout','budget_exceeded')),
+
+    created_at       TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX execution_logs_script_id_created_at_idx
+    ON execution_logs (script_id, created_at DESC);
--- a/crates/manager-core/src/api.rs
+++ b/crates/manager-core/src/api.rs
@@ -0,0 +1,197 @@
+//! Control-plane HTTP surface. Mounted by the `picloud` all-in-one
+//! binary under `/api/admin` and by the future split `picloud-manager`
+//! binary at its own root.
+
+use std::sync::Arc;
+
+use axum::{
+    extract::{Path, State},
+    http::StatusCode,
+    response::{IntoResponse, Response},
+    routing::get,
+    Json, Router,
+};
+use picloud_shared::{Script, ScriptId, ScriptValidator, ValidationError};
+use serde::Deserialize;
+
+use crate::repo::{NewScript, ScriptPatch, ScriptRepository, ScriptRepositoryError};
+
+/// State shared by control-plane handlers. Separates concerns so the
+/// manager can validate at upload time without depending on the
+/// concrete executor-core types.
+pub struct AdminState<R> {
+    pub repo: Arc<R>,
+    pub validator: Arc<dyn ScriptValidator>,
+}
+
+impl<R> Clone for AdminState<R> {
+    fn clone(&self) -> Self {
+        Self {
+            repo: self.repo.clone(),
+            validator: self.validator.clone(),
+        }
+    }
+}
+
+/// Build the admin router. The caller (binary) chooses where to mount
+/// it (typically `Router::new().nest("/api/admin", admin_router(state))`).
+pub fn admin_router<R: ScriptRepository + 'static>(state: AdminState<R>) -> Router {
+    Router::new()
+        .route("/scripts", get(list_scripts::<R>).post(create_script::<R>))
+        .route(
+            "/scripts/{id}",
+            get(get_script::<R>)
+                .put(update_script::<R>)
+                .delete(delete_script::<R>),
+        )
+        .with_state(state)
+}
+
+// ----------------------------------------------------------------------------
+// DTOs
+// ----------------------------------------------------------------------------
+
+#[derive(Debug, Deserialize)]
+pub struct CreateScriptRequest {
+    pub name: String,
+    pub description: Option<String>,
+    pub source: String,
+    pub timeout_seconds: Option<i32>,
+    pub memory_limit_mb: Option<i32>,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct UpdateScriptRequest {
+    pub name: Option<String>,
+    // Double Option lets clients explicitly clear the description by
+    // sending `"description": null`; an absent field leaves it alone.
+    #[serde(default, deserialize_with = "deserialize_optional_optional")]
+    #[allow(clippy::option_option)]
+    pub description: Option<Option<String>>,
+    pub source: Option<String>,
+    pub timeout_seconds: Option<i32>,
+    pub memory_limit_mb: Option<i32>,
+}
+
+#[allow(clippy::option_option)]
+fn deserialize_optional_optional<'de, D>(d: D) -> Result<Option<Option<String>>, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    Option::<String>::deserialize(d).map(Some)
+}
+
+// ----------------------------------------------------------------------------
+// Handlers
+// ----------------------------------------------------------------------------
+
+async fn list_scripts<R: ScriptRepository>(
+    State(state): State<AdminState<R>>,
+) -> Result<Json<Vec<Script>>, ApiError> {
+    Ok(Json(state.repo.list().await?))
+}
+
+async fn get_script<R: ScriptRepository>(
+    State(state): State<AdminState<R>>,
+    Path(id): Path<ScriptId>,
+) -> Result<Json<Script>, ApiError> {
+    state
+        .repo
+        .get(id)
+        .await?
+        .map(Json)
+        .ok_or(ApiError::NotFound(id))
+}
+
+async fn create_script<R: ScriptRepository>(
+    State(state): State<AdminState<R>>,
+    Json(input): Json<CreateScriptRequest>,
+) -> Result<(StatusCode, Json<Script>), ApiError> {
+    state.validator.validate(&input.source)?;
+    let created = state
+        .repo
+        .create(NewScript {
+            name: input.name,
+            description: input.description,
+            source: input.source,
+            timeout_seconds: input.timeout_seconds,
+            memory_limit_mb: input.memory_limit_mb,
+        })
+        .await?;
+    Ok((StatusCode::CREATED, Json(created)))
+}
+
+async fn update_script<R: ScriptRepository>(
+    State(state): State<AdminState<R>>,
+    Path(id): Path<ScriptId>,
+    Json(input): Json<UpdateScriptRequest>,
+) -> Result<Json<Script>, ApiError> {
+    if let Some(src) = input.source.as_deref() {
+        state.validator.validate(src)?;
+    }
+    let updated = state
+        .repo
+        .update(
+            id,
+            ScriptPatch {
+                name: input.name,
+                description: input.description,
+                source: input.source,
+                timeout_seconds: input.timeout_seconds,
+                memory_limit_mb: input.memory_limit_mb,
+            },
+        )
+        .await?;
+    Ok(Json(updated))
+}
+
+async fn delete_script<R: ScriptRepository>(
+    State(state): State<AdminState<R>>,
+    Path(id): Path<ScriptId>,
+) -> Result<StatusCode, ApiError> {
+    state.repo.delete(id).await?;
+    Ok(StatusCode::NO_CONTENT)
+}
+
+// ----------------------------------------------------------------------------
+// Errors
+// ----------------------------------------------------------------------------
+
+#[derive(Debug, thiserror::Error)]
+pub enum ApiError {
+    #[error("script not found: {0}")]
+    NotFound(ScriptId),
+
+    #[error("conflict: {0}")]
+    Conflict(String),
+
+    #[error("invalid script: {0}")]
+    Invalid(#[from] ValidationError),
+
+    #[error("repository error: {0}")]
+    Repo(#[from] ScriptRepositoryError),
+}
+
+impl IntoResponse for ApiError {
+    fn into_response(self) -> Response {
+        let (status, message) = match &self {
+            Self::NotFound(_) => (StatusCode::NOT_FOUND, self.to_string()),
+            Self::Conflict(_) => (StatusCode::CONFLICT, self.to_string()),
+            Self::Invalid(_) => (StatusCode::UNPROCESSABLE_ENTITY, self.to_string()),
+            Self::Repo(ScriptRepositoryError::NotFound(_)) => {
+                (StatusCode::NOT_FOUND, self.to_string())
+            }
+            Self::Repo(ScriptRepositoryError::Conflict(_)) => {
+                (StatusCode::CONFLICT, self.to_string())
+            }
+            Self::Repo(ScriptRepositoryError::Db(e)) => {
+                tracing::error!(error = %e, "manager db error");
+                (
+                    StatusCode::INTERNAL_SERVER_ERROR,
+                    "internal error".to_string(),
+                )
+            }
+        };
+        (status, Json(serde_json::json!({ "error": message }))).into_response()
+    }
+}
--- a/crates/manager-core/src/lib.rs
+++ b/crates/manager-core/src/lib.rs
@@ -4,7 +4,13 @@
 //! the same DB for now; once we add caching and per-node ingress, the
 //! manager will publish change events.

+pub mod api;
+pub mod migrations;
 pub mod repo;
 pub mod scheduler;

-pub use repo::{PostgresScriptRepository, ScriptRepository, ScriptRepositoryError};
+pub use api::{admin_router, AdminState};
+pub use repo::{
+    NewScript, PostgresScriptRepository, RepoResolver, ScriptPatch, ScriptRepository,
+    ScriptRepositoryError,
+};
--- a/crates/manager-core/src/migrations.rs
+++ b/crates/manager-core/src/migrations.rs
@@ -0,0 +1,9 @@
+//! Embedded SQL migrations. Runs against the manager's `PgPool` at
+//! startup. New migrations live in `crates/manager-core/migrations/`
+//! and follow the `NNNN_description.sql` convention.
+
+use sqlx::PgPool;
+
+pub async fn run(pool: &PgPool) -> Result<(), sqlx::migrate::MigrateError> {
+    sqlx::migrate!("./migrations").run(pool).await
+}
--- a/crates/manager-core/src/repo.rs
+++ b/crates/manager-core/src/repo.rs
@@ -10,6 +10,9 @@ pub enum ScriptRepositoryError {

    #[error("not found: {0}")]
    NotFound(ScriptId),
+
+    #[error("conflict: {0}")]
+    Conflict(String),
 }

 /// CRUD over the `scripts` table.
@@ -17,11 +20,36 @@ pub enum ScriptRepositoryError {
 pub trait ScriptRepository: Send + Sync {
    async fn get(&self, id: ScriptId) -> Result<Option<Script>, ScriptRepositoryError>;
    async fn list(&self) -> Result<Vec<Script>, ScriptRepositoryError>;
-    async fn create(&self, script: &Script) -> Result<(), ScriptRepositoryError>;
-    async fn update(&self, script: &Script) -> Result<(), ScriptRepositoryError>;
+    async fn create(&self, input: NewScript) -> Result<Script, ScriptRepositoryError>;
+    async fn update(
+        &self,
+        id: ScriptId,
+        patch: ScriptPatch,
+    ) -> Result<Script, ScriptRepositoryError>;
    async fn delete(&self, id: ScriptId) -> Result<(), ScriptRepositoryError>;
 }

+/// Inbound shape for create. Defaults match the migration's CHECK
+/// constraints; the repo enforces them in the DB regardless.
+#[derive(Debug, Clone)]
+pub struct NewScript {
+    pub name: String,
+    pub description: Option<String>,
+    pub source: String,
+    pub timeout_seconds: Option<i32>,
+    pub memory_limit_mb: Option<i32>,
+}
+
+/// Inbound shape for update. `None` fields are left untouched.
+#[derive(Debug, Clone, Default)]
+pub struct ScriptPatch {
+    pub name: Option<String>,
+    pub description: Option<Option<String>>,
+    pub source: Option<String>,
+    pub timeout_seconds: Option<i32>,
+    pub memory_limit_mb: Option<i32>,
+}
+
 pub struct PostgresScriptRepository {
    pool: PgPool,
 }
@@ -31,37 +59,145 @@ impl PostgresScriptRepository {
    pub fn new(pool: PgPool) -> Self {
        Self { pool }
    }
+
+    #[must_use]
+    pub fn pool(&self) -> &PgPool {
+        &self.pool
+    }
 }

-// Real query bodies land alongside the first migration. Stubbing the trait
-// impl so the workspace compiles and the seam is visible.
 #[async_trait]
 impl ScriptRepository for PostgresScriptRepository {
-    async fn get(&self, _id: ScriptId) -> Result<Option<Script>, ScriptRepositoryError> {
-        let _ = &self.pool;
-        Ok(None)
+    async fn get(&self, id: ScriptId) -> Result<Option<Script>, ScriptRepositoryError> {
+        let row = sqlx::query_as::<_, ScriptRow>(
+            "SELECT id, name, description, version, source, \
+                    timeout_seconds, memory_limit_mb, created_at, updated_at \
+             FROM scripts WHERE id = $1",
+        )
+        .bind(id.into_inner())
+        .fetch_optional(&self.pool)
+        .await?;
+        Ok(row.map(Into::into))
    }

    async fn list(&self) -> Result<Vec<Script>, ScriptRepositoryError> {
-        Ok(Vec::new())
+        let rows = sqlx::query_as::<_, ScriptRow>(
+            "SELECT id, name, description, version, source, \
+                    timeout_seconds, memory_limit_mb, created_at, updated_at \
+             FROM scripts ORDER BY name",
+        )
+        .fetch_all(&self.pool)
+        .await?;
+        Ok(rows.into_iter().map(Into::into).collect())
    }

-    async fn create(&self, _script: &Script) -> Result<(), ScriptRepositoryError> {
-        Ok(())
+    async fn create(&self, input: NewScript) -> Result<Script, ScriptRepositoryError> {
+        let res = sqlx::query_as::<_, ScriptRow>(
+            "INSERT INTO scripts (name, description, source, timeout_seconds, memory_limit_mb) \
+             VALUES ($1, $2, $3, COALESCE($4, 30), COALESCE($5, 256)) \
+             RETURNING id, name, description, version, source, \
+                       timeout_seconds, memory_limit_mb, created_at, updated_at",
+        )
+        .bind(&input.name)
+        .bind(input.description.as_deref())
+        .bind(&input.source)
+        .bind(input.timeout_seconds)
+        .bind(input.memory_limit_mb)
+        .fetch_one(&self.pool)
+        .await;
+
+        match res {
+            Ok(row) => Ok(row.into()),
+            Err(sqlx::Error::Database(e)) if e.is_unique_violation() => {
+                Err(ScriptRepositoryError::Conflict(format!(
+                    "a script named {:?} already exists",
+                    input.name
+                )))
+            }
+            Err(e) => Err(e.into()),
+        }
    }

-    async fn update(&self, _script: &Script) -> Result<(), ScriptRepositoryError> {
-        Ok(())
+    async fn update(
+        &self,
+        id: ScriptId,
+        patch: ScriptPatch,
+    ) -> Result<Script, ScriptRepositoryError> {
+        // COALESCE-based partial update: `NULL` parameters leave columns
+        // untouched. Description is double-Optioned so callers can
+        // explicitly set it to NULL (Some(None)) vs leave it alone (None).
+        let row = sqlx::query_as::<_, ScriptRow>(
+            "UPDATE scripts SET \
+                name = COALESCE($2, name), \
+                description = CASE WHEN $3::bool THEN $4 ELSE description END, \
+                source = COALESCE($5, source), \
+                timeout_seconds = COALESCE($6, timeout_seconds), \
+                memory_limit_mb = COALESCE($7, memory_limit_mb), \
+                version = version + 1, \
+                updated_at = NOW() \
+             WHERE id = $1 \
+             RETURNING id, name, description, version, source, \
+                       timeout_seconds, memory_limit_mb, created_at, updated_at",
+        )
+        .bind(id.into_inner())
+        .bind(patch.name.as_deref())
+        .bind(patch.description.is_some())
+        .bind(patch.description.as_ref().and_then(|d| d.as_deref()))
+        .bind(patch.source.as_deref())
+        .bind(patch.timeout_seconds)
+        .bind(patch.memory_limit_mb)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        row.map(Into::into)
+            .ok_or(ScriptRepositoryError::NotFound(id))
    }

-    async fn delete(&self, _id: ScriptId) -> Result<(), ScriptRepositoryError> {
+    async fn delete(&self, id: ScriptId) -> Result<(), ScriptRepositoryError> {
+        let res = sqlx::query("DELETE FROM scripts WHERE id = $1")
+            .bind(id.into_inner())
+            .execute(&self.pool)
+            .await?;
+        if res.rows_affected() == 0 {
+            return Err(ScriptRepositoryError::NotFound(id));
+        }
        Ok(())
    }
 }

+/// Row shape mirroring the `scripts` table for sqlx FromRow.
+#[derive(sqlx::FromRow)]
+struct ScriptRow {
+    id: uuid::Uuid,
+    name: String,
+    description: Option<String>,
+    version: i32,
+    source: String,
+    timeout_seconds: i32,
+    memory_limit_mb: i32,
+    created_at: chrono::DateTime<chrono::Utc>,
+    updated_at: chrono::DateTime<chrono::Utc>,
+}
+
+impl From<ScriptRow> for Script {
+    fn from(r: ScriptRow) -> Self {
+        Self {
+            id: r.id.into(),
+            name: r.name,
+            description: r.description,
+            version: r.version,
+            source: r.source,
+            timeout_seconds: u32::try_from(r.timeout_seconds).unwrap_or(30),
+            memory_limit_mb: u32::try_from(r.memory_limit_mb).unwrap_or(256),
+            created_at: r.created_at,
+            updated_at: r.updated_at,
+        }
+    }
+}
+
 /// Adapts a `ScriptRepository` into the `ScriptResolver` trait the
-/// orchestrator depends on, so we don't pull the manager into the
-/// orchestrator's dependency graph.
+/// orchestrator depends on. Keeps orchestrator-core unaware of how
+/// scripts are stored.
 pub struct RepoResolver<R: ScriptRepository> {
    repo: R,
 }