From 878cbe94399a918c6d82d4044e34a66bbbbf19f6 Mon Sep 17 00:00:00 2001 From: MechaCat02 Date: Sat, 23 May 2026 22:21:25 +0200 Subject: [PATCH] test(manager-core): schema snapshot guardrail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Boots a fresh Postgres via sqlx::test, applies every migration in order, dumps the resulting public schema (tables, columns with type + nullability + default, indexes, constraints, applied migration manifest), and compares against a checked-in golden text file. What this catches: * Someone edits a committed migration — schema diverges from the snapshot, test fails with a precise diff. * Someone adds a migration but forgets to update the snapshot — same divergence; test reminds them. * Two migrations drift apart in any other way — snapshot is the source of truth about the post-replay schema. Update workflow when adding a migration intentionally: BLESS=1 DATABASE_URL=postgres://... \ cargo test -p picloud-manager-core --test schema_snapshot \ -- --include-ignored Review the snapshot diff in the same PR. The header comment makes it clear the file is not for hand-editing. * Snapshot dump uses information_schema.columns + pg_indexes + pg_constraint with pg_get_constraintdef. Output is sorted on every dimension so cosmetic differences (insertion order, etc.) never cause spurious diffs. * #[ignore]'d by default for the same reason as the integration tests — needs DATABASE_URL pointing at a writable Postgres. * Initial expected_schema.txt blessed from the current migrations/ contents (3 tables, 9 indexes, 12 constraints). Wires up enforcement item (4) from docs/versioning.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/manager-core/tests/expected_schema.txt | 80 ++++++ crates/manager-core/tests/schema_snapshot.rs | 248 ++++++++++++++++++ 2 files changed, 328 insertions(+) create mode 100644 crates/manager-core/tests/expected_schema.txt create mode 100644 crates/manager-core/tests/schema_snapshot.rs diff --git a/crates/manager-core/tests/expected_schema.txt b/crates/manager-core/tests/expected_schema.txt new file mode 100644 index 0000000..3f316c1 --- /dev/null +++ b/crates/manager-core/tests/expected_schema.txt @@ -0,0 +1,80 @@ +# Schema snapshot — generated by schema_snapshot test. +# Do not edit by hand. Run with BLESS=1 to regenerate. + +## tables + +table: execution_logs + id: uuid NOT NULL default=gen_random_uuid() + script_id: uuid NOT NULL + request_id: uuid NOT NULL + request_path: text NULL + request_headers: jsonb NOT NULL default='{}'::jsonb + request_body: jsonb NULL + response_code: integer NULL + response_body: jsonb NULL + logs: jsonb NOT NULL default='[]'::jsonb + duration_ms: integer NOT NULL default=0 + status: text NOT NULL + created_at: timestamp with time zone NOT NULL default=now() + +table: routes + id: uuid NOT NULL default=gen_random_uuid() + script_id: uuid NOT NULL + host_kind: text NOT NULL + host: text NOT NULL default=''::text + host_param_name: text NULL + path_kind: text NOT NULL + path: text NOT NULL + method: text NULL + created_at: timestamp with time zone NOT NULL default=now() + +table: scripts + id: uuid NOT NULL default=gen_random_uuid() + name: text NOT NULL + description: text NULL + version: integer NOT NULL default=1 + source: text NOT NULL + timeout_seconds: integer NOT NULL default=30 + memory_limit_mb: integer NOT NULL default=256 + created_at: timestamp with time zone NOT NULL default=now() + updated_at: timestamp with time zone NOT NULL default=now() + sandbox: jsonb NOT NULL default='{}'::jsonb + +## indexes + +indexes on execution_logs: + execution_logs_pkey: public.execution_logs USING btree (id) + execution_logs_script_id_created_at_idx: public.execution_logs USING btree (script_id, created_at DESC) + +indexes on routes: + routes_lookup_idx: public.routes USING btree (host_kind, host) + routes_pkey: public.routes USING btree (id) + routes_script_id_idx: public.routes USING btree (script_id) + routes_unique_binding_idx: public.routes USING btree (host_kind, host, path_kind, path, COALESCE(method, ''::text)) + +indexes on scripts: + scripts_name_uidx: public.scripts USING btree (lower(name)) + scripts_pkey: public.scripts USING btree (id) + +## constraints + +constraints on execution_logs: + [CHECK] execution_logs_status_check: CHECK ((status = ANY (ARRAY['success'::text, 'error'::text, 'timeout'::text, 'budget_exceeded'::text]))) + [FOREIGN KEY] execution_logs_script_id_fkey: FOREIGN KEY (script_id) REFERENCES scripts(id) ON DELETE CASCADE + [PRIMARY KEY] execution_logs_pkey: PRIMARY KEY (id) + +constraints on routes: + [CHECK] routes_host_kind_check: CHECK ((host_kind = ANY (ARRAY['any'::text, 'strict'::text, 'wildcard'::text]))) + [CHECK] routes_path_kind_check: CHECK ((path_kind = ANY (ARRAY['exact'::text, 'prefix'::text, 'param'::text]))) + [FOREIGN KEY] routes_script_id_fkey: FOREIGN KEY (script_id) REFERENCES scripts(id) ON DELETE CASCADE + [PRIMARY KEY] routes_pkey: PRIMARY KEY (id) + +constraints on scripts: + [CHECK] scripts_memory_limit_mb_check: CHECK (((memory_limit_mb > 0) AND (memory_limit_mb <= 2048))) + [CHECK] scripts_timeout_seconds_check: CHECK (((timeout_seconds > 0) AND (timeout_seconds <= 300))) + [PRIMARY KEY] scripts_pkey: PRIMARY KEY (id) + +## applied migrations + 0001: init + 0002: sandbox + 0003: routes diff --git a/crates/manager-core/tests/schema_snapshot.rs b/crates/manager-core/tests/schema_snapshot.rs new file mode 100644 index 0000000..dacf5e5 --- /dev/null +++ b/crates/manager-core/tests/schema_snapshot.rs @@ -0,0 +1,248 @@ +//! Schema snapshot guardrail. +//! +//! Boots a fresh Postgres (via `#[sqlx::test]`), applies every +//! migration in `migrations/` in order, then queries +//! `information_schema` for a deterministic dump of every table, +//! column, index, and constraint the migrations produce. Compares +//! against the checked-in `expected_schema.txt` golden file. +//! +//! **What this catches:** +//! +//! * Someone edits a committed migration instead of adding a new +//! one — the resulting schema diverges from the golden. +//! * Someone adds a migration but forgets to update the golden — +//! same divergence; the test reminds them. +//! * Two migrations drift apart (the snapshot is the source of +//! truth about what the schema *should* be after replay). +//! +//! **How to update the golden after an intentional migration:** +//! +//! ```sh +//! BLESS=1 DATABASE_URL=postgres://... \ +//! cargo test -p picloud-manager-core --test schema_snapshot \ +//! -- --include-ignored +//! ``` +//! +//! Review the resulting diff in the same PR as the new migration. +//! +//! Like the orchestrator integration tests, this is `#[ignore]`'d by +//! default so plain `cargo test --workspace` stays green without +//! infrastructure. + +use std::fmt::Write as _; +use std::path::PathBuf; + +use sqlx::{PgPool, Row}; + +const SCHEMA: &str = "public"; + +const SNAPSHOT_PATH: &str = "tests/expected_schema.txt"; + +#[ignore = "needs DATABASE_URL pointing at a running Postgres"] +#[sqlx::test(migrations = "./migrations")] +async fn schema_after_replay_matches_snapshot(pool: PgPool) { + let actual = dump_schema(&pool).await; + + let snapshot_file = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(SNAPSHOT_PATH); + + if std::env::var("BLESS").is_ok() { + std::fs::write(&snapshot_file, &actual).expect("failed to write snapshot"); + eprintln!( + "BLESS=1 set: refreshed {} ({} bytes)", + snapshot_file.display(), + actual.len() + ); + return; + } + + let expected = std::fs::read_to_string(&snapshot_file).unwrap_or_else(|_| { + panic!( + "missing snapshot file at {}; run with BLESS=1 to create it", + snapshot_file.display() + ) + }); + + if actual != expected { + eprintln!( + "schema snapshot mismatch.\n\ + To re-bless after an intentional migration:\n\ + \n \ + BLESS=1 DATABASE_URL=postgres://... \\\n \ + cargo test -p picloud-manager-core --test schema_snapshot \\\n \ + -- --include-ignored\n\ + \n--- expected ---\n{expected}\n--- actual ---\n{actual}\n" + ); + panic!("schema diverged from {}", snapshot_file.display()); + } +} + +/// Build a deterministic representation of the `public` schema. +/// Sorted by every dimension so cosmetic differences (insertion +/// order, etc.) never cause spurious diffs. +async fn dump_schema(pool: &PgPool) -> String { + let mut out = String::new(); + writeln!( + &mut out, + "# Schema snapshot — generated by schema_snapshot test." + ) + .unwrap(); + writeln!( + &mut out, + "# Do not edit by hand. Run with BLESS=1 to regenerate." + ) + .unwrap(); + writeln!(&mut out).unwrap(); + + dump_tables(pool, &mut out).await; + dump_indexes(pool, &mut out).await; + dump_constraints(pool, &mut out).await; + dump_migrations(pool, &mut out).await; + + out +} + +async fn dump_tables(pool: &PgPool, out: &mut String) { + let rows = sqlx::query( + "SELECT \ + c.table_name, c.column_name, c.data_type, c.udt_name, \ + c.is_nullable, c.column_default \ + FROM information_schema.columns c \ + JOIN information_schema.tables t \ + ON t.table_schema = c.table_schema AND t.table_name = c.table_name \ + WHERE c.table_schema = $1 AND t.table_type = 'BASE TABLE' \ + AND c.table_name NOT LIKE '\\_sqlx%' \ + ORDER BY c.table_name, c.ordinal_position", + ) + .bind(SCHEMA) + .fetch_all(pool) + .await + .expect("failed to query information_schema.columns"); + + writeln!(out, "## tables").unwrap(); + let mut current_table = String::new(); + for row in rows { + let table: String = row.try_get("table_name").unwrap(); + let column: String = row.try_get("column_name").unwrap(); + let data_type: String = row.try_get("data_type").unwrap(); + let udt: String = row.try_get("udt_name").unwrap(); + let nullable: String = row.try_get("is_nullable").unwrap(); + let default: Option = row.try_get("column_default").unwrap(); + + if table != current_table { + writeln!(out, "\ntable: {table}").unwrap(); + current_table = table; + } + // Use udt_name when present (catches uuid, jsonb, etc. which + // information_schema reports as "USER-DEFINED"). + let kind = if data_type == "USER-DEFINED" { + udt + } else { + data_type + }; + let null_str = if nullable == "YES" { + "NULL" + } else { + "NOT NULL" + }; + let default_str = default.map(|d| format!(" default={d}")).unwrap_or_default(); + writeln!(out, " {column}: {kind} {null_str}{default_str}").unwrap(); + } + writeln!(out).unwrap(); +} + +async fn dump_indexes(pool: &PgPool, out: &mut String) { + // pg_indexes is the most portable source of truth for index DDL. + let rows = sqlx::query( + "SELECT tablename, indexname, indexdef \ + FROM pg_indexes \ + WHERE schemaname = $1 \ + AND tablename NOT LIKE '\\_sqlx%' \ + ORDER BY tablename, indexname", + ) + .bind(SCHEMA) + .fetch_all(pool) + .await + .expect("failed to query pg_indexes"); + + writeln!(out, "## indexes").unwrap(); + let mut current_table = String::new(); + for row in rows { + let table: String = row.try_get("tablename").unwrap(); + let name: String = row.try_get("indexname").unwrap(); + let def: String = row.try_get("indexdef").unwrap(); + if table != current_table { + writeln!(out, "\nindexes on {table}:").unwrap(); + current_table = table; + } + // The CREATE INDEX prefix is uniform across all rows; strip it + // to keep the snapshot focused on what's distinctive. + let summary = def + .split_once(" ON ") + .map_or(def.as_str(), |(_, after)| after); + writeln!(out, " {name}: {summary}").unwrap(); + } + writeln!(out).unwrap(); +} + +async fn dump_constraints(pool: &PgPool, out: &mut String) { + // CHECK / FK / UNIQUE / PRIMARY KEY definitions live across a few + // information_schema views. pg_constraint via pg_get_constraintdef + // gives the canonical text in one query. + let rows = sqlx::query( + "SELECT \ + cl.relname AS table_name, \ + con.conname AS constraint_name, \ + con.contype::text AS contype, \ + pg_get_constraintdef(con.oid) AS definition \ + FROM pg_constraint con \ + JOIN pg_class cl ON cl.oid = con.conrelid \ + JOIN pg_namespace ns ON ns.oid = cl.relnamespace \ + WHERE ns.nspname = $1 \ + AND cl.relname NOT LIKE '\\_sqlx%' \ + ORDER BY cl.relname, con.contype, con.conname", + ) + .bind(SCHEMA) + .fetch_all(pool) + .await + .expect("failed to query pg_constraint"); + + writeln!(out, "## constraints").unwrap(); + let mut current_table = String::new(); + for row in rows { + let table: String = row.try_get("table_name").unwrap(); + let name: String = row.try_get("constraint_name").unwrap(); + let kind_raw: String = row.try_get("contype").unwrap(); + let def: String = row.try_get("definition").unwrap(); + let kind = match kind_raw.as_str() { + "p" => "PRIMARY KEY", + "f" => "FOREIGN KEY", + "u" => "UNIQUE", + "c" => "CHECK", + other => other, + }; + if table != current_table { + writeln!(out, "\nconstraints on {table}:").unwrap(); + current_table = table; + } + writeln!(out, " [{kind}] {name}: {def}").unwrap(); + } + writeln!(out).unwrap(); +} + +async fn dump_migrations(pool: &PgPool, out: &mut String) { + // Record the migration versions that ran so the snapshot reflects + // the migration set used to build it. A schema that looks identical + // but was produced by a different migration sequence would still + // surface here. + let rows = sqlx::query("SELECT version, description FROM _sqlx_migrations ORDER BY version") + .fetch_all(pool) + .await + .expect("failed to query _sqlx_migrations"); + + writeln!(out, "## applied migrations").unwrap(); + for row in rows { + let version: i64 = row.try_get("version").unwrap(); + let description: String = row.try_get("description").unwrap(); + writeln!(out, " {version:04}: {description}").unwrap(); + } +}