Files
PiCloud/crates/manager-core/tests/schema_snapshot.rs
MechaCat02 878cbe9439 test(manager-core): schema snapshot guardrail
Boots a fresh Postgres via sqlx::test, applies every migration in
order, dumps the resulting public schema (tables, columns with type
+ nullability + default, indexes, constraints, applied migration
manifest), and compares against a checked-in golden text file.

What this catches:
  * Someone edits a committed migration — schema diverges from the
    snapshot, test fails with a precise diff.
  * Someone adds a migration but forgets to update the snapshot —
    same divergence; test reminds them.
  * Two migrations drift apart in any other way — snapshot is the
    source of truth about the post-replay schema.

Update workflow when adding a migration intentionally:

  BLESS=1 DATABASE_URL=postgres://... \
    cargo test -p picloud-manager-core --test schema_snapshot \
    -- --include-ignored

Review the snapshot diff in the same PR. The header comment makes
it clear the file is not for hand-editing.

  * Snapshot dump uses information_schema.columns + pg_indexes +
    pg_constraint with pg_get_constraintdef. Output is sorted on
    every dimension so cosmetic differences (insertion order,
    etc.) never cause spurious diffs.

  * #[ignore]'d by default for the same reason as the integration
    tests — needs DATABASE_URL pointing at a writable Postgres.

  * Initial expected_schema.txt blessed from the current
    migrations/ contents (3 tables, 9 indexes, 12 constraints).

Wires up enforcement item (4) from docs/versioning.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 22:21:25 +02:00

249 lines
8.8 KiB
Rust

//! Schema snapshot guardrail.
//!
//! Boots a fresh Postgres (via `#[sqlx::test]`), applies every
//! migration in `migrations/` in order, then queries
//! `information_schema` for a deterministic dump of every table,
//! column, index, and constraint the migrations produce. Compares
//! against the checked-in `expected_schema.txt` golden file.
//!
//! **What this catches:**
//!
//! * Someone edits a committed migration instead of adding a new
//! one — the resulting schema diverges from the golden.
//! * Someone adds a migration but forgets to update the golden —
//! same divergence; the test reminds them.
//! * Two migrations drift apart (the snapshot is the source of
//! truth about what the schema *should* be after replay).
//!
//! **How to update the golden after an intentional migration:**
//!
//! ```sh
//! BLESS=1 DATABASE_URL=postgres://... \
//! cargo test -p picloud-manager-core --test schema_snapshot \
//! -- --include-ignored
//! ```
//!
//! Review the resulting diff in the same PR as the new migration.
//!
//! Like the orchestrator integration tests, this is `#[ignore]`'d by
//! default so plain `cargo test --workspace` stays green without
//! infrastructure.
use std::fmt::Write as _;
use std::path::PathBuf;
use sqlx::{PgPool, Row};
const SCHEMA: &str = "public";
const SNAPSHOT_PATH: &str = "tests/expected_schema.txt";
#[ignore = "needs DATABASE_URL pointing at a running Postgres"]
#[sqlx::test(migrations = "./migrations")]
async fn schema_after_replay_matches_snapshot(pool: PgPool) {
let actual = dump_schema(&pool).await;
let snapshot_file = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(SNAPSHOT_PATH);
if std::env::var("BLESS").is_ok() {
std::fs::write(&snapshot_file, &actual).expect("failed to write snapshot");
eprintln!(
"BLESS=1 set: refreshed {} ({} bytes)",
snapshot_file.display(),
actual.len()
);
return;
}
let expected = std::fs::read_to_string(&snapshot_file).unwrap_or_else(|_| {
panic!(
"missing snapshot file at {}; run with BLESS=1 to create it",
snapshot_file.display()
)
});
if actual != expected {
eprintln!(
"schema snapshot mismatch.\n\
To re-bless after an intentional migration:\n\
\n \
BLESS=1 DATABASE_URL=postgres://... \\\n \
cargo test -p picloud-manager-core --test schema_snapshot \\\n \
-- --include-ignored\n\
\n--- expected ---\n{expected}\n--- actual ---\n{actual}\n"
);
panic!("schema diverged from {}", snapshot_file.display());
}
}
/// Build a deterministic representation of the `public` schema.
/// Sorted by every dimension so cosmetic differences (insertion
/// order, etc.) never cause spurious diffs.
async fn dump_schema(pool: &PgPool) -> String {
let mut out = String::new();
writeln!(
&mut out,
"# Schema snapshot — generated by schema_snapshot test."
)
.unwrap();
writeln!(
&mut out,
"# Do not edit by hand. Run with BLESS=1 to regenerate."
)
.unwrap();
writeln!(&mut out).unwrap();
dump_tables(pool, &mut out).await;
dump_indexes(pool, &mut out).await;
dump_constraints(pool, &mut out).await;
dump_migrations(pool, &mut out).await;
out
}
async fn dump_tables(pool: &PgPool, out: &mut String) {
let rows = sqlx::query(
"SELECT \
c.table_name, c.column_name, c.data_type, c.udt_name, \
c.is_nullable, c.column_default \
FROM information_schema.columns c \
JOIN information_schema.tables t \
ON t.table_schema = c.table_schema AND t.table_name = c.table_name \
WHERE c.table_schema = $1 AND t.table_type = 'BASE TABLE' \
AND c.table_name NOT LIKE '\\_sqlx%' \
ORDER BY c.table_name, c.ordinal_position",
)
.bind(SCHEMA)
.fetch_all(pool)
.await
.expect("failed to query information_schema.columns");
writeln!(out, "## tables").unwrap();
let mut current_table = String::new();
for row in rows {
let table: String = row.try_get("table_name").unwrap();
let column: String = row.try_get("column_name").unwrap();
let data_type: String = row.try_get("data_type").unwrap();
let udt: String = row.try_get("udt_name").unwrap();
let nullable: String = row.try_get("is_nullable").unwrap();
let default: Option<String> = row.try_get("column_default").unwrap();
if table != current_table {
writeln!(out, "\ntable: {table}").unwrap();
current_table = table;
}
// Use udt_name when present (catches uuid, jsonb, etc. which
// information_schema reports as "USER-DEFINED").
let kind = if data_type == "USER-DEFINED" {
udt
} else {
data_type
};
let null_str = if nullable == "YES" {
"NULL"
} else {
"NOT NULL"
};
let default_str = default.map(|d| format!(" default={d}")).unwrap_or_default();
writeln!(out, " {column}: {kind} {null_str}{default_str}").unwrap();
}
writeln!(out).unwrap();
}
async fn dump_indexes(pool: &PgPool, out: &mut String) {
// pg_indexes is the most portable source of truth for index DDL.
let rows = sqlx::query(
"SELECT tablename, indexname, indexdef \
FROM pg_indexes \
WHERE schemaname = $1 \
AND tablename NOT LIKE '\\_sqlx%' \
ORDER BY tablename, indexname",
)
.bind(SCHEMA)
.fetch_all(pool)
.await
.expect("failed to query pg_indexes");
writeln!(out, "## indexes").unwrap();
let mut current_table = String::new();
for row in rows {
let table: String = row.try_get("tablename").unwrap();
let name: String = row.try_get("indexname").unwrap();
let def: String = row.try_get("indexdef").unwrap();
if table != current_table {
writeln!(out, "\nindexes on {table}:").unwrap();
current_table = table;
}
// The CREATE INDEX prefix is uniform across all rows; strip it
// to keep the snapshot focused on what's distinctive.
let summary = def
.split_once(" ON ")
.map_or(def.as_str(), |(_, after)| after);
writeln!(out, " {name}: {summary}").unwrap();
}
writeln!(out).unwrap();
}
async fn dump_constraints(pool: &PgPool, out: &mut String) {
// CHECK / FK / UNIQUE / PRIMARY KEY definitions live across a few
// information_schema views. pg_constraint via pg_get_constraintdef
// gives the canonical text in one query.
let rows = sqlx::query(
"SELECT \
cl.relname AS table_name, \
con.conname AS constraint_name, \
con.contype::text AS contype, \
pg_get_constraintdef(con.oid) AS definition \
FROM pg_constraint con \
JOIN pg_class cl ON cl.oid = con.conrelid \
JOIN pg_namespace ns ON ns.oid = cl.relnamespace \
WHERE ns.nspname = $1 \
AND cl.relname NOT LIKE '\\_sqlx%' \
ORDER BY cl.relname, con.contype, con.conname",
)
.bind(SCHEMA)
.fetch_all(pool)
.await
.expect("failed to query pg_constraint");
writeln!(out, "## constraints").unwrap();
let mut current_table = String::new();
for row in rows {
let table: String = row.try_get("table_name").unwrap();
let name: String = row.try_get("constraint_name").unwrap();
let kind_raw: String = row.try_get("contype").unwrap();
let def: String = row.try_get("definition").unwrap();
let kind = match kind_raw.as_str() {
"p" => "PRIMARY KEY",
"f" => "FOREIGN KEY",
"u" => "UNIQUE",
"c" => "CHECK",
other => other,
};
if table != current_table {
writeln!(out, "\nconstraints on {table}:").unwrap();
current_table = table;
}
writeln!(out, " [{kind}] {name}: {def}").unwrap();
}
writeln!(out).unwrap();
}
async fn dump_migrations(pool: &PgPool, out: &mut String) {
// Record the migration versions that ran so the snapshot reflects
// the migration set used to build it. A schema that looks identical
// but was produced by a different migration sequence would still
// surface here.
let rows = sqlx::query("SELECT version, description FROM _sqlx_migrations ORDER BY version")
.fetch_all(pool)
.await
.expect("failed to query _sqlx_migrations");
writeln!(out, "## applied migrations").unwrap();
for row in rows {
let version: i64 = row.try_get("version").unwrap();
let description: String = row.try_get("description").unwrap();
writeln!(out, " {version:04}: {description}").unwrap();
}
}