diff --git a/Cargo.lock b/Cargo.lock index 812c63f88ba..b14b43e4db2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8194,6 +8194,30 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "spacetimedb-dst" +version = "2.2.0" +dependencies = [ + "anyhow", + "clap 4.5.50", + "futures-util", + "spacetimedb-commitlog", + "spacetimedb-core", + "spacetimedb-datastore", + "spacetimedb-durability", + "spacetimedb-lib 2.2.0", + "spacetimedb-paths", + "spacetimedb-primitives 2.2.0", + "spacetimedb-runtime", + "spacetimedb-sats 2.2.0", + "spacetimedb-schema", + "spacetimedb-snapshot", + "spacetimedb-table", + "tempfile", + "tracing", + "tracing-subscriber", +] + [[package]] name = "spacetimedb-durability" version = "2.2.0" diff --git a/Cargo.toml b/Cargo.toml index f4f74204ea3..094ad0d6b01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "crates/commitlog", "crates/core", "crates/data-structures", + "crates/dst", "crates/datastore", "crates/durability", "crates/execution", diff --git a/crates/commitlog/src/lib.rs b/crates/commitlog/src/lib.rs index abc8729c978..7123b7e3bb6 100644 --- a/crates/commitlog/src/lib.rs +++ b/crates/commitlog/src/lib.rs @@ -156,7 +156,7 @@ impl Options { /// The canonical commitlog API over a repository backend `R`. /// /// The default backend is the on-disk filesystem repository -/// [`repo::Fs`], but tests may supply another [`Repo`] +/// [`repo::Fs`], but tests and simulators may supply another [`Repo`] /// implementation. /// /// Records in the log are of type `T`, which canonically is instantiated to @@ -203,7 +203,7 @@ where { /// Open the log in `repo` with [`Options`]. /// - /// This is useful for tests which provide a repository + /// This is useful for tests and simulators which provide a repository /// implementation other than [`repo::Fs`]. pub fn open_with_repo(repo: R, opts: Options) -> io::Result { let inner = commitlog::Generic::open(repo, opts)?; diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 3d79f7f1e28..76c5d2e365b 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -161,6 +161,8 @@ pub trait RepoWithoutLockFile: Repo {} impl RepoWithoutLockFile for &T {} +impl RepoWithoutLockFile for Fs {} + #[cfg(any(test, feature = "test"))] impl RepoWithoutLockFile for Memory {} diff --git a/crates/core/src/database_logger.rs b/crates/core/src/database_logger.rs index 0e202229dea..f194cb60a48 100644 --- a/crates/core/src/database_logger.rs +++ b/crates/core/src/database_logger.rs @@ -11,7 +11,7 @@ use std::path::Path; use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use tokio::io::{AsyncRead, BufReader}; +use tokio::io::{AsyncRead, BufReader, ReadBuf}; use tokio::sync::{broadcast, mpsc, oneshot}; use tokio_stream::wrappers::errors::BroadcastStreamRecvError; use tokio_stream::wrappers::BroadcastStream; @@ -592,7 +592,7 @@ fn seek_to(file: &mut File, buf: &mut [u8], num_lines: u32) -> io::Result<()> { Ok(()) } -fn read_exact_at(file: &std::fs::File, buf: &mut [u8], offset: u64) -> io::Result<()> { +fn read_exact_at(file: &File, buf: &mut [u8], offset: u64) -> io::Result<()> { #[cfg(unix)] { use std::os::unix::fs::FileExt; @@ -641,7 +641,7 @@ impl MaybeFile { } impl AsyncRead for MaybeFile { - fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>) -> Poll> { + fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { match self.project() { MaybeFileProj::File { inner } => inner.poll_read(cx, buf), MaybeFileProj::Empty => Poll::Ready(Ok(())), diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs index f749f72850a..07ec4d356c3 100644 --- a/crates/core/src/db/durability.rs +++ b/crates/core/src/db/durability.rs @@ -44,6 +44,7 @@ pub(super) fn spawn_close(durability: Arc, runtime: &Handle, databas info!("{label} durability shut down at tx offset: {offset:?}"); } } + log::info!("closing spawn close"); }); } diff --git a/crates/core/src/db/persistence.rs b/crates/core/src/db/persistence.rs index ce3ef5d6841..c54a287bec2 100644 --- a/crates/core/src/db/persistence.rs +++ b/crates/core/src/db/persistence.rs @@ -4,10 +4,10 @@ use async_trait::async_trait; use spacetimedb_commitlog::SizeOnDisk; use spacetimedb_durability::{DurabilityExited, TxOffset}; use spacetimedb_paths::server::ServerDataDir; -use spacetimedb_snapshot::DynSnapshotRepo; +use spacetimedb_runtime::Handle; +use spacetimedb_snapshot::{DynSnapshotRepo, SnapshotStore}; use crate::{messages::control_db::Database, util::asyncify}; -use spacetimedb_runtime::Handle; use super::{ relational_db::{self, Txdata}, @@ -36,6 +36,8 @@ pub struct Persistence { /// Currently the expectation is that the reported size is the commitlog /// size only. pub disk_size: DiskSizeFn, + /// Optional snapshot store used during database restore. + pub snapshot_store: Option>, /// An optional [SnapshotWorker]. /// /// The current expectation is that snapshots are only enabled for @@ -63,9 +65,11 @@ impl Persistence { snapshots: Option, runtime: Handle, ) -> Self { + let snapshot_store = snapshots.as_ref().map(SnapshotWorker::snapshot_store); Self { durability: Arc::new(durability), disk_size: Arc::new(disk_size), + snapshot_store, snapshots, runtime, } @@ -76,6 +80,13 @@ impl Persistence { self.snapshots.as_ref().map(|worker| worker.snapshot_repo()) } + /// If snapshot restore is enabled, get the [SnapshotStore] to read from. + pub fn snapshot_store(&self) -> Option> { + self.snapshot_store + .clone() + .or_else(|| self.snapshots.as_ref().map(SnapshotWorker::snapshot_store)) + } + /// Get the [TxOffset] reported as durable by the [Durability] impl. /// /// Returns `Ok(None)` if no offset is durable yet, and `Err(DurabilityExited)` @@ -107,6 +118,7 @@ impl Persistence { |Self { durability, disk_size, + snapshot_store: _, snapshots, runtime, }| (Some(durability), Some(disk_size), snapshots, Some(runtime)), @@ -173,6 +185,7 @@ impl PersistenceProvider for LocalPersistenceProvider { Ok(Persistence { durability, disk_size, + snapshot_store: Some(snapshot_worker.snapshot_store()), snapshots: Some(snapshot_worker), runtime, }) diff --git a/crates/core/src/db/relational_db.rs b/crates/core/src/db/relational_db.rs index 57230e8866b..75efb0ad5ee 100644 --- a/crates/core/src/db/relational_db.rs +++ b/crates/core/src/db/relational_db.rs @@ -52,7 +52,7 @@ use spacetimedb_schema::schema::{ ColumnSchema, IndexSchema, RowLevelSecuritySchema, Schema, SequenceSchema, TableSchema, }; use spacetimedb_schema::table_name::TableName; -use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository}; +use spacetimedb_snapshot::{DynSnapshotRepo, ReconstructedSnapshot, SnapshotError, SnapshotRepository, SnapshotStore}; use spacetimedb_table::indexes::RowPointer; use spacetimedb_table::page_pool::PagePool; use spacetimedb_table::table::{RowRef, TableScanIter}; @@ -279,10 +279,10 @@ impl RelationalDB { let start_time = std::time::Instant::now(); - let snapshot_repo = persistence.as_ref().and_then(|p| p.snapshot_repo()); + let snapshot_store = persistence.as_ref().and_then(|p| p.snapshot_store()); let inner = Self::restore_from_snapshot_or_bootstrap( database_identity, - snapshot_repo.as_deref(), + snapshot_store.as_deref(), durable_tx_offset, min_commitlog_offset, page_pool, @@ -473,7 +473,7 @@ impl RelationalDB { fn restore_from_snapshot_or_bootstrap( database_identity: Identity, - snapshot_repo: Option<&DynSnapshotRepo>, + snapshot_store: Option<&dyn SnapshotStore>, durable_tx_offset: Option, min_commitlog_offset: TxOffset, page_pool: PagePool, @@ -481,14 +481,14 @@ impl RelationalDB { // Try to load the `ReconstructedSnapshot` at `snapshot_offset`. fn try_load_snapshot( database_identity: &Identity, - snapshot_repo: &DynSnapshotRepo, + snapshot_store: &dyn SnapshotStore, snapshot_offset: TxOffset, page_pool: &PagePool, ) -> Result> { log::info!("[{database_identity}] DATABASE: restoring snapshot of tx_offset {snapshot_offset}"); let start = std::time::Instant::now(); - let snapshot = snapshot_repo + let snapshot = snapshot_store .read_snapshot(snapshot_offset, page_pool) .map_err(Box::new)?; @@ -554,11 +554,11 @@ impl RelationalDB { } } - if let Some((snapshot_repo, durable_tx_offset)) = snapshot_repo.zip(durable_tx_offset) { + if let Some((snapshot_store, durable_tx_offset)) = snapshot_store.zip(durable_tx_offset) { // Mark any newer snapshots as invalid, as the history past // `durable_tx_offset` may have been reset and thus diverge from // any snapshots taken earlier. - snapshot_repo + snapshot_store .invalidate_newer_snapshots(durable_tx_offset) .map_err(|e| RestoreSnapshotError::Invalidate { offset: durable_tx_offset, @@ -569,7 +569,7 @@ impl RelationalDB { // range `(min_commitlog_offset + 1)..=durable_tx_offset`. let mut upper_bound = durable_tx_offset; loop { - let Some(snapshot_offset) = snapshot_repo + let Some(snapshot_offset) = snapshot_store .latest_snapshot_older_than(upper_bound) .map_err(Box::new)? else { @@ -579,7 +579,7 @@ impl RelationalDB { log::debug!("snapshot_offset={snapshot_offset} min_commitlog_offset={min_commitlog_offset}"); break; } - match try_load_snapshot(&database_identity, snapshot_repo, snapshot_offset, &page_pool) { + match try_load_snapshot(&database_identity, snapshot_store, snapshot_offset, &page_pool) { Ok(snapshot) if snapshot.database_identity != database_identity => { return Err(RestoreSnapshotError::IdentityMismatch { expected: database_identity, @@ -595,7 +595,7 @@ impl RelationalDB { // Newly created snapshots should not depend on it. if !is_transient_error(&e) { log::info!("invalidating bad snapshot at {snapshot_offset}"); - snapshot_repo.invalidate_snapshot(snapshot_offset).map_err(|e| { + snapshot_store.invalidate_snapshot(snapshot_offset).map_err(|e| { RestoreSnapshotError::Invalidate { offset: snapshot_offset, source: Box::new(e), @@ -1964,6 +1964,7 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, + snapshot_store: snapshots.as_ref().map(SnapshotWorker::snapshot_store), snapshots, runtime, }; @@ -2090,6 +2091,7 @@ pub mod tests_utils { let persistence = Persistence { durability: local.clone(), disk_size: disk_size_fn, + snapshot_store: snapshots.as_ref().map(SnapshotWorker::snapshot_store), snapshots, runtime, }; diff --git a/crates/core/src/db/snapshot.rs b/crates/core/src/db/snapshot.rs index 178bbda3d72..ac792ee0293 100644 --- a/crates/core/src/db/snapshot.rs +++ b/crates/core/src/db/snapshot.rs @@ -14,7 +14,7 @@ use prometheus::{Histogram, IntGauge}; use spacetimedb_datastore::locking_tx_datastore::{committed_state::CommittedState, datastore::Locking}; use spacetimedb_durability::TxOffset; use spacetimedb_lib::Identity; -use spacetimedb_snapshot::{CompressionStats, DynSnapshotRepo}; +use spacetimedb_snapshot::{BoxedPendingSnapshot, CompressionStats, DynSnapshotRepo, SnapshotRepo, SnapshotStore}; use tokio::sync::watch; use crate::worker_metrics::WORKER_METRICS; @@ -62,6 +62,7 @@ pub struct SnapshotWorker { snapshot_created: watch::Sender, request_snapshot: mpsc::UnboundedSender, snapshot_repository: Arc, + snapshot_store: Arc, } impl SnapshotWorker { @@ -70,20 +71,25 @@ impl SnapshotWorker { /// The handle is only partially initialized, as it is lacking the /// [SnapshotDatabaseState]. This allows control code to [Self::subscribe] /// to future snapshots before handing off the worker to the database. - pub fn new(snapshot_repository: Arc, compression: Compression, rt: Handle) -> Self { - let database = snapshot_repository.database_identity(); - let latest_snapshot = snapshot_repository.latest_snapshot().ok().flatten().unwrap_or(0); + pub fn new(snapshot_repo: Arc, compression: Compression, rt: Handle) -> Self + where + R: SnapshotRepo + 'static, + { + let snapshot_store: Arc = snapshot_repo.clone(); + let snapshot_repo: Arc = snapshot_repo; + let database = snapshot_repo.database_identity(); + let latest_snapshot = snapshot_repo.latest_snapshot().ok().flatten().unwrap_or(0); let (snapshot_created, _) = watch::channel(latest_snapshot); let (request_tx, request_rx) = mpsc::unbounded(); let actor = SnapshotWorkerActor { snapshot_requests: request_rx, - snapshot_repo: snapshot_repository.clone(), + snapshot_repo: snapshot_repo.clone(), snapshot_created: snapshot_created.clone(), metrics: SnapshotMetrics::new(database), rt: rt.clone(), compression: compression.is_enabled().then(|| Compressor { - snapshot_repo: snapshot_repository.clone(), + snapshot_repo: snapshot_repo.clone(), metrics: CompressionMetrics::new(database), stats: <_>::default(), rt: rt.clone(), @@ -94,7 +100,8 @@ impl SnapshotWorker { Self { snapshot_created, request_snapshot: request_tx, - snapshot_repository, + snapshot_repository: snapshot_repo, + snapshot_store, } } @@ -113,6 +120,11 @@ impl SnapshotWorker { self.snapshot_repository.clone() } + /// Get the snapshot store this worker is operating on. + pub fn snapshot_store(&self) -> Arc { + self.snapshot_store.clone() + } + /// Request a snapshot to be taken. /// /// The snapshot will be taken at some point in the future. diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs index 4c94df74ab8..742e2eddf83 100644 --- a/crates/core/src/subscription/module_subscription_actor.rs +++ b/crates/core/src/subscription/module_subscription_actor.rs @@ -2102,6 +2102,7 @@ mod tests { Some(Persistence { durability: durability.clone(), disk_size: Arc::new(|| Ok(<_>::default())), + snapshot_store: None, snapshots: None, runtime: spacetimedb_runtime::Handle::tokio(rt), }), diff --git a/crates/datastore/src/locking_tx_datastore/datastore.rs b/crates/datastore/src/locking_tx_datastore/datastore.rs index e9d67103b16..254f44c4e01 100644 --- a/crates/datastore/src/locking_tx_datastore/datastore.rs +++ b/crates/datastore/src/locking_tx_datastore/datastore.rs @@ -38,7 +38,7 @@ use spacetimedb_schema::{ reducer_name::ReducerName, schema::{ColumnSchema, IndexSchema, SequenceSchema, TableSchema}, }; -use spacetimedb_snapshot::{BoxedPendingSnapshot, DynSnapshotRepo, ReconstructedSnapshot}; +use spacetimedb_snapshot::{BoxedPendingSnapshot, DynSnapshotRepo, ReconstructedSnapshot, SnapshotStore}; use spacetimedb_table::{ indexes::RowPointer, page_pool::PagePool, @@ -259,6 +259,28 @@ impl Locking { Ok(Some((tx_offset, unflushed_snapshot))) } + pub fn take_snapshot_store_internal( + committed_state: &RwLock, + store: &dyn SnapshotStore, + ) -> Result> { + let mut committed_state = committed_state.write(); + let Some(tx_offset) = committed_state.next_tx_offset.checked_sub(1) else { + return Ok(None); + }; + + log::info!( + "Capturing snapshot of database {:?} at TX offset {}", + store.database_identity(), + tx_offset, + ); + + let (mut tables, blob_store) = committed_state.persistent_tables_and_blob_store(); + store + .capture_snapshot(&mut tables, blob_store, tx_offset) + .map(Some) + .map_err(Into::into) + } + /// Returns a list over all the currently connected clients, /// reading from the `st_clients` system table. pub fn connected_clients<'a>( @@ -2824,6 +2846,38 @@ pub(crate) mod tests { Ok(()) } + #[test] + fn test_try_begin_mut_tx_reports_writer_contention() -> ResultTest<()> { + let datastore = get_datastore()?; + let tx = begin_mut_tx(&datastore); + assert!(datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .is_none()); + let _ = datastore.rollback_mut_tx(tx); + + let tx = datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .expect("write lock should be available after rollback"); + let _ = datastore.rollback_mut_tx(tx); + Ok(()) + } + + #[test] + fn test_try_begin_mut_tx_reports_read_contention() -> ResultTest<()> { + let datastore = get_datastore()?; + let tx = begin_tx(&datastore); + assert!(datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .is_none()); + let _ = datastore.release_tx(tx); + + let tx = datastore + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .expect("write lock should be available after read release"); + let _ = datastore.rollback_mut_tx(tx); + Ok(()) + } + #[test] fn test_scheduled_table_insert_and_update() -> ResultTest<()> { // Build the minimal schema that is a valid scheduler table. diff --git a/crates/dst/Cargo.toml b/crates/dst/Cargo.toml new file mode 100644 index 00000000000..add6ccd36ad --- /dev/null +++ b/crates/dst/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "spacetimedb-dst" +version.workspace = true +edition.workspace = true +license-file = "LICENSE" +description = "Deterministic simulation testing utilities for SpacetimeDB crates" +rust-version.workspace = true + +[lints] +workspace = true + +[[bin]] +name = "spacetimedb-dst" +path = "src/main.rs" +bench = false + +[dependencies] +anyhow.workspace = true +clap.workspace = true +futures-util.workspace = true +spacetimedb-datastore = { workspace = true, features = ["test"] } +spacetimedb_core = { package = "spacetimedb-core", path = "../core", version = "=2.2.0", features = ["test"] } +spacetimedb-commitlog = { workspace = true, features = ["test"] } +spacetimedb_durability = { package = "spacetimedb-durability", path = "../durability", version = "=2.2.0", features = ["test"] } +spacetimedb-lib.workspace = true +spacetimedb-paths.workspace = true +spacetimedb-primitives.workspace = true +spacetimedb-runtime = { workspace = true, features = ["simulation"] } +spacetimedb-sats.workspace = true +spacetimedb-schema = { workspace = true, features = ["test"] } +spacetimedb-snapshot.workspace = true +spacetimedb-table.workspace = true +tempfile.workspace = true +tracing.workspace = true +tracing-subscriber.workspace = true diff --git a/crates/dst/README.md b/crates/dst/README.md new file mode 100644 index 00000000000..e9c756a5646 --- /dev/null +++ b/crates/dst/README.md @@ -0,0 +1,227 @@ +# `spacetimedb-dst` + +Deterministic simulation testing for SpacetimeDB components. + +DST is not a generic random fuzzer. It is a seed-replayable framework for +generating meaningful SpacetimeDB histories, executing them against real +implementation paths, and checking semantic properties while the run is still +in progress. + +## First Principles + +- A failing run must be reproducible from target, scenario, seed, run budget, + and fault profile. Use `--max-interactions` for exact replay; `--duration` is + a wall-clock soak budget and may stop at a different step count on another + machine or runtime. +- Workloads describe legal but stressful user behavior. They should not depend + on target internals. +- Targets execute interactions against real SpacetimeDB code. +- Properties check externally observable behavior, preferably against a simple + model or a replayed durable history. +- Generation, execution, and property checking stay separate so failures are + diagnosable as workload bugs, target bugs, or weak assertions. +- Runs stream interactions instead of materializing a full plan by default. +- Fault injection is explicit, configurable, and summarized in the outcome. +- Shared probability and weighting logic belongs in `workload::strategy`, not + ad hoc scenario code. + +## Current Architecture + +The CLI selects a target, scenario, seed, budget, and fault profile. The shared +runner pulls one interaction at a time from a source, sends it to the target, +and asks the property runtime to observe the result. + +```text +CLI -> TargetDescriptor -> WorkloadSource -> TargetEngine -> Observation + \-> StreamingProperties -> Outcome +``` + +The core contracts are: + +- `WorkloadSource`: deterministic pull-based interaction stream. +- `TargetEngine`: target-specific execution and outcome collection. +- `StreamingProperties`: reusable property checks over observations and target + accessors. + +## Client Model + +DST workloads use shared logical client IDs rather than target-owned ad hoc +connection numbers. A `ClientId` is a stable actor in the generated history; a +`SessionId` is one live connection/session for that actor. A single client can +own multiple active sessions, which matters for reconnect, multi-tab, and future +replication traffic. Targets translate those IDs into their own handles: + +- `relational-db-commitlog` maps `SessionId` to direct write/read transaction + slots. +- future replication targets can map `SessionId` plus endpoint/node IDs to a + client connection routed through the simulated network. + +Concrete handles stay target-owned. Shared workloads should carry logical +identity and lifecycle intent, not `RelTx`, websocket handles, or target-specific +connection objects. + +## Workload Composition + +DST workloads use three building blocks: + +- **Source:** emits a deterministic stream of interactions. +- **Profile:** configures weights, schema shape, and generation policy. +- **Layer:** wraps a source and adds lifecycle, fault, or cross-cutting + interactions. + +`table_ops` is the base table-transaction workload. `commitlog_ops` composes it +and injects durability lifecycle operations such as sync, close/reopen, dynamic +table create/migrate/drop, and replay checks. + +Use this rule of thumb: + +- Add a new profile when the interaction language is unchanged and only weights + or schema shape differ. +- Add a new layer when you are adding lifecycle behavior around an existing + source. +- Add a new workload family only when the interaction vocabulary is genuinely + different. + +## Table Operation Semantics + +The table workload keeps the executable operation language small. Similar +cases converge into physical operations such as `InsertRows`, `DeleteRows`, and +`BeginTx`; the generated interaction also carries a case label for coverage and +debug output. + +Correctness does not come from that label. The property runtime asks its model +what the physical operation should do: + +- inserting fresh rows should mutate the table +- inserting an exact visible row should be an idempotent no-op +- inserting an existing primary id with a different payload should report a + unique-key error +- deleting visible rows should mutate the table +- deleting absent rows should report a missing-row error +- beginning or writing behind another writer should report a write conflict +- query operations (`PointLookup`, `PredicateCount`, `RangeScan`, `FullScan`) + should match the model-visible state + +The case label still matters for summaries. It lets a run report that it hit +`ExactDuplicateInsert` or `UniqueKeyConflictInsert`, without teaching the target +or properties to trust generator-provided expectations. + +## Current Targets + +- `relational-db-commitlog`: runs table and commitlog lifecycle interactions + against `RelationalDB`, local durability, dynamic schema operations, + close/reopen, and replay-from-history checks. + +## Properties + +Properties live in `src/properties.rs` and are selected by target. +Table-oriented properties use `TargetPropertyAccess` so the property runtime can +ask a target for rows, counts, lookups, and range scans without knowing target +storage internals. + +Current property families include: + +- insert/select and delete/select checks +- observed error vs model-predicted error matching +- model-predicted no-op checks +- point lookup, predicate count, range scan, and full scan vs the table oracle +- NoREC-style optimizer-vs-direct checks +- TLP-style true/false/null partition checks +- index range exclusion checks +- banking mirror-table invariants +- dynamic migration auto-increment checks +- durable replay state vs the oracle committed model + +## Fault Injection + +`relational-db-commitlog` can wrap the in-memory commitlog repo in +`BuggifiedRepo`. Fault decisions are deterministic from the run seed and +summarized in the final outcome. + +Profiles: + +- `off`: no injected disk behavior. +- `light`: latency and occasional short I/O. +- `default`: stronger latency and short I/O pressure. +- `aggressive`: higher latency and short I/O rates. I/O error hooks exist but + are currently disabled in profile-driven runs because local durability does + not yet classify those errors as recoverable target outcomes. + +## Running + +Fast local run: + +```bash +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --seed 42 --max-interactions 200 +``` + +Scenario examples: + +```bash +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario banking --duration 5m +cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --scenario indexed-ranges --duration 5m +``` + +Run with commitlog faults: + +```bash +cargo run -p spacetimedb-dst -- run \ + --target relational-db-commitlog \ + --seed 42 \ + --max-interactions 400 \ + --commitlog-fault-profile default +``` + +Trace every interaction: + +```bash +RUST_LOG=trace cargo run -p spacetimedb-dst -- run --target relational-db-commitlog --duration 5m +``` + +## Run Budgets + +Prefer `--max-interactions` when reporting or replaying a failure. It is the +deterministic interaction budget, so target, scenario, seed, interaction count, +and fault profile are enough to rerun the same generated stream. + +Use `--duration` for local soaks. It is intentionally wall-clock based, so it +can stop after a different number of interactions if host speed, logging, or +runtime behavior changes. + +## Reading The Code + +Start here: + +- `src/core/mod.rs`: source, engine, property, and runner traits. +- `src/workload/table_ops`: table interaction language, generation model, and + scenarios. +- `src/workload/commitlog_ops`: lifecycle layer over table workloads. +- `src/sim/`: local executor and deterministic-decision shim. +- `src/properties.rs`: property catalog and oracle/model checks. +- `src/targets/relational_db_commitlog.rs`: target adapter for RelationalDB, + commitlog durability, fault injection, close/reopen, and replay. +- `src/targets/buggified_repo.rs`: deterministic disk-like fault layer. + +## Adding A New Target + +1. Add a target engine in `src/targets/.rs`. +2. Reuse an existing workload family or add `src/workload//`. +3. Return observations that are rich enough for properties to validate behavior. +4. Plug target-specific properties through `PropertyRuntime`. +5. Add a `TargetDescriptor` in `src/targets/descriptor.rs`. +6. Register the target in CLI `TargetKind`. + +## Current Gaps + +- No structured trace/replay format yet. +- No shrinker yet; seed replay is the current reproduction mechanism. +- Sometimes-property reporting is still outcome-counter based, not a stable + property-event catalog. +- The local `sim` shim is not a real simulator yet. It owns executor setup and + deterministic fault decisions so future simulator work has one boundary. +- The current `RelationalDB` target drives open read snapshots to release before + starting writes, because beginning a write behind an open read snapshot can + block in this target shape. Interleaved read/write snapshot histories should + come back once the target models that lock behavior explicitly. +- Runtime-boundary work for scheduler, time, network, filesystem, and lower + randomness sources is still future work. diff --git a/crates/dst/proptest-regressions/datastore.txt b/crates/dst/proptest-regressions/datastore.txt new file mode 100644 index 00000000000..a76f311290a --- /dev/null +++ b/crates/dst/proptest-regressions/datastore.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc d9b364a151c583c83224b9ddcc17de730b057b77c5509c8433e8dc12514d2415 # shrinks to seed = 0 diff --git a/crates/dst/src/client.rs b/crates/dst/src/client.rs new file mode 100644 index 00000000000..84b215a7198 --- /dev/null +++ b/crates/dst/src/client.rs @@ -0,0 +1,70 @@ +//! Logical client and topology identifiers shared by DST workloads and targets. +//! +//! These IDs are part of the generated workload language. Targets translate +//! them into concrete handles such as direct database transaction slots, +//! `ClientConnection`s, websocket sessions, or simulated-node connections. + +use std::fmt; + +/// Stable logical client identity within one DST run. +/// +/// A `ClientId` is an actor/user identity, not a live network connection. One +/// client may own zero, one, or many [`SessionId`]s at the same time. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct ClientId(u32); + +impl ClientId { + pub const ZERO: Self = Self(0); + + pub const fn new(raw: u32) -> Self { + Self(raw) + } +} + +impl fmt::Display for ClientId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "client{}", self.0) + } +} + +/// Logical live connection/session for a client. +/// +/// Current single-process targets use `SessionId` anywhere old DST code said +/// "connection": transaction slots, read snapshots, reducer-call handles, and +/// property observations. A target translates this logical session into its +/// concrete handle, such as a `RelTx` slot or `ClientConnection`. +/// +/// The `generation` field is the per-client session ordinal. Workloads can keep +/// several generations active concurrently to model one client with multiple +/// open connections, or allocate a later generation after a reconnect. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct SessionId { + pub client: ClientId, + pub generation: u32, +} + +impl SessionId { + pub const ZERO: Self = Self::new(ClientId::ZERO, 0); + + pub const fn new(client: ClientId, generation: u32) -> Self { + Self { client, generation } + } + + /// Compatibility helper for today's fixed-size session pools. + /// + /// A run with `N` connections starts as one logical client with `N` + /// sessions: `client0/session0`, `client0/session1`, ... + pub(crate) const fn from_index(index: usize) -> Self { + Self::new(ClientId::ZERO, index as u32) + } + + pub(crate) const fn as_index(self) -> usize { + self.generation as usize + } +} + +impl fmt::Display for SessionId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.session{}", self.client, self.generation) + } +} diff --git a/crates/dst/src/config.rs b/crates/dst/src/config.rs new file mode 100644 index 00000000000..5968c5abb96 --- /dev/null +++ b/crates/dst/src/config.rs @@ -0,0 +1,115 @@ +//! Shared run-budget configuration for DST targets. + +use std::{ + fmt, + time::{Duration, Instant}, +}; + +/// Coarse disk-fault profile for commitlog-backed DST targets. +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +pub enum CommitlogFaultProfile { + Off, + Light, + #[default] + Default, + Aggressive, +} + +impl fmt::Display for CommitlogFaultProfile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Off => f.write_str("off"), + Self::Light => f.write_str("light"), + Self::Default => f.write_str("default"), + Self::Aggressive => f.write_str("aggressive"), + } + } +} + +/// Common stop conditions for generated DST runs. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct RunConfig { + /// Hard cap on generated interactions. `None` means no interaction budget. + /// + /// This is the preferred budget for exact seed replay: the same target, + /// scenario, seed, max-interactions value, and fault profile should produce + /// the same generated interaction stream. + pub max_interactions: Option, + /// Wall-clock duration budget in milliseconds. `None` means no time budget. + /// + /// Duration runs are useful as local soaks, but the exact stop step can vary + /// with host speed and runtime behavior. Use `max_interactions` when a + /// failure needs precise replay. + pub max_duration_ms: Option, + /// Disk-fault profile for commitlog-backed targets. + pub commitlog_fault_profile: CommitlogFaultProfile, +} + +impl Default for RunConfig { + fn default() -> Self { + Self { + max_interactions: None, + max_duration_ms: None, + commitlog_fault_profile: CommitlogFaultProfile::Default, + } + } +} + +impl RunConfig { + pub fn with_max_interactions(max_interactions: usize) -> Self { + Self { + max_interactions: Some(max_interactions), + max_duration_ms: None, + ..Default::default() + } + } + + pub fn with_duration_spec(duration: &str) -> anyhow::Result { + Ok(Self { + max_interactions: None, + max_duration_ms: Some(parse_duration_spec(duration)?.as_millis() as u64), + ..Default::default() + }) + } + + pub fn with_commitlog_fault_profile(mut self, profile: CommitlogFaultProfile) -> Self { + self.commitlog_fault_profile = profile; + self + } + + /// Return the wall-clock deadline for duration-budgeted runs. + /// + /// This intentionally uses `std::time::Instant`, not simulated time. DST + /// duration budgets are a harness stop condition rather than part of the + /// simulated system under test. + pub fn deadline(&self) -> Option { + self.max_duration_ms + .map(Duration::from_millis) + .map(|duration| Instant::now() + duration) + } + + pub fn max_interactions_or_default(&self, default: usize) -> usize { + self.max_interactions.unwrap_or(default) + } +} + +pub fn parse_duration_spec(spec: &str) -> anyhow::Result { + let spec = spec.trim(); + if spec.is_empty() { + anyhow::bail!("duration spec cannot be empty"); + } + + let split_at = spec + .find(|ch: char| !ch.is_ascii_digit()) + .ok_or_else(|| anyhow::anyhow!("duration spec missing unit: {spec}"))?; + let (digits, unit) = spec.split_at(split_at); + let value: u64 = digits.parse()?; + + match unit { + "ms" => Ok(Duration::from_millis(value)), + "s" => Ok(Duration::from_secs(value)), + "m" => Ok(Duration::from_secs(value.saturating_mul(60))), + "h" => Ok(Duration::from_secs(value.saturating_mul(60 * 60))), + _ => anyhow::bail!("unsupported duration unit: {unit}"), + } +} diff --git a/crates/dst/src/core/mod.rs b/crates/dst/src/core/mod.rs new file mode 100644 index 00000000000..3920471971c --- /dev/null +++ b/crates/dst/src/core/mod.rs @@ -0,0 +1,267 @@ +//! Core abstractions for pluggable DST workloads, engines, and properties. + +use std::{ + any::Any, + fmt::Debug, + future::Future, + panic::{self, AssertUnwindSafe}, +}; + +use crate::config::RunConfig; +use futures_util::FutureExt; + +/// Pull-based deterministic interaction source. +pub trait WorkloadSource { + type Interaction; + + fn next_interaction(&mut self) -> Option; + fn request_finish(&mut self); +} + +/// Target execution contract over a workload interaction stream. +pub trait TargetEngine { + type Observation; + type Outcome; + type Error; + + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a I, + ) -> impl Future> + 'a; + fn finish(&mut self); + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a; +} + +/// Property runtime contract for the shared streaming runner. +pub trait StreamingProperties +where + E: TargetEngine, +{ + fn observe(&mut self, engine: &E, interaction: &I, observation: &O) -> Result<(), String>; + fn finish(&mut self, engine: &E, outcome: &E::Outcome) -> Result<(), String>; +} + +/// Shared streaming runner with property orchestration. +pub async fn run_streaming( + mut source: S, + mut engine: E, + mut properties: P, + cfg: RunConfig, +) -> anyhow::Result +where + I: Clone + Debug, + S: WorkloadSource, + E: TargetEngine, + P: StreamingProperties, +{ + // Duration is a harness-level wall-clock stop condition. The reproducible + // budget for exact replay is `RunConfig::max_interactions`, which the + // source uses when it is constructed. + let deadline = cfg.deadline(); + let mut step = 0usize; + loop { + if deadline.is_some_and(|d| std::time::Instant::now() >= d) { + source.request_finish(); + } + let Some(interaction) = source.next_interaction() else { + break; + }; + let execution = guard_target("execute_interaction", step, Some(&interaction), || { + engine.execute_interaction(&interaction) + }) + .await + .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; + let observation = execution.map_err(|e| anyhow::anyhow!("interaction execution failed at step {step}: {e}"))?; + properties + .observe(&engine, &interaction, &observation) + .map_err(|e| anyhow::anyhow!("property violation at step {step}: {e}"))?; + step = step.saturating_add(1); + } + guard_target("finish", step, Option::<&I>::None, || async { + engine.finish(); + }) + .await + .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; + let outcome = guard_target("collect_outcome", step, Option::<&I>::None, || engine.collect_outcome()) + .await + .map_err(|e| anyhow::anyhow!("property violation while collecting outcome: {e}"))??; + properties + .finish(&engine, &outcome) + .map_err(|e| anyhow::anyhow!("property violation at finish: {e}"))?; + Ok(outcome) +} + +async fn guard_target( + phase: &'static str, + step: usize, + interaction: Option<&I>, + make_future: impl FnOnce() -> Fut, +) -> Result +where + I: Debug, + Fut: Future, +{ + let future = panic::catch_unwind(AssertUnwindSafe(make_future)) + .map_err(|payload| not_crash_error(phase, step, interaction, &payload))?; + AssertUnwindSafe(future) + .catch_unwind() + .await + .map_err(|payload| not_crash_error(phase, step, interaction, &payload)) +} + +fn not_crash_error( + phase: &'static str, + step: usize, + interaction: Option<&I>, + payload: &Box, +) -> String { + let payload = panic_payload_to_string(payload); + match interaction { + Some(interaction) => { + format!("[NotCrash] target panicked during {phase} at step {step}: interaction={interaction:?}, payload={payload}") + } + None => format!("[NotCrash] target panicked during {phase} after step {step}: payload={payload}"), + } +} + +fn panic_payload_to_string(payload: &Box) -> String { + if let Some(message) = payload.downcast_ref::<&'static str>() { + (*message).to_string() + } else if let Some(message) = payload.downcast_ref::() { + message.clone() + } else { + "".to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Clone, Debug)] + struct TestInteraction; + + struct SingleStepSource { + emitted: bool, + } + + impl SingleStepSource { + fn new() -> Self { + Self { emitted: false } + } + } + + impl WorkloadSource for SingleStepSource { + type Interaction = TestInteraction; + + fn next_interaction(&mut self) -> Option { + if self.emitted { + None + } else { + self.emitted = true; + Some(TestInteraction) + } + } + + fn request_finish(&mut self) {} + } + + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + enum PanicPhase { + Execute, + Finish, + CollectOutcome, + } + + struct PanicEngine { + phase: PanicPhase, + } + + impl PanicEngine { + fn new(phase: PanicPhase) -> Self { + Self { phase } + } + } + + impl TargetEngine for PanicEngine { + type Observation = (); + type Outcome = (); + type Error = String; + + fn execute_interaction<'a>( + &'a mut self, + _interaction: &'a TestInteraction, + ) -> impl Future> + 'a { + async move { + if self.phase == PanicPhase::Execute { + panic!("execute panic"); + } + Ok(()) + } + } + + fn finish(&mut self) { + if self.phase == PanicPhase::Finish { + panic!("finish panic"); + } + } + + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a { + async move { + if self.phase == PanicPhase::CollectOutcome { + panic!("collect panic"); + } + Ok(()) + } + } + } + + struct NoopProperties; + + impl StreamingProperties for NoopProperties { + fn observe( + &mut self, + _engine: &PanicEngine, + _interaction: &TestInteraction, + _observation: &(), + ) -> Result<(), String> { + Ok(()) + } + + fn finish(&mut self, _engine: &PanicEngine, _outcome: &()) -> Result<(), String> { + Ok(()) + } + } + + #[test] + fn not_crash_catches_execute_panic() { + assert_not_crash_error(PanicPhase::Execute, "execute_interaction", "execute panic"); + } + + #[test] + fn not_crash_catches_finish_panic() { + assert_not_crash_error(PanicPhase::Finish, "finish", "finish panic"); + } + + #[test] + fn not_crash_catches_collect_outcome_panic() { + assert_not_crash_error(PanicPhase::CollectOutcome, "collect_outcome", "collect panic"); + } + + fn assert_not_crash_error(phase: PanicPhase, expected_phase: &str, expected_payload: &str) { + let mut runtime = crate::sim::Runtime::new(crate::seed::DstSeed(0)).expect("runtime"); + let err = runtime + .block_on(run_streaming( + SingleStepSource::new(), + PanicEngine::new(phase), + NoopProperties, + RunConfig::with_max_interactions(1), + )) + .unwrap_err() + .to_string(); + + assert!(err.contains("[NotCrash]")); + assert!(err.contains(expected_phase)); + assert!(err.contains(expected_payload)); + } +} diff --git a/crates/dst/src/lib.rs b/crates/dst/src/lib.rs new file mode 100644 index 00000000000..5463186a8b9 --- /dev/null +++ b/crates/dst/src/lib.rs @@ -0,0 +1,51 @@ +//! Deterministic simulation testing utilities for SpacetimeDB crates. +//! +//! Public surface is intentionally narrow and centered on the CLI: +//! +//! - [`client`] for logical client/session identifiers, +//! - [`config`] for run budgets, +//! - [`properties`] for reusable semantic checks, +//! - [`seed`] for deterministic seeds, +//! - [`workload`] for scenario identifiers, +//! - [`targets`] for the executable relational-db + commitlog adapter. +//! +//! ## DST principles +//! +//! 1. Every generated choice comes from [`seed::DstSeed`] or a simulator-provided +//! deterministic source. A failing run should be replayable from the printed +//! seed and CLI arguments. Use `--max-interactions` for exact replay; duration +//! budgets are wall-clock soak limits. +//! 2. Workloads describe legal but stressful user behavior. Targets may add +//! faults and lifecycle disruption, but the generator should not depend on +//! target internals. +//! 3. Oracles should check observable state, not merely absence of panics. When +//! possible, compare the target against a simple model or a replayed durable +//! history. +//! 4. Keep generation, execution, and property checking separate. This makes it +//! clear whether a failure came from an invalid workload, a target bug, or a +//! weak assertion. +//! 5. Prefer streaming state machines over precomputed traces. DST runs should +//! scale by budget and duration without materializing the whole workload. +//! 6. Fault injection must be explicit, configurable, and summarized in the run +//! output. Profiles should start with recoverable API-level behavior before +//! introducing crash or corruption semantics. +//! 7. Shared randomness, weighting, and sampling helpers belong in the +//! workload strategy module, not in ad hoc target or scenario code. + +/// Logical client/session identifiers shared by workloads and targets. +pub mod client; +/// Shared run-budget configuration for DST targets. +pub mod config; +/// Core traits/runners for pluggable workloads and targets. +pub mod core; +/// Reusable semantic properties and oracle-model checks. +pub(crate) mod properties; +mod schema; +/// Stable seed and RNG utilities used to make runs reproducible. +pub mod seed; +/// Local executor and deterministic-decision shim. +pub mod sim; +/// Concrete simulator targets. +pub mod targets; +/// Shared workload generators reused by multiple targets. +pub mod workload; diff --git a/crates/dst/src/main.rs b/crates/dst/src/main.rs new file mode 100644 index 00000000000..53e368adb92 --- /dev/null +++ b/crates/dst/src/main.rs @@ -0,0 +1,113 @@ +use std::time::{SystemTime, UNIX_EPOCH}; + +use clap::{Args, Parser, Subcommand}; +use spacetimedb_dst::{ + config::RunConfig, + seed::DstSeed, + targets::descriptor::{RelationalDbConcurrentDescriptor, TargetDescriptor}, +}; + +#[derive(Parser, Debug)] +#[command(name = "spacetimedb-dst")] +#[command(about = "Run deterministic simulation targets")] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand, Debug)] +enum Command { + Run(RunArgs), +} + +#[derive(Args, Debug)] +struct RunArgs { + #[arg(long, help = "Seed for generated choices. Defaults to wall-clock time.")] + seed: Option, + #[arg( + long, + help = "Wall-clock soak budget such as 500ms, 10s, 5m, or 1h. Use --max-interactions for exact replay." + )] + duration: Option, + #[arg(long, help = "Deterministic interaction budget. Preferred for replayable failures.")] + max_interactions: Option, +} + +fn main() -> anyhow::Result<()> { + init_tracing(); + match Cli::parse().command { + Command::Run(args) => run_command(args), + } +} + +fn init_tracing() { + use tracing_subscriber::{fmt, EnvFilter}; + + let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); + let _ = fmt() + .with_env_filter(filter) + .with_target(false) + .with_thread_ids(false) + .with_thread_names(false) + .compact() + .try_init(); +} + +fn run_command(args: RunArgs) -> anyhow::Result<()> { + let seed = resolve_seed(args.seed); + let config = build_config(args.duration.as_deref(), args.max_interactions)?; + + run_prepared_target::(seed, (), config) +} + +fn run_prepared_target( + seed: DstSeed, + scenario: D::Scenario, + config: RunConfig, +) -> anyhow::Result<()> +where + D: 'static, + D::Scenario: Send + 'static, +{ + D::prepare(seed, &scenario, &config)?; + std::thread::spawn(move || { + let mut runtime = spacetimedb_dst::sim::Runtime::new(seed)?; + runtime.block_on(run_target::(seed, scenario, config)) + }) + .join() + .unwrap_or_else(|payload| std::panic::resume_unwind(payload)) +} + +fn resolve_seed(seed: Option) -> DstSeed { + seed.map(DstSeed).unwrap_or_else(|| { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time went backwards") + .as_nanos() as u64; + DstSeed(nanos) + }) +} + +fn build_config(duration: Option<&str>, max_interactions: Option) -> anyhow::Result { + Ok(match (duration, max_interactions) { + (Some(duration), Some(max_interactions)) => RunConfig { + max_interactions: Some(max_interactions), + max_duration_ms: Some(spacetimedb_dst::config::parse_duration_spec(duration)?.as_millis() as u64), + ..Default::default() + }, + (Some(duration), None) => RunConfig::with_duration_spec(duration)?, + (None, Some(max_interactions)) => RunConfig::with_max_interactions(max_interactions), + (None, None) => RunConfig::with_max_interactions(1_000), + }) +} + +#[allow(clippy::disallowed_macros)] +async fn run_target( + seed: DstSeed, + scenario: D::Scenario, + config: RunConfig, +) -> anyhow::Result<()> { + let line = D::run_streaming(seed, scenario, config).await?; + println!("{line}"); + Ok(()) +} diff --git a/crates/dst/src/properties.rs b/crates/dst/src/properties.rs new file mode 100644 index 00000000000..11d652fcaec --- /dev/null +++ b/crates/dst/src/properties.rs @@ -0,0 +1,239 @@ +//! Reusable property runtime shared by DST targets. +//! +//! This module is the boundary between target execution and semantic checking. +//! Targets emit observations and implement [`TargetPropertyAccess`]; property +//! rules compare those observations against either the target's externally +//! visible state, an oracle model, or durable replay state. +//! +//! ## Property Model +//! +//! A property is a named check over a run. It observes generated interactions, +//! target observations, target-visible state, oracle models, and final +//! outcomes. Failures should include a stable property name and enough context +//! to replay the seed or trace. +//! +//! The current catalog is intentionally small and falls into the same groups +//! used by the proposal: +//! +//! - Safety properties: `NotCrash`, `ErrorMatchesOracle`, +//! `NoMutationMatchesModel`, `DurableReplayMatchesModel`, +//! `SnapshotCaptureMaintainsPrefix`, `SnapshotRestoreWithinDurablePrefix`, +//! `BankingTablesMatch`, and `DynamicMigrationAutoInc`. +//! - Model/oracle properties: `PointLookupMatchesModel`, +//! `PredicateCountMatchesModel`, `RangeScanMatchesModel`, +//! `FullScanMatchesModel`, and the scenario-specific final table-state check. +//! - Differential and metamorphic properties: `InsertSelect`, `DeleteSelect`, +//! `SelectSelectOptimizer`, `WhereTrueFalseNull`, and `IndexRangeExcluded`. +//! - Coverage and progress properties are not first-class rules yet. For now, +//! targets expose operation and outcome counters. Those counters should become +//! selectable properties once long-running and replication targets need them. + +mod rules; +mod runtime; + +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + schema::{SchemaPlan, SimRow}, + workload::{ + commitlog_ops::{DurableReplaySummary, SnapshotObservation}, + table_ops::{TableErrorKind, TableWorkloadInteraction, TableWorkloadOutcome}, + }, +}; + +pub(crate) use runtime::PropertyRuntime; + +/// Target adapter for property evaluation. +pub(crate) trait TargetPropertyAccess { + fn schema_plan(&self) -> &SchemaPlan; + fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String>; + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String>; + fn collect_rows_for_table(&self, table: usize) -> Result, String>; + fn count_rows(&self, table: usize) -> Result; + fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result; + fn range_scan( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String>; +} + +/// Canonical property IDs that can be selected by targets. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum PropertyKind { + /// Safety: target execution must not panic. + /// + /// Enforced by the shared streaming runner. + NotCrash, + /// Metamorphic: an inserted row is immediately visible to the inserting session. + InsertSelect, + /// Metamorphic: a deleted row disappears from the deleting session's view. + DeleteSelect, + /// Differential: optimized predicate counts agree with direct row projection. + SelectSelectOptimizer, + /// Metamorphic: boolean partitions preserve total cardinality. + WhereTrueFalseNull, + /// Metamorphic: composite index range scans implement excluded upper bounds correctly. + IndexRangeExcluded, + /// Safety: banking scenario debit and credit shadow tables remain identical. + BankingTablesMatch, + /// Safety: auto-increment IDs continue advancing after dynamic table migration. + DynamicMigrationAutoInc, + /// Safety: durable replay state equals the oracle committed model. + DurableReplayMatchesModel, + /// Safety: failed snapshot capture does not publish a newer usable snapshot. + SnapshotCaptureMaintainsPrefix, + /// Safety: restored snapshots are within the durable prefix. + SnapshotRestoreWithinDurablePrefix, + /// Safety: observed errors match the model-predicted error class. + ErrorMatchesOracle, + /// Safety: model-predicted no-op interactions do not mutate visible state. + NoMutationMatchesModel, + /// Model/oracle: point lookups match the oracle session-visible model. + PointLookupMatchesModel, + /// Model/oracle: predicate counts match the oracle session-visible model. + PredicateCountMatchesModel, + /// Model/oracle: range scans match the oracle session-visible model. + RangeScanMatchesModel, + /// Model/oracle: full scans match the oracle session-visible model. + FullScanMatchesModel, +} + +#[derive(Clone, Debug)] +pub(crate) struct DynamicMigrationProbe { + pub slot: u32, + pub from_version: u32, + pub to_version: u32, + pub existing_rows: Vec, + pub inserted_row: SimRow, +} + +#[derive(Clone, Debug)] +pub(crate) enum TableMutation { + Inserted { + table: usize, + requested: SimRow, + returned: SimRow, + }, + Deleted { + table: usize, + row: SimRow, + }, +} + +#[derive(Clone, Debug)] +pub(crate) enum TableObservation { + Applied, + Mutated { + conn: SessionId, + mutations: Vec, + in_tx: bool, + }, + ObservedError(TableErrorKind), + PointLookup { + conn: SessionId, + table: usize, + id: u64, + actual: Option, + }, + PredicateCount { + conn: SessionId, + table: usize, + col: u16, + value: AlgebraicValue, + actual: usize, + }, + RangeScan { + conn: SessionId, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + actual: Vec, + }, + FullScan { + conn: SessionId, + table: usize, + actual: Vec, + }, + CommitOrRollback, +} + +#[derive(Clone, Debug)] +pub(crate) enum CommitlogObservation { + Table(TableObservation), + Applied, + Skipped, + DynamicMigrationProbe(DynamicMigrationProbe), + Snapshot(SnapshotObservation), + DurableReplay(DurableReplaySummary), +} + +struct PropertyContext<'a> { + access: &'a dyn TargetPropertyAccess, + models: &'a runtime::PropertyModels, +} + +#[derive(Clone, Debug)] +enum PropertyEvent<'a> { + TableInteractionApplied, + RowInserted { + conn: SessionId, + table: usize, + returned: &'a SimRow, + in_tx: bool, + }, + RowDeleted { + conn: SessionId, + table: usize, + row: &'a SimRow, + in_tx: bool, + }, + ObservedError { + observed: TableErrorKind, + predicted: TableErrorKind, + subject: Option<(SessionId, usize)>, + interaction: &'a TableWorkloadInteraction, + }, + NoMutation { + subject: Option<(SessionId, usize)>, + interaction: &'a TableWorkloadInteraction, + observation: &'a TableObservation, + }, + PointLookup { + conn: SessionId, + table: usize, + id: u64, + actual: &'a Option, + }, + PredicateCount { + conn: SessionId, + table: usize, + col: u16, + value: &'a AlgebraicValue, + actual: usize, + }, + RangeScan { + conn: SessionId, + table: usize, + cols: &'a [u16], + lower: &'a Bound, + upper: &'a Bound, + actual: &'a [SimRow], + }, + FullScan { + conn: SessionId, + table: usize, + actual: &'a [SimRow], + }, + CommitOrRollback, + DynamicMigrationProbe(&'a DynamicMigrationProbe), + SnapshotCapture(&'a SnapshotObservation), + DurableReplay(&'a DurableReplaySummary), + TableWorkloadFinished(&'a TableWorkloadOutcome), +} diff --git a/crates/dst/src/properties/rules.rs b/crates/dst/src/properties/rules.rs new file mode 100644 index 00000000000..cb3f5bfc5d9 --- /dev/null +++ b/crates/dst/src/properties/rules.rs @@ -0,0 +1,665 @@ +use std::ops::Bound; + +use spacetimedb_sats::{AlgebraicType, AlgebraicValue}; + +use crate::{ + client::SessionId, + schema::{SchemaPlan, SimRow}, + workload::{ + commitlog_ops::SnapshotCaptureStatus, + table_ops::{TableOperation, TableScenario}, + }, +}; + +use super::{PropertyContext, PropertyEvent, PropertyKind, TableMutation, TableObservation, TargetPropertyAccess}; + +pub(super) trait PropertyRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let _ = ctx; + let _ = event; + Ok(()) + } +} + +pub(super) fn rule_for_kind(kind: PropertyKind) -> Box { + match kind { + PropertyKind::NotCrash => Box::::default(), + PropertyKind::InsertSelect => Box::::default(), + PropertyKind::DeleteSelect => Box::::default(), + PropertyKind::SelectSelectOptimizer => Box::::default(), + PropertyKind::WhereTrueFalseNull => Box::::default(), + PropertyKind::IndexRangeExcluded => Box::::default(), + PropertyKind::BankingTablesMatch => Box::::default(), + PropertyKind::DynamicMigrationAutoInc => Box::::default(), + PropertyKind::DurableReplayMatchesModel => Box::::default(), + PropertyKind::SnapshotCaptureMaintainsPrefix => Box::::default(), + PropertyKind::SnapshotRestoreWithinDurablePrefix => Box::::default(), + PropertyKind::ErrorMatchesOracle => Box::::default(), + PropertyKind::NoMutationMatchesModel => Box::::default(), + PropertyKind::PointLookupMatchesModel => Box::::default(), + PropertyKind::PredicateCountMatchesModel => Box::::default(), + PropertyKind::RangeScanMatchesModel => Box::::default(), + PropertyKind::FullScanMatchesModel => Box::::default(), + } +} + +pub(super) fn oracle_table_state_rule(scenario: S, schema: SchemaPlan) -> Box +where + S: TableScenario + 'static, +{ + Box::new(OracleTableStateRule::new(scenario, schema)) +} + +#[derive(Default)] +struct NotCrashRule; + +impl PropertyRule for NotCrashRule {} + +struct OracleTableStateRule { + scenario: S, + schema: SchemaPlan, +} + +impl OracleTableStateRule { + fn new(scenario: S, schema: SchemaPlan) -> Self { + Self { scenario, schema } + } +} + +impl PropertyRule for OracleTableStateRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + match event { + PropertyEvent::TableWorkloadFinished(outcome) => { + let expected_rows = ctx.models.table().committed_rows(); + if outcome.final_rows != expected_rows { + return Err(format!( + "[OracleTableState] final table state mismatch: expected={expected_rows:?} actual={:?}", + outcome.final_rows + )); + } + self.scenario + .validate_outcome(&self.schema, outcome) + .map_err(|err| format!("[OracleTableState] scenario invariant failed: {err}")) + } + _ => Ok(()), + } + } +} + +#[derive(Default)] +struct InsertSelectRule; + +impl PropertyRule for InsertSelectRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RowInserted { + conn, table, returned, .. + } = event + else { + return Ok(()); + }; + let id = returned.id().ok_or_else(|| "row missing id column".to_string())?; + let found = ctx.access.lookup_in_connection(conn, table, id)?; + if found != Some(returned.clone()) { + return Err(format!( + "[PQS::InsertSelect] row not visible after insert on conn={conn}, table={table}, expected={returned:?}, actual={found:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct DeleteSelectRule; + +impl PropertyRule for DeleteSelectRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RowDeleted { conn, table, row, .. } = event else { + return Ok(()); + }; + let id = row.id().ok_or_else(|| "row missing id column".to_string())?; + if ctx.access.lookup_in_connection(conn, table, id)?.is_some() { + return Err(format!( + "[DeleteSelect] row still visible after delete on conn={conn}, table={table}, row={row:?}" + )); + } + Ok(()) + } +} + +fn post_write_check_tables(ctx: &PropertyContext<'_>, event: &PropertyEvent<'_>) -> Option> { + match event { + PropertyEvent::RowInserted { + table, in_tx: false, .. + } + | PropertyEvent::RowDeleted { + table, in_tx: false, .. + } => Some(vec![*table]), + PropertyEvent::CommitOrRollback => Some((0..ctx.access.schema_plan().tables.len()).collect()), + _ => None, + } +} + +#[derive(Default)] +struct NoRecRule; + +impl PropertyRule for NoRecRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some((col_idx, col_ty)) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool | AlgebraicType::U64)) + .map(|(idx, col)| (idx as u16, &col.ty)) + else { + continue; + }; + let scanned_rows = ctx.access.collect_rows_for_table(table)?; + if scanned_rows.is_empty() { + continue; + } + let predicate_value = match col_ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::U64 => scanned_rows[0].values[col_idx as usize].clone(), + _ => continue, + }; + let where_count = ctx.access.count_by_col_eq(table, col_idx, &predicate_value)?; + let projected_true_count = scanned_rows + .iter() + .filter(|row| row.values[col_idx as usize] == predicate_value) + .count(); + if where_count != projected_true_count { + return Err(format!( + "[NoREC::SelectSelectOptimizer] mismatch on table={table}, col={col_idx}: where_count={where_count}, projected_true={projected_true_count}" + )); + } + } + Ok(()) + } +} + +#[derive(Default)] +struct TlpRule; + +impl PropertyRule for TlpRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let Some(col_idx) = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .find(|(_, col)| matches!(col.ty, AlgebraicType::Bool)) + .map(|(idx, _)| idx as u16) + else { + continue; + }; + let total = ctx.access.count_rows(table)?; + let true_count = ctx + .access + .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(true))?; + let false_count = ctx + .access + .count_by_col_eq(table, col_idx, &AlgebraicValue::Bool(false))?; + let partition_sum = true_count + false_count; + if partition_sum != total { + return Err(format!( + "[TLP::WhereTrueFalseNull|TLP::UNIONAllPreservesCardinality] partition mismatch on table={table}, col={col_idx}: true={true_count}, false={false_count}, total={total}" + )); + } + } + Ok(()) + } +} + +#[derive(Default)] +struct IndexRangeExcludedRule; + +impl PropertyRule for IndexRangeExcludedRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let Some(tables) = post_write_check_tables(ctx, &event) else { + return Ok(()); + }; + const MAX_ROWS_FOR_INDEX_SCAN_CHECK: usize = 512; + + for table in tables { + let table_plan = ctx + .access + .schema_plan() + .tables + .get(table) + .ok_or_else(|| format!("table {table} out of range"))?; + let rows = ctx.access.collect_rows_for_table(table)?; + if rows.len() < 2 || rows.len() > MAX_ROWS_FOR_INDEX_SCAN_CHECK { + continue; + } + + for cols in table_plan.extra_indexes.iter().filter(|cols| cols.len() > 1) { + if !cols.iter().all(|&col| { + matches!( + table_plan.columns[col as usize].ty, + AlgebraicType::U64 | AlgebraicType::Bool + ) + }) { + continue; + } + + let mut sorted_rows = rows.clone(); + sorted_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let lower_key = sorted_rows[0].project_key(cols).to_algebraic_value(); + let upper_key = sorted_rows[sorted_rows.len() - 1] + .project_key(cols) + .to_algebraic_value(); + let lower = Bound::Included(lower_key.clone()); + let upper = Bound::Excluded(upper_key.clone()); + + let mut expected_rows = sorted_rows + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + key >= lower_key && key < upper_key + }) + .collect::>(); + expected_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + let mut actual_rows = ctx.access.range_scan(table, cols, lower, upper)?; + actual_rows.sort_by(|lhs, rhs| compare_rows_by_cols(lhs, rhs, cols)); + + if actual_rows != expected_rows { + return Err(format!( + "[PQS::IndexRangeExcluded] range mismatch on table={table}, cols={cols:?}: expected={expected_rows:?}, actual={actual_rows:?}" + )); + } + } + } + + Ok(()) + } +} + +#[derive(Default)] +struct BankingMatchRule; + +impl PropertyRule for BankingMatchRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + match event { + PropertyEvent::RowInserted { in_tx: false, .. } + | PropertyEvent::RowDeleted { in_tx: false, .. } + | PropertyEvent::CommitOrRollback => check_banking_tables_match(ctx.access), + _ => Ok(()), + } + } +} + +#[derive(Default)] +struct DynamicMigrationAutoIncRule; + +impl PropertyRule for DynamicMigrationAutoIncRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::DynamicMigrationProbe(probe) = event else { + return Ok(()); + }; + let max_existing_id = probe + .existing_rows + .iter() + .filter_map(sim_row_integer_id) + .max() + .unwrap_or(0); + let inserted_id = sim_row_integer_id(&probe.inserted_row).ok_or_else(|| { + format!( + "[DynamicMigrationAutoInc] probe row missing integer id for slot={}, from_version={}, to_version={}: {:?}", + probe.slot, probe.from_version, probe.to_version, probe.inserted_row + ) + })?; + if inserted_id <= max_existing_id { + return Err(format!( + "[DynamicMigrationAutoInc] non-advancing id for slot={}, from_version={}, to_version={}: inserted_id={}, max_existing_id={}", + probe.slot, probe.from_version, probe.to_version, inserted_id, max_existing_id + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct DurableReplayMatchesModelRule; + +impl PropertyRule for DurableReplayMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::DurableReplay(replay) = event else { + return Ok(()); + }; + let expected_rows = ctx.models.table().committed_rows(); + if replay.base_rows != expected_rows { + return Err(format!( + "[DurableReplayMatchesModel] replayed durable state mismatch at durable_offset {:?}, restored_snapshot {:?}: expected={expected_rows:?} actual={:?}", + replay.durable_offset, replay.restored_snapshot_offset, replay.base_rows + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct SnapshotCaptureMaintainsPrefixRule; + +impl PropertyRule for SnapshotCaptureMaintainsPrefixRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::SnapshotCapture(snapshot) = event else { + return Ok(()); + }; + + match snapshot.status { + SnapshotCaptureStatus::Captured { offset } => { + if snapshot.latest_after != Some(offset) { + return Err(format!( + "[SnapshotCaptureMaintainsPrefix] captured offset {offset}, but latest snapshot is {:?}: {snapshot:?}", + snapshot.latest_after + )); + } + let durable = snapshot.durable_offset.ok_or_else(|| { + format!( + "[SnapshotCaptureMaintainsPrefix] captured snapshot {offset} without a durable offset: {snapshot:?}" + ) + })?; + if offset > durable { + return Err(format!( + "[SnapshotCaptureMaintainsPrefix] captured snapshot {offset} beyond durable offset {durable}: {snapshot:?}" + )); + } + } + SnapshotCaptureStatus::SkippedInjectedFault => { + if snapshot.latest_after > snapshot.latest_before { + return Err(format!( + "[SnapshotCaptureMaintainsPrefix] injected snapshot fault published newer snapshot: before={:?}, after={:?}", + snapshot.latest_before, snapshot.latest_after + )); + } + } + SnapshotCaptureStatus::SkippedOpenTransaction | SnapshotCaptureStatus::SkippedNoSnapshotCreated => { + if snapshot.latest_after != snapshot.latest_before { + return Err(format!( + "[SnapshotCaptureMaintainsPrefix] skipped snapshot changed latest snapshot: before={:?}, after={:?}, status={:?}", + snapshot.latest_before, snapshot.latest_after, snapshot.status + )); + } + } + } + Ok(()) + } +} + +#[derive(Default)] +struct SnapshotRestoreWithinDurablePrefixRule; + +impl PropertyRule for SnapshotRestoreWithinDurablePrefixRule { + fn observe(&mut self, _ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::DurableReplay(replay) = event else { + return Ok(()); + }; + let Some(snapshot_offset) = replay.restored_snapshot_offset else { + return Ok(()); + }; + let durable_offset = replay.durable_offset.ok_or_else(|| { + format!( + "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset} without durable offset: {replay:?}" + ) + })?; + if snapshot_offset > durable_offset { + return Err(format!( + "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset} beyond durable offset {durable_offset}: {replay:?}" + )); + } + if replay.latest_snapshot_offset == Some(snapshot_offset) { + return Ok(()); + } + if let Some(latest) = replay.latest_snapshot_offset + && latest <= durable_offset + && latest > snapshot_offset + { + return Err(format!( + "[SnapshotRestoreWithinDurablePrefix] restored snapshot {snapshot_offset}, but newer usable snapshot {latest} exists within durable offset {durable_offset}: {replay:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct ErrorMatchesOracleRule; + +impl PropertyRule for ErrorMatchesOracleRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::ObservedError { + observed, + predicted, + subject, + interaction, + } = event + else { + return Ok(()); + }; + if observed != predicted { + return Err(format!( + "[ErrorMatchesOracle] observed {observed:?}, but model predicted {predicted:?}: {interaction:?}", + )); + } + if let Some((conn, table)) = subject { + assert_visible_rows_match_model(ctx, conn, table, "[ErrorDoesNotMutate]", interaction)?; + } + Ok(()) + } +} + +#[derive(Default)] +struct NoMutationMatchesModelRule; + +impl PropertyRule for NoMutationMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::NoMutation { + interaction, + subject, + observation, + } = event + else { + return Ok(()); + }; + if let TableOperation::InsertRows { table, rows, .. } = &interaction.op + && let TableObservation::Mutated { mutations, .. } = observation + { + if mutations.len() != rows.len() { + return Err(format!( + "[NoMutationMatchesModel] insert no-op returned wrong mutation count: expected={}, actual={}; interaction={interaction:?}", + rows.len(), + mutations.len() + )); + } + for (row, mutation) in rows.iter().zip(mutations) { + let TableMutation::Inserted { + table: observed_table, + requested, + returned, + } = mutation + else { + return Err(format!( + "[NoMutationMatchesModel] insert no-op returned non-insert mutation: {mutation:?}; interaction={interaction:?}" + )); + }; + if observed_table != table || requested != row || returned != row { + return Err(format!( + "[NoMutationMatchesModel] no-op insert returned row mismatch: expected table={table}, row={row:?}; observed table={observed_table}, requested={requested:?}, returned={returned:?}; interaction={interaction:?}" + )); + } + } + } + + if let Some((conn, table)) = subject { + assert_visible_rows_match_model(ctx, conn, table, "[NoMutationMatchesModel]", interaction)?; + } + Ok(()) + } +} + +fn assert_visible_rows_match_model( + ctx: &PropertyContext<'_>, + conn: SessionId, + table: usize, + property: &str, + interaction: &crate::workload::table_ops::TableWorkloadInteraction, +) -> Result<(), String> { + let mut actual = ctx.access.collect_rows_in_connection(conn, table)?; + actual.sort_by_key(|row| row.id().unwrap_or_default()); + let expected = ctx.models.table().visible_rows(conn, table); + if actual != expected { + return Err(format!( + "{property} visible rows changed unexpectedly on conn={conn}, table={table}: expected={expected:?}, actual={actual:?}; interaction={interaction:?}" + )); + } + Ok(()) +} + +#[derive(Default)] +struct PointLookupMatchesModelRule; + +impl PropertyRule for PointLookupMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::PointLookup { + conn, + table, + id, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().lookup_by_id(conn, table, id); + if *actual != expected { + return Err(format!( + "[Model::PointLookup] mismatch conn={conn}, table={table}, id={id}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct PredicateCountMatchesModelRule; + +impl PropertyRule for PredicateCountMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().predicate_count(conn, table, col, value); + if actual != expected { + return Err(format!( + "[Model::PredicateCount] mismatch conn={conn}, table={table}, col={col}, value={value:?}: expected={expected}, actual={actual}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct RangeScanMatchesModelRule; + +impl PropertyRule for RangeScanMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + } = event + else { + return Ok(()); + }; + let expected = ctx.models.table().range_scan(conn, table, cols, lower, upper); + if actual != expected.as_slice() { + return Err(format!( + "[Model::RangeScan] mismatch conn={conn}, table={table}, cols={cols:?}, lower={lower:?}, upper={upper:?}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +#[derive(Default)] +struct FullScanMatchesModelRule; + +impl PropertyRule for FullScanMatchesModelRule { + fn observe(&mut self, ctx: &PropertyContext<'_>, event: PropertyEvent<'_>) -> Result<(), String> { + let PropertyEvent::FullScan { conn, table, actual } = event else { + return Ok(()); + }; + let expected = ctx.models.table().full_scan(conn, table); + if actual != expected.as_slice() { + return Err(format!( + "[Model::FullScan] mismatch conn={conn}, table={table}: expected={expected:?}, actual={actual:?}" + )); + } + Ok(()) + } +} + +fn check_banking_tables_match(access: &dyn TargetPropertyAccess) -> Result<(), String> { + let schema = access.schema_plan(); + let debit = schema.tables.iter().position(|table| table.name == "debit_accounts"); + let credit = schema.tables.iter().position(|table| table.name == "credit_accounts"); + let (Some(left), Some(right)) = (debit, credit) else { + return Ok(()); + }; + + let left_rows = access.collect_rows_for_table(left)?; + let right_rows = access.collect_rows_for_table(right)?; + if left_rows != right_rows { + return Err(format!( + "[Shadow::AllTableHaveExpectedContent] banking mismatch: debit={left_rows:?}, credit={right_rows:?}" + )); + } + Ok(()) +} + +fn compare_rows_by_cols(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) +} + +fn sim_row_integer_id(row: &SimRow) -> Option { + match row.values.first() { + Some(AlgebraicValue::I64(value)) => Some(*value as i128), + Some(AlgebraicValue::U64(value)) => Some(*value as i128), + _ => None, + } +} diff --git a/crates/dst/src/properties/runtime.rs b/crates/dst/src/properties/runtime.rs new file mode 100644 index 00000000000..c6f67c26e3f --- /dev/null +++ b/crates/dst/src/properties/runtime.rs @@ -0,0 +1,474 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + core::{StreamingProperties, TargetEngine}, + schema::{SchemaPlan, SimRow}, + workload::{ + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DurableReplaySummary, SnapshotObservation}, + table_ops::{ + PredictedOutcome, TableErrorKind, TableOracle, TableScenario, TableWorkloadInteraction, + TableWorkloadOutcome, + }, + }, +}; + +use super::{ + rules::{oracle_table_state_rule, rule_for_kind, PropertyRule}, + CommitlogObservation, DynamicMigrationProbe, PropertyContext, PropertyEvent, PropertyKind, TableMutation, + TableObservation, TargetPropertyAccess, +}; + +#[derive(Clone, Debug)] +pub(super) struct PropertyModels { + table: TableModel, +} + +#[derive(Clone, Debug)] +pub(super) struct TableModel { + oracle: TableOracle, +} + +impl PropertyModels { + pub(super) fn new(table_count: usize, num_connections: usize) -> Self { + Self { + table: TableModel { + oracle: TableOracle::new(table_count, num_connections), + }, + } + } + + pub(super) fn table(&self) -> &TableModel { + &self.table + } + + fn predict(&self, interaction: &TableWorkloadInteraction) -> Result { + self.table.oracle.predict(&interaction.op) + } + + fn apply(&mut self, interaction: &TableWorkloadInteraction) { + self.table.oracle.apply(&interaction.op); + } +} + +impl TableModel { + pub(super) fn committed_rows(&self) -> Vec> { + self.oracle.clone().committed_rows() + } + + pub(super) fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { + self.oracle.lookup_by_id(conn, table, id) + } + + pub(super) fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { + self.oracle.predicate_count(conn, table, col, value) + } + + pub(super) fn range_scan( + &self, + conn: SessionId, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + ) -> Vec { + self.oracle.range_scan(conn, table, cols, lower, upper) + } + + pub(super) fn full_scan(&self, conn: SessionId, table: usize) -> Vec { + let mut rows = self.oracle.visible_rows(conn, table); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + rows + } + + pub(super) fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let mut rows = self.oracle.visible_rows(conn, table); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + rows + } +} + +/// Mutable runtime holding selected property implementations. +pub(crate) struct PropertyRuntime { + rules: Vec, + models: PropertyModels, +} + +impl PropertyRuntime { + pub fn with_kinds(kinds: &[PropertyKind]) -> Self { + let rules = kinds.iter().copied().map(rule_for_kind).map(RuleEntry::new).collect(); + Self { + rules, + models: PropertyModels::new(0, 0), + } + } + + pub fn for_table_workload(scenario: S, schema: SchemaPlan, num_connections: usize) -> Self + where + S: TableScenario + 'static, + { + let mut runtime = Self { + models: PropertyModels::new(schema.tables.len(), num_connections), + ..Self::default() + }; + runtime + .rules + .push(RuleEntry::new(oracle_table_state_rule(scenario, schema))); + runtime + } + + fn observe_event(&mut self, access: &dyn TargetPropertyAccess, event: PropertyEvent<'_>) -> Result<(), String> { + let ctx = PropertyContext { + access, + models: &self.models, + }; + for entry in &mut self.rules { + entry.rule.observe(&ctx, event.clone())?; + } + Ok(()) + } + + fn on_table_interaction( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + self.models.apply(interaction); + self.observe_event(access, PropertyEvent::TableInteractionApplied) + } + + fn on_mutations( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + mutations: &[TableMutation], + in_tx: bool, + ) -> Result<(), String> { + for mutation in mutations { + match mutation { + TableMutation::Inserted { + table, + requested: _, + returned, + } => self.observe_event( + access, + PropertyEvent::RowInserted { + conn, + table: *table, + returned, + in_tx, + }, + )?, + TableMutation::Deleted { table, row } => self.observe_event( + access, + PropertyEvent::RowDeleted { + conn, + table: *table, + row, + in_tx, + }, + )?, + } + } + Ok(()) + } + + fn on_observed_error( + &mut self, + access: &dyn TargetPropertyAccess, + observed: TableErrorKind, + predicted: TableErrorKind, + subject: Option<(SessionId, usize)>, + interaction: &TableWorkloadInteraction, + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::ObservedError { + observed, + predicted, + subject, + interaction, + }, + ) + } + + fn on_no_mutation( + &mut self, + access: &dyn TargetPropertyAccess, + subject: Option<(SessionId, usize)>, + interaction: &TableWorkloadInteraction, + observation: &TableObservation, + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::NoMutation { + subject, + interaction, + observation, + }, + ) + } + + fn on_point_lookup( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + id: u64, + actual: &Option, + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::PointLookup { + conn, + table, + id, + actual, + }, + ) + } + + fn on_predicate_count( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + col: u16, + value: &AlgebraicValue, + actual: usize, + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::PredicateCount { + conn, + table, + col, + value, + actual, + }, + ) + } + + #[allow(clippy::too_many_arguments)] + fn on_range_scan( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + actual: &[SimRow], + ) -> Result<(), String> { + self.observe_event( + access, + PropertyEvent::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + }, + ) + } + + fn on_full_scan( + &mut self, + access: &dyn TargetPropertyAccess, + conn: SessionId, + table: usize, + actual: &[SimRow], + ) -> Result<(), String> { + self.observe_event(access, PropertyEvent::FullScan { conn, table, actual }) + } + + fn on_commit_or_rollback(&mut self, access: &dyn TargetPropertyAccess) -> Result<(), String> { + self.observe_event(access, PropertyEvent::CommitOrRollback) + } + + fn on_dynamic_migration_probe( + &mut self, + access: &dyn TargetPropertyAccess, + probe: &DynamicMigrationProbe, + ) -> Result<(), String> { + self.observe_event(access, PropertyEvent::DynamicMigrationProbe(probe)) + } + + fn on_snapshot_capture( + &mut self, + access: &dyn TargetPropertyAccess, + snapshot: &SnapshotObservation, + ) -> Result<(), String> { + self.observe_event(access, PropertyEvent::SnapshotCapture(snapshot)) + } + + fn on_durable_replay( + &mut self, + access: &dyn TargetPropertyAccess, + replay: &DurableReplaySummary, + ) -> Result<(), String> { + self.observe_event(access, PropertyEvent::DurableReplay(replay)) + } + + fn on_table_workload_finish( + &mut self, + access: &dyn TargetPropertyAccess, + outcome: &TableWorkloadOutcome, + ) -> Result<(), String> { + self.observe_event(access, PropertyEvent::TableWorkloadFinished(outcome)) + } + + fn observe_table_observation( + &mut self, + access: &dyn TargetPropertyAccess, + interaction: &TableWorkloadInteraction, + observation: &TableObservation, + ) -> Result<(), String> { + let prediction = self.models.predict(interaction)?; + match (&prediction, observed_error_kind(observation)) { + (PredictedOutcome::Error { kind, subject }, Some(observed)) => { + self.on_observed_error(access, observed, *kind, *subject, interaction)?; + return Ok(()); + } + (PredictedOutcome::Error { kind, .. }, None) => { + return Err(format!( + "[ErrorMatchesOracle] expected {kind:?}, observed successful result {observation:?} for {interaction:?}" + )); + } + (PredictedOutcome::Applied, Some(observed)) => { + return Err(format!( + "[ErrorMatchesOracle] expected success, observed {observed:?} for {interaction:?}" + )); + } + (PredictedOutcome::Applied, None) => self.on_table_interaction(access, interaction)?, + (PredictedOutcome::NoMutation { subject: _ }, Some(observed)) => { + return Err(format!( + "[NoMutationMatchesModel] expected no mutation, observed {observed:?} for {interaction:?}" + )); + } + (PredictedOutcome::NoMutation { subject }, None) => { + self.on_no_mutation(access, *subject, interaction, observation)?; + } + } + + match observation { + TableObservation::Applied => {} + TableObservation::Mutated { conn, mutations, in_tx } => { + self.on_mutations(access, *conn, mutations, *in_tx)? + } + TableObservation::ObservedError(_) => {} + TableObservation::PointLookup { + conn, + table, + id, + actual, + } => self.on_point_lookup(access, *conn, *table, *id, actual)?, + TableObservation::PredicateCount { + conn, + table, + col, + value, + actual, + } => self.on_predicate_count(access, *conn, *table, *col, value, *actual)?, + TableObservation::RangeScan { + conn, + table, + cols, + lower, + upper, + actual, + } => self.on_range_scan(access, *conn, *table, cols, lower, upper, actual)?, + TableObservation::FullScan { conn, table, actual } => self.on_full_scan(access, *conn, *table, actual)?, + TableObservation::CommitOrRollback => {} + } + + if matches!(observation, TableObservation::CommitOrRollback) { + self.on_commit_or_rollback(access)?; + } + Ok(()) + } +} + +impl StreamingProperties for PropertyRuntime +where + E: TargetEngine< + CommitlogInteraction, + Observation = CommitlogObservation, + Outcome = CommitlogWorkloadOutcome, + Error = String, + > + TargetPropertyAccess, +{ + fn observe( + &mut self, + engine: &E, + interaction: &CommitlogInteraction, + observation: &CommitlogObservation, + ) -> Result<(), String> { + match (interaction, observation) { + (CommitlogInteraction::Table(table_interaction), CommitlogObservation::Table(table_observation)) => { + self.observe_table_observation(engine, table_interaction, table_observation) + } + (_, CommitlogObservation::DynamicMigrationProbe(probe)) => self.on_dynamic_migration_probe(engine, probe), + (_, CommitlogObservation::DurableReplay(replay)) => self.on_durable_replay(engine, replay), + (_, CommitlogObservation::Applied | CommitlogObservation::Skipped) => Ok(()), + (other, observation) => Err(format!( + "observation {observation:?} does not match interaction {other:?}" + )), + } + } + + fn finish(&mut self, engine: &E, outcome: &CommitlogWorkloadOutcome) -> Result<(), String> { + self.on_durable_replay(engine, &outcome.replay)?; + self.on_table_workload_finish(engine, &outcome.table) + } +} + +struct RuleEntry { + rule: Box, +} + +impl RuleEntry { + fn new(rule: Box) -> Self { + Self { rule } + } +} + +impl Default for PropertyRuntime { + fn default() -> Self { + Self::with_kinds(&[ + PropertyKind::NotCrash, + PropertyKind::InsertSelect, + PropertyKind::DeleteSelect, + PropertyKind::SelectSelectOptimizer, + PropertyKind::WhereTrueFalseNull, + PropertyKind::IndexRangeExcluded, + PropertyKind::BankingTablesMatch, + PropertyKind::DynamicMigrationAutoInc, + PropertyKind::DurableReplayMatchesModel, + PropertyKind::SnapshotCaptureMaintainsPrefix, + PropertyKind::SnapshotRestoreWithinDurablePrefix, + PropertyKind::ErrorMatchesOracle, + PropertyKind::NoMutationMatchesModel, + PropertyKind::PointLookupMatchesModel, + PropertyKind::PredicateCountMatchesModel, + PropertyKind::RangeScanMatchesModel, + PropertyKind::FullScanMatchesModel, + ]) + } +} + +fn observed_error_kind(observation: &TableObservation) -> Option { + match observation { + TableObservation::ObservedError(kind) => Some(*kind), + TableObservation::Applied + | TableObservation::Mutated { .. } + | TableObservation::PointLookup { .. } + | TableObservation::PredicateCount { .. } + | TableObservation::RangeScan { .. } + | TableObservation::FullScan { .. } + | TableObservation::CommitOrRollback => None, + } +} diff --git a/crates/dst/src/schema.rs b/crates/dst/src/schema.rs new file mode 100644 index 00000000000..ebce6c3a34b --- /dev/null +++ b/crates/dst/src/schema.rs @@ -0,0 +1,196 @@ +//! Shared schema and row model used by DST targets. + +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; + +use crate::seed::DstRng; + +/// Generated schema for one simulator case. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SchemaPlan { + /// User-visible tables installed before the workload starts. + pub tables: Vec, +} + +/// Table definition used by simulators. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct TablePlan { + /// Stable logical table name used in generated interactions and assertions. + pub name: String, + /// Ordered column definitions. Column 0 is treated as the primary id column. + pub columns: Vec, + /// Additional indexed column sets beyond the implicit primary id index. + /// + /// A value like `[1]` means a single-column secondary index on column 1. + /// A value like `[0, 1]` means a composite btree index over columns 0 and 1. + pub extra_indexes: Vec>, +} + +/// Column definition used by simulators. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ColumnPlan { + /// Column name installed into the target schema. + pub name: String, + /// Algebraic type for generated values in this column. + pub ty: AlgebraicType, +} + +/// Serializable row representation used by generated interactions. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SimRow { + /// Column values in schema order. + pub values: Vec, +} + +pub fn generate_supported_type(rng: &mut DstRng) -> AlgebraicType { + match rng.index(12) { + 0 => AlgebraicType::Bool, + 1 => AlgebraicType::I8, + 2 => AlgebraicType::U8, + 3 => AlgebraicType::I16, + 4 => AlgebraicType::U16, + 5 => AlgebraicType::I32, + 6 => AlgebraicType::U32, + 7 => AlgebraicType::I64, + 8 => AlgebraicType::U64, + 9 => AlgebraicType::I128, + 10 => AlgebraicType::U128, + _ => AlgebraicType::String, + } +} + +pub fn generate_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { + if rng.index(5) == 0 { + return edge_value_for_type(rng, ty, idx); + } + + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), + AlgebraicType::I8 => AlgebraicValue::I8(((rng.next_u64() % 64) as i8) - 32), + AlgebraicType::U8 => AlgebraicValue::U8((rng.next_u64() % u8::MAX as u64) as u8), + AlgebraicType::I16 => AlgebraicValue::I16(((rng.next_u64() % 2048) as i16) - 1024), + AlgebraicType::U16 => AlgebraicValue::U16((rng.next_u64() % u16::MAX as u64) as u16), + AlgebraicType::I32 => AlgebraicValue::I32(((rng.next_u64() % 200_000) as i32) - 100_000), + AlgebraicType::U32 => AlgebraicValue::U32((rng.next_u64() % 1_000_000) as u32), + AlgebraicType::I64 => AlgebraicValue::I64(((rng.next_u64() % 2_000_000) as i64) - 1_000_000), + AlgebraicType::U64 => AlgebraicValue::U64((rng.next_u64() % 1000) + idx as u64), + AlgebraicType::I128 => { + let v = ((rng.next_u64() % 2_000_000) as i128) - 1_000_000; + AlgebraicValue::I128(v.into()) + } + AlgebraicType::U128 => { + let v = (rng.next_u64() % 2_000_000) as u128; + AlgebraicValue::U128(v.into()) + } + AlgebraicType::String => AlgebraicValue::String(format!("v{}_{}", idx, rng.next_u64() % 10_000).into()), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +pub fn default_value_for_type(ty: &AlgebraicType) -> AlgebraicValue { + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(false), + AlgebraicType::I8 => AlgebraicValue::I8(0), + AlgebraicType::U8 => AlgebraicValue::U8(0), + AlgebraicType::I16 => AlgebraicValue::I16(0), + AlgebraicType::U16 => AlgebraicValue::U16(0), + AlgebraicType::I32 => AlgebraicValue::I32(0), + AlgebraicType::U32 => AlgebraicValue::U32(0), + AlgebraicType::I64 => AlgebraicValue::I64(0), + AlgebraicType::U64 => AlgebraicValue::U64(0), + AlgebraicType::I128 => AlgebraicValue::I128(0.into()), + AlgebraicType::U128 => AlgebraicValue::U128(0.into()), + AlgebraicType::String => AlgebraicValue::String("".into()), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +pub fn distinct_value_for_type(ty: &AlgebraicType, current: &AlgebraicValue) -> AlgebraicValue { + let default = default_value_for_type(ty); + if &default != current { + return default; + } + + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(true), + AlgebraicType::I8 => AlgebraicValue::I8(1), + AlgebraicType::U8 => AlgebraicValue::U8(1), + AlgebraicType::I16 => AlgebraicValue::I16(1), + AlgebraicType::U16 => AlgebraicValue::U16(1), + AlgebraicType::I32 => AlgebraicValue::I32(1), + AlgebraicType::U32 => AlgebraicValue::U32(1), + AlgebraicType::I64 => AlgebraicValue::I64(1), + AlgebraicType::U64 => AlgebraicValue::U64(1), + AlgebraicType::I128 => AlgebraicValue::I128(1.into()), + AlgebraicType::U128 => AlgebraicValue::U128(1.into()), + AlgebraicType::String => AlgebraicValue::String("dst_unique_conflict".into()), + other => panic!("unsupported generated column type: {other:?}"), + } +} + +fn edge_value_for_type(rng: &mut DstRng, ty: &AlgebraicType, idx: usize) -> AlgebraicValue { + match ty { + AlgebraicType::Bool => AlgebraicValue::Bool(rng.index(2) == 0), + AlgebraicType::I8 => [i8::MIN, -1, 0, 1, i8::MAX][rng.index(5)].into(), + AlgebraicType::U8 => [0, 1, u8::MAX][rng.index(3)].into(), + AlgebraicType::I16 => [i16::MIN, -1, 0, 1, i16::MAX][rng.index(5)].into(), + AlgebraicType::U16 => [0, 1, u16::MAX][rng.index(3)].into(), + AlgebraicType::I32 => [i32::MIN, -1, 0, 1, i32::MAX][rng.index(5)].into(), + AlgebraicType::U32 => [0, 1, u32::MAX][rng.index(3)].into(), + AlgebraicType::I64 => [i64::MIN, -1, 0, 1, i64::MAX][rng.index(5)].into(), + AlgebraicType::U64 => [0, 1, u64::MAX.saturating_sub(idx as u64)][rng.index(3)].into(), + AlgebraicType::I128 => { + let value = [i128::MIN, -1, 0, 1, i128::MAX][rng.index(5)]; + AlgebraicValue::I128(value.into()) + } + AlgebraicType::U128 => { + let value = [0, 1, u128::MAX][rng.index(3)]; + AlgebraicValue::U128(value.into()) + } + AlgebraicType::String => match rng.index(5) { + 0 => AlgebraicValue::String("".into()), + 1 => AlgebraicValue::String("same".into()), + 2 => AlgebraicValue::String("x".repeat(512).into()), + 3 => AlgebraicValue::String(format!("edge_{}", char::from_u32(0x2603).expect("valid char")).into()), + _ => AlgebraicValue::String(format!("v{idx}_edge").into()), + }, + other => panic!("unsupported generated column type: {other:?}"), + } +} + +impl SimRow { + pub fn to_product_value(&self) -> ProductValue { + ProductValue::from_iter(self.values.iter().cloned()) + } + + pub fn to_bsatn(&self) -> anyhow::Result> { + Ok(spacetimedb_sats::bsatn::to_vec(&self.to_product_value())?) + } + + pub fn from_product_value(value: ProductValue) -> Self { + SimRow { + values: value.elements.to_vec(), + } + } + + pub fn project_key(&self, cols: &[u16]) -> Self { + let values = cols + .iter() + .map(|&col| self.values[col as usize].clone()) + .collect::>(); + SimRow { values } + } + + pub fn to_algebraic_value(&self) -> AlgebraicValue { + match self.values.as_slice() { + [value] => value.clone(), + _ => ProductValue::from_iter(self.values.iter().cloned()).into(), + } + } + + pub fn id(&self) -> Option { + match self.values.first() { + Some(AlgebraicValue::U64(value)) => Some(*value), + _ => None, + } + } +} diff --git a/crates/dst/src/seed.rs b/crates/dst/src/seed.rs new file mode 100644 index 00000000000..669bb125dd3 --- /dev/null +++ b/crates/dst/src/seed.rs @@ -0,0 +1,52 @@ +//! Stable seed and RNG utilities used across DST runs. +//! +//! The important property here is repeatability, not statistical quality. +//! `DstSeed::fork` is used to derive independent substreams without requiring +//! callers to manually coordinate RNG state. + +/// Top-level seed value for a deterministic run. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub struct DstSeed(pub u64); + +impl DstSeed { + pub(crate) fn fork(self, discriminator: u64) -> Self { + // derive independent seed using same mixing primitive + Self(splitmix64(self.0 ^ discriminator.wrapping_mul(GAMMA))) + } + + pub(crate) fn rng(self) -> DstRng { + DstRng { + state: splitmix64(self.0), + } + } +} + +/// Small deterministic RNG for simulator code. +#[derive(Clone, Debug)] +pub(crate) struct DstRng { + state: u64, +} + +impl DstRng { + pub(crate) fn next_u64(&mut self) -> u64 { + // advance state, then reuse splitmix64 mixing + self.state = self.state.wrapping_add(GAMMA); + splitmix64(self.state) + } + + pub(crate) fn index(&mut self, len: usize) -> usize { + assert!(len > 0, "len must be non-zero"); + (self.next_u64() as usize) % len + } +} + +// constants reused everywhere +const GAMMA: u64 = 0x9e37_79b9_7f4a_7c15; + +/// Reference: https://rosettacode.org/wiki/Pseudo-random_numbers/Splitmix64 +fn splitmix64(mut x: u64) -> u64 { + x = x.wrapping_add(GAMMA); + x = (x ^ (x >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + x = (x ^ (x >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + x ^ (x >> 31) +} diff --git a/crates/dst/src/sim/commitlog.rs b/crates/dst/src/sim/commitlog.rs new file mode 100644 index 00000000000..a3891571732 --- /dev/null +++ b/crates/dst/src/sim/commitlog.rs @@ -0,0 +1,294 @@ +//! Commitlog storage fault-injection support for DST targets. + +use std::{ + fmt, + io::{self, BufRead, Read, Seek, Write}, +}; + +use spacetimedb_commitlog::{ + repo::{ + CompressOnce, CompressionStats, Repo, RepoWithoutLockFile, SegmentLen, SegmentReader, TxOffset, TxOffsetIndex, TxOffsetIndexMut, + }, + segment::{FileLike, Header}, +}; + +use crate::{ + seed::DstSeed, + sim::storage_faults::{ + is_injected_fault_text, ShortIoKind, StorageFaultConfig, StorageFaultController, StorageFaultDomain, + StorageFaultKind, StorageFaultSummary, + }, +}; + +pub(crate) type CommitlogFaultConfig = StorageFaultConfig; +pub(crate) type CommitlogFaultSummary = StorageFaultSummary; + +/// Returns true if `text` contains an error created by this fault layer. +pub(crate) fn is_injected_disk_error_text(text: &str) -> bool { + is_injected_fault_text(StorageFaultDomain::Disk, text) +} + +/// DST-only repo wrapper that makes the in-memory commitlog backend behave less like RAM. +/// +/// Faults stay within normal file API semantics: calls may take deterministic simulated time, +/// reads/writes may complete partially, and configured calls may return transient I/O errors. +/// The wrapper deliberately avoids corruption or crash-style partial persistence; those need a +/// stronger durability model before we enable them. +#[derive(Clone)] +pub(crate) struct FaultableRepo { + inner: R, + faults: StorageFaultController, +} + +impl FaultableRepo { + pub(crate) fn new(inner: R, config: CommitlogFaultConfig, seed: DstSeed) -> Self { + Self { + inner, + faults: StorageFaultController::new(config, StorageFaultDomain::Disk, seed), + } + } + + pub(crate) fn enable_faults(&self) { + self.faults.enable(); + } + + pub(crate) fn fault_summary(&self) -> CommitlogFaultSummary { + self.faults.summary() + } + + pub(crate) fn with_faults_suspended(&self, f: impl FnOnce() -> T) -> T { + self.faults.with_suspended(f) + } +} + +impl fmt::Display for FaultableRepo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}+faultable({})", self.inner, self.faults.summary().profile) + } +} + +impl Repo for FaultableRepo { + type SegmentWriter = FaultableSegment; + type SegmentReader = FaultableReader; + + fn create_segment(&self, offset: u64, header: Header) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Open)?; + self.inner + .create_segment(offset, header) + .map(|inner| FaultableSegment::new(inner, self.faults.clone())) + } + + fn open_segment_reader(&self, offset: u64) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Open)?; + self.inner + .open_segment_reader(offset) + .map(|inner| FaultableReader::new(inner, self.faults.clone())) + } + + fn open_segment_writer(&self, offset: u64) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Open)?; + self.inner + .open_segment_writer(offset) + .map(|inner| FaultableSegment::new(inner, self.faults.clone())) + } + + fn segment_file_path(&self, offset: u64) -> Option { + self.inner.segment_file_path(offset) + } + + fn remove_segment(&self, offset: u64) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.remove_segment(offset) + } + + fn compress_segment_with(&self, offset: u64, f: impl CompressOnce) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.compress_segment_with(offset, f) + } + + fn existing_offsets(&self) -> io::Result> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.existing_offsets() + } + + fn create_offset_index(&self, offset: TxOffset, cap: u64) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.create_offset_index(offset, cap) + } + + fn remove_offset_index(&self, offset: TxOffset) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.remove_offset_index(offset) + } + + fn get_offset_index(&self, offset: TxOffset) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.get_offset_index(offset) + } +} + +impl RepoWithoutLockFile for FaultableRepo {} + +pub(crate) struct FaultableSegment { + inner: S, + faults: StorageFaultController, +} + +impl FaultableSegment { + fn new(inner: S, faults: StorageFaultController) -> Self { + Self { inner, faults } + } +} + +impl Read for FaultableSegment { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); + self.inner.read(&mut buf[..len]) + } +} + +impl Write for FaultableSegment { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Write)?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Write); + self.inner.write(&buf[..len]) + } + + fn flush(&mut self) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Flush)?; + self.inner.flush() + } +} + +impl Seek for FaultableSegment { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.faults.maybe_latency(); + self.inner.seek(pos) + } +} + +impl SegmentLen for FaultableSegment { + fn segment_len(&mut self) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.segment_len() + } +} + +impl FileLike for FaultableSegment { + fn fsync(&mut self) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Fsync)?; + self.inner.fsync() + } + + fn ftruncate(&mut self, tx_offset: u64, size: u64) -> io::Result<()> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.ftruncate(tx_offset, size) + } +} + +pub(crate) struct FaultableReader { + inner: S, + faults: StorageFaultController, +} + +impl FaultableReader { + fn new(inner: S, faults: StorageFaultController) -> Self { + Self { inner, faults } + } +} + +impl Read for FaultableReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); + self.inner.read(&mut buf[..len]) + } +} + +impl BufRead for FaultableReader { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Read)?; + let buf = self.inner.fill_buf()?; + let len = self.faults.maybe_short_len(buf.len(), ShortIoKind::Read); + Ok(&buf[..len]) + } + + fn consume(&mut self, amount: usize) { + self.inner.consume(amount); + } +} + +impl Seek for FaultableReader { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.faults.maybe_latency(); + self.inner.seek(pos) + } +} + +impl SegmentLen for FaultableReader { + fn segment_len(&mut self) -> io::Result { + self.faults.maybe_latency(); + self.faults.maybe_error(StorageFaultKind::Metadata)?; + self.inner.segment_len() + } +} + +impl SegmentReader for FaultableReader { + fn sealed(&self) -> bool { + self.inner.sealed() + } +} + +#[cfg(test)] +mod tests { + use std::io::{BufRead, Cursor}; + + use crate::config::CommitlogFaultProfile; + + use super::*; + + fn always_short_read_config() -> CommitlogFaultConfig { + CommitlogFaultConfig { + profile: CommitlogFaultProfile::Default, + enabled: true, + latency_prob: 0.0, + long_latency_prob: 0.0, + short_io_prob: 1.0, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + } + } + + #[test] + fn buf_read_path_applies_short_read_faults() { + let faults = StorageFaultController::new(always_short_read_config(), StorageFaultDomain::Disk, DstSeed(55)); + faults.enable(); + let mut reader = FaultableReader::new(Cursor::new(vec![1, 2, 3, 4]), faults.clone()); + + assert_eq!(reader.fill_buf().unwrap(), &[1, 2]); + assert_eq!(faults.summary().short_read, 1); + } +} diff --git a/crates/dst/src/sim/mod.rs b/crates/dst/src/sim/mod.rs new file mode 100644 index 00000000000..6b78fc542b0 --- /dev/null +++ b/crates/dst/src/sim/mod.rs @@ -0,0 +1,112 @@ +//! Local simulation shim for the DST crate. +//! +//! This module is deliberately small, but its executor shape follows madsim's: +//! futures are scheduled as runnable tasks and the ready queue is sampled by a +//! deterministic RNG instead of being driven by a package-level async runtime. + +pub(crate) mod commitlog; +pub(crate) mod snapshot; +pub(crate) mod storage_faults; +pub mod time; + +use std::{cell::RefCell, future::Future, time::Duration}; + +pub use spacetimedb_runtime::sim::{yield_now, Handle, JoinHandle, Node, NodeBuilder, NodeId, Rng}; + +use crate::seed::DstSeed; + +thread_local! { + static CURRENT_HANDLE: RefCell> = const { RefCell::new(None) }; +} + +struct CurrentHandleGuard { + previous: Option, +} + +fn enter_current_handle(handle: Handle) -> CurrentHandleGuard { + let previous = CURRENT_HANDLE.with(|slot| slot.replace(Some(handle))); + CurrentHandleGuard { previous } +} + +impl Drop for CurrentHandleGuard { + fn drop(&mut self) { + CURRENT_HANDLE.with(|slot| { + let _ = slot.replace(self.previous.take()); + }); + } +} + +pub(crate) fn current_handle() -> Option { + CURRENT_HANDLE.with(|slot| slot.borrow().clone()) +} + +/// DST-facing wrapper that keeps the top-level seed type local to this crate. +pub struct Runtime { + inner: spacetimedb_runtime::sim::Runtime, +} + +impl Runtime { + pub fn new(seed: DstSeed) -> anyhow::Result { + Ok(Self { + inner: spacetimedb_runtime::sim::Runtime::new(seed.0), + }) + } + + pub fn block_on(&mut self, future: F) -> F::Output { + let _guard = enter_current_handle(self.inner.handle()); + spacetimedb_runtime::sim_std::block_on(&mut self.inner, future) + } + + pub fn elapsed(&self) -> Duration { + self.inner.elapsed() + } + + pub fn handle(&self) -> Handle { + self.inner.handle() + } + + pub fn create_node(&self) -> NodeBuilder { + self.inner.create_node() + } + + pub fn pause(&self, node: NodeId) { + self.inner.pause(node); + } + + pub fn resume(&self, node: NodeId) { + self.inner.resume(node); + } + + pub fn spawn_on(&self, node: NodeId, future: F) -> JoinHandle + where + F: Future + Send + 'static, + F::Output: Send + 'static, + { + self.inner.spawn_on(node, future) + } + + pub fn check_determinism(seed: DstSeed, make_future: fn() -> F) -> F::Output + where + F: Future + 'static, + F::Output: Send + 'static, + { + spacetimedb_runtime::sim_std::check_determinism(seed.0, make_future) + } + + pub fn check_determinism_with(seed: DstSeed, make_future: M) -> F::Output + where + M: Fn() -> F + Clone + Send + 'static, + F: Future + 'static, + F::Output: Send + 'static, + { + spacetimedb_runtime::sim_std::check_determinism(seed.0, make_future) + } +} + +pub(crate) fn advance_time(duration: Duration) { + time::advance(duration); +} + +pub(crate) fn decision_source(seed: DstSeed) -> Rng { + Rng::new(seed.0) +} diff --git a/crates/dst/src/sim/snapshot.rs b/crates/dst/src/sim/snapshot.rs new file mode 100644 index 00000000000..e3773b07e36 --- /dev/null +++ b/crates/dst/src/sim/snapshot.rs @@ -0,0 +1,267 @@ +//! In-memory snapshot storage with deterministic fault injection. +//! +//! This is intentionally a semantic snapshot seam, not a filesystem facade. It +//! keeps DST snapshot bytes inside controlled memory storage, while still using +//! the same snapshot capture/restore shape as production. + +use std::{ops::Range, sync::Arc}; + +use spacetimedb_durability::TxOffset; +use spacetimedb_lib::Identity; +use spacetimedb_snapshot::{ + BoxedPendingSnapshot, CompressionStats, MemorySnapshotRepository, PendingSnapshot, ReconstructedSnapshot, + SnapshotError, SnapshotRepo, SnapshotStore, +}; +use spacetimedb_table::{blob_store::BlobStore, page_pool::PagePool, table::Table}; + +use crate::{ + seed::DstSeed, + sim::storage_faults::{ + is_injected_fault_text, StorageFaultConfig, StorageFaultController, StorageFaultDomain, StorageFaultKind, + StorageFaultSummary, + }, +}; + +pub(crate) type SnapshotFaultConfig = StorageFaultConfig; + +/// Returns true if `text` contains an error created by this snapshot fault layer. +pub(crate) fn is_injected_snapshot_error_text(text: &str) -> bool { + is_injected_fault_text(StorageFaultDomain::Snapshot, text) +} + +pub(crate) struct SnapshotRestoreRepo { + pub(crate) store: Option>, + pub(crate) restored_snapshot_offset: Option, + pub(crate) latest_snapshot_offset: Option, +} + +/// In-memory snapshot repository wrapped with deterministic operation-level faults. +/// +/// The bytes/pages are written and read by `spacetimedb-snapshot`; this wrapper +/// only decides whether a DST operation reaches that repository. That keeps +/// restore semantics aligned with production without requiring the +/// Tokio-backed `SnapshotWorker` or the host filesystem inside the simulator. +/// +/// This is the intended boundary for the current DST target. It exercises +/// capture/restore behavior, retry classification, and replay correctness. It +/// does not model torn snapshot pages or byte-level corruption. +#[derive(Clone)] +pub(crate) struct BuggifiedSnapshotRepo { + repo: Arc, + faults: StorageFaultController, +} + +impl BuggifiedSnapshotRepo { + pub(crate) fn new(config: SnapshotFaultConfig, seed: DstSeed) -> anyhow::Result { + Ok(Self { + repo: Arc::new(MemorySnapshotRepository::new(Identity::ZERO, 0)), + faults: StorageFaultController::new(config, StorageFaultDomain::Snapshot, seed), + }) + } + + pub(crate) fn enable_faults(&self) { + self.faults.enable(); + } + + pub(crate) fn fault_summary(&self) -> StorageFaultSummary { + self.faults.summary() + } + + pub(crate) fn with_faults_suspended(&self, f: impl FnOnce() -> T) -> T { + self.faults.with_suspended(f) + } + + pub(crate) fn latest_snapshot_unfaulted(&self) -> Result, String> { + self.with_faults_suspended(|| { + self.repo + .latest_snapshot() + .map_err(|err| format!("snapshot metadata read failed: {err}")) + }) + } + + pub(crate) fn repo_for_restore(&self, durable_offset: Option) -> Result { + let latest_snapshot_offset = self.latest_snapshot_unfaulted()?; + self.faults.maybe_latency(); + self.inject(StorageFaultKind::Metadata)?; + let Some(durable_offset) = durable_offset else { + return Ok(SnapshotRestoreRepo { + store: None, + restored_snapshot_offset: None, + latest_snapshot_offset, + }); + }; + let restored_snapshot_offset = self + .repo + .latest_snapshot_older_than(durable_offset) + .map_err(|err| format!("snapshot metadata before restore failed: {err}"))?; + if restored_snapshot_offset.is_none() { + return Ok(SnapshotRestoreRepo { + store: None, + restored_snapshot_offset, + latest_snapshot_offset, + }); + } + + self.inject(StorageFaultKind::Open)?; + self.inject(StorageFaultKind::Read)?; + Ok(SnapshotRestoreRepo { + store: Some(self.repo.clone()), + restored_snapshot_offset, + latest_snapshot_offset, + }) + } + + fn inject(&self, kind: StorageFaultKind) -> Result<(), String> { + self.faults.maybe_error(kind).map_err(|err| err.to_string()) + } +} + +impl SnapshotStore for BuggifiedSnapshotRepo { + fn database_identity(&self) -> Identity { + self.repo.database_identity() + } + + fn capture_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Open) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Write) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Fsync) + .map_err(SnapshotError::Io)?; + self.repo.capture_snapshot(tables, blobs, tx_offset) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Open) + .map_err(SnapshotError::Io)?; + self.faults + .maybe_error(StorageFaultKind::Read) + .map_err(SnapshotError::Io)?; + self.repo.read_snapshot(tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.latest_snapshot_older_than(upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.latest_snapshot() + } + + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.invalidate_newer_snapshots(upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + self.faults.maybe_latency(); + self.faults + .maybe_error(StorageFaultKind::Metadata) + .map_err(SnapshotError::Io)?; + self.repo.invalidate_snapshot(tx_offset) + } +} + +struct BuggifiedPendingSnapshot { + tx_offset: TxOffset, +} + +impl PendingSnapshot for BuggifiedPendingSnapshot { + fn sync_all(self: Box) -> Result { + Ok(self.tx_offset) + } +} + +impl SnapshotRepo for BuggifiedSnapshotRepo { + type Pending = BoxedPendingSnapshot; + + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.capture_snapshot(tables, blobs, tx_offset)?; + Ok(Box::new(BuggifiedPendingSnapshot { tx_offset })) + } + + fn compress_snapshots(&self, _stats: &mut CompressionStats, _range: Range) -> Result<(), SnapshotError> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::{config::CommitlogFaultProfile, seed::DstSeed}; + + use super::*; + + fn no_faults() -> SnapshotFaultConfig { + SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Off) + } + + fn always_metadata_error() -> SnapshotFaultConfig { + SnapshotFaultConfig { + enabled: true, + metadata_error_prob: 1.0, + ..SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Default) + } + } + + #[test] + fn repo_without_snapshots_is_not_used_for_restore() { + let repo = BuggifiedSnapshotRepo::new(no_faults(), DstSeed(41)).unwrap(); + + assert!(repo.repo_for_restore(Some(0)).unwrap().store.is_none()); + } + + #[test] + fn injected_metadata_error_is_counted_and_recognizable() { + let repo = BuggifiedSnapshotRepo::new(always_metadata_error(), DstSeed(42)).unwrap(); + repo.enable_faults(); + + let err = match repo.repo_for_restore(Some(0)) { + Ok(_) => panic!("expected injected snapshot metadata error"), + Err(err) => err, + }; + + assert!(is_injected_snapshot_error_text(&err)); + assert_eq!(repo.fault_summary().metadata_error, 1); + } + + #[test] + fn suspended_faults_allow_restore_probe() { + let repo = BuggifiedSnapshotRepo::new(always_metadata_error(), DstSeed(43)).unwrap(); + repo.enable_faults(); + + let restore = repo.with_faults_suspended(|| repo.repo_for_restore(Some(0))); + + assert!(restore.unwrap().store.is_none()); + assert_eq!(repo.fault_summary().metadata_error, 0); + } +} diff --git a/crates/dst/src/sim/storage_faults.rs b/crates/dst/src/sim/storage_faults.rs new file mode 100644 index 00000000000..65c89dbafe3 --- /dev/null +++ b/crates/dst/src/sim/storage_faults.rs @@ -0,0 +1,320 @@ +//! Shared storage fault-injection primitives for DST simulation helpers. + +use std::{ + io, + sync::{ + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, + Arc, + }, + time::Duration, +}; + +use crate::{config::CommitlogFaultProfile, seed::DstSeed, sim}; + +const INJECTED_ERROR_PREFIX: &str = "dst injected "; + +pub(crate) fn is_injected_fault_text(domain: StorageFaultDomain, text: &str) -> bool { + text.contains(&format!("{INJECTED_ERROR_PREFIX}{} ", domain.label())) +} + +/// API-level storage fault profile for DST-only storage wrappers. +#[derive(Clone, Copy, Debug)] +pub(crate) struct StorageFaultConfig { + pub(crate) profile: CommitlogFaultProfile, + pub(crate) enabled: bool, + pub(crate) latency_prob: f64, + pub(crate) long_latency_prob: f64, + pub(crate) short_io_prob: f64, + pub(crate) read_error_prob: f64, + pub(crate) write_error_prob: f64, + pub(crate) flush_error_prob: f64, + pub(crate) fsync_error_prob: f64, + pub(crate) open_error_prob: f64, + pub(crate) metadata_error_prob: f64, + pub(crate) max_short_io_divisor: usize, +} + +impl StorageFaultConfig { + pub(crate) fn for_profile(profile: CommitlogFaultProfile) -> Self { + match profile { + CommitlogFaultProfile::Off => Self { + profile, + enabled: false, + latency_prob: 0.0, + long_latency_prob: 0.0, + short_io_prob: 0.0, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Light => Self { + profile, + enabled: true, + latency_prob: 0.20, + long_latency_prob: 0.04, + short_io_prob: 0.03, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Default => Self { + profile, + enabled: true, + latency_prob: 0.35, + long_latency_prob: 0.08, + short_io_prob: 0.08, + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 2, + }, + CommitlogFaultProfile::Aggressive => Self { + profile, + enabled: true, + latency_prob: 0.65, + long_latency_prob: 0.18, + short_io_prob: 0.20, + // Current profile-driven runs stay with latency and short I/O. + // Error hooks are available for targeted tests once targets can + // classify transient storage failures instead of treating them + // as harness errors. + read_error_prob: 0.0, + write_error_prob: 0.0, + flush_error_prob: 0.0, + fsync_error_prob: 0.0, + open_error_prob: 0.0, + metadata_error_prob: 0.0, + max_short_io_divisor: 4, + }, + } + } + + pub(crate) fn enabled(&self) -> bool { + self.enabled + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub(crate) struct StorageFaultSummary { + pub(crate) profile: CommitlogFaultProfile, + pub(crate) latency: usize, + pub(crate) short_read: usize, + pub(crate) short_write: usize, + pub(crate) read_error: usize, + pub(crate) write_error: usize, + pub(crate) flush_error: usize, + pub(crate) fsync_error: usize, + pub(crate) open_error: usize, + pub(crate) metadata_error: usize, +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum StorageFaultDomain { + Disk, + Snapshot, +} + +impl StorageFaultDomain { + fn label(self) -> &'static str { + match self { + Self::Disk => "disk", + Self::Snapshot => "snapshot", + } + } +} + +#[derive(Clone)] +pub(crate) struct StorageFaultController { + config: StorageFaultConfig, + domain: StorageFaultDomain, + counters: Arc, + decisions: Arc, + time: Option, + armed: Arc, + suspended: Arc, +} + +impl StorageFaultController { + pub(crate) fn new(config: StorageFaultConfig, domain: StorageFaultDomain, seed: DstSeed) -> Self { + Self { + config, + domain, + counters: Arc::default(), + decisions: Arc::new(sim::decision_source(seed)), + time: sim::time::try_current_handle(), + armed: Arc::new(AtomicBool::new(false)), + suspended: Arc::default(), + } + } + + pub(crate) fn enable(&self) { + self.armed.store(true, Ordering::Relaxed); + } + + pub(crate) fn with_suspended(&self, f: impl FnOnce() -> T) -> T { + self.suspended.fetch_add(1, Ordering::Relaxed); + let _guard = SuspendFaultsGuard { + suspended: self.suspended.clone(), + }; + f() + } + + pub(crate) fn maybe_latency(&self) { + if self.sample(self.config.latency_prob) { + self.counters.latency.fetch_add(1, Ordering::Relaxed); + let latency = if self.sample(self.config.long_latency_prob) { + Duration::from_millis(25) + } else { + Duration::from_millis(1) + }; + if let Some(time) = &self.time { + time.advance(latency); + } else { + sim::advance_time(latency); + } + } + } + + pub(crate) fn maybe_error(&self, kind: StorageFaultKind) -> io::Result<()> { + if self.sample(kind.probability(&self.config)) { + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + return Err(io::Error::other(kind.message(self.domain))); + } + Ok(()) + } + + pub(crate) fn maybe_short_len(&self, len: usize, kind: ShortIoKind) -> usize { + if len <= 1 { + return len; + } + if !self.sample(self.config.short_io_prob) { + return len; + } + + kind.counter(&self.counters).fetch_add(1, Ordering::Relaxed); + let divisor = self.config.max_short_io_divisor.max(2); + (len / divisor).max(1) + } + + pub(crate) fn summary(&self) -> StorageFaultSummary { + StorageFaultSummary { + profile: self.config.profile, + latency: self.counters.latency.load(Ordering::Relaxed) as usize, + short_read: self.counters.short_read.load(Ordering::Relaxed) as usize, + short_write: self.counters.short_write.load(Ordering::Relaxed) as usize, + read_error: self.counters.read_error.load(Ordering::Relaxed) as usize, + write_error: self.counters.write_error.load(Ordering::Relaxed) as usize, + flush_error: self.counters.flush_error.load(Ordering::Relaxed) as usize, + fsync_error: self.counters.fsync_error.load(Ordering::Relaxed) as usize, + open_error: self.counters.open_error.load(Ordering::Relaxed) as usize, + metadata_error: self.counters.metadata_error.load(Ordering::Relaxed) as usize, + } + } + + fn active(&self) -> bool { + self.config.enabled() && self.armed.load(Ordering::Relaxed) && self.suspended.load(Ordering::Relaxed) == 0 + } + + fn sample(&self, probability: f64) -> bool { + if !self.active() || probability <= 0.0 { + return false; + } + + self.decisions.sample_probability(probability) + } +} + +struct SuspendFaultsGuard { + suspended: Arc, +} + +impl Drop for SuspendFaultsGuard { + fn drop(&mut self) { + self.suspended.fetch_sub(1, Ordering::Relaxed); + } +} + +#[derive(Debug, Default)] +struct FaultCounters { + latency: AtomicU64, + short_read: AtomicU64, + short_write: AtomicU64, + read_error: AtomicU64, + write_error: AtomicU64, + flush_error: AtomicU64, + fsync_error: AtomicU64, + open_error: AtomicU64, + metadata_error: AtomicU64, +} + +#[derive(Clone, Copy)] +pub(crate) enum ShortIoKind { + Read, + Write, +} + +impl ShortIoKind { + fn counter(self, counters: &FaultCounters) -> &AtomicU64 { + match self { + Self::Read => &counters.short_read, + Self::Write => &counters.short_write, + } + } +} + +#[derive(Clone, Copy)] +pub(crate) enum StorageFaultKind { + Read, + Write, + Flush, + Fsync, + Open, + Metadata, +} + +impl StorageFaultKind { + fn probability(self, config: &StorageFaultConfig) -> f64 { + match self { + Self::Read => config.read_error_prob, + Self::Write => config.write_error_prob, + Self::Flush => config.flush_error_prob, + Self::Fsync => config.fsync_error_prob, + Self::Open => config.open_error_prob, + Self::Metadata => config.metadata_error_prob, + } + } + + fn counter(self, counters: &FaultCounters) -> &AtomicU64 { + match self { + Self::Read => &counters.read_error, + Self::Write => &counters.write_error, + Self::Flush => &counters.flush_error, + Self::Fsync => &counters.fsync_error, + Self::Open => &counters.open_error, + Self::Metadata => &counters.metadata_error, + } + } + + fn message(self, domain: StorageFaultDomain) -> String { + let action = match self { + Self::Read => "read", + Self::Write => "write", + Self::Flush => "flush", + Self::Fsync => "fsync", + Self::Open => "open", + Self::Metadata => "metadata", + }; + format!("{INJECTED_ERROR_PREFIX}{} {action} error", domain.label()) + } +} diff --git a/crates/dst/src/sim/time.rs b/crates/dst/src/sim/time.rs new file mode 100644 index 00000000000..f55d6378658 --- /dev/null +++ b/crates/dst/src/sim/time.rs @@ -0,0 +1,123 @@ +//! Virtual time for the local DST simulator. + +use std::time::Duration; + +pub use spacetimedb_runtime::sim::time::TimeoutElapsed; +pub use spacetimedb_runtime::sim::Handle as TimeHandle; + +fn current_handle() -> TimeHandle { + super::current_handle().expect("sim::time used outside Runtime::block_on") +} + +pub fn try_current_handle() -> Option { + super::current_handle() +} + +pub fn now() -> Duration { + current_handle().now() +} + +pub async fn sleep(duration: Duration) { + current_handle().sleep(duration).await +} + +pub async fn timeout(duration: Duration, future: impl core::future::Future) -> Result { + current_handle().timeout(duration, future).await +} + +pub fn advance(duration: Duration) { + current_handle().advance(duration); +} + +#[cfg(test)] +mod tests { + use std::{ + sync::{Arc, Mutex}, + time::Duration, + }; + + use crate::{seed::DstSeed, sim}; + + #[test] + fn sleep_fast_forwards_virtual_time() { + let mut runtime = sim::Runtime::new(DstSeed(101)).unwrap(); + + runtime.block_on(async { + assert_eq!(super::now(), Duration::ZERO); + super::sleep(Duration::from_millis(5)).await; + assert_eq!(super::now(), Duration::from_millis(5)); + }); + } + + #[test] + fn shorter_timer_wakes_first() { + let mut runtime = sim::Runtime::new(DstSeed(102)).unwrap(); + let handle = runtime.handle(); + let order = Arc::new(Mutex::new(Vec::new())); + + runtime.block_on({ + let order = Arc::clone(&order); + async move { + let slow_order = Arc::clone(&order); + let slow = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(10)).await; + slow_order.lock().expect("order poisoned").push(10); + }); + + let fast_order = Arc::clone(&order); + let fast = handle.spawn_on(sim::NodeId::MAIN, async move { + super::sleep(Duration::from_millis(3)).await; + fast_order.lock().expect("order poisoned").push(3); + }); + + fast.await.expect("fast timer task should complete"); + slow.await.expect("slow timer task should complete"); + } + }); + + assert_eq!(*order.lock().expect("order poisoned"), vec![3, 10]); + assert_eq!(runtime.elapsed(), Duration::from_millis(10)); + } + + #[test] + fn explicit_advance_moves_virtual_time() { + let mut runtime = sim::Runtime::new(DstSeed(103)).unwrap(); + + runtime.block_on(async { + super::advance(Duration::from_millis(7)); + assert_eq!(super::now(), Duration::from_millis(7)); + }); + } + + #[test] + fn timeout_returns_future_output_before_deadline() { + let mut runtime = sim::Runtime::new(DstSeed(104)).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(10), async { + super::sleep(Duration::from_millis(3)).await; + 9 + }) + .await + }); + + assert_eq!(output, Ok(9)); + assert_eq!(runtime.elapsed(), Duration::from_millis(3)); + } + + #[test] + fn timeout_expires_at_virtual_deadline() { + let mut runtime = sim::Runtime::new(DstSeed(105)).unwrap(); + + let output = runtime.block_on(async { + super::timeout(Duration::from_millis(4), async { + super::sleep(Duration::from_millis(20)).await; + 9 + }) + .await + }); + + assert_eq!(output.unwrap_err().duration(), Duration::from_millis(4)); + assert_eq!(runtime.elapsed(), Duration::from_millis(4)); + } +} diff --git a/crates/dst/src/targets/descriptor.rs b/crates/dst/src/targets/descriptor.rs new file mode 100644 index 00000000000..5a387625536 --- /dev/null +++ b/crates/dst/src/targets/descriptor.rs @@ -0,0 +1,61 @@ +//! Target descriptor layer used by the CLI. + +use std::{future::Future, pin::Pin}; + +use crate::{config::RunConfig, seed::DstSeed}; + +/// Descriptor contract: CLI talks to this, not per-target ad hoc handlers. +pub trait TargetDescriptor { + const NAME: &'static str; + type Scenario; + + fn prepare(_seed: DstSeed, _scenario: &Self::Scenario, _config: &RunConfig) -> anyhow::Result<()> { + Ok(()) + } + + fn run_streaming(seed: DstSeed, scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture; +} + +pub type TargetRunFuture = Pin>>>; + +pub struct RelationalDbConcurrentDescriptor; + +impl TargetDescriptor for RelationalDbConcurrentDescriptor { + const NAME: &'static str = "relational_db_concurrent"; + type Scenario = (); + + fn run_streaming(seed: DstSeed, _scenario: Self::Scenario, config: RunConfig) -> TargetRunFuture { + Box::pin(async move { + let outcome = crate::targets::relational_db_concurrent::run_generated_with_config(seed, config).await?; + Ok(format_relational_db_concurrent_outcome(Self::NAME, seed, &outcome)) + }) + } +} + +fn format_relational_db_concurrent_outcome( + target: &str, + seed: DstSeed, + outcome: &crate::targets::relational_db_concurrent::RelationalDbConcurrentOutcome, +) -> String { + format!( + concat!( + "ok target={} seed={} rounds={}\n", + "\n", + "clients={} events={} reads={}\n", + "transactions: committed={} write_conflicts={} writer_conflicts={} reader_conflicts={}\n", + "rows: final={} expected={}" + ), + target, + seed.0, + outcome.rounds, + outcome.clients, + outcome.events, + outcome.reads, + outcome.committed, + outcome.write_conflicts, + outcome.writer_conflicts, + outcome.reader_conflicts, + outcome.final_rows.len(), + outcome.expected_rows.len(), + ) +} diff --git a/crates/dst/src/targets/mod.rs b/crates/dst/src/targets/mod.rs new file mode 100644 index 00000000000..fba30fb371f --- /dev/null +++ b/crates/dst/src/targets/mod.rs @@ -0,0 +1,5 @@ +//! Concrete simulation targets. + +pub mod descriptor; +pub mod relational_db_commitlog; +pub mod relational_db_concurrent; diff --git a/crates/dst/src/targets/relational_db_commitlog.rs b/crates/dst/src/targets/relational_db_commitlog.rs new file mode 100644 index 00000000000..2c58b32a287 --- /dev/null +++ b/crates/dst/src/targets/relational_db_commitlog.rs @@ -0,0 +1,1780 @@ +//! RelationalDB DST target with mocked commitlog file chaos and replay checks. + +use std::{cell::Cell, collections::BTreeMap, io, num::NonZeroU64, ops::Bound, sync::Arc, time::Duration}; + +use spacetimedb_commitlog::repo::{Memory as MemoryCommitlogRepo, SizeOnDisk}; +use spacetimedb_core::{ + db::{ + relational_db::{MutTx as RelMutTx, Persistence, RelationalDB, SnapshotWorker, Tx as RelTx}, + snapshot, + }, + error::{DBError, DatastoreError, IndexError}, + messages::control_db::HostType, +}; +use spacetimedb_datastore::{ + execution_context::Workload, + traits::{IsolationLevel, Program}, +}; +use spacetimedb_durability::{Durability, EmptyHistory, Local}; +use spacetimedb_lib::{ + db::auth::{StAccess, StTableType}, + Identity, +}; +use spacetimedb_primitives::{SequenceId, TableId}; +use spacetimedb_runtime::Handle; +use spacetimedb_sats::{AlgebraicType, AlgebraicValue, ProductValue}; +use spacetimedb_schema::{ + def::BTreeAlgorithm, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, SequenceSchema, TableSchema}, + table_name::TableName, +}; +use spacetimedb_table::page_pool::PagePool; +use tracing::{debug, info, trace}; + +use crate::{ + client::SessionId, + config::RunConfig, + core::{self, TargetEngine}, + properties::{ + CommitlogObservation, DynamicMigrationProbe, PropertyRuntime, TableMutation, TableObservation, + TargetPropertyAccess, + }, + schema::{SchemaPlan, SimRow}, + seed::DstSeed, + sim::{ + self, + commitlog::{is_injected_disk_error_text, CommitlogFaultConfig, CommitlogFaultSummary, FaultableRepo}, + snapshot::{is_injected_snapshot_error_text, BuggifiedSnapshotRepo, SnapshotFaultConfig}, + }, + workload::{ + commitlog_ops::{CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary}, + commitlog_ops::{InteractionSummary, RuntimeSummary, SchemaSummary, TableOperationSummary, TransactionSummary}, + table_ops::{ + ConnectionWriteState, TableErrorKind, TableInteractionCase, TableOperation, TableScenario, TableScenarioId, + TableWorkloadInteraction, TableWorkloadOutcome, + }, + }, +}; + +pub type RelationalDbCommitlogOutcome = CommitlogWorkloadOutcome; +type RelationalDbCommitlogSource = crate::workload::commitlog_ops::CommitlogWorkloadSource; +type RelationalDbCommitlogProperties = PropertyRuntime; + +const DURABILITY_WAIT_TIMEOUT: Duration = Duration::from_secs(30); + +pub async fn run_generated_with_config_and_scenario( + seed: DstSeed, + scenario: TableScenarioId, + config: RunConfig, +) -> anyhow::Result { + let (source, engine, properties) = build(seed, scenario, &config)?; + let outcome = core::run_streaming(source, engine, properties, config).await?; + info!( + applied_steps = outcome.applied_steps, + durable_commit_count = outcome.durable_commit_count, + replay_table_count = outcome.replay_table_count, + "relational_db_commitlog complete" + ); + Ok(outcome) +} + +fn build( + seed: DstSeed, + scenario: TableScenarioId, + config: &RunConfig, +) -> anyhow::Result<( + RelationalDbCommitlogSource, + RelationalDbEngine, + RelationalDbCommitlogProperties, +)> { + build_with_fault_configs( + seed, + scenario, + config, + CommitlogFaultConfig::for_profile(config.commitlog_fault_profile), + SnapshotFaultConfig::for_profile(config.commitlog_fault_profile), + ) +} + +fn build_with_fault_configs( + seed: DstSeed, + scenario: TableScenarioId, + config: &RunConfig, + commitlog_fault_config: CommitlogFaultConfig, + snapshot_fault_config: SnapshotFaultConfig, +) -> anyhow::Result<( + RelationalDbCommitlogSource, + RelationalDbEngine, + RelationalDbCommitlogProperties, +)> { + let mut connection_rng = seed.fork(121).rng(); + let num_connections = connection_rng.index(3) + 1; + let mut schema_rng = seed.fork(122).rng(); + let schema = scenario.generate_schema(&mut schema_rng); + let generator = crate::workload::commitlog_ops::CommitlogWorkloadSource::new( + seed, + scenario, + schema.clone(), + num_connections, + config.max_interactions_or_default(usize::MAX), + ); + let engine = RelationalDbEngine::new_with_fault_configs( + seed, + &schema, + num_connections, + commitlog_fault_config, + snapshot_fault_config, + )?; + let properties = PropertyRuntime::for_table_workload(scenario, schema.clone(), num_connections); + Ok((generator, engine, properties)) +} + +#[derive(Clone, Debug)] +struct DynamicTableState { + name: String, + version: u32, + table_id: TableId, +} + +#[derive(Default)] +struct RunStats { + interactions: InteractionSummary, + table_ops: TableOperationSummary, + transactions: TransactionStats, + runtime: RuntimeStats, +} + +#[derive(Default)] +struct TransactionStats { + explicit_begin: usize, + explicit_commit: usize, + explicit_rollback: usize, + auto_commit: usize, + read_tx: Cell, +} + +#[derive(Default)] +struct RuntimeStats { + durability_actors_started: usize, +} + +impl RunStats { + fn record_interaction_requested(&mut self, interaction: &CommitlogInteraction) { + match interaction { + CommitlogInteraction::Table(_) => self.interactions.table += 1, + CommitlogInteraction::CreateDynamicTable { .. } => self.interactions.create_dynamic_table += 1, + CommitlogInteraction::DropDynamicTable { .. } => self.interactions.drop_dynamic_table += 1, + CommitlogInteraction::MigrateDynamicTable { .. } => self.interactions.migrate_dynamic_table += 1, + CommitlogInteraction::CloseReopen => self.interactions.close_reopen_requested += 1, + } + } + + fn record_interaction_result(&mut self, interaction: &CommitlogInteraction, observation: &CommitlogObservation) { + if matches!(observation, CommitlogObservation::Skipped) { + self.interactions.skipped += 1; + } + if matches!(interaction, CommitlogInteraction::CloseReopen) { + match observation { + CommitlogObservation::Skipped => self.interactions.close_reopen_skipped += 1, + CommitlogObservation::Applied | CommitlogObservation::DurableReplay(_) => { + self.interactions.close_reopen_applied += 1 + } + _ => {} + } + } + } + + fn record_table_operation(&mut self, case: TableInteractionCase) { + match case { + TableInteractionCase::BeginTx => self.table_ops.begin_tx += 1, + TableInteractionCase::CommitTx => self.table_ops.commit_tx += 1, + TableInteractionCase::RollbackTx => self.table_ops.rollback_tx += 1, + TableInteractionCase::BeginReadTx => self.table_ops.begin_read_tx += 1, + TableInteractionCase::ReleaseReadTx => self.table_ops.release_read_tx += 1, + TableInteractionCase::BeginTxConflict => self.table_ops.begin_tx_conflict += 1, + TableInteractionCase::WriteConflictInsert => self.table_ops.write_conflict_insert += 1, + TableInteractionCase::Insert => self.table_ops.insert += 1, + TableInteractionCase::Delete => self.table_ops.delete += 1, + TableInteractionCase::ExactDuplicateInsert => self.table_ops.exact_duplicate_insert += 1, + TableInteractionCase::UniqueKeyConflictInsert => self.table_ops.unique_key_conflict_insert += 1, + TableInteractionCase::DeleteMissing => self.table_ops.delete_missing += 1, + TableInteractionCase::BatchInsert => self.table_ops.batch_insert += 1, + TableInteractionCase::BatchDelete => self.table_ops.batch_delete += 1, + TableInteractionCase::Reinsert => self.table_ops.reinsert += 1, + TableInteractionCase::AddColumn => self.table_ops.add_column += 1, + TableInteractionCase::AddIndex => self.table_ops.add_index += 1, + TableInteractionCase::PointLookup => self.table_ops.point_lookup += 1, + TableInteractionCase::PredicateCount => self.table_ops.predicate_count += 1, + TableInteractionCase::RangeScan => self.table_ops.range_scan += 1, + TableInteractionCase::FullScan => self.table_ops.full_scan += 1, + } + } + + fn record_read_tx(&self) { + self.transactions + .read_tx + .set(self.transactions.read_tx.get().saturating_add(1)); + } + + fn transaction_summary(&self, durable_commit_count: usize) -> TransactionSummary { + TransactionSummary { + explicit_begin: self.transactions.explicit_begin, + explicit_commit: self.transactions.explicit_commit, + explicit_rollback: self.transactions.explicit_rollback, + auto_commit: self.transactions.auto_commit, + read_tx: self.transactions.read_tx.get(), + durable_commit_count, + } + } + + fn runtime_summary(&self) -> RuntimeSummary { + RuntimeSummary { + known_runtime_tasks_scheduled: self.runtime.durability_actors_started, + durability_actors_started: self.runtime.durability_actors_started, + runtime_alive_tasks: runtime_alive_tasks(), + } + } +} + +struct ReopenedRelationalDb { + durability: Arc, + db: RelationalDB, + restored_snapshot_offset: Option, + latest_snapshot_offset: Option, + snapshot_worker: SnapshotWorker, +} + +/// Engine executing mixed table+lifecycle interactions while recording mocked durable history. +struct RelationalDbEngine { + db: Option, + execution: ConnectionWriteState, + read_tx_by_connection: Vec>, + base_schema: SchemaPlan, + base_table_ids: Vec, + dynamic_tables: BTreeMap, + step: usize, + last_requested_durable_offset: Option, + last_observed_durable_offset: Option, + last_restored_snapshot_offset: Option, + latest_snapshot_offset: Option, + durability: Arc, + durability_opts: spacetimedb_durability::local::Options, + commitlog_repo: StressCommitlogRepo, + snapshot_repo: StressSnapshotRepo, + snapshot_worker: SnapshotWorker, + stats: RunStats, +} + +impl RelationalDbEngine { + fn new_with_fault_configs( + seed: DstSeed, + schema: &SchemaPlan, + num_connections: usize, + commitlog_fault_config: CommitlogFaultConfig, + snapshot_fault_config: SnapshotFaultConfig, + ) -> anyhow::Result { + let bootstrap = bootstrap_relational_db(seed.fork(700), commitlog_fault_config, snapshot_fault_config)?; + let mut this = Self { + db: Some(bootstrap.db), + execution: ConnectionWriteState::new(num_connections), + read_tx_by_connection: (0..num_connections).map(|_| None).collect(), + base_schema: schema.clone(), + base_table_ids: Vec::with_capacity(schema.tables.len()), + dynamic_tables: BTreeMap::new(), + step: 0, + last_requested_durable_offset: None, + last_observed_durable_offset: None, + last_restored_snapshot_offset: None, + latest_snapshot_offset: None, + durability: bootstrap.durability, + durability_opts: bootstrap.durability_opts, + commitlog_repo: bootstrap.commitlog_repo, + snapshot_repo: bootstrap.snapshot_repo, + snapshot_worker: bootstrap.snapshot_worker, + stats: RunStats { + runtime: RuntimeStats::default(), + ..Default::default() + }, + }; + this.install_base_schema().map_err(anyhow::Error::msg)?; + this.refresh_observed_durable_offset(true).map_err(anyhow::Error::msg)?; + this.commitlog_repo.enable_faults(); + this.snapshot_repo.enable_faults(); + Ok(this) + } + + fn install_base_schema(&mut self) -> Result<(), String> { + let mut tx = self + .db()? + .begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + for table in &self.base_schema.tables { + let columns = table + .columns + .iter() + .enumerate() + .map(|(idx, col)| ColumnSchema::for_test(idx as u16, &col.name, col.ty.clone())) + .collect::>(); + let mut indexes = vec![IndexSchema::for_test( + format!("{}_id_idx", table.name), + BTreeAlgorithm::from(0), + )]; + for cols in &table.extra_indexes { + let cols_name = cols.iter().map(|col| format!("c{col}")).collect::>().join("_"); + indexes.push(IndexSchema::for_test( + format!("{}_{}_idx", table.name, cols_name), + BTreeAlgorithm::from(cols.iter().copied().collect::()), + )); + } + let constraints = vec![ConstraintSchema::unique_for_test( + format!("{}_id_unique", table.name), + 0, + )]; + let table_id = self + .db()? + .create_table( + &mut tx, + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(&table.name), + None, + columns, + indexes, + constraints, + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ), + ) + .map_err(|err| format!("create table '{}' failed: {err}", table.name))?; + self.base_table_ids.push(table_id); + } + let committed = self + .db()? + .commit_tx(tx) + .map_err(|err| format!("install base schema commit failed: {err}"))?; + self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); + Ok(()) + } + + async fn execute(&mut self, interaction: &CommitlogInteraction) -> Result { + self.step = self.step.saturating_add(1); + self.stats.record_interaction_requested(interaction); + let observation = match interaction { + CommitlogInteraction::Table(op) => self.execute_table_op(op).map(CommitlogObservation::Table), + CommitlogInteraction::CreateDynamicTable { conn, slot } => self.create_dynamic_table(*conn, *slot), + CommitlogInteraction::DropDynamicTable { conn, slot } => self.drop_dynamic_table(*conn, *slot), + CommitlogInteraction::MigrateDynamicTable { conn, slot } => self.migrate_dynamic_table(*conn, *slot), + CommitlogInteraction::CloseReopen => self.close_and_reopen().await, + }?; + if !matches!(interaction, CommitlogInteraction::CloseReopen) { + self.wait_for_requested_durability(false).await?; + } + self.stats.record_interaction_result(interaction, &observation); + Ok(observation) + } + + async fn close_and_reopen(&mut self) -> Result { + if self.execution.active_writer.is_some() + || self.execution.tx_by_connection.iter().any(|tx| tx.is_some()) + || self.read_tx_by_connection.iter().any(|tx| tx.is_some()) + { + trace!("skip close/reopen while transaction is open"); + return Ok(CommitlogObservation::Skipped); + } + + self.wait_for_requested_durability(true).await?; + // Explicitly drop the current RelationalDB instance before attempting + // to open a new durability+DB pair on the same replica directory. + let old_db = self + .db + .take() + .ok_or_else(|| "close/reopen failed: relational db not initialized".to_string())?; + old_db.shutdown().await; + drop(old_db); + info!("starting in-memory durability"); + + let reopened = self.reopen_from_history_with_fault_retry("close/reopen")?; + + self.durability = reopened.durability; + self.db = Some(reopened.db); + self.last_restored_snapshot_offset = reopened.restored_snapshot_offset; + self.latest_snapshot_offset = reopened.latest_snapshot_offset; + self.snapshot_worker = reopened.snapshot_worker; + self.rebuild_table_handles_after_reopen()?; + self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); + let replay = self.durable_replay_summary()?; + debug!( + base_tables = self.base_table_ids.len(), + dynamic_tables = self.dynamic_tables.len(), + "reopened relational db from durable history" + ); + Ok(CommitlogObservation::DurableReplay(replay)) + } + + fn reopen_from_history_with_fault_retry(&self, context: &'static str) -> Result { + match self.reopen_from_history() { + Ok(reopened) => Ok(reopened), + Err(err) if is_injected_disk_error_text(&err) || is_injected_snapshot_error_text(&err) => { + trace!(error = %err, "retrying {context} with injected storage faults suspended"); + self.commitlog_repo + .with_faults_suspended(|| self.snapshot_repo.with_faults_suspended(|| self.reopen_from_history())) + } + Err(err) => Err(err), + } + } + + fn reopen_from_history(&self) -> Result { + let runtime = Handle::tokio_current(); + let durability = Arc::new( + InMemoryCommitlogDurability::open_with_repo( + self.commitlog_repo.clone(), + runtime.clone(), + self.durability_opts, + ) + .map_err(|err| format!("reopen in-memory durability failed: {err}"))?, + ); + let durable_offset = durability.durable_tx_offset().last_seen(); + let snapshot_restore = self.snapshot_repo.repo_for_restore(durable_offset)?; + let snapshot_worker = SnapshotWorker::new( + Arc::new(self.snapshot_repo.clone()), + snapshot::Compression::Disabled, + runtime.clone(), + ); + let persistence = Persistence { + durability: durability.clone(), + disk_size: Arc::new(in_memory_size_on_disk), + snapshot_store: snapshot_restore.store.clone(), + snapshots: Some(snapshot_worker.clone()), + runtime, + }; + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + durability.as_history(), + Some(persistence), + None, + PagePool::new_for_test(), + ) + .map_err(|err| format!("close/reopen failed: {err}"))?; + if !connected_clients.is_empty() { + return Err(format!( + "unexpected connected clients after reopen: {connected_clients:?}" + )); + } + Ok(ReopenedRelationalDb { + durability, + db, + restored_snapshot_offset: snapshot_restore.restored_snapshot_offset, + latest_snapshot_offset: snapshot_restore.latest_snapshot_offset, + snapshot_worker, + }) + } + + fn rebuild_table_handles_after_reopen(&mut self) -> Result<(), String> { + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); + let schemas = db + .get_all_tables(&tx) + .map_err(|err| format!("list tables after reopen failed: {err}"))?; + let _ = db.release_tx(tx); + + let mut by_name = BTreeMap::new(); + for schema in schemas { + by_name.insert(schema.table_name.to_string(), schema.table_id); + } + + self.base_table_ids.clear(); + for table in &self.base_schema.tables { + let table_id = by_name + .get(&table.name) + .copied() + .ok_or_else(|| format!("base table '{}' missing after reopen", table.name))?; + self.base_table_ids.push(table_id); + } + + self.dynamic_tables.retain(|_slot, state| { + if let Some(table_id) = by_name.get(&state.name).copied() { + state.table_id = table_id; + true + } else { + false + } + }); + + Ok(()) + } + + fn execute_table_op(&mut self, interaction: &TableWorkloadInteraction) -> Result { + let observation = self.execute_table_op_inner(&interaction.op)?; + self.stats.record_table_operation(interaction.case); + Ok(observation) + } + + fn execute_table_op_inner(&mut self, op: &TableOperation) -> Result { + trace!(step = self.step, ?op, "table interaction"); + match op { + TableOperation::BeginTx { conn } => self.begin_write_tx(*conn), + TableOperation::BeginReadTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + if self.execution.tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open write transaction")); + } + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open read transaction")); + } + let tx = self.db()?.begin_tx(Workload::ForTests); + self.read_tx_by_connection[conn.as_index()] = Some(tx); + self.stats.record_read_tx(); + Ok(TableObservation::Applied) + } + TableOperation::ReleaseReadTx { conn } => { + self.execution.ensure_known_connection(*conn)?; + let tx = self.read_tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} has no read transaction to release"))?; + let _ = self.db()?.release_tx(tx); + Ok(TableObservation::Applied) + } + TableOperation::CommitTx { conn } => { + self.execution.ensure_writer_owner(*conn, "commit")?; + let tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to commit"))?; + let committed = self + .db()? + .commit_tx(tx) + .map_err(|err| format!("commit interaction failed: {err}"))?; + self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); + self.execution.active_writer = None; + self.stats.transactions.explicit_commit += 1; + Ok(TableObservation::CommitOrRollback) + } + TableOperation::RollbackTx { conn } => { + self.execution.ensure_writer_owner(*conn, "rollback")?; + let tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} has no transaction to rollback"))?; + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + self.stats.transactions.explicit_rollback += 1; + Ok(TableObservation::CommitOrRollback) + } + TableOperation::InsertRows { conn, table, rows } => self.execute_insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.execute_delete_rows(*conn, *table, rows), + TableOperation::AddColumn { + conn, + table, + column, + default, + } => { + let table_id = self.with_mut_tx(*conn, |engine, tx| { + let table_id = engine.table_id_for_index(*table)?; + let column_idx = engine.base_schema.tables[*table].columns.len() as u16; + let mut columns = engine.base_schema.tables[*table] + .columns + .iter() + .enumerate() + .map(|(idx, existing)| ColumnSchema::for_test(idx as u16, &existing.name, existing.ty.clone())) + .collect::>(); + columns.push(ColumnSchema::for_test(column_idx, &column.name, column.ty.clone())); + let new_table_id = engine + .db()? + .add_columns_to_table(tx, table_id, columns, vec![default.clone()]) + .map_err(|err| format!("add column failed: {err}"))?; + Ok(new_table_id) + })?; + self.base_table_ids[*table] = table_id; + self.base_schema.tables[*table].columns.push(column.clone()); + self.refresh_observed_durable_offset(false)?; + Ok(TableObservation::Applied) + } + TableOperation::AddIndex { conn, table, cols } => { + self.with_mut_tx(*conn, |engine, tx| { + let table_id = engine.table_id_for_index(*table)?; + let mut schema = IndexSchema::for_test( + format!( + "{}_dst_added_{}_idx", + engine.base_schema.tables[*table].name, + engine.base_schema.tables[*table].extra_indexes.len() + ), + BTreeAlgorithm::from(cols.iter().copied().collect::()), + ); + schema.table_id = table_id; + engine + .db()? + .create_index(tx, schema, false) + .map_err(|err| format!("add index failed: {err}"))?; + Ok(()) + })?; + if !self.base_schema.tables[*table].extra_indexes.contains(cols) { + self.base_schema.tables[*table].extra_indexes.push(cols.clone()); + } + self.refresh_observed_durable_offset(false)?; + Ok(TableObservation::Applied) + } + TableOperation::PointLookup { conn, table, id } => { + let actual = self.lookup_base_row(*conn, *table, *id)?; + Ok(TableObservation::PointLookup { + conn: *conn, + table: *table, + id: *id, + actual, + }) + } + TableOperation::PredicateCount { + conn, + table, + col, + value, + } => { + let actual = self.count_by_col_eq_in_connection(*conn, *table, *col, value)?; + Ok(TableObservation::PredicateCount { + conn: *conn, + table: *table, + col: *col, + value: value.clone(), + actual, + }) + } + TableOperation::RangeScan { + conn, + table, + cols, + lower, + upper, + } => { + let actual = self.range_scan_in_connection(*conn, *table, cols, lower.clone(), upper.clone())?; + Ok(TableObservation::RangeScan { + conn: *conn, + table: *table, + cols: cols.clone(), + lower: lower.clone(), + upper: upper.clone(), + actual, + }) + } + TableOperation::FullScan { conn, table } => { + let actual = self.collect_rows_in_connection(*conn, *table)?; + Ok(TableObservation::FullScan { + conn: *conn, + table: *table, + actual, + }) + } + } + } + + fn begin_write_tx(&mut self, conn: SessionId) -> Result { + self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open read transaction")); + } + if self.execution.tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} already has open transaction")); + } + match self + .db()? + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + { + Some(tx) => { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + let _ = self.db()?.rollback_mut_tx(tx); + return Err(format!( + "connection {conn} unexpectedly acquired write lock while conflicting transaction was open" + )); + } + self.execution.tx_by_connection[conn.as_index()] = Some(tx); + self.execution.active_writer = Some(conn); + self.stats.transactions.explicit_begin += 1; + Ok(TableObservation::Applied) + } + None => { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)) + } else { + Err(format!( + "connection {conn} failed to begin write transaction without an open conflicting lock" + )) + } + } + } + } + + fn execute_insert_rows( + &mut self, + conn: SessionId, + table: usize, + rows: &[SimRow], + ) -> Result { + let in_tx = self.is_in_write_tx(conn); + let outcome = self.with_mut_tx_observed(conn, |engine, tx| { + let mut mutations = Vec::with_capacity(rows.len()); + for row in rows { + match engine.try_insert_base_row(tx, table, row)? { + Ok(returned) => mutations.push(TableMutation::Inserted { + table, + requested: row.clone(), + returned, + }), + Err(err) if is_unique_constraint_violation(&err) => { + return Ok(Err(TableErrorKind::UniqueConstraintViolation)); + } + Err(err) => return Err(format!("insert failed: {err}")), + } + } + Ok(Ok(mutations)) + }); + self.mutation_observation(conn, in_tx, outcome) + } + + fn execute_delete_rows( + &mut self, + conn: SessionId, + table: usize, + rows: &[SimRow], + ) -> Result { + let in_tx = self.is_in_write_tx(conn); + let outcome = self.with_mut_tx_observed(conn, |engine, tx| { + let mut mutations = Vec::with_capacity(rows.len()); + for row in rows { + match engine.delete_base_row_count(tx, table, row)? { + 0 => return Ok(Err(TableErrorKind::MissingRow)), + 1 => mutations.push(TableMutation::Deleted { + table, + row: row.clone(), + }), + deleted => { + return Err(format!("delete for row={row:?} affected {deleted} rows")); + } + } + } + Ok(Ok(mutations)) + }); + self.mutation_observation(conn, in_tx, outcome) + } + + fn mutation_observation( + &mut self, + conn: SessionId, + in_tx: bool, + outcome: Result, TableErrorKind>, String>, + ) -> Result { + match outcome { + Ok(Ok(mutations)) => { + self.refresh_if_auto_commit(in_tx)?; + Ok(TableObservation::Mutated { conn, mutations, in_tx }) + } + Ok(Err(kind)) => Ok(TableObservation::ObservedError(kind)), + Err(err) if is_write_conflict_error(&err) => { + Ok(TableObservation::ObservedError(TableErrorKind::WriteConflict)) + } + Err(err) => Err(err), + } + } + + fn with_mut_tx_observed( + &mut self, + conn: SessionId, + mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result, String>, + ) -> Result, String> { + self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} cannot write while read transaction is open")); + } + if self.execution.tx_by_connection[conn.as_index()].is_some() { + let mut tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; + let result = f(self, &mut tx); + self.execution.tx_by_connection[conn.as_index()] = Some(tx); + return result; + } + + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + return Ok(Err(TableErrorKind::WriteConflict)); + } + + let mut tx = self + .db()? + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .ok_or_else(|| format!("connection {conn} failed to acquire write transaction"))?; + self.execution.active_writer = Some(conn); + let value = match f(self, &mut tx) { + Ok(Ok(value)) => value, + Ok(Err(kind)) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Ok(Err(kind)); + } + Err(err) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Err(err); + } + }; + let committed = match self.db()?.commit_tx(tx) { + Ok(committed) => committed, + Err(err) => { + self.execution.active_writer = None; + return Err(format!("auto-commit write failed: {err}")); + } + }; + self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); + self.execution.active_writer = None; + self.stats.transactions.auto_commit += 1; + Ok(Ok(value)) + } + + fn with_mut_tx( + &mut self, + conn: SessionId, + mut f: impl FnMut(&mut Self, &mut RelMutTx) -> Result, + ) -> Result { + self.execution.ensure_known_connection(conn)?; + if self.read_tx_by_connection[conn.as_index()].is_some() { + return Err(format!("connection {conn} cannot write while read transaction is open")); + } + if self.execution.tx_by_connection[conn.as_index()].is_some() { + let mut tx = self.execution.tx_by_connection[conn.as_index()] + .take() + .ok_or_else(|| format!("connection {conn} missing transaction handle"))?; + let result = f(self, &mut tx); + self.execution.tx_by_connection[conn.as_index()] = Some(tx); + return result; + } + + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + return Err(format!( + "connection {conn} cannot auto-commit write while a conflicting lock is open" + )); + } + + let mut tx = self + .db()? + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + .ok_or_else(|| format!("connection {conn} failed to acquire write transaction"))?; + self.execution.active_writer = Some(conn); + let value = match f(self, &mut tx) { + Ok(value) => value, + Err(err) => { + let _ = self.db()?.rollback_mut_tx(tx); + self.execution.active_writer = None; + return Err(err); + } + }; + let committed = match self.db()?.commit_tx(tx) { + Ok(committed) => committed, + Err(err) => { + self.execution.active_writer = None; + return Err(format!("auto-commit write failed: {err}")); + } + }; + self.record_committed_offset(committed.as_ref().map(|(tx_offset, ..)| *tx_offset)); + self.execution.active_writer = None; + self.stats.transactions.auto_commit += 1; + Ok(value) + } + + fn try_insert_base_row( + &self, + tx: &mut RelMutTx, + table: usize, + row: &SimRow, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + self.try_insert_row(tx, table_id, row) + } + + fn try_insert_row( + &self, + tx: &mut RelMutTx, + table_id: TableId, + row: &SimRow, + ) -> Result, String> { + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + Ok(match self.db()?.insert(tx, table_id, &bsatn) { + Ok((_, row_ref, _)) => Ok(SimRow::from_product_value(row_ref.to_product_value())), + Err(err) => Err(err), + }) + } + + fn insert_row( + &self, + tx: &mut RelMutTx, + table_id: TableId, + row: &SimRow, + context: impl Into, + ) -> Result { + let context = context.into(); + self.try_insert_row(tx, table_id, row)? + .map_err(|err| format!("{context}: {err}")) + } + + fn delete_base_row_count(&self, tx: &mut RelMutTx, table: usize, row: &SimRow) -> Result { + let table_id = self.table_id_for_index(table)?; + Ok(self.db()?.delete_by_rel(tx, table_id, [row.to_product_value()])) + } + + fn create_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + trace!( + step = self.step, + slot, + "skip create dynamic table while transaction is open" + ); + return Ok(CommitlogObservation::Skipped); + } + let conn = self.normalize_conn(conn); + debug!(step = self.step, conn = %conn, slot, "create dynamic table"); + self.with_mut_tx(conn, |engine, tx| { + if engine.dynamic_tables.contains_key(&slot) { + return Ok(()); + } + let name = dynamic_table_name(slot); + let schema = dynamic_schema(&name, 0); + let table_id = engine + .db()? + .create_table(tx, schema) + .map_err(|err| format!("create dynamic table slot={slot} failed: {err}"))?; + let seed_row = SimRow { + values: vec![AlgebraicValue::I64(0), AlgebraicValue::U64(slot as u64)], + }; + engine.insert_row( + tx, + table_id, + &seed_row, + format!("seed dynamic table auto-inc insert failed for slot={slot}"), + )?; + engine.dynamic_tables.insert( + slot, + DynamicTableState { + name, + version: 0, + table_id, + }, + ); + Ok(()) + })?; + self.refresh_observed_durable_offset(false)?; + Ok(CommitlogObservation::Applied) + } + + fn drop_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + trace!( + step = self.step, + slot, + "skip drop dynamic table while transaction is open" + ); + return Ok(CommitlogObservation::Skipped); + } + let conn = self.normalize_conn(conn); + debug!(step = self.step, conn = %conn, slot, "drop dynamic table"); + self.with_mut_tx(conn, |engine, tx| { + let Some(state) = engine.dynamic_tables.remove(&slot) else { + return Ok(()); + }; + if let Err(err) = engine.db()?.drop_table(tx, state.table_id) { + let msg = err.to_string(); + if !msg.contains("not found") { + return Err(format!("drop dynamic table slot={slot} failed: {err}")); + } + } + Ok(()) + })?; + self.refresh_observed_durable_offset(false)?; + Ok(CommitlogObservation::Applied) + } + + fn migrate_dynamic_table(&mut self, conn: SessionId, slot: u32) -> Result { + if self.execution.active_writer.is_some() || self.any_open_read_tx() { + trace!( + step = self.step, + slot, + "skip migrate dynamic table while transaction is open" + ); + return Ok(CommitlogObservation::Skipped); + } + let conn = self.normalize_conn(conn); + debug!(step = self.step, conn = %conn, slot, "migrate dynamic table"); + let probe = self.with_mut_tx(conn, |engine, tx| { + let Some(state) = engine.dynamic_tables.get(&slot).cloned() else { + return Ok(None); + }; + let to_version = state.version.saturating_add(1); + let new_table_id = engine + .db()? + .add_columns_to_table( + tx, + state.table_id, + dynamic_column_schemas(to_version), + vec![AlgebraicValue::Bool(false)], + ) + .map_err(|err| format!("migrate add_columns_to_table failed for slot={slot}: {err}"))?; + let existing_rows = engine + .db()? + .iter_mut(tx, new_table_id) + .map_err(|err| format!("migrate scan table failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + + let probe_row = dynamic_probe_row(slot, to_version); + let inserted = engine.insert_row( + tx, + new_table_id, + &probe_row, + format!("migrate auto-inc probe failed for slot={slot}"), + )?; + engine.dynamic_tables.insert( + slot, + DynamicTableState { + name: state.name, + version: to_version, + table_id: new_table_id, + }, + ); + Ok(Some(DynamicMigrationProbe { + slot, + from_version: state.version, + to_version, + existing_rows, + inserted_row: inserted, + })) + })?; + self.refresh_observed_durable_offset(false)?; + Ok(probe + .map(CommitlogObservation::DynamicMigrationProbe) + .unwrap_or(CommitlogObservation::Skipped)) + } + + fn normalize_conn(&self, conn: SessionId) -> SessionId { + self.execution.active_writer.unwrap_or(conn) + } + + fn any_open_read_tx(&self) -> bool { + self.read_tx_by_connection.iter().any(Option::is_some) + } + + fn refresh_observed_durable_offset(&mut self, forced: bool) -> Result<(), String> { + let durable_offset = self.durability.durable_tx_offset().last_seen(); + if forced || durable_offset != self.last_observed_durable_offset { + self.last_observed_durable_offset = durable_offset; + } + Ok(()) + } + + async fn wait_for_requested_durability(&mut self, forced: bool) -> Result<(), String> { + if let Some(target_offset) = self.last_requested_durable_offset { + let current = self.durability.durable_tx_offset().last_seen(); + if current.is_none_or(|offset| offset < target_offset) { + let mut durable_offset = self.durability.durable_tx_offset(); + sim::time::timeout(DURABILITY_WAIT_TIMEOUT, durable_offset.wait_for(target_offset)) + .await + .map_err(|err| { + format!( + "durability wait for tx offset {target_offset} timed out after {:?}", + err.duration() + ) + })? + .map_err(|err| format!("durability wait for tx offset {target_offset} failed: {err}"))?; + } + } else if forced { + sim::yield_now().await; + } + self.refresh_observed_durable_offset(forced) + } + + fn record_committed_offset(&mut self, offset: Option) { + if let Some(offset) = offset { + self.last_requested_durable_offset = Some(offset); + } + } + + fn is_in_write_tx(&self, conn: SessionId) -> bool { + self.execution + .tx_by_connection + .get(conn.as_index()) + .is_some_and(Option::is_some) + } + + fn refresh_if_auto_commit(&mut self, in_tx: bool) -> Result<(), String> { + if !in_tx { + self.refresh_observed_durable_offset(false)?; + } + Ok(()) + } + + fn table_id_for_index(&self, table: usize) -> Result { + self.base_table_ids + .get(table) + .copied() + .ok_or_else(|| format!("table {table} out of range")) + } + + fn with_fresh_read_tx(&self, f: impl FnOnce(&RelationalDB, &RelTx) -> Result) -> Result { + let db = self.db()?; + let tx = db.begin_tx(Workload::ForTests); + self.stats.record_read_tx(); + let result = f(db, &tx); + let _ = db.release_tx(tx); + result + } + + fn collect_rows_in_fresh_tx(&self, table_id: TableId, context: &'static str) -> Result, String> { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter(tx, table_id) + .map_err(|err| format!("{context}: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>()) + }) + } + + fn count_rows_in_fresh_tx(&self, table_id: TableId, context: &'static str) -> Result { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter(tx, table_id) + .map_err(|err| format!("{context}: {err}"))? + .count()) + }) + } + + fn count_by_col_eq_in_fresh_tx( + &self, + table_id: TableId, + col: u16, + value: &AlgebraicValue, + context: &'static str, + ) -> Result { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("{context}: {err}"))? + .count()) + }) + } + + fn range_scan_in_fresh_tx( + &self, + table_id: TableId, + cols: spacetimedb_primitives::ColList, + bounds: (Bound, Bound), + context: &'static str, + ) -> Result, String> { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_range(tx, table_id, cols, bounds) + .map_err(|err| format!("{context}: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>()) + }) + } + + fn lookup_base_row(&self, conn: SessionId, table: usize, id: u64) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + Ok(self + .db()? + .iter_by_col_eq_mut(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("in-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { + Ok(self + .db()? + .iter_by_col_eq(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("read-tx lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + } else { + self.with_fresh_read_tx(|db, tx| { + Ok(db + .iter_by_col_eq(tx, table_id, 0u16, &AlgebraicValue::U64(id)) + .map_err(|err| format!("lookup failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .next()) + }) + } + } + + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + let mut rows = self + .db()? + .iter_mut(tx, table_id) + .map_err(|err| format!("in-tx scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { + let mut rows = self + .db()? + .iter(tx, table_id) + .map_err(|err| format!("read-tx scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } else { + self.collect_rows_by_id(table_id) + } + } + + fn count_by_col_eq_in_connection( + &self, + conn: SessionId, + table: usize, + col: u16, + value: &AlgebraicValue, + ) -> Result { + let table_id = self.table_id_for_index(table)?; + if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + Ok(self + .db()? + .iter_by_col_eq_mut(tx, table_id, col, value) + .map_err(|err| format!("in-tx predicate query failed: {err}"))? + .count()) + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { + Ok(self + .db()? + .iter_by_col_eq(tx, table_id, col, value) + .map_err(|err| format!("read-tx predicate query failed: {err}"))? + .count()) + } else { + self.count_by_col_eq_in_fresh_tx(table_id, col, value, "predicate query failed") + } + } + + fn range_scan_in_connection( + &self, + conn: SessionId, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + let col_list = cols.iter().copied().collect::(); + let mut rows = if let Some(Some(tx)) = self.execution.tx_by_connection.get(conn.as_index()) { + self.db()? + .iter_by_col_range_mut(tx, table_id, col_list, (lower, upper)) + .map_err(|err| format!("in-tx range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>() + } else if let Some(Some(tx)) = self.read_tx_by_connection.get(conn.as_index()) { + self.db()? + .iter_by_col_range(tx, table_id, col_list, (lower, upper)) + .map_err(|err| format!("read-tx range scan failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>() + } else { + self.range_scan_in_fresh_tx(table_id, col_list, (lower, upper), "range scan failed")? + }; + rows.sort_by(|lhs, rhs| compare_rows_for_range(lhs, rhs, cols)); + Ok(rows) + } + + fn count_rows_for_property(&self, table: usize) -> Result { + let table_id = self.table_id_for_index(table)?; + self.count_rows_in_fresh_tx(table_id, "scan failed") + } + + fn count_by_col_eq_for_property(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + let table_id = self.table_id_for_index(table)?; + self.count_by_col_eq_in_fresh_tx(table_id, col, value, "predicate query failed") + } + + fn range_scan_for_property( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + let cols = cols.iter().copied().collect::(); + self.range_scan_in_fresh_tx(table_id, cols, (lower, upper), "range scan failed") + } + + fn collect_rows_by_id(&self, table_id: TableId) -> Result, String> { + let mut rows = self.collect_rows_in_fresh_tx(table_id, "scan failed")?; + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) + } + + fn durable_replay_summary(&self) -> Result { + Ok(DurableReplaySummary { + durable_offset: self.last_observed_durable_offset, + restored_snapshot_offset: self.last_restored_snapshot_offset, + latest_snapshot_offset: self.latest_snapshot_offset, + base_rows: self.collect_base_rows()?, + dynamic_table_count: self.dynamic_tables.len(), + }) + } + + async fn reopen_for_final_replay_check(&mut self) -> Result { + let old_db = self + .db + .take() + .ok_or_else(|| "final replay check failed: relational db not initialized".to_string())?; + old_db.shutdown().await; + drop(old_db); + + let reopened = self.reopen_from_history_with_fault_retry("final replay check")?; + self.durability = reopened.durability; + self.db = Some(reopened.db); + self.last_restored_snapshot_offset = reopened.restored_snapshot_offset; + self.latest_snapshot_offset = reopened.latest_snapshot_offset; + self.rebuild_table_handles_after_reopen()?; + self.last_observed_durable_offset = self.durability.durable_tx_offset().last_seen(); + self.durable_replay_summary() + } + + async fn collect_outcome(&mut self) -> Result { + self.wait_for_requested_durability(true).await?; + let table = self.collect_table_outcome()?; + let replay = self.reopen_for_final_replay_check().await?; + let durable_commit_count = self + .last_observed_durable_offset + .map(|offset| (offset as usize).saturating_add(1)) + .unwrap_or(0); + let replay_table_count = replay.base_rows.len() + replay.dynamic_table_count; + debug!(durable_commits = durable_commit_count, "replayed durable prefix"); + Ok(RelationalDbCommitlogOutcome { + applied_steps: self.step, + durable_commit_count, + replay_table_count, + schema: schema_summary(&self.base_schema), + interactions: self.stats.interactions.clone(), + table_ops: self.stats.table_ops.clone(), + transactions: self.stats.transaction_summary(durable_commit_count), + runtime: self.stats.runtime_summary(), + disk_faults: disk_fault_summary(self.commitlog_repo.fault_summary()), + snapshot_faults: disk_fault_summary(self.snapshot_repo.fault_summary()), + replay, + table, + }) + } + + fn collect_base_rows(&self) -> Result>, String> { + self.base_table_ids + .iter() + .map(|&table_id| self.collect_rows_by_id(table_id)) + .collect() + } + + fn collect_table_outcome(&self) -> Result { + let mut final_rows = Vec::with_capacity(self.base_table_ids.len()); + let mut final_row_counts = Vec::with_capacity(self.base_table_ids.len()); + + for &table_id in &self.base_table_ids { + let rows = self.collect_rows_by_id(table_id)?; + final_row_counts.push(rows.len() as u64); + final_rows.push(rows); + } + + Ok(TableWorkloadOutcome { + final_row_counts, + final_rows, + }) + } + + fn finish(&mut self) { + for tx in &mut self.execution.tx_by_connection { + if let Some(tx) = tx.take() + && let Some(db) = &self.db + { + let _ = db.rollback_mut_tx(tx); + } + } + for tx in &mut self.read_tx_by_connection { + if let Some(tx) = tx.take() + && let Some(db) = &self.db + { + let _ = db.release_tx(tx); + } + } + self.execution.active_writer = None; + } + + fn db(&self) -> Result<&RelationalDB, String> { + self.db + .as_ref() + .ok_or_else(|| "relational db is unavailable during close/reopen".to_string()) + } +} + +impl TargetPropertyAccess for RelationalDbEngine { + fn schema_plan(&self) -> &SchemaPlan { + &self.base_schema + } + + fn lookup_in_connection(&self, conn: SessionId, table: usize, id: u64) -> Result, String> { + Self::lookup_base_row(self, conn, table, id) + } + + fn collect_rows_in_connection(&self, conn: SessionId, table: usize) -> Result, String> { + Self::collect_rows_in_connection(self, conn, table) + } + + fn collect_rows_for_table(&self, table: usize) -> Result, String> { + let table_id = self.table_id_for_index(table)?; + Self::collect_rows_by_id(self, table_id) + } + + fn count_rows(&self, table: usize) -> Result { + Self::count_rows_for_property(self, table) + } + + fn count_by_col_eq(&self, table: usize, col: u16, value: &AlgebraicValue) -> Result { + Self::count_by_col_eq_for_property(self, table, col, value) + } + + fn range_scan( + &self, + table: usize, + cols: &[u16], + lower: Bound, + upper: Bound, + ) -> Result, String> { + Self::range_scan_for_property(self, table, cols, lower, upper) + } +} + +impl TargetEngine for RelationalDbEngine { + type Observation = CommitlogObservation; + type Outcome = RelationalDbCommitlogOutcome; + type Error = String; + + #[allow(clippy::manual_async_fn)] + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a CommitlogInteraction, + ) -> impl std::future::Future> + 'a { + async move { self.execute(interaction).await } + } + + fn finish(&mut self) { + Self::finish(self); + } + + #[allow(clippy::manual_async_fn)] + fn collect_outcome<'a>(&'a mut self) -> impl std::future::Future> + 'a { + async move { + RelationalDbEngine::collect_outcome(self) + .await + .map_err(anyhow::Error::msg) + } + } +} + +type StressCommitlogRepo = FaultableRepo; +type StressSnapshotRepo = BuggifiedSnapshotRepo; +type InMemoryCommitlogDurability = Local; + +struct RelationalDbBootstrap { + db: RelationalDB, + commitlog_repo: StressCommitlogRepo, + snapshot_repo: StressSnapshotRepo, + snapshot_worker: SnapshotWorker, + durability: Arc, + durability_opts: spacetimedb_durability::local::Options, +} + +fn bootstrap_relational_db( + seed: DstSeed, + commitlog_fault_config: CommitlogFaultConfig, + snapshot_fault_config: SnapshotFaultConfig, +) -> anyhow::Result { + let runtime = Handle::tokio_current(); + let commitlog_repo = FaultableRepo::new( + MemoryCommitlogRepo::new(8 * 1024 * 1024), + commitlog_fault_config, + seed.fork(702), + ); + let snapshot_repo = BuggifiedSnapshotRepo::new(snapshot_fault_config, seed.fork(703))?; + let durability_opts = commitlog_stress_options(seed.fork(701)); + let durability = Arc::new( + InMemoryCommitlogDurability::open_with_repo(commitlog_repo.clone(), runtime.clone(), durability_opts) + .map_err(|err| anyhow::anyhow!("open in-memory durability failed: {err}"))?, + ); + let snapshot_worker = SnapshotWorker::new( + Arc::new(snapshot_repo.clone()), + snapshot::Compression::Disabled, + runtime.clone(), + ); + let persistence = Persistence { + durability: durability.clone(), + disk_size: Arc::new(in_memory_size_on_disk), + snapshot_store: Some(snapshot_worker.snapshot_store()), + snapshots: Some(snapshot_worker.clone()), + runtime, + }; + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + EmptyHistory::new(), + Some(persistence), + None, + PagePool::new_for_test(), + )?; + assert_eq!(connected_clients.len(), 0); + db.with_auto_commit(Workload::Internal, |tx| { + db.set_initialized(tx, Program::empty(HostType::Wasm.into())) + })?; + Ok(RelationalDbBootstrap { + db, + commitlog_repo, + snapshot_repo, + snapshot_worker, + durability, + durability_opts, + }) +} + +fn commitlog_stress_options(seed: DstSeed) -> spacetimedb_durability::local::Options { + let mut opts = spacetimedb_durability::local::Options::default(); + opts.commitlog.max_segment_size = 2 * 1024; + opts.commitlog.offset_index_interval_bytes = NonZeroU64::new(256).expect("256 > 0"); + opts.commitlog.offset_index_require_segment_fsync = seed.0.is_multiple_of(2); + opts.commitlog.write_buffer_size = 512; + opts +} + +fn runtime_alive_tasks() -> Option { + // The shim only exposes Tokio-compatible handles today. Keep this explicit + // until the target owns a simulator/runtime that can report live task state. + None +} + +fn schema_summary(schema: &SchemaPlan) -> SchemaSummary { + let initial_tables = schema.tables.len(); + let initial_columns = schema.tables.iter().map(|table| table.columns.len()).sum(); + let max_columns_per_table = schema + .tables + .iter() + .map(|table| table.columns.len()) + .max() + .unwrap_or_default(); + let extra_indexes = schema + .tables + .iter() + .map(|table| table.extra_indexes.len()) + .sum::(); + SchemaSummary { + initial_tables, + initial_columns, + max_columns_per_table, + initial_indexes: initial_tables + extra_indexes, + extra_indexes, + } +} + +fn disk_fault_summary(summary: CommitlogFaultSummary) -> DiskFaultSummary { + DiskFaultSummary { + profile: summary.profile, + latency: summary.latency, + short_read: summary.short_read, + short_write: summary.short_write, + read_error: summary.read_error, + write_error: summary.write_error, + flush_error: summary.flush_error, + fsync_error: summary.fsync_error, + open_error: summary.open_error, + metadata_error: summary.metadata_error, + } +} + +fn in_memory_size_on_disk() -> io::Result { + Ok(SizeOnDisk::default()) +} + +fn is_unique_constraint_violation(err: &DBError) -> bool { + matches!( + err, + DBError::Datastore(DatastoreError::Index(IndexError::UniqueConstraintViolation(_))) + ) +} + +fn is_write_conflict_error(err: &str) -> bool { + err.contains("owns lock") +} + +fn compare_rows_for_range(lhs: &SimRow, rhs: &SimRow, cols: &[u16]) -> std::cmp::Ordering { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) +} + +fn dynamic_table_name(slot: u32) -> String { + format!("dst_dynamic_slot_{slot}") +} + +fn dynamic_column_schemas(version: u32) -> Vec { + let mut columns = vec![ + ColumnSchema::for_test(0, "id", AlgebraicType::I64), + ColumnSchema::for_test(1, "value", AlgebraicType::U64), + ]; + for v in 1..=version { + columns.push(ColumnSchema::for_test( + (v + 1) as u16, + format!("migrated_v{v}"), + AlgebraicType::Bool, + )); + } + columns +} + +fn dynamic_probe_row(slot: u32, version: u32) -> SimRow { + let mut values = vec![AlgebraicValue::I64(0), AlgebraicValue::U64(slot as u64)]; + for _ in 1..=version { + values.push(AlgebraicValue::Bool(false)); + } + SimRow { values } +} + +fn dynamic_schema(name: &str, version: u32) -> TableSchema { + let columns = dynamic_column_schemas(version); + let indexes = vec![IndexSchema::for_test(format!("{name}_id_idx"), BTreeAlgorithm::from(0))]; + let constraints = vec![ConstraintSchema::unique_for_test(format!("{name}_id_unique"), 0)]; + let sequences = vec![SequenceSchema { + sequence_id: SequenceId::SENTINEL, + sequence_name: format!("{name}_id_seq").into(), + table_id: TableId::SENTINEL, + col_pos: 0.into(), + increment: 1, + start: 1, + min_value: 1, + max_value: i128::MAX, + }]; + TableSchema::new( + TableId::SENTINEL, + TableName::for_test(name), + None, + columns, + indexes, + constraints, + sequences, + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ) +} + +#[cfg(test)] +mod tests { + use crate::config::CommitlogFaultProfile; + + use super::*; + + fn run_seed_12_with_snapshot_fault( + configure: impl FnOnce(&mut SnapshotFaultConfig), + ) -> RelationalDbCommitlogOutcome { + let seed = DstSeed(12); + let config = RunConfig::with_max_interactions(100).with_commitlog_fault_profile(CommitlogFaultProfile::Off); + let mut snapshot_fault_config = SnapshotFaultConfig::for_profile(CommitlogFaultProfile::Off); + snapshot_fault_config.enabled = true; + configure(&mut snapshot_fault_config); + let mut runtime = sim::Runtime::new(seed).unwrap(); + + runtime + .block_on(async move { + let (source, engine, properties) = build_with_fault_configs( + seed, + TableScenarioId::RandomCrud, + &config, + CommitlogFaultConfig::for_profile(CommitlogFaultProfile::Off), + snapshot_fault_config, + )?; + core::run_streaming(source, engine, properties, config).await + }) + .unwrap() + } + + #[test] + fn seed_12_exercises_snapshot_capture_and_restore() { + let seed = DstSeed(12); + let config = RunConfig::with_max_interactions(100).with_commitlog_fault_profile(CommitlogFaultProfile::Off); + let mut runtime = sim::Runtime::new(seed).unwrap(); + + let outcome = runtime + .block_on(run_generated_with_config_and_scenario( + seed, + TableScenarioId::RandomCrud, + config, + )) + .unwrap(); + + assert_eq!(outcome.interactions.snapshot_requested, 2); + assert_eq!(outcome.interactions.snapshot_created, 2); + assert_eq!(outcome.interactions.close_reopen_applied, 1); + assert!(outcome.replay.durable_offset.is_some()); + assert!(outcome.replay.restored_snapshot_offset.is_some()); + assert!(outcome.replay.restored_snapshot_offset <= outcome.replay.durable_offset); + } + + #[test] + fn targeted_snapshot_open_faults_are_skipped_and_replay_matches_model() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.open_error_prob = 1.0); + + assert_eq!(outcome.interactions.snapshot_requested, 2); + assert_eq!(outcome.interactions.snapshot_created, 0); + assert_eq!(outcome.interactions.snapshot_skipped, 2); + assert!(outcome.snapshot_faults.open_error > 0); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } + + #[test] + fn targeted_snapshot_metadata_faults_are_retryable_on_reopen() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.metadata_error_prob = 1.0); + + assert_eq!(outcome.interactions.close_reopen_applied, 1); + assert!(outcome.snapshot_faults.metadata_error > 0); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } + + #[test] + fn targeted_snapshot_read_faults_are_retryable_on_reopen() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.read_error_prob = 1.0); + + assert_eq!(outcome.interactions.snapshot_created, 2); + assert!(outcome.snapshot_faults.read_error > 0); + assert!(outcome.replay.restored_snapshot_offset.is_some()); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } + + #[test] + fn targeted_snapshot_write_faults_do_not_publish_new_snapshots() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.write_error_prob = 1.0); + + assert_eq!(outcome.interactions.snapshot_requested, 2); + assert_eq!(outcome.interactions.snapshot_created, 0); + assert_eq!(outcome.interactions.snapshot_skipped, 2); + assert!(outcome.snapshot_faults.write_error > 0); + assert!(outcome.replay.restored_snapshot_offset.is_none()); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } + + #[test] + fn targeted_snapshot_fsync_faults_do_not_publish_new_snapshots() { + let outcome = run_seed_12_with_snapshot_fault(|config| config.fsync_error_prob = 1.0); + + assert_eq!(outcome.interactions.snapshot_requested, 2); + assert_eq!(outcome.interactions.snapshot_created, 0); + assert_eq!(outcome.interactions.snapshot_skipped, 2); + assert!(outcome.snapshot_faults.fsync_error > 0); + assert!(outcome.replay.restored_snapshot_offset.is_none()); + assert_eq!(outcome.table.final_rows, outcome.replay.base_rows); + } +} diff --git a/crates/dst/src/targets/relational_db_concurrent.rs b/crates/dst/src/targets/relational_db_concurrent.rs new file mode 100644 index 00000000000..f0299470779 --- /dev/null +++ b/crates/dst/src/targets/relational_db_concurrent.rs @@ -0,0 +1,1045 @@ +//! Concurrent RelationalDB API target. +//! +//! The target models concurrency at RelationalDB lock boundaries. A generated +//! round may hold one or more read transactions, or one write transaction, and +//! then probe whether another client can acquire the write lock. Once a client +//! owns a `Tx` or `MutTx`, that section is synchronous: no simulator yield or +//! async boundary is allowed until the transaction is released, committed, or +//! rolled back. + +use std::{collections::BTreeMap, fmt}; + +use spacetimedb_core::{ + db::relational_db::{MutTx as RelMutTx, RelationalDB, Tx as RelTx}, + error::DBError, + messages::control_db::HostType, +}; +use spacetimedb_datastore::{execution_context::Workload, traits::IsolationLevel}; +use spacetimedb_durability::EmptyHistory; +use spacetimedb_lib::{ + db::auth::{StAccess, StTableType}, + Identity, +}; +use spacetimedb_primitives::TableId; +use spacetimedb_sats::AlgebraicValue; +use spacetimedb_schema::{ + def::BTreeAlgorithm, + schema::{ColumnSchema, ConstraintSchema, IndexSchema, TableSchema}, + table_name::TableName, +}; +use spacetimedb_table::page_pool::PagePool; +use tracing::info; + +use crate::{ + client::SessionId, + config::RunConfig, + core::{self, StreamingProperties, TargetEngine, WorkloadSource}, + schema::SimRow, + seed::{DstRng, DstSeed}, +}; + +pub async fn run_generated_with_config( + seed: DstSeed, + config: RunConfig, +) -> anyhow::Result { + let source = ConcurrentWorkloadSource::new(seed, config.max_interactions_or_default(usize::MAX)); + let engine = ConcurrentRelationalDbEngine::new()?; + let outcome = core::run_streaming(source, engine, ConcurrentProperties, config).await?; + info!( + rounds = outcome.rounds, + committed = outcome.committed, + conflicts = outcome.write_conflicts, + "relational_db_concurrent complete" + ); + Ok(outcome) +} + +#[derive(Clone, Debug)] +struct RoundPlan { + id: u64, + kind: RoundKind, + shared: SimRow, + extra: SimRow, +} + +#[derive(Clone, Copy, Debug)] +enum RoundKind { + WriterBlocksWriter, + ReadersBlockWriter, + MultiReaderSnapshot, + MixedReadWrite, +} + +struct ConcurrentWorkloadSource { + rng: DstRng, + emitted: usize, + target: usize, + next_id: u64, +} + +impl ConcurrentWorkloadSource { + fn new(seed: DstSeed, target: usize) -> Self { + Self { + rng: seed.fork(910).rng(), + emitted: 0, + target, + next_id: seed.fork(911).0.max(1), + } + } + + fn make_row(&mut self) -> SimRow { + let id = self.next_id; + self.next_id = self.next_id.wrapping_add(1).max(1); + SimRow { + values: vec![ + AlgebraicValue::U64(id), + AlgebraicValue::U64(self.rng.next_u64() % 1_000), + ], + } + } + + fn make_round(&mut self, id: u64) -> RoundPlan { + RoundPlan { + id, + kind: match id % 4 { + 0 => RoundKind::WriterBlocksWriter, + 1 => RoundKind::ReadersBlockWriter, + 2 => RoundKind::MultiReaderSnapshot, + _ => RoundKind::MixedReadWrite, + }, + shared: self.make_row(), + extra: self.make_row(), + } + } +} + +impl WorkloadSource for ConcurrentWorkloadSource { + type Interaction = RoundPlan; + + fn next_interaction(&mut self) -> Option { + if self.emitted >= self.target { + return None; + } + let round = self.make_round(self.emitted as u64); + self.emitted += 1; + Some(round) + } + + fn request_finish(&mut self) { + self.target = self.emitted; + } +} + +struct ConcurrentRelationalDbEngine { + db: RelationalDB, + table_id: TableId, + events: Vec, +} + +impl ConcurrentRelationalDbEngine { + fn new() -> anyhow::Result { + let (db, connected_clients) = RelationalDB::open( + Identity::ZERO, + Identity::ZERO, + EmptyHistory::new(), + None, + None, + PagePool::new_for_test(), + )?; + assert_eq!(connected_clients.len(), 0); + db.with_auto_commit(Workload::Internal, |tx| { + db.set_initialized(tx, spacetimedb_datastore::traits::Program::empty(HostType::Wasm.into())) + })?; + + let table_id = install_concurrent_schema(&db)?; + Ok(Self { + db, + table_id, + events: Vec::new(), + }) + } + + fn execute_round(&mut self, round: &RoundPlan) -> Result { + let mut machine = RoundMachine::new(&self.db, self.table_id, round.id, 4); + let events = machine.run(round)?; + self.events.extend(events.clone()); + Ok(RoundObservation { + round: round.id, + events, + }) + } + + fn collect_rows(&self) -> Result, String> { + let tx = self.db.begin_tx(Workload::ForTests); + let result = collect_rows_in_tx(&self.db, self.table_id, &tx, "collect rows"); + let _ = self.db.release_tx(tx); + result + } +} + +impl TargetEngine for ConcurrentRelationalDbEngine { + type Observation = RoundObservation; + type Outcome = RelationalDbConcurrentOutcome; + type Error = String; + + fn execute_interaction<'a>( + &'a mut self, + interaction: &'a RoundPlan, + ) -> impl Future> + 'a { + async move { self.execute_round(interaction) } + } + + fn finish(&mut self) {} + + fn collect_outcome<'a>(&'a mut self) -> impl Future> + 'a { + async move { + let final_rows = self.collect_rows().map_err(anyhow::Error::msg)?; + let expected_rows = expected_rows_from_events(&self.events); + let summary = ConcurrentSummary::from_events(&self.events); + Ok(RelationalDbConcurrentOutcome { + rounds: summary.rounds, + clients: summary.clients, + events: summary.events, + reads: summary.reads, + committed: summary.committed, + write_conflicts: summary.write_conflicts, + writer_conflicts: summary.writer_conflicts, + reader_conflicts: summary.reader_conflicts, + final_rows, + expected_rows, + }) + } + } +} + +struct RoundMachine<'a> { + db: &'a RelationalDB, + table_id: TableId, + round: u64, + clients: Vec, + events: Vec, +} + +impl<'a> RoundMachine<'a> { + fn new(db: &'a RelationalDB, table_id: TableId, round: u64, clients: usize) -> Self { + Self { + db, + table_id, + round, + clients: (0..clients).map(|_| ClientState::Idle).collect(), + events: Vec::new(), + } + } + + fn run(&mut self, round: &RoundPlan) -> Result, String> { + let result = match round.kind { + RoundKind::WriterBlocksWriter => self.writer_blocks_writer(round), + RoundKind::ReadersBlockWriter => self.readers_block_writer(round), + RoundKind::MultiReaderSnapshot => self.multi_reader_snapshot(round), + RoundKind::MixedReadWrite => self.mixed_read_write(round), + }; + let cleanup = self.cleanup(); + result.and(cleanup)?; + Ok(std::mem::take(&mut self.events)) + } + + fn writer_blocks_writer(&mut self, round: &RoundPlan) -> Result<(), String> { + self.begin_write(client(0))?; + self.insert(client(0), round.shared.clone())?; + self.expect_write_conflict(client(1), ConflictReason::WriterHeld)?; + self.commit(client(0))?; + + self.begin_write(client(1))?; + self.insert(client(1), round.extra.clone())?; + self.commit(client(1)) + } + + fn readers_block_writer(&mut self, round: &RoundPlan) -> Result<(), String> { + self.begin_read(client(0))?; + self.begin_read(client(1))?; + self.full_scan(client(0))?; + self.full_scan(client(1))?; + self.expect_write_conflict(client(2), ConflictReason::ReadersHeld)?; + self.release_read(client(0))?; + self.release_read(client(1))?; + + self.begin_write(client(2))?; + self.insert(client(2), round.shared.clone())?; + self.commit(client(2)) + } + + fn multi_reader_snapshot(&mut self, round: &RoundPlan) -> Result<(), String> { + self.begin_read(client(0))?; + self.begin_read(client(1))?; + let snapshot_0 = self.full_scan(client(0))?; + let snapshot_1 = self.full_scan(client(1))?; + if snapshot_0 != snapshot_1 { + return Err(format!( + "[ConcurrentRelationalDb] round={} readers observed different snapshots: left={snapshot_0:?} right={snapshot_1:?}", + self.round + )); + } + self.release_read(client(0))?; + self.release_read(client(1))?; + + self.begin_write(client(2))?; + self.insert(client(2), round.shared.clone())?; + self.commit(client(2))?; + + self.begin_read(client(3))?; + self.point_lookup(client(3), round.shared.id().ok_or("generated row missing id")?)?; + self.release_read(client(3)) + } + + fn mixed_read_write(&mut self, round: &RoundPlan) -> Result<(), String> { + self.begin_write(client(0))?; + self.insert(client(0), round.shared.clone())?; + self.commit(client(0))?; + + self.begin_read(client(1))?; + self.point_lookup(client(1), round.shared.id().ok_or("generated row missing id")?)?; + self.release_read(client(1))?; + + self.begin_write(client(2))?; + self.delete(client(2), round.shared.clone())?; + self.rollback(client(2)); + + self.begin_write(client(3))?; + self.insert(client(3), round.extra.clone())?; + self.commit(client(3)) + } + + fn begin_read(&mut self, client: SessionId) -> Result<(), String> { + if self.any_writer() { + return Err(format!( + "[ConcurrentRelationalDb] round={} client={} would block beginning read while writer is held", + self.round, client + )); + } + self.expect_idle(client, "begin_read")?; + self.record_action(client, "begin_read"); + let tx = self.db.begin_tx(Workload::ForTests); + self.replace(client, ClientState::Reading { tx }); + Ok(()) + } + + fn release_read(&mut self, client: SessionId) -> Result<(), String> { + self.record_action(client, "release_read"); + match self.take(client)? { + ClientState::Reading { tx } => { + let _ = self.db.release_tx(tx); + self.replace(client, ClientState::Idle); + Ok(()) + } + state => { + self.replace(client, state); + Err(self.invalid_state(client, "release_read")) + } + } + } + + fn begin_write(&mut self, client: SessionId) -> Result<(), String> { + if self.try_begin_write(client)? { + Ok(()) + } else { + Err(format!( + "[ConcurrentRelationalDb] round={} client={} expected write lock to be available", + self.round, client + )) + } + } + + fn expect_write_conflict(&mut self, client: SessionId, reason: ConflictReason) -> Result<(), String> { + if self.try_begin_write(client)? { + self.rollback(client); + return Err(format!( + "[ConcurrentRelationalDb] round={} client={} unexpectedly acquired write lock", + self.round, client + )); + } + match self.events.last() { + Some(RoundEvent::WriteConflict { reason: observed, .. }) if *observed == reason => Ok(()), + Some(event) => Err(format!( + "[ConcurrentRelationalDb] round={} expected conflict reason {reason:?}, observed {event}", + self.round + )), + None => Err(format!( + "[ConcurrentRelationalDb] round={} expected write conflict event", + self.round + )), + } + } + + fn try_begin_write(&mut self, client: SessionId) -> Result { + self.expect_idle(client, "try_begin_write")?; + self.record_action(client, "try_begin_write"); + match self + .db + .try_begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests) + { + Some(tx) => { + self.replace( + client, + ClientState::Writing { + tx, + pending: Vec::new(), + }, + ); + self.events.push(RoundEvent::WriteLockAcquired { + round: self.round, + client, + }); + Ok(true) + } + None => { + self.events.push(RoundEvent::WriteConflict { + round: self.round, + client, + reason: self.conflict_reason(), + }); + Ok(false) + } + } + } + + fn insert(&mut self, client: SessionId, row: SimRow) -> Result<(), String> { + self.record_action(client, "insert"); + let table_id = self.table_id; + let db = self.db; + self.with_writer(client, |tx, pending| { + let bsatn = row.to_bsatn().map_err(|err| err.to_string())?; + match db.insert(tx, table_id, &bsatn) { + Ok((_, row_ref, _)) => { + pending.push(ConcurrentMutation::Inserted(SimRow::from_product_value( + row_ref.to_product_value(), + ))); + Ok(()) + } + Err(err) if is_unique_constraint_violation(&err) => Ok(()), + Err(err) => Err(format!("insert failed: {err}")), + } + }) + } + + fn delete(&mut self, client: SessionId, row: SimRow) -> Result<(), String> { + self.record_action(client, "delete"); + let table_id = self.table_id; + let db = self.db; + self.with_writer(client, |tx, pending| { + match db.delete_by_rel(tx, table_id, [row.to_product_value()]) { + 0 => Ok(()), + 1 => { + pending.push(ConcurrentMutation::Deleted(row)); + Ok(()) + } + deleted => Err(format!("delete affected {deleted} rows")), + } + }) + } + + fn commit(&mut self, client: SessionId) -> Result<(), String> { + self.record_action(client, "commit"); + match self.take(client)? { + ClientState::Writing { tx, mut pending } => { + let committed = self + .db + .commit_tx(tx) + .map_err(|err| format!("commit failed: {err}"))? + .ok_or_else(|| "commit returned no tx data".to_string())?; + self.events.push(RoundEvent::Committed { + round: self.round, + client, + tx_offset: committed.0, + mutations: std::mem::take(&mut pending), + }); + self.replace(client, ClientState::Idle); + Ok(()) + } + state => { + self.replace(client, state); + Err(self.invalid_state(client, "commit")) + } + } + } + + fn rollback(&mut self, client: SessionId) { + self.record_action(client, "rollback"); + match self.take(client) { + Ok(ClientState::Writing { tx, .. }) => { + let _ = self.db.rollback_mut_tx(tx); + self.events.push(RoundEvent::RolledBack { + round: self.round, + client, + }); + self.replace(client, ClientState::Idle); + } + Ok(state) => self.replace(client, state), + Err(_) => {} + } + } + + fn full_scan(&mut self, client: SessionId) -> Result { + self.record_action(client, "full_scan"); + let summary = self.with_reader(client, |tx| scan_summary_in_tx(self.db, self.table_id, tx, "full scan"))?; + self.events.push(RoundEvent::Read { + round: self.round, + client, + kind: ReadKind::FullScan, + summary, + }); + Ok(summary) + } + + fn point_lookup(&mut self, client: SessionId, id: u64) -> Result { + self.record_action(client, "point_lookup"); + let summary = self.with_reader(client, |tx| point_lookup_summary_in_tx(self.db, self.table_id, tx, id))?; + self.events.push(RoundEvent::Read { + round: self.round, + client, + kind: ReadKind::PointLookup { id }, + summary, + }); + Ok(summary) + } + + fn with_writer( + &mut self, + client: SessionId, + f: impl FnOnce(&mut RelMutTx, &mut Vec) -> Result, + ) -> Result { + match self.state_mut(client)? { + ClientState::Writing { tx, pending } => f(tx, pending), + _ => Err(self.invalid_state(client, "write operation")), + } + } + + fn with_reader(&self, client: SessionId, f: impl FnOnce(&RelTx) -> Result) -> Result { + match self.state(client)? { + ClientState::Reading { tx } => f(tx), + _ => Err(self.invalid_state(client, "read operation")), + } + } + + fn cleanup(&mut self) -> Result<(), String> { + let mut leaked = None; + for index in 0..self.clients.len() { + let client = SessionId::from_index(index); + match self.take(client)? { + ClientState::Idle => self.replace(client, ClientState::Idle), + ClientState::Reading { tx } => { + let _ = self.db.release_tx(tx); + self.replace(client, ClientState::Idle); + leaked.get_or_insert_with(|| { + format!( + "[ConcurrentRelationalDb] round={} client={} leaked read transaction", + self.round, client + ) + }); + } + ClientState::Writing { tx, .. } => { + let _ = self.db.rollback_mut_tx(tx); + self.replace(client, ClientState::Idle); + leaked.get_or_insert_with(|| { + format!( + "[ConcurrentRelationalDb] round={} client={} leaked write transaction", + self.round, client + ) + }); + } + } + } + match leaked { + Some(err) => Err(err), + None => Ok(()), + } + } + + fn conflict_reason(&self) -> ConflictReason { + if self.any_writer() { + ConflictReason::WriterHeld + } else if self.any_reader() { + ConflictReason::ReadersHeld + } else { + ConflictReason::Unknown + } + } + + fn any_reader(&self) -> bool { + self.clients.iter().any(ClientState::is_reading) + } + + fn any_writer(&self) -> bool { + self.clients.iter().any(ClientState::is_writing) + } + + fn expect_idle(&self, client: SessionId, action: &'static str) -> Result<(), String> { + if self.state(client)?.is_idle() { + Ok(()) + } else { + Err(self.invalid_state(client, action)) + } + } + + fn record_action(&mut self, client: SessionId, name: &'static str) { + self.events.push(RoundEvent::Action { + round: self.round, + client, + name, + }); + } + + fn state(&self, client: SessionId) -> Result<&ClientState, String> { + self.clients + .get(client.as_index()) + .ok_or_else(|| format!("[ConcurrentRelationalDb] unknown client {client}")) + } + + fn state_mut(&mut self, client: SessionId) -> Result<&mut ClientState, String> { + self.clients + .get_mut(client.as_index()) + .ok_or_else(|| format!("[ConcurrentRelationalDb] unknown client {client}")) + } + + fn take(&mut self, client: SessionId) -> Result { + let state = self.state_mut(client)?; + Ok(std::mem::replace(state, ClientState::Idle)) + } + + fn replace(&mut self, client: SessionId, state: ClientState) { + self.clients[client.as_index()] = state; + } + + fn invalid_state(&self, client: SessionId, action: &str) -> String { + format!( + "[ConcurrentRelationalDb] round={} client={} cannot {action} from {}", + self.round, + client, + self.state(client).map(ClientState::name).unwrap_or("unknown") + ) + } +} + +enum ClientState { + Idle, + Reading { + tx: RelTx, + }, + Writing { + tx: RelMutTx, + pending: Vec, + }, +} + +impl ClientState { + fn name(&self) -> &'static str { + match self { + Self::Idle => "idle", + Self::Reading { .. } => "reading", + Self::Writing { .. } => "writing", + } + } + + fn is_idle(&self) -> bool { + matches!(self, Self::Idle) + } + + fn is_reading(&self) -> bool { + matches!(self, Self::Reading { .. }) + } + + fn is_writing(&self) -> bool { + matches!(self, Self::Writing { .. }) + } +} + +#[derive(Clone, Debug)] +struct RoundObservation { + round: u64, + events: Vec, +} + +#[derive(Clone, Debug)] +pub struct RelationalDbConcurrentOutcome { + pub rounds: usize, + pub clients: usize, + pub events: usize, + pub reads: usize, + pub committed: usize, + pub write_conflicts: usize, + pub writer_conflicts: usize, + pub reader_conflicts: usize, + pub final_rows: Vec, + pub expected_rows: Vec, +} + +#[derive(Clone, Debug)] +enum RoundEvent { + Action { + round: u64, + client: SessionId, + name: &'static str, + }, + WriteLockAcquired { + round: u64, + client: SessionId, + }, + WriteConflict { + round: u64, + client: SessionId, + reason: ConflictReason, + }, + Committed { + round: u64, + client: SessionId, + tx_offset: u64, + mutations: Vec, + }, + RolledBack { + round: u64, + client: SessionId, + }, + Read { + round: u64, + client: SessionId, + kind: ReadKind, + summary: ReadSummary, + }, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum ConflictReason { + WriterHeld, + ReadersHeld, + Unknown, +} + +#[derive(Clone, Debug)] +enum ReadKind { + FullScan, + PointLookup { id: u64 }, +} + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +struct ReadSummary { + row_count: usize, + checksum: u64, +} + +impl ReadSummary { + fn add_row(&mut self, row: &SimRow, label: &'static str) -> Result<(), String> { + self.row_count += 1; + self.checksum = self.checksum.wrapping_add(concurrent_row_checksum(row, label)?); + Ok(()) + } +} + +#[derive(Clone, Debug)] +enum ConcurrentMutation { + Inserted(SimRow), + Deleted(SimRow), +} + +#[derive(Default)] +struct ConcurrentSummary { + rounds: usize, + clients: usize, + events: usize, + reads: usize, + committed: usize, + write_conflicts: usize, + writer_conflicts: usize, + reader_conflicts: usize, +} + +impl ConcurrentSummary { + fn from_events(events: &[RoundEvent]) -> Self { + let mut summary = Self::default(); + let mut max_round = None; + let mut max_client = None; + + for event in events { + summary.events += 1; + let (round, client) = event.position(); + max_round = Some(max_round.unwrap_or(round).max(round)); + max_client = Some(max_client.unwrap_or(client.as_index()).max(client.as_index())); + + match event { + RoundEvent::WriteConflict { reason, .. } => { + summary.write_conflicts += 1; + match reason { + ConflictReason::WriterHeld => summary.writer_conflicts += 1, + ConflictReason::ReadersHeld => summary.reader_conflicts += 1, + ConflictReason::Unknown => {} + } + } + RoundEvent::Committed { .. } => summary.committed += 1, + RoundEvent::Read { .. } => summary.reads += 1, + RoundEvent::Action { .. } | RoundEvent::WriteLockAcquired { .. } | RoundEvent::RolledBack { .. } => {} + } + } + + summary.rounds = max_round.map(|round| round as usize + 1).unwrap_or_default(); + summary.clients = max_client.map(|client| client + 1).unwrap_or_default(); + summary + } +} + +impl RoundEvent { + fn position(&self) -> (u64, SessionId) { + match self { + Self::Action { round, client, .. } + | Self::WriteLockAcquired { round, client } + | Self::WriteConflict { round, client, .. } + | Self::Committed { round, client, .. } + | Self::RolledBack { round, client } + | Self::Read { round, client, .. } => (*round, *client), + } + } +} + +struct ConcurrentProperties; + +impl StreamingProperties for ConcurrentProperties { + fn observe( + &mut self, + _engine: &ConcurrentRelationalDbEngine, + _interaction: &RoundPlan, + observation: &RoundObservation, + ) -> Result<(), String> { + if observation.events.is_empty() { + return Err(format!( + "[ConcurrentRelationalDb] round={} produced no events", + observation.round + )); + } + + for event in &observation.events { + if let RoundEvent::Read { + kind: ReadKind::PointLookup { id }, + summary, + .. + } = event + { + if summary.row_count > 1 { + return Err(format!( + "[ConcurrentRelationalDb] round={} invalid point lookup id={id}: {summary:?}", + observation.round + )); + } + } + } + Ok(()) + } + + fn finish( + &mut self, + _engine: &ConcurrentRelationalDbEngine, + outcome: &RelationalDbConcurrentOutcome, + ) -> Result<(), String> { + if outcome.final_rows != outcome.expected_rows { + return Err(format!( + "[ConcurrentRelationalDb] final rows differ from commit-offset oracle: expected={:?} actual={:?}", + outcome.expected_rows, outcome.final_rows + )); + } + if outcome.writer_conflicts == 0 { + return Err("[ConcurrentRelationalDb] no writer-held lock contention was observed".to_string()); + } + if outcome.reader_conflicts == 0 { + return Err("[ConcurrentRelationalDb] no reader-held lock contention was observed".to_string()); + } + if outcome.reads == 0 { + return Err("[ConcurrentRelationalDb] no read sections were observed".to_string()); + } + Ok(()) + } +} + +fn collect_rows_in_tx( + db: &RelationalDB, + table_id: TableId, + tx: &RelTx, + label: &'static str, +) -> Result, String> { + let mut rows = db + .iter(tx, table_id) + .map_err(|err| format!("{label} failed: {err}"))? + .map(|row_ref| SimRow::from_product_value(row_ref.to_product_value())) + .collect::>(); + rows.sort_by_key(|row| row.id().unwrap_or_default()); + Ok(rows) +} + +fn scan_summary_in_tx( + db: &RelationalDB, + table_id: TableId, + tx: &RelTx, + label: &'static str, +) -> Result { + let mut summary = ReadSummary::default(); + for row_ref in db.iter(tx, table_id).map_err(|err| format!("{label} failed: {err}"))? { + let row = SimRow::from_product_value(row_ref.to_product_value()); + summary.add_row(&row, label)?; + } + Ok(summary) +} + +fn point_lookup_summary_in_tx( + db: &RelationalDB, + table_id: TableId, + tx: &RelTx, + id: u64, +) -> Result { + let value = AlgebraicValue::U64(id); + let mut summary = ReadSummary::default(); + for row_ref in db + .iter_by_col_eq(tx, table_id, 0u16, &value) + .map_err(|err| format!("point lookup failed: {err}"))? + { + let row = SimRow::from_product_value(row_ref.to_product_value()); + if row.id() != Some(id) { + return Err(format!( + "[ConcurrentRelationalDb] point lookup id={id} returned different row: {row:?}" + )); + } + summary.add_row(&row, "point lookup")?; + } + Ok(summary) +} + +fn concurrent_row_checksum(row: &SimRow, label: &'static str) -> Result { + let id = row + .id() + .ok_or_else(|| format!("[ConcurrentRelationalDb] {label} row missing u64 id: {row:?}"))?; + let value = match row.values.get(1) { + Some(AlgebraicValue::U64(value)) => *value, + other => { + return Err(format!( + "[ConcurrentRelationalDb] {label} row has invalid value column: {other:?} in {row:?}" + )); + } + }; + + Ok(mix64(id) + .wrapping_add(mix64(value ^ 0xa076_1d64_78bd_642f)) + .wrapping_add(mix64(row.values.len() as u64))) +} + +fn mix64(mut value: u64) -> u64 { + value = (value ^ (value >> 30)).wrapping_mul(0xbf58_476d_1ce4_e5b9); + value = (value ^ (value >> 27)).wrapping_mul(0x94d0_49bb_1331_11eb); + value ^ (value >> 31) +} + +fn expected_rows_from_events(events: &[RoundEvent]) -> Vec { + let mut commits = events + .iter() + .filter_map(|event| match event { + RoundEvent::Committed { + tx_offset, mutations, .. + } => Some((*tx_offset, mutations)), + _ => None, + }) + .collect::>(); + commits.sort_by_key(|(tx_offset, _)| *tx_offset); + + let mut rows = BTreeMap::::new(); + for (_tx_offset, mutations) in commits { + for mutation in mutations { + match mutation { + ConcurrentMutation::Inserted(row) => { + if let Some(id) = row.id() { + rows.insert(id, row.clone()); + } + } + ConcurrentMutation::Deleted(row) => { + if let Some(id) = row.id() { + rows.remove(&id); + } + } + } + } + } + rows.into_values().collect() +} + +fn install_concurrent_schema(db: &RelationalDB) -> anyhow::Result { + let mut tx = db.begin_mut_tx(IsolationLevel::Serializable, Workload::ForTests); + let table_id = db.create_table( + &mut tx, + TableSchema::new( + TableId::SENTINEL, + TableName::for_test("concurrent_rows"), + None, + vec![ + ColumnSchema::for_test(0, "id", spacetimedb_sats::AlgebraicType::U64), + ColumnSchema::for_test(1, "value", spacetimedb_sats::AlgebraicType::U64), + ], + vec![IndexSchema::for_test("concurrent_rows_id_idx", BTreeAlgorithm::from(0))], + vec![ConstraintSchema::unique_for_test("concurrent_rows_id_unique", 0)], + vec![], + StTableType::User, + StAccess::Public, + None, + Some(0.into()), + false, + None, + ), + )?; + let _ = db.commit_tx(tx)?; + Ok(table_id) +} + +fn client(index: usize) -> SessionId { + SessionId::from_index(index) +} + +fn is_unique_constraint_violation(err: &DBError) -> bool { + err.to_string().contains("Unique") || err.to_string().contains("unique") +} + +impl fmt::Display for RoundEvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Action { name, .. } => write!(f, "action({name})"), + event => write!(f, "{event:?}"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sim; + + #[test] + fn seed_12_exercises_lock_state_machine() { + let seed = DstSeed(12); + let config = RunConfig::with_max_interactions(100); + let mut runtime = sim::Runtime::new(seed).unwrap(); + + let outcome = runtime.block_on(run_generated_with_config(seed, config)).unwrap(); + + assert_eq!(outcome.rounds, 100); + assert!(outcome.committed > 0); + assert!(outcome.writer_conflicts > 0); + assert!(outcome.reader_conflicts > 0); + assert!(outcome.reads > 0); + assert_eq!(outcome.final_rows, outcome.expected_rows); + } + + #[test] + fn first_four_rounds_cover_core_lock_cases() { + let seed = DstSeed(12); + let config = RunConfig::with_max_interactions(4); + let mut runtime = sim::Runtime::new(seed).unwrap(); + + let outcome = runtime.block_on(run_generated_with_config(seed, config)).unwrap(); + + assert_eq!(outcome.rounds, 4); + assert_eq!(outcome.writer_conflicts, 1); + assert_eq!(outcome.reader_conflicts, 1); + assert!(outcome.reads >= 4); + assert_eq!(outcome.final_rows, outcome.expected_rows); + } +} diff --git a/crates/dst/src/workload/commitlog_ops/generation.rs b/crates/dst/src/workload/commitlog_ops/generation.rs new file mode 100644 index 00000000000..4e6a173c6a8 --- /dev/null +++ b/crates/dst/src/workload/commitlog_ops/generation.rs @@ -0,0 +1,279 @@ +//! Commitlog workload source: table workload plus lifecycle and durability pressure. + +use std::collections::{BTreeSet, VecDeque}; + +use crate::{ + core::WorkloadSource, + schema::SchemaPlan, + seed::{DstRng, DstSeed}, + workload::strategy::{Index, Percent, Strategy}, + workload::{ + commitlog_ops::CommitlogInteraction, + table_ops::{strategies::ConnectionChoice, TableScenario, TableWorkloadSource}, + }, +}; + +/// Generation profile for commitlog-specific interactions layered around table ops. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) struct CommitlogWorkloadProfile { + pub(crate) close_reopen_pct: usize, + pub(crate) snapshot_pct: usize, + pub(crate) create_dynamic_table_pct: usize, + pub(crate) migrate_after_create_pct: usize, + pub(crate) migrate_dynamic_table_pct: usize, + pub(crate) drop_dynamic_table_pct: usize, +} + +impl Default for CommitlogWorkloadProfile { + fn default() -> Self { + Self { + close_reopen_pct: 1, + snapshot_pct: 2, + create_dynamic_table_pct: 1, + migrate_after_create_pct: 55, + migrate_dynamic_table_pct: 6, + drop_dynamic_table_pct: 5, + } + } +} + +/// Streaming source for commitlog-oriented targets. +/// +/// This composes a base table workload with commitlog lifecycle interactions +/// instead of defining an unrelated workload language. +pub(crate) struct CommitlogWorkloadSource { + base: TableWorkloadSource, + profile: CommitlogWorkloadProfile, + rng: DstRng, + num_connections: usize, + next_slot: u32, + alive_slots: BTreeSet, + pending: VecDeque, +} + +impl CommitlogWorkloadSource { + pub fn new( + seed: DstSeed, + scenario: S, + schema: SchemaPlan, + num_connections: usize, + target_interactions: usize, + ) -> Self { + Self::with_profile( + seed, + scenario, + schema, + num_connections, + target_interactions, + CommitlogWorkloadProfile::default(), + ) + } + + pub fn with_profile( + seed: DstSeed, + scenario: S, + schema: SchemaPlan, + num_connections: usize, + target_interactions: usize, + profile: CommitlogWorkloadProfile, + ) -> Self { + Self { + base: TableWorkloadSource::new(seed.fork(123), scenario, schema, num_connections, target_interactions), + profile, + rng: seed.fork(124).rng(), + num_connections, + next_slot: 0, + alive_slots: BTreeSet::new(), + pending: VecDeque::new(), + } + } + + pub fn request_finish(&mut self) { + self.base.request_finish(); + } + + fn fill_pending(&mut self) -> bool { + let Some(base_op) = self.base.next() else { + return false; + }; + self.pending.push_back(CommitlogInteraction::Table(base_op)); + + if self.base.has_open_read_tx() || self.base.has_open_write_tx() { + return true; + } + + if Percent::new(self.profile.close_reopen_pct).sample(&mut self.rng) { + self.pending.push_back(CommitlogInteraction::CloseReopen); + } + + if Percent::new(self.profile.create_dynamic_table_pct).sample(&mut self.rng) { + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng); + let slot = self.next_slot; + self.next_slot = self.next_slot.saturating_add(1); + self.alive_slots.insert(slot); + self.pending + .push_back(CommitlogInteraction::CreateDynamicTable { conn, slot }); + // Frequently follow a create with migration to stress add-column + + // copy + subsequent auto-inc allocation paths. + if Percent::new(self.profile.migrate_after_create_pct).sample(&mut self.rng) { + self.pending + .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); + } + return true; + } + + if !self.alive_slots.is_empty() && Percent::new(self.profile.migrate_dynamic_table_pct).sample(&mut self.rng) { + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng); + let idx = Index::new(self.alive_slots.len()).sample(&mut self.rng); + let slot = *self + .alive_slots + .iter() + .nth(idx) + .expect("slot index within alive set bounds"); + self.pending + .push_back(CommitlogInteraction::MigrateDynamicTable { conn, slot }); + } + + if !self.alive_slots.is_empty() && Percent::new(self.profile.drop_dynamic_table_pct).sample(&mut self.rng) { + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng); + let idx = Index::new(self.alive_slots.len()).sample(&mut self.rng); + let slot = *self + .alive_slots + .iter() + .nth(idx) + .expect("slot index within alive set bounds"); + self.alive_slots.remove(&slot); + self.pending + .push_back(CommitlogInteraction::DropDynamicTable { conn, slot }); + } + + true + } +} + +impl CommitlogWorkloadSource { + pub fn pull_next_interaction(&mut self) -> Option { + loop { + if let Some(next) = self.pending.pop_front() { + return Some(next); + } + if !self.fill_pending() { + return None; + } + } + } +} + +impl WorkloadSource for CommitlogWorkloadSource { + type Interaction = CommitlogInteraction; + + fn next_interaction(&mut self) -> Option { + self.pull_next_interaction() + } + + fn request_finish(&mut self) { + Self::request_finish(self); + } +} + +impl Iterator for CommitlogWorkloadSource { + type Item = CommitlogInteraction; + + fn next(&mut self) -> Option { + self.pull_next_interaction() + } +} + +#[cfg(test)] +mod tests { + use spacetimedb_sats::AlgebraicType; + + use crate::{ + client::SessionId, + schema::{ColumnPlan, SchemaPlan, TablePlan}, + seed::{DstRng, DstSeed}, + workload::{ + commitlog_ops::CommitlogInteraction, + table_ops::{ScenarioPlanner, TableOperation, TableScenario, TableWorkloadInteraction}, + }, + }; + + use super::{CommitlogWorkloadProfile, CommitlogWorkloadSource}; + + #[derive(Clone)] + struct BeginThenCommitScenario; + + impl TableScenario for BeginThenCommitScenario { + fn generate_schema(&self, _rng: &mut DstRng) -> SchemaPlan { + SchemaPlan { + tables: vec![TablePlan { + name: "test_table".to_string(), + columns: vec![ColumnPlan { + name: "id".to_string(), + ty: AlgebraicType::U64, + }], + extra_indexes: vec![], + }], + } + } + + fn validate_outcome( + &self, + _schema: &SchemaPlan, + _outcome: &crate::workload::table_ops::TableWorkloadOutcome, + ) -> anyhow::Result<()> { + Ok(()) + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + if planner.active_writer() == Some(conn) { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } else { + planner.begin_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); + } + } + } + + #[test] + fn lifecycle_interactions_wait_for_open_write_tx_to_close() { + let scenario = BeginThenCommitScenario; + let mut rng = DstSeed(1).rng(); + let schema = scenario.generate_schema(&mut rng); + let profile = CommitlogWorkloadProfile { + close_reopen_pct: 100, + snapshot_pct: 100, + create_dynamic_table_pct: 100, + migrate_after_create_pct: 100, + migrate_dynamic_table_pct: 100, + drop_dynamic_table_pct: 100, + }; + let mut source = CommitlogWorkloadSource::with_profile(DstSeed(10), scenario, schema, 1, 2, profile); + + assert!(matches!( + source.next(), + Some(CommitlogInteraction::Table(TableWorkloadInteraction { + op: TableOperation::BeginTx { .. }, + .. + })) + )); + assert!(matches!( + source.next(), + Some(CommitlogInteraction::Table(TableWorkloadInteraction { + op: TableOperation::CommitTx { .. }, + .. + })) + )); + assert!(matches!(source.next(), Some(CommitlogInteraction::CloseReopen))); + } +} diff --git a/crates/dst/src/workload/commitlog_ops/mod.rs b/crates/dst/src/workload/commitlog_ops/mod.rs new file mode 100644 index 00000000000..62d0f99a82a --- /dev/null +++ b/crates/dst/src/workload/commitlog_ops/mod.rs @@ -0,0 +1,11 @@ +//! Commitlog-oriented workload that composes `table_ops` with lifecycle/chaos. + +mod generation; +mod types; + +pub(crate) use generation::CommitlogWorkloadSource; +pub use types::{ + CommitlogInteraction, CommitlogWorkloadOutcome, DiskFaultSummary, DurableReplaySummary, InteractionSummary, + RuntimeSummary, SchemaSummary, SnapshotCaptureStatus, SnapshotObservation, TableOperationSummary, + TransactionSummary, +}; diff --git a/crates/dst/src/workload/commitlog_ops/types.rs b/crates/dst/src/workload/commitlog_ops/types.rs new file mode 100644 index 00000000000..62711866eb4 --- /dev/null +++ b/crates/dst/src/workload/commitlog_ops/types.rs @@ -0,0 +1,169 @@ +//! Serializable interaction model for relational-db + commitlog DST. + +use crate::{ + client::SessionId, + config::CommitlogFaultProfile, + schema::SimRow, + workload::table_ops::{TableWorkloadInteraction, TableWorkloadOutcome}, +}; + +/// One interaction in the commitlog-oriented mixed workload. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum CommitlogInteraction { + /// Reused base workload interaction from `table_ops`. + Table(TableWorkloadInteraction), + /// Create a dynamic user table for a logical slot. + CreateDynamicTable { conn: SessionId, slot: u32 }, + /// Drop a previously created dynamic user table. + DropDynamicTable { conn: SessionId, slot: u32 }, + /// Migrate dynamic table schema for a slot. + MigrateDynamicTable { conn: SessionId, slot: u32 }, + /// Close and restart the database from durable history. + CloseReopen, +} + +/// Successful run summary for commitlog target. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct CommitlogWorkloadOutcome { + pub applied_steps: usize, + pub durable_commit_count: usize, + pub replay_table_count: usize, + pub schema: SchemaSummary, + pub interactions: InteractionSummary, + pub table_ops: TableOperationSummary, + pub transactions: TransactionSummary, + pub runtime: RuntimeSummary, + pub disk_faults: DiskFaultSummary, + pub snapshot_faults: DiskFaultSummary, + pub replay: DurableReplaySummary, + pub table: TableWorkloadOutcome, +} + +/// State observed after opening a fresh database from durable commitlog history. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DurableReplaySummary { + pub durable_offset: Option, + pub restored_snapshot_offset: Option, + pub latest_snapshot_offset: Option, + pub base_rows: Vec>, + pub dynamic_table_count: usize, +} + +/// Snapshot capture status observed by a target. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SnapshotCaptureStatus { + Captured { offset: u64 }, + SkippedOpenTransaction, + SkippedNoSnapshotCreated, + SkippedInjectedFault, +} + +/// Snapshot capture facts exposed to properties. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SnapshotObservation { + pub durable_offset: Option, + pub latest_before: Option, + pub latest_after: Option, + pub status: SnapshotCaptureStatus, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct SchemaSummary { + pub initial_tables: usize, + pub initial_columns: usize, + pub max_columns_per_table: usize, + pub initial_indexes: usize, + pub extra_indexes: usize, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct InteractionSummary { + pub table: usize, + pub create_dynamic_table: usize, + pub drop_dynamic_table: usize, + pub migrate_dynamic_table: usize, + pub close_reopen_requested: usize, + pub close_reopen_applied: usize, + pub close_reopen_skipped: usize, + pub snapshot_requested: usize, + pub snapshot_created: usize, + pub snapshot_skipped: usize, + pub skipped: usize, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct TableOperationSummary { + /// Explicit write transaction starts. + pub begin_tx: usize, + /// Explicit write transaction commits. + pub commit_tx: usize, + /// Explicit write transaction rollbacks. + pub rollback_tx: usize, + /// Long read snapshot starts. + pub begin_read_tx: usize, + /// Long read snapshot releases. + pub release_read_tx: usize, + /// Expected failures when a second writer tries to begin. + pub begin_tx_conflict: usize, + /// Expected failures when a second writer tries to write. + pub write_conflict_insert: usize, + /// Fresh single-row inserts. + pub insert: usize, + /// Single-row deletes. + pub delete: usize, + /// Exact full-row reinserts that should be idempotent no-ops. + pub exact_duplicate_insert: usize, + /// Same primary id with different payload; should violate the unique key. + pub unique_key_conflict_insert: usize, + /// Deletes of absent rows that should report no mutation. + pub delete_missing: usize, + /// Multi-row inserts. + pub batch_insert: usize, + /// Multi-row deletes. + pub batch_delete: usize, + /// Delete followed by inserting the same row. + pub reinsert: usize, + /// Add-column schema changes against live base tables. + pub add_column: usize, + /// Add-index schema changes against live base tables. + pub add_index: usize, + /// Primary-id lookup oracle checks. + pub point_lookup: usize, + /// Column equality count oracle checks. + pub predicate_count: usize, + /// Indexed range scan oracle checks. + pub range_scan: usize, + /// Full scan oracle checks. + pub full_scan: usize, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct TransactionSummary { + pub explicit_begin: usize, + pub explicit_commit: usize, + pub explicit_rollback: usize, + pub auto_commit: usize, + pub read_tx: usize, + pub durable_commit_count: usize, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct RuntimeSummary { + pub known_runtime_tasks_scheduled: usize, + pub durability_actors_started: usize, + pub runtime_alive_tasks: Option, +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct DiskFaultSummary { + pub profile: CommitlogFaultProfile, + pub latency: usize, + pub short_read: usize, + pub short_write: usize, + pub read_error: usize, + pub write_error: usize, + pub flush_error: usize, + pub fsync_error: usize, + pub open_error: usize, + pub metadata_error: usize, +} diff --git a/crates/dst/src/workload/mod.rs b/crates/dst/src/workload/mod.rs new file mode 100644 index 00000000000..52482e737f1 --- /dev/null +++ b/crates/dst/src/workload/mod.rs @@ -0,0 +1,5 @@ +//! Shared workload generators reused by multiple DST targets. + +pub mod commitlog_ops; +pub(crate) mod strategy; +pub mod table_ops; diff --git a/crates/dst/src/workload/strategy.rs b/crates/dst/src/workload/strategy.rs new file mode 100644 index 00000000000..94108eced8c --- /dev/null +++ b/crates/dst/src/workload/strategy.rs @@ -0,0 +1,112 @@ +//! Small proptest-inspired strategy primitives for deterministic DST generation. +//! +//! This is intentionally minimal: we keep DST's streaming execution model and +//! use strategies only for typed, composable input generation. + +use crate::seed::DstRng; + +/// Typed strategy that can sample values from the shared deterministic RNG. +pub(crate) trait Strategy: Sized { + fn sample(&self, rng: &mut DstRng) -> T; +} + +/// Picks a value in `[0, upper)`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Index { + upper: usize, +} + +impl Index { + pub(crate) fn new(upper: usize) -> Self { + assert!(upper > 0, "index upper bound must be non-zero"); + Self { upper } + } +} + +impl Strategy for Index { + fn sample(&self, rng: &mut DstRng) -> usize { + rng.index(self.upper) + } +} + +/// Bernoulli-style strategy from an integer percentage in `[0, 100]`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Percent { + percent: usize, +} + +impl Percent { + pub(crate) fn new(percent: usize) -> Self { + assert!(percent <= 100, "percent must be in 0..=100, got {percent}"); + Self { percent } + } +} + +impl Strategy for Percent { + fn sample(&self, rng: &mut DstRng) -> bool { + Index::new(100).sample(rng) < self.percent + } +} + +/// Weighted discrete choice over cloneable values. +#[derive(Clone, Debug)] +pub(crate) struct Weighted { + options: Vec<(usize, T)>, + total_weight: usize, +} + +impl Weighted { + pub(crate) fn new(options: Vec<(usize, T)>) -> Self { + let total_weight = options.iter().map(|(weight, _)| *weight).sum(); + assert!(total_weight > 0, "weighted strategy requires positive total weight"); + Self { options, total_weight } + } +} + +impl Strategy for Weighted { + fn sample(&self, rng: &mut DstRng) -> T { + let mut pick = Index::new(self.total_weight).sample(rng); + for (weight, value) in &self.options { + if pick < *weight { + return value.clone(); + } + pick -= *weight; + } + self.options + .last() + .map(|(_, value)| value.clone()) + .expect("weighted strategy has at least one option") + } +} + +#[cfg(test)] +mod tests { + use crate::seed::DstSeed; + + use super::{Index, Percent, Strategy, Weighted}; + + #[test] + fn weighted_is_deterministic_for_seed() { + let strategy = Weighted::new(vec![(1, 10usize), (2, 20usize), (3, 30usize)]); + let mut rng_a = DstSeed(7).rng(); + let mut rng_b = DstSeed(7).rng(); + let a = (0..16).map(|_| strategy.sample(&mut rng_a)).collect::>(); + let b = (0..16).map(|_| strategy.sample(&mut rng_b)).collect::>(); + assert_eq!(a, b); + } + + #[test] + fn index_strategy_respects_bounds() { + let mut rng = DstSeed(123).rng(); + for _ in 0..64 { + let idx = Index::new(5).sample(&mut rng); + assert!(idx < 5); + } + } + + #[test] + #[should_panic(expected = "percent must be in 0..=100")] + fn percent_rejects_out_of_range_values() { + let _ = Percent::new(101); + } +} diff --git a/crates/dst/src/workload/table_ops/generation.rs b/crates/dst/src/workload/table_ops/generation.rs new file mode 100644 index 00000000000..dec276060b2 --- /dev/null +++ b/crates/dst/src/workload/table_ops/generation.rs @@ -0,0 +1,307 @@ +use std::collections::VecDeque; + +use crate::{ + client::SessionId, + core::WorkloadSource, + schema::{ColumnPlan, SchemaPlan, TablePlan}, + seed::{DstRng, DstSeed}, + workload::strategy::{Index, Percent, Strategy}, +}; + +use super::{ + model::GenerationModel, + strategies::{ConnectionChoice, TableChoice, TxControlAction, TxControlChoice}, + TableScenario, TableWorkloadInteraction, +}; + +/// Streaming planner for table-oriented workloads. +/// +/// The stream keeps only generator state plus a small pending queue, so long +/// duration runs do not need to materialize the full interaction list in +/// memory up front. +#[derive(Clone, Debug)] +pub struct TableWorkloadSource { + // Deterministic source for all planner choices. + rng: DstRng, + // Scenario-specific workload policy layered on top of the shared model. + scenario: S, + // Generator-side model used to decide what interactions are legal. + model: GenerationModel, + num_connections: usize, + // Soft budget for scenario-generated interactions. Finish mode may emit a + // few extra commit/follow-up interactions to close open transactions. + target_interactions: usize, + emitted: usize, + // When the budget is exhausted, we walk connections in order and commit any + // still-open transaction so the stream ends in a clean state. + finalize_conn: usize, + // Scenario code can enqueue a burst of interactions at once: for example a + // mutation followed by one or more property checks. + pending: VecDeque, + finished: bool, +} + +/// Narrow helper passed to scenario code so scenario-specific planning can +/// inspect the current model and enqueue interactions without owning the whole +/// stream state machine. +pub struct ScenarioPlanner<'a> { + rng: &'a mut DstRng, + model: &'a mut GenerationModel, + pending: &'a mut VecDeque, +} + +impl<'a> ScenarioPlanner<'a> { + pub fn choose_index(&mut self, len: usize) -> usize { + Index::new(len).sample(self.rng) + } + + pub fn choose_table(&mut self) -> usize { + TableChoice { + table_count: self.model.schema.tables.len(), + } + .sample(self.rng) + } + + pub fn roll_percent(&mut self, percent: usize) -> bool { + Percent::new(percent).sample(self.rng) + } + + pub fn active_writer(&self) -> Option { + self.model.active_writer() + } + + pub fn has_read_tx(&self, conn: SessionId) -> bool { + self.model.has_read_tx(conn) + } + + pub fn any_read_tx(&self) -> bool { + self.model.any_read_tx() + } + + pub fn begin_read_tx(&mut self, conn: SessionId) { + self.model.begin_read_tx(conn); + } + + pub fn release_read_tx(&mut self, conn: SessionId) { + self.model.release_read_tx(conn); + } + + pub fn begin_tx(&mut self, conn: SessionId) { + self.model.begin_tx(conn); + } + + pub fn commit_tx(&mut self, conn: SessionId) { + self.model.commit(conn); + } + + pub fn rollback_tx(&mut self, conn: SessionId) { + self.model.rollback(conn); + } + + /// Tries to emit one transaction control interaction for `conn`. + /// + /// The shared generator owns transaction lifecycle so scenario code can + /// focus on domain operations like inserts, deletes, and range checks. + pub fn maybe_control_tx( + &mut self, + conn: SessionId, + begin_pct: usize, + commit_pct: usize, + rollback_pct: usize, + ) -> bool { + match (TxControlChoice { + begin_pct, + commit_pct, + rollback_pct, + }) + .sample(self.rng) + { + TxControlAction::Begin + if !self.model.connections[conn.as_index()].in_tx && !self.model.has_read_tx(conn) => + { + if self.model.active_writer().is_none() && !self.model.any_read_tx() { + self.model.begin_tx(conn); + self.pending.push_back(TableWorkloadInteraction::begin_tx(conn)); + } else { + self.pending + .push_back(TableWorkloadInteraction::begin_tx_conflict(conn)); + } + true + } + TxControlAction::Commit if self.model.connections[conn.as_index()].in_tx => { + self.model.commit(conn); + self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); + true + } + TxControlAction::Rollback if self.model.connections[conn.as_index()].in_tx => { + self.model.rollback(conn); + self.pending.push_back(TableWorkloadInteraction::rollback_tx(conn)); + true + } + _ => false, + } + } + + pub fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + self.model.visible_rows(conn, table) + } + + pub fn table_plan(&self, table: usize) -> &TablePlan { + &self.model.schema.tables[table] + } + + pub fn make_row(&mut self, table: usize) -> crate::schema::SimRow { + self.model.make_row(self.rng, table) + } + + pub fn insert(&mut self, conn: SessionId, table: usize, row: crate::schema::SimRow) { + self.model.insert(conn, table, row); + } + + pub fn batch_insert(&mut self, conn: SessionId, table: usize, rows: &[crate::schema::SimRow]) { + self.model.batch_insert(conn, table, rows); + } + + pub fn delete(&mut self, conn: SessionId, table: usize, row: crate::schema::SimRow) { + self.model.delete(conn, table, row); + } + + pub fn batch_delete(&mut self, conn: SessionId, table: usize, rows: &[crate::schema::SimRow]) { + self.model.batch_delete(conn, table, rows); + } + + pub fn add_column(&mut self, table: usize, column: ColumnPlan, default: spacetimedb_sats::AlgebraicValue) { + self.model.add_column(table, column, default); + } + + pub fn add_index(&mut self, table: usize, cols: Vec) { + self.model.add_index(table, cols); + } + + pub fn absent_row(&mut self, conn: SessionId, table: usize) -> crate::schema::SimRow { + self.model.absent_row(self.rng, conn, table) + } + + pub fn unique_key_conflict_row( + &mut self, + table: usize, + source: &crate::schema::SimRow, + ) -> Option { + self.model.unique_key_conflict_row(self.rng, table, source) + } + + pub fn push_interaction(&mut self, interaction: TableWorkloadInteraction) { + self.pending.push_back(interaction); + } +} + +impl TableWorkloadSource { + pub fn new( + seed: DstSeed, + scenario: S, + schema: SchemaPlan, + num_connections: usize, + target_interactions: usize, + ) -> Self { + Self { + rng: seed.fork(17).rng(), + scenario, + model: GenerationModel::new(&schema, num_connections, seed), + num_connections, + target_interactions, + emitted: 0, + finalize_conn: 0, + pending: VecDeque::new(), + finished: false, + } + } + + pub fn request_finish(&mut self) { + self.target_interactions = self.emitted; + } + + pub fn has_open_read_tx(&self) -> bool { + self.model.any_read_tx() + } + + pub fn has_open_write_tx(&self) -> bool { + self.model.active_writer().is_some() + } + + fn fill_pending(&mut self) { + if self.emitted >= self.target_interactions { + // Once the workload budget is spent, stop asking the scenario for + // more work and only flush any open transaction state. + while self.finalize_conn < self.num_connections { + let conn = SessionId::from_index(self.finalize_conn); + self.finalize_conn += 1; + if self.model.connections[conn.as_index()].in_tx { + self.model.commit(conn); + self.pending.push_back(TableWorkloadInteraction::commit_tx(conn)); + return; + } + if self.model.has_read_tx(conn) { + self.model.release_read_tx(conn); + self.pending.push_back(TableWorkloadInteraction::release_read_tx(conn)); + return; + } + } + self.finished = true; + return; + } + + // Transactions stay open across interactions, but each API call is a + // separate synchronous step. Always choose a connection uniformly so + // later steps can naturally observe lock contention instead of the + // planner steering around open readers or writers. + let conn = ConnectionChoice { + connection_count: self.num_connections, + } + .sample(&mut self.rng); + let mut planner = ScenarioPlanner { + rng: &mut self.rng, + model: &mut self.model, + pending: &mut self.pending, + }; + self.scenario.fill_pending(&mut planner, conn); + } +} + +impl TableWorkloadSource { + pub fn pull_next_interaction(&mut self) -> Option { + loop { + // Scenario planning fills `pending` in bursts, but the iterator + // surface stays one interaction at a time. + if let Some(interaction) = self.pending.pop_front() { + self.emitted += 1; + return Some(interaction); + } + + if self.finished { + return None; + } + + self.fill_pending(); + } + } +} + +impl WorkloadSource for TableWorkloadSource { + type Interaction = TableWorkloadInteraction; + + fn next_interaction(&mut self) -> Option { + self.pull_next_interaction() + } + + fn request_finish(&mut self) { + Self::request_finish(self); + } +} + +impl Iterator for TableWorkloadSource { + type Item = TableWorkloadInteraction; + + fn next(&mut self) -> Option { + self.pull_next_interaction() + } +} diff --git a/crates/dst/src/workload/table_ops/mod.rs b/crates/dst/src/workload/table_ops/mod.rs new file mode 100644 index 00000000000..f75470bf56a --- /dev/null +++ b/crates/dst/src/workload/table_ops/mod.rs @@ -0,0 +1,15 @@ +//! Shared transactional table workload used by table-oriented targets. + +mod generation; +mod model; +mod scenarios; +pub(crate) mod strategies; +mod types; + +#[cfg(test)] +pub(crate) use generation::ScenarioPlanner; +pub(crate) use generation::TableWorkloadSource; +pub(crate) use model::{PredictedOutcome, TableOracle}; +pub use scenarios::TableScenarioId; +pub(crate) use types::{ConnectionWriteState, TableScenario}; +pub use types::{TableErrorKind, TableInteractionCase, TableOperation, TableWorkloadInteraction, TableWorkloadOutcome}; diff --git a/crates/dst/src/workload/table_ops/model.rs b/crates/dst/src/workload/table_ops/model.rs new file mode 100644 index 00000000000..0b498c3ef13 --- /dev/null +++ b/crates/dst/src/workload/table_ops/model.rs @@ -0,0 +1,709 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + schema::{distinct_value_for_type, generate_value_for_type, ColumnPlan, SchemaPlan, SimRow}, + seed::{DstRng, DstSeed}, +}; + +use super::{TableErrorKind, TableOperation}; + +/// Generator-side model of committed rows plus per-connection pending writes. +/// +/// This model is used only while producing interactions. It lets the planner +/// pick valid deletes, synthesize visibility checks, and enforce the +/// single-writer discipline before the real target executes anything. +#[derive(Clone, Debug)] +pub(crate) struct GenerationModel { + pub(crate) schema: SchemaPlan, + pub(crate) connections: Vec, + committed: Vec>, + next_ids: Vec, + active_writer: Option, +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct PendingConnection { + pub(crate) in_tx: bool, + read_snapshot: Option>>, + staged_inserts: Vec<(usize, SimRow)>, + staged_deletes: Vec<(usize, SimRow)>, +} + +impl GenerationModel { + pub(crate) fn new(schema: &SchemaPlan, num_connections: usize, seed: DstSeed) -> Self { + Self { + schema: schema.clone(), + connections: vec![PendingConnection::default(); num_connections], + committed: vec![Vec::new(); schema.tables.len()], + next_ids: (0..schema.tables.len()) + .map(|idx| seed.fork(idx as u64 + 100).0) + .collect(), + active_writer: None, + } + } + + pub(crate) fn make_row(&mut self, rng: &mut DstRng, table: usize) -> SimRow { + let table_plan = &self.schema.tables[table]; + let id = self.next_ids[table]; + self.next_ids[table] = self.next_ids[table].wrapping_add(1).max(1); + let mut values = vec![AlgebraicValue::U64(id)]; + for (idx, col) in table_plan.columns.iter().enumerate().skip(1) { + values.push(generate_value_for_type(rng, &col.ty, idx)); + } + SimRow { values } + } + + pub(crate) fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let conn_idx = conn.as_index(); + if let Some(snapshot) = &self.connections[conn_idx].read_snapshot { + return snapshot[table].clone(); + } + let mut rows = self.committed[table].clone(); + let pending = &self.connections[conn_idx]; + for (pending_table, row) in &pending.staged_deletes { + if *pending_table == table { + rows.retain(|candidate| candidate != row); + } + } + for (pending_table, row) in &pending.staged_inserts { + if *pending_table == table { + rows.push(row.clone()); + } + } + rows + } + + pub(crate) fn absent_row(&mut self, rng: &mut DstRng, conn: SessionId, table: usize) -> SimRow { + let mut row = self.make_row(rng, table); + while self.visible_rows(conn, table).iter().any(|candidate| candidate == &row) { + row = self.make_row(rng, table); + } + row + } + + pub(crate) fn unique_key_conflict_row(&self, rng: &mut DstRng, table: usize, source: &SimRow) -> Option { + let table_plan = &self.schema.tables[table]; + let value_count = source.values.len().min(table_plan.columns.len()); + if value_count <= 1 { + return None; + } + + let col_idx = 1 + rng.index(value_count - 1); + let mut row = source.clone(); + row.values[col_idx] = distinct_value_for_type(&table_plan.columns[col_idx].ty, &row.values[col_idx]); + Some(row) + } + + pub(crate) fn active_writer(&self) -> Option { + self.active_writer + } + + pub(crate) fn has_read_tx(&self, conn: SessionId) -> bool { + self.connections[conn.as_index()].read_snapshot.is_some() + } + + pub(crate) fn any_read_tx(&self) -> bool { + self.connections + .iter() + .any(|connection| connection.read_snapshot.is_some()) + } + + pub(crate) fn begin_read_tx(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; + assert!(!pending.in_tx, "connection already has write transaction"); + assert!( + pending.read_snapshot.is_none(), + "connection already has read transaction" + ); + pending.read_snapshot = Some(self.committed.clone()); + } + + pub(crate) fn release_read_tx(&mut self, conn: SessionId) { + assert!( + self.connections[conn.as_index()].read_snapshot.take().is_some(), + "connection has no read transaction" + ); + } + + pub(crate) fn begin_tx(&mut self, conn: SessionId) { + assert!(self.active_writer.is_none(), "single writer already active"); + let pending = &mut self.connections[conn.as_index()]; + assert!(!pending.in_tx, "connection already in transaction"); + assert!( + pending.read_snapshot.is_none(), + "connection already has read transaction" + ); + pending.in_tx = true; + self.active_writer = Some(conn); + } + + pub(crate) fn insert(&mut self, conn: SessionId, table: usize, row: SimRow) { + let pending = &mut self.connections[conn.as_index()]; + if pending.in_tx { + pending.staged_inserts.push((table, row)); + } else { + self.committed[table].push(row); + } + } + + pub(crate) fn batch_insert(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + self.insert(conn, table, row.clone()); + } + } + + pub(crate) fn delete(&mut self, conn: SessionId, table: usize, row: SimRow) { + let pending = &mut self.connections[conn.as_index()]; + if pending.in_tx { + pending + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == table && *candidate == row)); + pending.staged_deletes.push((table, row)); + } else { + self.committed[table].retain(|candidate| *candidate != row); + } + } + + pub(crate) fn batch_delete(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + self.delete(conn, table, row.clone()); + } + } + + pub(crate) fn commit(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; + let inserts = std::mem::take(&mut pending.staged_inserts); + let deletes = std::mem::take(&mut pending.staged_deletes); + pending.in_tx = false; + self.active_writer = None; + + for (table, row) in &deletes { + self.committed[*table].retain(|candidate| candidate != row); + } + for (table, row) in &inserts { + self.committed[*table].push(row.clone()); + } + } + + pub(crate) fn rollback(&mut self, conn: SessionId) { + let pending = &mut self.connections[conn.as_index()]; + pending.staged_inserts.clear(); + pending.staged_deletes.clear(); + pending.in_tx = false; + self.active_writer = None; + } + + pub(crate) fn add_column(&mut self, table: usize, column: ColumnPlan, default: AlgebraicValue) { + self.schema.tables[table].columns.push(column); + for row in &mut self.committed[table] { + row.values.push(default.clone()); + } + for connection in &mut self.connections { + for (pending_table, row) in connection + .staged_inserts + .iter_mut() + .chain(connection.staged_deletes.iter_mut()) + { + if *pending_table == table { + row.values.push(default.clone()); + } + } + if let Some(snapshot) = &mut connection.read_snapshot { + for row in &mut snapshot[table] { + row.values.push(default.clone()); + } + } + } + } + + pub(crate) fn add_index(&mut self, table: usize, cols: Vec) { + let indexes = &mut self.schema.tables[table].extra_indexes; + if !indexes.contains(&cols) { + indexes.push(cols); + } + } +} + +/// Replay model used as the oracle for table workload properties. +/// +/// Target property runtimes apply every table interaction here in parallel with +/// real target execution, then compare the collected target outcome against this +/// model at the end of the run. +#[derive(Clone, Debug)] +pub struct TableOracle { + committed: Vec>, + connections: Vec, + active_writer: Option, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum PredictedOutcome { + Applied, + NoMutation { + subject: Option<(SessionId, usize)>, + }, + Error { + kind: TableErrorKind, + subject: Option<(SessionId, usize)>, + }, +} + +#[derive(Clone, Debug, Default)] +struct ExpectedConnection { + in_tx: bool, + read_snapshot: Option>>, + staged_inserts: Vec<(usize, SimRow)>, + staged_deletes: Vec<(usize, SimRow)>, +} + +impl TableOracle { + pub fn new(table_count: usize, connection_count: usize) -> Self { + Self { + committed: vec![Vec::new(); table_count], + connections: vec![ExpectedConnection::default(); connection_count], + active_writer: None, + } + } + + pub fn predict(&self, op: &TableOperation) -> Result { + match op { + TableOperation::BeginTx { conn } => { + self.ensure_connection(*conn)?; + if self.connections[conn.as_index()].read_snapshot.is_some() { + return Err(format!("connection {conn} cannot begin write tx with open read tx")); + } + if self.connections[conn.as_index()].in_tx { + return Err(format!("connection {conn} already has open write tx")); + } + if self.active_writer.is_some() + || self + .connections + .iter() + .any(|connection| connection.read_snapshot.is_some()) + { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: None, + }); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::BeginReadTx { conn } => { + self.ensure_connection(*conn)?; + let state = &self.connections[conn.as_index()]; + if state.in_tx || state.read_snapshot.is_some() { + return Err(format!("connection {conn} cannot begin read tx in current state")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::ReleaseReadTx { conn } => { + self.ensure_connection(*conn)?; + if self.connections[conn.as_index()].read_snapshot.is_none() { + return Err(format!("connection {conn} has no read tx to release")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::CommitTx { conn } | TableOperation::RollbackTx { conn } => { + self.ensure_connection(*conn)?; + if self.active_writer != Some(*conn) || !self.connections[conn.as_index()].in_tx { + return Err(format!("connection {conn} does not own an open write tx")); + } + Ok(PredictedOutcome::Applied) + } + TableOperation::InsertRows { conn, table, rows } => self.predict_insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.predict_delete_rows(*conn, *table, rows), + TableOperation::AddColumn { .. } | TableOperation::AddIndex { .. } => Ok(PredictedOutcome::Applied), + TableOperation::PointLookup { .. } + | TableOperation::PredicateCount { .. } + | TableOperation::RangeScan { .. } + | TableOperation::FullScan { .. } => Ok(PredictedOutcome::NoMutation { subject: None }), + } + } + + pub fn apply(&mut self, op: &TableOperation) { + match op { + TableOperation::BeginTx { conn } => { + assert!( + self.active_writer.is_none(), + "multiple concurrent writers in table oracle" + ); + self.connections[conn.as_index()].in_tx = true; + self.active_writer = Some(*conn); + } + TableOperation::BeginReadTx { conn } => { + let state = &mut self.connections[conn.as_index()]; + assert!(!state.in_tx, "read tx started while write tx is open"); + assert!(state.read_snapshot.is_none(), "nested read tx in table oracle"); + state.read_snapshot = Some(self.committed.clone()); + } + TableOperation::ReleaseReadTx { conn } => { + assert!( + self.connections[conn.as_index()].read_snapshot.take().is_some(), + "release read tx without open read tx" + ); + } + TableOperation::CommitTx { conn } => { + assert_eq!(self.active_writer, Some(*conn), "commit by non-owner in table oracle"); + let state = &mut self.connections[conn.as_index()]; + for (table, row) in state.staged_deletes.drain(..) { + self.committed[table].retain(|candidate| *candidate != row); + } + for (table, row) in state.staged_inserts.drain(..) { + self.committed[table].push(row); + } + state.in_tx = false; + self.active_writer = None; + } + TableOperation::RollbackTx { conn } => { + assert_eq!(self.active_writer, Some(*conn), "rollback by non-owner in table oracle"); + let state = &mut self.connections[conn.as_index()]; + state.staged_inserts.clear(); + state.staged_deletes.clear(); + state.in_tx = false; + self.active_writer = None; + } + TableOperation::InsertRows { conn, table, rows } => self.insert_rows(*conn, *table, rows), + TableOperation::DeleteRows { conn, table, rows } => self.delete_rows(*conn, *table, rows), + TableOperation::AddColumn { + table, + column: _, + default, + .. + } => { + self.add_column(*table, default.clone()); + } + TableOperation::AddIndex { .. } => {} + TableOperation::PointLookup { .. } + | TableOperation::PredicateCount { .. } + | TableOperation::RangeScan { .. } + | TableOperation::FullScan { .. } => {} + } + } + + fn predict_insert_rows(&self, conn: SessionId, table: usize, rows: &[SimRow]) -> Result { + if let Some(outcome) = self.predict_write_access(conn, table)? { + return Ok(outcome); + } + + let mut visible = self.visible_rows(conn, table); + let mut mutates = false; + for row in rows { + let Some(id) = row.id() else { + return Err(format!("insert row for table {table} is missing primary id: {row:?}")); + }; + match visible.iter().find(|candidate| candidate.id() == Some(id)) { + Some(existing) if existing == row => {} + Some(_) => { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::UniqueConstraintViolation, + subject: Some((conn, table)), + }); + } + None => { + mutates = true; + visible.push(row.clone()); + } + } + } + + if mutates { + Ok(PredictedOutcome::Applied) + } else { + Ok(PredictedOutcome::NoMutation { + subject: Some((conn, table)), + }) + } + } + + fn predict_delete_rows(&self, conn: SessionId, table: usize, rows: &[SimRow]) -> Result { + if let Some(outcome) = self.predict_write_access(conn, table)? { + return Ok(outcome); + } + + let mut visible = self.visible_rows(conn, table); + for row in rows { + let Some(idx) = visible.iter().position(|candidate| candidate == row) else { + return Ok(PredictedOutcome::Error { + kind: TableErrorKind::MissingRow, + subject: Some((conn, table)), + }); + }; + visible.remove(idx); + } + + Ok(PredictedOutcome::Applied) + } + + fn predict_write_access(&self, conn: SessionId, table: usize) -> Result, String> { + self.ensure_connection(conn)?; + self.ensure_table(table)?; + if self.connections[conn.as_index()].read_snapshot.is_some() { + return Err(format!("connection {conn} cannot write while read tx is open")); + } + if let Some(owner) = self.active_writer + && owner != conn + { + return Ok(Some(PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: None, + })); + } + Ok(None) + } + + fn ensure_connection(&self, conn: SessionId) -> Result<(), String> { + self.connections + .get(conn.as_index()) + .map(|_| ()) + .ok_or_else(|| format!("connection {conn} out of range")) + } + + fn ensure_table(&self, table: usize) -> Result<(), String> { + self.committed + .get(table) + .map(|_| ()) + .ok_or_else(|| format!("table {table} out of range")) + } + + pub fn visible_rows(&self, conn: SessionId, table: usize) -> Vec { + let conn_idx = conn.as_index(); + if let Some(snapshot) = &self.connections[conn_idx].read_snapshot { + return snapshot[table].clone(); + } + let mut rows = self.committed[table].clone(); + let pending = &self.connections[conn_idx]; + for (pending_table, row) in &pending.staged_deletes { + if *pending_table == table { + rows.retain(|candidate| candidate != row); + } + } + for (pending_table, row) in &pending.staged_inserts { + if *pending_table == table { + rows.push(row.clone()); + } + } + rows + } + + pub fn lookup_by_id(&self, conn: SessionId, table: usize, id: u64) -> Option { + self.visible_rows(conn, table) + .into_iter() + .find(|row| row.id() == Some(id)) + } + + pub fn predicate_count(&self, conn: SessionId, table: usize, col: u16, value: &AlgebraicValue) -> usize { + self.visible_rows(conn, table) + .into_iter() + .filter(|row| row.values.get(col as usize) == Some(value)) + .count() + } + + pub fn range_scan( + &self, + conn: SessionId, + table: usize, + cols: &[u16], + lower: &Bound, + upper: &Bound, + ) -> Vec { + let mut rows = self + .visible_rows(conn, table) + .into_iter() + .filter(|row| { + let key = row.project_key(cols).to_algebraic_value(); + bound_contains_lower(lower, &key) && bound_contains_upper(upper, &key) + }) + .collect::>(); + rows.sort_by(|lhs, rhs| { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + rows + } + + pub fn committed_rows(mut self) -> Vec> { + for table_rows in &mut self.committed { + table_rows.sort_by_key(|row| row.id().unwrap_or_default()); + } + self.committed + } + + fn insert(&mut self, conn: SessionId, table: usize, row: SimRow) { + let state = &mut self.connections[conn.as_index()]; + if state.in_tx { + state.staged_inserts.push((table, row)); + } else { + self.committed[table].push(row); + } + } + + fn insert_rows(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + if self + .visible_rows(conn, table) + .into_iter() + .any(|candidate| candidate == *row) + { + continue; + } + self.insert(conn, table, row.clone()); + } + } + + fn delete(&mut self, conn: SessionId, table: usize, row: SimRow) { + let state = &mut self.connections[conn.as_index()]; + if state.in_tx { + state + .staged_inserts + .retain(|(pending_table, candidate)| !(*pending_table == table && *candidate == row)); + state.staged_deletes.push((table, row)); + } else { + self.committed[table].retain(|candidate| *candidate != row); + } + } + + fn delete_rows(&mut self, conn: SessionId, table: usize, rows: &[SimRow]) { + for row in rows { + self.delete(conn, table, row.clone()); + } + } + + fn add_column(&mut self, table: usize, default: AlgebraicValue) { + for row in &mut self.committed[table] { + row.values.push(default.clone()); + } + for connection in &mut self.connections { + for (pending_table, row) in connection + .staged_inserts + .iter_mut() + .chain(connection.staged_deletes.iter_mut()) + { + if *pending_table == table { + row.values.push(default.clone()); + } + } + if let Some(snapshot) = &mut connection.read_snapshot { + for row in &mut snapshot[table] { + row.values.push(default.clone()); + } + } + } + } +} + +fn bound_contains_lower(bound: &Bound, key: &AlgebraicValue) -> bool { + match bound { + Bound::Included(value) => key >= value, + Bound::Excluded(value) => key > value, + Bound::Unbounded => true, + } +} + +fn bound_contains_upper(bound: &Bound, key: &AlgebraicValue) -> bool { + match bound { + Bound::Included(value) => key <= value, + Bound::Excluded(value) => key < value, + Bound::Unbounded => true, + } +} + +#[cfg(test)] +mod tests { + use spacetimedb_sats::AlgebraicValue; + + use crate::{client::SessionId, schema::SimRow}; + + use super::{PredictedOutcome, TableErrorKind, TableOperation, TableOracle}; + + fn row(id: u64) -> SimRow { + SimRow { + values: vec![AlgebraicValue::U64(id)], + } + } + + #[test] + fn write_conflict_prediction_does_not_request_blocking_visibility_check() { + let owner = SessionId::from_index(0); + let contender = SessionId::from_index(1); + let mut oracle = TableOracle::new(1, 2); + oracle.apply(&TableOperation::BeginTx { conn: owner }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn: contender, + table: 0, + rows: vec![row(1)], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::Error { + kind: TableErrorKind::WriteConflict, + subject: None, + } + ); + } + + #[test] + fn exact_duplicate_insert_is_predicted_as_no_mutation() { + let conn = SessionId::from_index(0); + let mut oracle = TableOracle::new(1, 1); + oracle.apply(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![row(1)], + }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![row(1)], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::NoMutation { + subject: Some((conn, 0)), + } + ); + } + + #[test] + fn same_id_different_row_is_predicted_as_unique_constraint_violation() { + let conn = SessionId::from_index(0); + let mut oracle = TableOracle::new(1, 1); + oracle.apply(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![SimRow { + values: vec![AlgebraicValue::U64(1), AlgebraicValue::U64(10)], + }], + }); + + let prediction = oracle + .predict(&TableOperation::InsertRows { + conn, + table: 0, + rows: vec![SimRow { + values: vec![AlgebraicValue::U64(1), AlgebraicValue::U64(11)], + }], + }) + .unwrap(); + + assert_eq!( + prediction, + PredictedOutcome::Error { + kind: TableErrorKind::UniqueConstraintViolation, + subject: Some((conn, 0)), + } + ); + } +} diff --git a/crates/dst/src/workload/table_ops/scenarios/banking.rs b/crates/dst/src/workload/table_ops/scenarios/banking.rs new file mode 100644 index 00000000000..534f8ca504c --- /dev/null +++ b/crates/dst/src/workload/table_ops/scenarios/banking.rs @@ -0,0 +1,108 @@ +use spacetimedb_sats::AlgebraicType; + +use crate::{ + client::SessionId, + schema::{ColumnPlan, SchemaPlan, TablePlan}, +}; + +use super::super::{generation::ScenarioPlanner, TableWorkloadInteraction, TableWorkloadOutcome}; + +pub fn generate_schema() -> SchemaPlan { + SchemaPlan { + tables: vec![ + TablePlan { + name: "debit_accounts".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + ty: AlgebraicType::U64, + }, + ColumnPlan { + name: "balance".into(), + ty: AlgebraicType::U64, + }, + ], + extra_indexes: vec![vec![1]], + }, + TablePlan { + name: "credit_accounts".into(), + columns: vec![ + ColumnPlan { + name: "id".into(), + ty: AlgebraicType::U64, + }, + ColumnPlan { + name: "balance".into(), + ty: AlgebraicType::U64, + }, + ], + extra_indexes: vec![vec![1]], + }, + ], + } +} + +pub fn validate_outcome(schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + let debit_idx = schema + .tables + .iter() + .position(|table| table.name == "debit_accounts") + .ok_or_else(|| anyhow::anyhow!("missing debit_accounts table"))?; + let credit_idx = schema + .tables + .iter() + .position(|table| table.name == "credit_accounts") + .ok_or_else(|| anyhow::anyhow!("missing credit_accounts table"))?; + + let debit_rows = outcome + .final_rows + .get(debit_idx) + .ok_or_else(|| anyhow::anyhow!("missing debit_accounts rows"))?; + let credit_rows = outcome + .final_rows + .get(credit_idx) + .ok_or_else(|| anyhow::anyhow!("missing credit_accounts rows"))?; + + if debit_rows != credit_rows { + anyhow::bail!("banking tables diverged: debit={debit_rows:?} credit={credit_rows:?}"); + } + Ok(()) +} + +pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + if planner.maybe_control_tx(conn, 25, 20, 10) { + return; + } + + let debit_rows = planner.visible_rows(conn, 0); + let choose_insert = debit_rows.is_empty() || planner.roll_percent(65); + let wrap_pair_in_tx = planner.active_writer().is_none(); + if wrap_pair_in_tx { + planner.begin_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); + } + if choose_insert { + let row = planner.make_row(0); + let mirror = row.clone(); + planner.insert(conn, 0, row.clone()); + planner.insert(conn, 1, mirror.clone()); + planner.push_interaction(TableWorkloadInteraction::insert(conn, 0, row.clone())); + planner.push_interaction(TableWorkloadInteraction::insert(conn, 1, mirror.clone())); + if wrap_pair_in_tx { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } + return; + } + + let row = debit_rows[planner.choose_index(debit_rows.len())].clone(); + let mirror = row.clone(); + planner.delete(conn, 0, row.clone()); + planner.delete(conn, 1, mirror.clone()); + planner.push_interaction(TableWorkloadInteraction::delete(conn, 0, row.clone())); + planner.push_interaction(TableWorkloadInteraction::delete(conn, 1, mirror.clone())); + if wrap_pair_in_tx { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } +} diff --git a/crates/dst/src/workload/table_ops/scenarios/mod.rs b/crates/dst/src/workload/table_ops/scenarios/mod.rs new file mode 100644 index 00000000000..ac024a87655 --- /dev/null +++ b/crates/dst/src/workload/table_ops/scenarios/mod.rs @@ -0,0 +1,91 @@ +mod banking; +mod random_crud; + +use crate::{client::SessionId, schema::SchemaPlan, seed::DstRng}; + +use super::{generation::ScenarioPlanner, TableScenario, TableWorkloadOutcome}; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct RandomCrudScenario; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct IndexedRangesScenario; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct BankingScenario; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum TableScenarioId { + #[default] + RandomCrud, + IndexedRanges, + Banking, +} + +impl TableScenario for RandomCrudScenario { + fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { + random_crud::generate_schema(rng) + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + random_crud::validate_outcome(schema, outcome) + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + random_crud::fill_pending(planner, conn); + } +} + +impl TableScenario for BankingScenario { + fn generate_schema(&self, _rng: &mut DstRng) -> SchemaPlan { + banking::generate_schema() + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + banking::validate_outcome(schema, outcome) + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + banking::fill_pending(planner, conn); + } +} + +impl TableScenario for IndexedRangesScenario { + fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { + random_crud::generate_indexed_ranges_schema(rng) + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + random_crud::validate_outcome(schema, outcome) + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + random_crud::fill_pending_indexed_ranges(planner, conn); + } +} + +impl TableScenario for TableScenarioId { + fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan { + match self { + Self::RandomCrud => RandomCrudScenario.generate_schema(rng), + Self::IndexedRanges => IndexedRangesScenario.generate_schema(rng), + Self::Banking => BankingScenario.generate_schema(rng), + } + } + + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + match self { + Self::RandomCrud => RandomCrudScenario.validate_outcome(schema, outcome), + Self::IndexedRanges => IndexedRangesScenario.validate_outcome(schema, outcome), + Self::Banking => BankingScenario.validate_outcome(schema, outcome), + } + } + + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + match self { + Self::RandomCrud => RandomCrudScenario.fill_pending(planner, conn), + Self::IndexedRanges => IndexedRangesScenario.fill_pending(planner, conn), + Self::Banking => BankingScenario.fill_pending(planner, conn), + } + } +} diff --git a/crates/dst/src/workload/table_ops/scenarios/random_crud.rs b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs new file mode 100644 index 00000000000..49c96f150a9 --- /dev/null +++ b/crates/dst/src/workload/table_ops/scenarios/random_crud.rs @@ -0,0 +1,489 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicType; + +use crate::{ + client::SessionId, + schema::{default_value_for_type, generate_supported_type, ColumnPlan, SchemaPlan, SimRow, TablePlan}, + seed::DstRng, + workload::strategy::{Index, Percent, Strategy}, +}; + +use super::super::{generation::ScenarioPlanner, TableInteractionCase, TableWorkloadInteraction, TableWorkloadOutcome}; + +#[derive(Clone, Copy)] +struct TableWorkloadProfile { + min_tables: usize, + table_count_choices: usize, + min_extra_cols: usize, + extra_col_choices: usize, + preferred_range_cols: usize, + prefer_range_compatible_pct: usize, + prefer_u64_pct: usize, + single_index_pct: usize, + composite2_index_pct: usize, + composite3_index_pct: usize, + insert_pct: usize, + begin_tx_pct: usize, + commit_tx_pct: usize, + rollback_tx_pct: usize, + begin_read_tx_pct: usize, + release_read_tx_pct: usize, + empty_tx_pct: usize, + exact_duplicate_insert_pct: usize, + unique_key_conflict_insert_pct: usize, + add_column_pct: usize, + add_index_pct: usize, +} + +const RANDOM_CRUD_PROFILE: TableWorkloadProfile = TableWorkloadProfile { + min_tables: 2, + table_count_choices: 3, + min_extra_cols: 1, + extra_col_choices: 4, + preferred_range_cols: 2, + prefer_range_compatible_pct: 65, + prefer_u64_pct: 75, + single_index_pct: 70, + composite2_index_pct: 65, + composite3_index_pct: 30, + insert_pct: 65, + begin_tx_pct: 20, + commit_tx_pct: 15, + rollback_tx_pct: 10, + begin_read_tx_pct: 4, + release_read_tx_pct: 35, + empty_tx_pct: 2, + exact_duplicate_insert_pct: 4, + unique_key_conflict_insert_pct: 4, + add_column_pct: 1, + add_index_pct: 2, +}; + +const INDEXED_RANGES_PROFILE: TableWorkloadProfile = TableWorkloadProfile { + min_tables: 2, + table_count_choices: 2, + min_extra_cols: 3, + extra_col_choices: 3, + preferred_range_cols: 3, + prefer_range_compatible_pct: 90, + prefer_u64_pct: 90, + single_index_pct: 100, + composite2_index_pct: 100, + composite3_index_pct: 75, + insert_pct: 55, + begin_tx_pct: 20, + commit_tx_pct: 15, + rollback_tx_pct: 8, + begin_read_tx_pct: 6, + release_read_tx_pct: 30, + empty_tx_pct: 2, + exact_duplicate_insert_pct: 3, + unique_key_conflict_insert_pct: 4, + add_column_pct: 2, + add_index_pct: 4, +}; + +pub fn generate_schema(rng: &mut DstRng) -> SchemaPlan { + generate_schema_with_profile(rng, RANDOM_CRUD_PROFILE) +} + +pub fn generate_indexed_ranges_schema(rng: &mut DstRng) -> SchemaPlan { + generate_schema_with_profile(rng, INDEXED_RANGES_PROFILE) +} + +fn generate_schema_with_profile(rng: &mut DstRng, profile: TableWorkloadProfile) -> SchemaPlan { + let table_count = profile.min_tables + Index::new(profile.table_count_choices).sample(rng); + let mut tables = Vec::with_capacity(table_count); + + for table_idx in 0..table_count { + let extra_cols = profile.min_extra_cols + Index::new(profile.extra_col_choices).sample(rng); + let mut columns = vec![ColumnPlan { + name: "id".into(), + ty: AlgebraicType::U64, + }]; + for col_idx in 0..extra_cols { + let ty = if col_idx < profile.preferred_range_cols + && Percent::new(profile.prefer_range_compatible_pct).sample(rng) + { + if Percent::new(profile.prefer_u64_pct).sample(rng) { + AlgebraicType::U64 + } else { + AlgebraicType::Bool + } + } else { + generate_supported_type(rng) + }; + columns.push(ColumnPlan { + name: format!("c{table_idx}_{col_idx}"), + ty, + }); + } + let mut extra_indexes = Vec::new(); + let non_primary_range_cols = columns + .iter() + .enumerate() + .skip(1) + .filter(|(_, col)| is_range_compatible(&col.ty)) + .map(|(idx, _)| idx as u16) + .collect::>(); + if let Some(&col) = non_primary_range_cols.first() + && Percent::new(profile.single_index_pct).sample(rng) + { + extra_indexes.push(vec![col]); + } + if non_primary_range_cols.len() >= 2 && Percent::new(profile.composite2_index_pct).sample(rng) { + extra_indexes.push(non_primary_range_cols[..2].to_vec()); + } + if non_primary_range_cols.len() >= 3 && Percent::new(profile.composite3_index_pct).sample(rng) { + extra_indexes.push(non_primary_range_cols[..3].to_vec()); + } + extra_indexes.sort(); + extra_indexes.dedup(); + tables.push(TablePlan { + name: format!("dst_table_{table_idx}_{}", rng.next_u64() % 10_000), + columns, + extra_indexes, + }); + } + + SchemaPlan { tables } +} + +pub fn validate_outcome(_schema: &SchemaPlan, _outcome: &TableWorkloadOutcome) -> anyhow::Result<()> { + Ok(()) +} + +pub fn fill_pending(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + fill_pending_with_profile(planner, conn, RANDOM_CRUD_PROFILE); +} + +pub fn fill_pending_indexed_ranges(planner: &mut ScenarioPlanner<'_>, conn: SessionId) { + fill_pending_with_profile(planner, conn, INDEXED_RANGES_PROFILE); +} + +fn fill_pending_with_profile(planner: &mut ScenarioPlanner<'_>, conn: SessionId, profile: TableWorkloadProfile) { + if planner.has_read_tx(conn) { + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + if planner.roll_percent(profile.release_read_tx_pct) { + planner.release_read_tx(conn); + planner.push_interaction(TableWorkloadInteraction::release_read_tx(conn)); + } else if !emit_query(planner, conn, table, &visible_rows) { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + } + return; + } + + if planner.active_writer().is_none() { + if planner.roll_percent(profile.empty_tx_pct) { + let rollback = planner.roll_percent(50); + planner.begin_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_tx(conn)); + if rollback { + planner.rollback_tx(conn); + planner.push_interaction(TableWorkloadInteraction::rollback_tx(conn)); + } else { + planner.commit_tx(conn); + planner.push_interaction(TableWorkloadInteraction::commit_tx(conn)); + } + return; + } + + if planner.roll_percent(profile.begin_read_tx_pct) { + planner.begin_read_tx(conn); + planner.push_interaction(TableWorkloadInteraction::begin_read_tx(conn)); + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + if !emit_query(planner, conn, table, &visible_rows) { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + } + return; + } + } + + if planner.maybe_control_tx( + conn, + profile.begin_tx_pct, + profile.commit_tx_pct, + profile.rollback_tx_pct, + ) { + return; + } + + let table = planner.choose_table(); + let visible_rows = planner.visible_rows(conn, table); + if planner.active_writer().is_none() + && !planner.any_read_tx() + && !visible_rows.is_empty() + && planner.roll_percent(profile.add_column_pct) + && emit_add_column(planner, conn, table) + { + return; + } + if planner.active_writer().is_none() + && !planner.any_read_tx() + && visible_rows.len() >= 2 + && planner.roll_percent(profile.add_index_pct) + && emit_add_index(planner, conn, table, &visible_rows) + { + return; + } + if emit_query(planner, conn, table, &visible_rows) { + return; + } + if planner.roll_percent(5) { + let row = planner.absent_row(conn, table); + planner.push_interaction(TableWorkloadInteraction::delete_missing(conn, table, row)); + return; + } + let choose_insert = visible_rows.is_empty() || planner.roll_percent(profile.insert_pct); + if choose_insert { + if planner.roll_percent(10) { + let count = 2 + planner.choose_index(3); + let rows = (0..count).map(|_| planner.make_row(table)).collect::>(); + planner.batch_insert(conn, table, &rows); + planner.push_interaction(TableWorkloadInteraction::batch_insert(conn, table, rows)); + return; + } + let row = planner.make_row(table); + planner.insert(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::insert(conn, table, row)); + return; + } + + if planner.roll_percent(profile.exact_duplicate_insert_pct) { + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.push_interaction(TableWorkloadInteraction::exact_duplicate_insert(conn, table, row)); + return; + } + if planner.roll_percent(profile.unique_key_conflict_insert_pct) + && emit_unique_key_conflict_insert(planner, conn, table, &visible_rows) + { + return; + } + + if visible_rows.len() >= 2 && planner.roll_percent(10) { + let count = 2 + planner.choose_index(visible_rows.len().min(3) - 1); + let mut candidates = visible_rows.clone(); + let mut rows = Vec::with_capacity(count); + for _ in 0..count { + let idx = planner.choose_index(candidates.len()); + rows.push(candidates.remove(idx)); + } + planner.batch_delete(conn, table, &rows); + planner.push_interaction(TableWorkloadInteraction::batch_delete(conn, table, rows)); + return; + } + if planner.roll_percent(6) { + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.delete(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::delete_with_case( + conn, + table, + row.clone(), + TableInteractionCase::Reinsert, + )); + planner.insert(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::insert(conn, table, row)); + return; + } + + let row = visible_rows[planner.choose_index(visible_rows.len())].clone(); + planner.delete(conn, table, row.clone()); + planner.push_interaction(TableWorkloadInteraction::delete(conn, table, row)); +} + +fn emit_add_column(planner: &mut ScenarioPlanner<'_>, conn: SessionId, table: usize) -> bool { + const MAX_COLUMNS_PER_TABLE: usize = 12; + let column_idx = planner.table_plan(table).columns.len(); + if column_idx >= MAX_COLUMNS_PER_TABLE { + return false; + } + let ty = match planner.choose_index(4) { + 0 => AlgebraicType::Bool, + 1 => AlgebraicType::U64, + 2 => AlgebraicType::String, + _ => generate_supported_type_for_churn(planner), + }; + let column = ColumnPlan { + name: format!("dst_added_{table}_{column_idx}"), + ty, + }; + let default = default_value_for_type(&column.ty); + planner.add_column(table, column.clone(), default.clone()); + planner.push_interaction(TableWorkloadInteraction::add_column(conn, table, column, default)); + true +} + +fn emit_add_index(planner: &mut ScenarioPlanner<'_>, conn: SessionId, table: usize, visible_rows: &[SimRow]) -> bool { + let candidates = candidate_new_indexes(planner, table); + if candidates.is_empty() { + return false; + } + let cols = candidates[planner.choose_index(candidates.len())].clone(); + planner.add_index(table, cols.clone()); + planner.push_interaction(TableWorkloadInteraction::add_index(conn, table, cols.clone())); + if let Some((lower, upper)) = inclusive_bounds_for_rows(visible_rows, &cols) { + planner.push_interaction(TableWorkloadInteraction::range_scan( + conn, + table, + cols, + Bound::Included(lower), + Bound::Included(upper), + )); + } + true +} + +fn emit_unique_key_conflict_insert( + planner: &mut ScenarioPlanner<'_>, + conn: SessionId, + table: usize, + visible_rows: &[SimRow], +) -> bool { + let source = visible_rows[planner.choose_index(visible_rows.len())].clone(); + let Some(row) = planner.unique_key_conflict_row(table, &source) else { + return false; + }; + planner.push_interaction(TableWorkloadInteraction::unique_key_conflict_insert(conn, table, row)); + true +} + +fn generate_supported_type_for_churn(planner: &mut ScenarioPlanner<'_>) -> AlgebraicType { + match planner.choose_index(6) { + 0 => AlgebraicType::I64, + 1 => AlgebraicType::U32, + 2 => AlgebraicType::I32, + 3 => AlgebraicType::U8, + 4 => AlgebraicType::I128, + _ => AlgebraicType::U128, + } +} + +fn candidate_new_indexes(planner: &ScenarioPlanner<'_>, table: usize) -> Vec> { + let table_plan = planner.table_plan(table); + let cols = table_plan + .columns + .iter() + .enumerate() + .skip(1) + .filter(|(_, column)| is_range_compatible(&column.ty)) + .map(|(idx, _)| idx as u16) + .collect::>(); + let mut candidates = Vec::new(); + for width in 1..=cols.len().min(3) { + let candidate = cols[..width].to_vec(); + if !table_plan.extra_indexes.contains(&candidate) { + candidates.push(candidate); + } + } + candidates +} + +fn inclusive_bounds_for_rows( + rows: &[SimRow], + cols: &[u16], +) -> Option<(spacetimedb_sats::AlgebraicValue, spacetimedb_sats::AlgebraicValue)> { + let mut sorted = rows.to_vec(); + sorted.sort_by(|lhs, rhs| { + lhs.project_key(cols) + .to_algebraic_value() + .cmp(&rhs.project_key(cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + let lower = sorted.first()?.project_key(cols).to_algebraic_value(); + let upper = sorted.last()?.project_key(cols).to_algebraic_value(); + Some((lower, upper)) +} + +fn emit_query( + planner: &mut ScenarioPlanner<'_>, + conn: SessionId, + table: usize, + visible_rows: &[crate::schema::SimRow], +) -> bool { + if !planner.roll_percent(25) { + return false; + } + if visible_rows.is_empty() { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + return true; + } + + match planner.choose_index(4) { + 0 => { + let row = &visible_rows[planner.choose_index(visible_rows.len())]; + if let Some(id) = row.id() { + planner.push_interaction(TableWorkloadInteraction::point_lookup(conn, table, id)); + true + } else { + false + } + } + 1 => { + let col = choose_predicate_col(planner, table); + let row = &visible_rows[planner.choose_index(visible_rows.len())]; + if let Some(value) = row.values.get(col as usize).cloned() { + planner.push_interaction(TableWorkloadInteraction::predicate_count(conn, table, col, value)); + true + } else { + false + } + } + 2 => { + let extra_indexes = planner.table_plan(table).extra_indexes.clone(); + let Some(cols) = extra_indexes + .into_iter() + .find(|cols| range_cols_supported(planner, table, cols)) + else { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + return true; + }; + let mut rows = visible_rows.to_vec(); + rows.sort_by(|lhs, rhs| { + lhs.project_key(&cols) + .to_algebraic_value() + .cmp(&rhs.project_key(&cols).to_algebraic_value()) + .then_with(|| lhs.values.cmp(&rhs.values)) + }); + let lower = rows[0].project_key(&cols).to_algebraic_value(); + let upper = rows[rows.len() - 1].project_key(&cols).to_algebraic_value(); + planner.push_interaction(TableWorkloadInteraction::range_scan( + conn, + table, + cols, + Bound::Included(lower), + Bound::Included(upper), + )); + true + } + _ => { + planner.push_interaction(TableWorkloadInteraction::full_scan(conn, table)); + true + } + } +} + +fn choose_predicate_col(planner: &mut ScenarioPlanner<'_>, table: usize) -> u16 { + let column_count = planner.table_plan(table).columns.len(); + if column_count <= 1 { + 0 + } else { + 1 + planner.choose_index(column_count - 1) as u16 + } +} + +fn range_cols_supported(planner: &ScenarioPlanner<'_>, table: usize, cols: &[u16]) -> bool { + cols.iter().all(|col| { + planner + .table_plan(table) + .columns + .get(*col as usize) + .is_some_and(|column| is_range_compatible(&column.ty)) + }) +} + +fn is_range_compatible(ty: &AlgebraicType) -> bool { + matches!(ty, AlgebraicType::U64 | AlgebraicType::Bool) +} diff --git a/crates/dst/src/workload/table_ops/strategies.rs b/crates/dst/src/workload/table_ops/strategies.rs new file mode 100644 index 00000000000..13d04d2054c --- /dev/null +++ b/crates/dst/src/workload/table_ops/strategies.rs @@ -0,0 +1,66 @@ +//! Typed strategies specific to table-style workload generation. + +use crate::{ + client::SessionId, + seed::DstRng, + workload::strategy::{Index, Strategy, Weighted}, +}; + +/// Choose one logical session uniformly from the current fixed-size session pool. +#[derive(Clone, Copy, Debug)] +pub(crate) struct ConnectionChoice { + pub(crate) connection_count: usize, +} + +impl Strategy for ConnectionChoice { + fn sample(&self, rng: &mut DstRng) -> SessionId { + SessionId::from_index(Index::new(self.connection_count).sample(rng)) + } +} + +/// Choose one table uniformly. +#[derive(Clone, Copy, Debug)] +pub(crate) struct TableChoice { + pub(crate) table_count: usize, +} + +impl Strategy for TableChoice { + fn sample(&self, rng: &mut DstRng) -> usize { + Index::new(self.table_count).sample(rng) + } +} + +/// Weighted transaction control action. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum TxControlAction { + Begin, + Commit, + Rollback, + None, +} + +/// Strategy for begin/commit/rollback control flow. +#[derive(Clone, Copy, Debug)] +pub(crate) struct TxControlChoice { + pub(crate) begin_pct: usize, + pub(crate) commit_pct: usize, + pub(crate) rollback_pct: usize, +} + +impl Strategy for TxControlChoice { + fn sample(&self, rng: &mut DstRng) -> TxControlAction { + let begin = self.begin_pct.min(100); + let commit = self.commit_pct.min(100); + let rollback = self.rollback_pct.min(100); + let reserved = begin.saturating_add(commit).saturating_add(rollback).min(100); + let none = 100usize.saturating_sub(reserved); + + Weighted::new(vec![ + (begin, TxControlAction::Begin), + (commit, TxControlAction::Commit), + (rollback, TxControlAction::Rollback), + (none, TxControlAction::None), + ]) + .sample(rng) + } +} diff --git a/crates/dst/src/workload/table_ops/types.rs b/crates/dst/src/workload/table_ops/types.rs new file mode 100644 index 00000000000..96947a509bc --- /dev/null +++ b/crates/dst/src/workload/table_ops/types.rs @@ -0,0 +1,311 @@ +use std::ops::Bound; + +use spacetimedb_sats::AlgebraicValue; + +use crate::{ + client::SessionId, + schema::{ColumnPlan, SchemaPlan, SimRow}, + seed::DstRng, +}; + +use super::generation::ScenarioPlanner; + +/// Scenario hook for shared table-oriented workloads. +/// +/// A scenario supplies the initial schema, scenario-specific commit-time +/// properties, and any final invariant over the collected outcome. +pub(crate) trait TableScenario: Clone { + fn generate_schema(&self, rng: &mut DstRng) -> SchemaPlan; + fn validate_outcome(&self, schema: &SchemaPlan, outcome: &TableWorkloadOutcome) -> anyhow::Result<()>; + fn fill_pending(&self, planner: &mut ScenarioPlanner<'_>, conn: SessionId); +} + +/// One generated workload step. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct PlannedInteraction { + pub op: TableOperation, + /// Generator-side coverage/debug label. + /// + /// Correctness must not depend on this field. Properties predict expected + /// behavior from the model and `op`; this label only preserves intent in + /// summaries and failure reports. + pub case: TableInteractionCase, +} + +pub type TableWorkloadInteraction = PlannedInteraction; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum TableOperation { + /// Start an explicit write transaction on a connection. + BeginTx { conn: SessionId }, + /// Commit the connection's explicit write transaction. + CommitTx { conn: SessionId }, + /// Roll back the connection's explicit write transaction. + RollbackTx { conn: SessionId }, + /// Hold a read snapshot open while later reads observe stable state. + BeginReadTx { conn: SessionId }, + /// Release a previously opened read snapshot. + ReleaseReadTx { conn: SessionId }, + /// Insert one or more rows. + InsertRows { + conn: SessionId, + table: usize, + rows: Vec, + }, + /// Delete one or more rows. + DeleteRows { + conn: SessionId, + table: usize, + rows: Vec, + }, + /// Add a column to an existing table with a default for live rows. + AddColumn { + conn: SessionId, + table: usize, + column: ColumnPlan, + default: AlgebraicValue, + }, + /// Add a non-primary index after data exists. + AddIndex { + conn: SessionId, + table: usize, + cols: Vec, + }, + /// Query a row by primary id and compare against the model. + PointLookup { conn: SessionId, table: usize, id: u64 }, + /// Count rows by equality on one column and compare against the model. + PredicateCount { + conn: SessionId, + table: usize, + col: u16, + value: AlgebraicValue, + }, + /// Scan an indexed range and compare against model filtering. + RangeScan { + conn: SessionId, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + }, + /// Scan all visible rows and compare against the model. + FullScan { conn: SessionId, table: usize }, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum TableErrorKind { + UniqueConstraintViolation, + MissingRow, + WriteConflict, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum TableInteractionCase { + BeginTx, + CommitTx, + RollbackTx, + BeginReadTx, + ReleaseReadTx, + BeginTxConflict, + WriteConflictInsert, + Insert, + Delete, + ExactDuplicateInsert, + UniqueKeyConflictInsert, + DeleteMissing, + BatchInsert, + BatchDelete, + Reinsert, + AddColumn, + AddIndex, + PointLookup, + PredicateCount, + RangeScan, + FullScan, +} + +impl PlannedInteraction { + pub fn new(op: TableOperation, case: TableInteractionCase) -> Self { + Self { op, case } + } + + pub fn begin_tx(conn: SessionId) -> Self { + Self::new(TableOperation::BeginTx { conn }, TableInteractionCase::BeginTx) + } + + pub fn commit_tx(conn: SessionId) -> Self { + Self::new(TableOperation::CommitTx { conn }, TableInteractionCase::CommitTx) + } + + pub fn rollback_tx(conn: SessionId) -> Self { + Self::new(TableOperation::RollbackTx { conn }, TableInteractionCase::RollbackTx) + } + + pub fn begin_read_tx(conn: SessionId) -> Self { + Self::new(TableOperation::BeginReadTx { conn }, TableInteractionCase::BeginReadTx) + } + + pub fn release_read_tx(conn: SessionId) -> Self { + Self::new( + TableOperation::ReleaseReadTx { conn }, + TableInteractionCase::ReleaseReadTx, + ) + } + + pub fn begin_tx_conflict(conn: SessionId) -> Self { + Self::new(TableOperation::BeginTx { conn }, TableInteractionCase::BeginTxConflict) + } + + pub fn write_conflict_insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_rows(conn, table, vec![row], TableInteractionCase::WriteConflictInsert) + } + + pub fn insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_with_case(conn, table, row, TableInteractionCase::Insert) + } + + pub fn insert_with_case(conn: SessionId, table: usize, row: SimRow, case: TableInteractionCase) -> Self { + Self::insert_rows(conn, table, vec![row], case) + } + + pub fn delete(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::delete_with_case(conn, table, row, TableInteractionCase::Delete) + } + + pub fn delete_with_case(conn: SessionId, table: usize, row: SimRow, case: TableInteractionCase) -> Self { + Self::delete_rows(conn, table, vec![row], case) + } + + pub fn exact_duplicate_insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_with_case(conn, table, row, TableInteractionCase::ExactDuplicateInsert) + } + + pub fn unique_key_conflict_insert(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::insert_with_case(conn, table, row, TableInteractionCase::UniqueKeyConflictInsert) + } + + pub fn delete_missing(conn: SessionId, table: usize, row: SimRow) -> Self { + Self::delete_with_case(conn, table, row, TableInteractionCase::DeleteMissing) + } + + pub fn batch_insert(conn: SessionId, table: usize, rows: Vec) -> Self { + Self::insert_rows(conn, table, rows, TableInteractionCase::BatchInsert) + } + + pub fn batch_delete(conn: SessionId, table: usize, rows: Vec) -> Self { + Self::delete_rows(conn, table, rows, TableInteractionCase::BatchDelete) + } + + fn insert_rows(conn: SessionId, table: usize, rows: Vec, case: TableInteractionCase) -> Self { + Self::new(TableOperation::InsertRows { conn, table, rows }, case) + } + + fn delete_rows(conn: SessionId, table: usize, rows: Vec, case: TableInteractionCase) -> Self { + Self::new(TableOperation::DeleteRows { conn, table, rows }, case) + } + + pub fn add_column(conn: SessionId, table: usize, column: ColumnPlan, default: AlgebraicValue) -> Self { + Self::new( + TableOperation::AddColumn { + conn, + table, + column, + default, + }, + TableInteractionCase::AddColumn, + ) + } + + pub fn add_index(conn: SessionId, table: usize, cols: Vec) -> Self { + Self::new( + TableOperation::AddIndex { conn, table, cols }, + TableInteractionCase::AddIndex, + ) + } + + pub fn point_lookup(conn: SessionId, table: usize, id: u64) -> Self { + Self::new( + TableOperation::PointLookup { conn, table, id }, + TableInteractionCase::PointLookup, + ) + } + + pub fn predicate_count(conn: SessionId, table: usize, col: u16, value: AlgebraicValue) -> Self { + Self::new( + TableOperation::PredicateCount { + conn, + table, + col, + value, + }, + TableInteractionCase::PredicateCount, + ) + } + + pub fn range_scan( + conn: SessionId, + table: usize, + cols: Vec, + lower: Bound, + upper: Bound, + ) -> Self { + Self::new( + TableOperation::RangeScan { + conn, + table, + cols, + lower, + upper, + }, + TableInteractionCase::RangeScan, + ) + } + + pub fn full_scan(conn: SessionId, table: usize) -> Self { + Self::new(TableOperation::FullScan { conn, table }, TableInteractionCase::FullScan) + } +} + +/// Final state gathered from a table-workload engine after execution ends. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct TableWorkloadOutcome { + /// Row count for each table in schema order. + pub final_row_counts: Vec, + /// Full committed rows for each table in schema order. + pub final_rows: Vec>, +} + +/// Per-session write transaction bookkeeping shared by locking targets. +pub(crate) struct ConnectionWriteState { + /// Open mutable transaction handle for each simulated session. + pub tx_by_connection: Vec>, + /// Session that currently owns the single-writer lock, if any. + pub active_writer: Option, +} + +impl ConnectionWriteState { + pub fn new(connection_count: usize) -> Self { + Self { + tx_by_connection: (0..connection_count).map(|_| None).collect(), + active_writer: None, + } + } + + pub fn ensure_known_connection(&self, conn: SessionId) -> Result<(), String> { + self.tx_by_connection + .get(conn.as_index()) + .map(|_| ()) + .ok_or_else(|| format!("connection {conn} out of range")) + } + + pub fn ensure_writer_owner(&self, conn: SessionId, action: &str) -> Result<(), String> { + self.ensure_known_connection(conn)?; + match self.active_writer { + Some(owner) if owner == conn => Ok(()), + Some(owner) => Err(format!( + "connection {conn} cannot {action} while connection {owner} owns lock" + )), + None => Err(format!("connection {conn} has no transaction to {action}")), + } + } +} diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs index e3eca56e5d9..3447e4fbf9a 100644 --- a/crates/durability/src/imp/local.rs +++ b/crates/durability/src/imp/local.rs @@ -199,8 +199,8 @@ where impl Local where - T: Send + Sync + 'static, - R: Repo + Send + Sync + 'static, + T: Encode + Send + Sync + 'static, + R: RepoWithoutLockFile + Send + Sync + 'static, { /// Inspect how many transactions added via [`Self::append_tx`] are pending /// to be applied to the underlying [`Commitlog`]. diff --git a/crates/io/LICENSE b/crates/io/LICENSE new file mode 120000 index 00000000000..8540cf8a991 --- /dev/null +++ b/crates/io/LICENSE @@ -0,0 +1 @@ +../../licenses/BSL.txt \ No newline at end of file diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 6af30dc0f26..55ae62f074b 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -48,6 +48,7 @@ use std::fs::{self, File}; use std::io; use std::ops::{Range, RangeBounds}; use std::path::Path; +use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use std::time::{Duration, Instant}; use std::{ collections::BTreeMap, @@ -1369,20 +1370,23 @@ impl SnapshotRepository { } } -/// Snapshot storage backend. -pub trait SnapshotRepo: Send + Sync { - type Pending: PendingSnapshot; - +/// Snapshot storage backend that can capture, read, list, and invalidate snapshots. +/// +/// Production uses the filesystem-backed [`SnapshotRepository`]. DST can use +/// [`MemorySnapshotRepository`] to keep snapshot storage inside the simulator +/// boundary instead of depending on temporary directories or host filesystem +/// behavior. +pub trait SnapshotStore: Send + Sync { /// Return the database identity associated with this snapshot backend. fn database_identity(&self) -> Identity; - /// Start creating a snapshot at `tx_offset` from the provided tables and blob store. - fn create_snapshot<'db>( + /// Capture and finalize a snapshot at `tx_offset`. + fn capture_snapshot<'db>( &self, tables: &mut dyn Iterator, blobs: &'db dyn BlobStore, tx_offset: TxOffset, - ) -> Result; + ) -> Result; /// Reconstruct the snapshot at `tx_offset` using the supplied page pool. fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result; @@ -1395,6 +1399,25 @@ pub trait SnapshotRepo: Send + Sync { self.latest_snapshot_older_than(TxOffset::MAX) } + /// Invalidate every snapshot newer than `upper_bound`. + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError>; + + /// Invalidate the snapshot at `tx_offset`. + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError>; +} + +/// Filesystem-style snapshot backend with a pending snapshot phase and optional compression. +pub trait SnapshotRepo: SnapshotStore { + type Pending: PendingSnapshot; + + /// Start creating a snapshot at `tx_offset` from the provided tables and blob store. + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result; + /// Attempt to compress all snapshots that fall into `range`, and record /// the outcome in `stats`. /// @@ -1403,30 +1426,21 @@ pub trait SnapshotRepo: Send + Sync { /// /// See [CompressionStats] for how to interpret the results. fn compress_snapshots(&self, stats: &mut CompressionStats, range: Range) -> Result<(), SnapshotError>; - - /// Invalidate every snapshot newer than `upper_bound`. - fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError>; - - /// Invalidate the snapshot at `tx_offset`. - fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError>; } -impl SnapshotRepo for SnapshotRepository { - type Pending = BoxedPendingSnapshot; - +impl SnapshotStore for SnapshotRepository { fn database_identity(&self) -> Identity { SnapshotRepository::database_identity(self) } - fn create_snapshot<'db>( + fn capture_snapshot<'db>( &self, tables: &mut dyn Iterator, blobs: &'db dyn BlobStore, tx_offset: TxOffset, - ) -> Result { - Ok(Box::new(SnapshotRepository::create_snapshot( - self, tables, blobs, tx_offset, - )?)) + ) -> Result { + self.create_snapshot(tables, blobs, tx_offset)?.sync_all()?; + Ok(tx_offset) } fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { @@ -1441,17 +1455,316 @@ impl SnapshotRepo for SnapshotRepository { SnapshotRepository::latest_snapshot(self) } + fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_newer_snapshots(self, upper_bound) + } + + fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + SnapshotRepository::invalidate_snapshot(self, tx_offset) + } +} + +impl SnapshotRepo for SnapshotRepository { + type Pending = BoxedPendingSnapshot; + + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + Ok(Box::new(SnapshotRepository::create_snapshot( + self, tables, blobs, tx_offset, + )?)) + } + fn compress_snapshots(&self, stats: &mut CompressionStats, range: Range) -> Result<(), SnapshotError> { SnapshotRepository::compress_snapshots(self, stats, range) } +} + +/// In-memory snapshot repository for deterministic tests. +/// +/// This stores snapshot object bytes in process memory and reconstructs through +/// the same [`ReconstructedSnapshot`] shape as the filesystem repository. It is +/// not durable and intentionally does not model the on-disk two-phase flush +/// protocol; it is a simulator/test backend for semantic snapshot capture and +/// restore. +pub struct MemorySnapshotRepository { + database_identity: Identity, + replica_id: u64, + snapshots: RwLock>, +} + +impl MemorySnapshotRepository { + pub fn new(database_identity: Identity, replica_id: u64) -> Self { + Self { + database_identity, + replica_id, + snapshots: RwLock::new(BTreeMap::new()), + } + } + + pub fn database_identity(&self) -> Identity { + self.database_identity + } + + pub fn capture_snapshot<'db>( + &self, + tables: impl Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.invalidate_newer_snapshots(tx_offset.saturating_sub(1))?; + let snapshot = MemorySnapshot::capture(self.database_identity, self.replica_id, tables, blobs, tx_offset)?; + self.write_snapshots()?.insert(tx_offset, snapshot); + Ok(tx_offset) + } + + pub fn read_snapshot( + &self, + tx_offset: TxOffset, + page_pool: &PagePool, + ) -> Result { + let snapshot = self + .read_snapshots()? + .get(&tx_offset) + .cloned() + .ok_or_else(|| memory_snapshot_not_found(tx_offset))?; + snapshot.reconstruct(page_pool) + } + + pub fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + Ok(self + .read_snapshots()? + .range(..=upper_bound) + .next_back() + .map(|(&tx_offset, _)| tx_offset)) + } + + pub fn latest_snapshot(&self) -> Result, SnapshotError> { + self.latest_snapshot_older_than(TxOffset::MAX) + } + + pub fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { + self.write_snapshots()?.retain(|tx_offset, _| *tx_offset <= upper_bound); + Ok(()) + } + + pub fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { + self.write_snapshots()?.remove(&tx_offset); + Ok(()) + } + + fn read_snapshots(&self) -> Result>, SnapshotError> { + self.snapshots.read().map_err(|_| memory_snapshot_lock_poisoned()) + } + + fn write_snapshots(&self) -> Result>, SnapshotError> { + self.snapshots.write().map_err(|_| memory_snapshot_lock_poisoned()) + } +} + +impl SnapshotStore for MemorySnapshotRepository { + fn database_identity(&self) -> Identity { + MemorySnapshotRepository::database_identity(self) + } + + fn capture_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + MemorySnapshotRepository::capture_snapshot(self, tables, blobs, tx_offset) + } + + fn read_snapshot(&self, tx_offset: TxOffset, page_pool: &PagePool) -> Result { + MemorySnapshotRepository::read_snapshot(self, tx_offset, page_pool) + } + + fn latest_snapshot_older_than(&self, upper_bound: TxOffset) -> Result, SnapshotError> { + MemorySnapshotRepository::latest_snapshot_older_than(self, upper_bound) + } + + fn latest_snapshot(&self) -> Result, SnapshotError> { + MemorySnapshotRepository::latest_snapshot(self) + } fn invalidate_newer_snapshots(&self, upper_bound: TxOffset) -> Result<(), SnapshotError> { - SnapshotRepository::invalidate_newer_snapshots(self, upper_bound) + MemorySnapshotRepository::invalidate_newer_snapshots(self, upper_bound) } fn invalidate_snapshot(&self, tx_offset: TxOffset) -> Result<(), SnapshotError> { - SnapshotRepository::invalidate_snapshot(self, tx_offset) + MemorySnapshotRepository::invalidate_snapshot(self, tx_offset) + } +} + +struct MemoryPendingSnapshot { + tx_offset: TxOffset, +} + +impl PendingSnapshot for MemoryPendingSnapshot { + fn sync_all(self: Box) -> Result { + Ok(self.tx_offset) + } +} + +impl SnapshotRepo for MemorySnapshotRepository { + type Pending = BoxedPendingSnapshot; + + fn create_snapshot<'db>( + &self, + tables: &mut dyn Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + self.capture_snapshot(tables, blobs, tx_offset)?; + Ok(Box::new(MemoryPendingSnapshot { tx_offset })) + } + + fn compress_snapshots(&self, _stats: &mut CompressionStats, _range: Range) -> Result<(), SnapshotError> { + Ok(()) + } +} + +#[derive(Clone)] +struct MemorySnapshot { + database_identity: Identity, + replica_id: u64, + tx_offset: TxOffset, + module_abi_version: [u16; 2], + blobs: Vec, + tables: BTreeMap>, +} + +impl MemorySnapshot { + fn capture<'db>( + database_identity: Identity, + replica_id: u64, + tables: impl Iterator, + blobs: &'db dyn BlobStore, + tx_offset: TxOffset, + ) -> Result { + let blobs = blobs + .iter_blobs() + .map(|(hash, uses, bytes)| MemoryBlob { + hash: *hash, + uses: uses as u32, + bytes: bytes.into(), + }) + .collect(); + + let tables = tables + .map(|table| { + let pages = table + .iter_pages_with_hashes() + .map(|(hash, page)| { + let bytes = bsatn::to_vec(page).map_err(|cause| SnapshotError::Serialize { + ty: ObjectType::Page(hash), + cause, + })?; + Ok(MemoryPage { hash, bytes }) + }) + .collect::, SnapshotError>>()?; + Ok((table.schema.table_id, pages)) + }) + .collect::, SnapshotError>>()?; + + Ok(Self { + database_identity, + replica_id, + tx_offset, + module_abi_version: CURRENT_MODULE_ABI_VERSION, + blobs, + tables, + }) } + + fn reconstruct(self, page_pool: &PagePool) -> Result { + let source_repo = memory_snapshot_path(self.tx_offset); + let mut blob_store = HashMapBlobStore::default(); + for MemoryBlob { hash, uses, bytes } in self.blobs { + let computed = BlobHash::hash_from_bytes(&bytes); + if hash != computed { + return Err(SnapshotError::HashMismatch { + ty: ObjectType::Blob(hash), + expected: hash.data, + computed: computed.data, + source_repo: source_repo.clone(), + }); + } + blob_store.insert_with_uses(&hash, uses as usize, bytes); + } + + let tables = + self.tables + .into_iter() + .map(|(table_id, pages)| { + let pages = pages + .into_iter() + .map(|MemoryPage { hash, bytes }| { + let page = page_pool.take_deserialize_from(&bytes).map_err(|cause| { + SnapshotError::Deserialize { + ty: ObjectType::Page(hash), + source_repo: source_repo.clone(), + cause, + } + })?; + let computed = page.content_hash(); + if hash != computed { + return Err(SnapshotError::HashMismatch { + ty: ObjectType::Page(hash), + expected: *hash.as_bytes(), + computed: *computed.as_bytes(), + source_repo: source_repo.clone(), + }); + } + Ok(page) + }) + .collect::, SnapshotError>>()?; + Ok((table_id, pages)) + }) + .collect::, SnapshotError>>()?; + + Ok(ReconstructedSnapshot { + database_identity: self.database_identity, + replica_id: self.replica_id, + tx_offset: self.tx_offset, + module_abi_version: self.module_abi_version, + blob_store, + tables, + compress_type: CompressType::None, + }) + } +} + +#[derive(Clone)] +struct MemoryBlob { + hash: BlobHash, + uses: u32, + bytes: Box<[u8]>, +} + +#[derive(Clone)] +struct MemoryPage { + hash: blake3::Hash, + bytes: Vec, +} + +fn memory_snapshot_lock_poisoned() -> SnapshotError { + SnapshotError::Io(io::Error::other("memory snapshot repository lock poisoned")) +} + +fn memory_snapshot_not_found(tx_offset: TxOffset) -> SnapshotError { + SnapshotError::Io(io::Error::new( + io::ErrorKind::NotFound, + format!("memory snapshot {tx_offset} not found"), + )) +} + +fn memory_snapshot_path(tx_offset: TxOffset) -> PathBuf { + PathBuf::from(format!("")) } pub struct ReconstructedSnapshot { diff --git a/crates/snapshot/tests/remote.rs b/crates/snapshot/tests/remote.rs index 1c6c51fe8e7..e7133191ffa 100644 --- a/crates/snapshot/tests/remote.rs +++ b/crates/snapshot/tests/remote.rs @@ -23,6 +23,7 @@ use spacetimedb_lib::{ }; use spacetimedb_paths::{server::SnapshotsPath, FromPathUnchecked}; use spacetimedb_primitives::TableId; +use spacetimedb_runtime::Handle; use spacetimedb_sats::{product, raw_identifier::RawIdentifier}; use spacetimedb_schema::{ def::ModuleDef, @@ -230,11 +231,13 @@ async fn create_snapshot(repo: Arc) -> anyhow::Result::default())), - snapshots: Some(SnapshotWorker::new(repo, snapshot::Compression::Disabled, rt.clone())), + snapshot_store: Some(snapshot_worker.snapshot_store()), + snapshots: Some(snapshot_worker), runtime: rt, }; let db = TestDB::open_db(EmptyHistory::new(), Some(persistence), None, 0)?; diff --git a/crates/standalone/src/subcommands/start.rs b/crates/standalone/src/subcommands/start.rs index 50f6db19257..bc8241938d2 100644 --- a/crates/standalone/src/subcommands/start.rs +++ b/crates/standalone/src/subcommands/start.rs @@ -1,12 +1,18 @@ +#[cfg(not(simulation))] use netstat2::{get_sockets_info, AddressFamilyFlags, ProtocolFlags, ProtocolSocketInfo, TcpState}; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::identity::IdentityRoutes; +#[cfg(not(simulation))] use spacetimedb_pg::pg_server; +#[cfg(not(simulation))] use std::io::{self, Write}; +#[cfg(not(simulation))] use std::net::IpAddr; use std::sync::Arc; use crate::{StandaloneEnv, StandaloneOptions}; use anyhow::Context; +#[cfg(not(simulation))] use axum::extract::DefaultBodyLimit; use clap::ArgAction::SetTrue; use clap::{Arg, ArgMatches}; @@ -15,11 +21,14 @@ use spacetimedb::db::{self, Storage}; use spacetimedb::startup::{self, TracingOptions}; use spacetimedb::util::jobs::JobCores; use spacetimedb::worker_metrics; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::database::DatabaseRoutes; +#[cfg(not(simulation))] use spacetimedb_client_api::routes::router; use spacetimedb_client_api::routes::subscribe::WebSocketOptions; use spacetimedb_paths::cli::{PrivKeyPath, PubKeyPath}; use spacetimedb_paths::server::{ConfigToml, ServerDataDir}; +#[cfg(not(simulation))] use tokio::net::TcpListener; pub fn cli() -> clap::Command { @@ -111,6 +120,7 @@ impl ConfigFile { pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { let listen_addr = args.get_one::("listen_addr").unwrap(); let pg_port = args.get_one::("pg_port"); + #[cfg(not(simulation))] let non_interactive = args.get_flag("non_interactive"); let cert_dir = args.get_one::("jwt_key_dir"); let certs = Option::zip( @@ -198,13 +208,26 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { ); worker_metrics::spawn_page_pool_stats(listen_addr.clone(), ctx.page_pool().clone()); worker_metrics::spawn_bsatn_rlb_pool_stats(listen_addr.clone(), ctx.bsatn_rlb_pool().clone()); + #[cfg(simulation)] + { + let _ = (pg_port, ctx, listen_addr); + anyhow::bail!("standalone start server mode is not supported under simulation"); + } + + #[cfg(not(simulation))] let mut db_routes = DatabaseRoutes::default(); - db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); - db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); - db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + #[cfg(not(simulation))] + { + db_routes.root_post = db_routes.root_post.layer(DefaultBodyLimit::disable()); + db_routes.db_put = db_routes.db_put.layer(DefaultBodyLimit::disable()); + db_routes.pre_publish = db_routes.pre_publish.layer(DefaultBodyLimit::disable()); + } + #[cfg(not(simulation))] let extra = axum::Router::new().nest("/health", spacetimedb_client_api::routes::health::router()); + #[cfg(not(simulation))] let service = router(&ctx, db_routes, IdentityRoutes::default(), extra).with_state(ctx.clone()); + #[cfg(not(simulation))] // Check if the requested port is available on both IPv4 and IPv6. // If not, offer to find an available port by incrementing (unless non-interactive). let listen_addr = if let Some((host, port_str)) = listen_addr.rsplit_once(':') { @@ -250,40 +273,44 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { listen_addr.to_string() }; - let tcp = TcpListener::bind(&listen_addr).await.context(format!( - "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" - ))?; - socket2::SockRef::from(&tcp).set_nodelay(true)?; - log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); - - if let Some(pg_port) = pg_port { - let server_addr = listen_addr.split(':').next().unwrap(); - let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( - "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + #[cfg(not(simulation))] + { + let tcp = TcpListener::bind(&listen_addr).await.context(format!( + "failed to bind the SpacetimeDB server to '{listen_addr}', please check that the address is valid and not already in use" ))?; - - let notify = Arc::new(tokio::sync::Notify::new()); - let shutdown_notify = notify.clone(); - tokio::select! { - _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, - _ = axum::serve(tcp, service).with_graceful_shutdown(async move { - shutdown_notify.notified().await; - }) => {}, - _ = tokio::signal::ctrl_c() => { - println!("Shutting down servers..."); - notify.notify_waiters(); // Notify all tasks + socket2::SockRef::from(&tcp).set_nodelay(true)?; + log::info!("Starting SpacetimeDB listening on {}", tcp.local_addr()?); + + if let Some(pg_port) = pg_port { + let server_addr = listen_addr.split(':').next().unwrap(); + let tcp_pg = TcpListener::bind(format!("{server_addr}:{pg_port}")).await.context(format!( + "failed to bind the SpacetimeDB PostgreSQL wire protocol server to {server_addr}:{pg_port}, please check that the port is valid and not already in use" + ))?; + + let notify = Arc::new(tokio::sync::Notify::new()); + let shutdown_notify = notify.clone(); + tokio::select! { + _ = pg_server::start_pg(notify.clone(), ctx, tcp_pg) => {}, + _ = axum::serve(tcp, service).with_graceful_shutdown(async move { + shutdown_notify.notified().await; + }) => {}, + _ = tokio::signal::ctrl_c() => { + println!("Shutting down servers..."); + notify.notify_waiters(); // Notify all tasks + } } + } else { + log::warn!("PostgreSQL wire protocol server disabled"); + axum::serve(tcp, service) + .with_graceful_shutdown(async { + tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); + log::info!("Shutting down server..."); + }) + .await?; } - } else { - log::warn!("PostgreSQL wire protocol server disabled"); - axum::serve(tcp, service) - .with_graceful_shutdown(async { - tokio::signal::ctrl_c().await.expect("failed to install Ctrl+C handler"); - log::info!("Shutting down server..."); - }) - .await?; } + #[cfg(not(simulation))] Ok(()) } @@ -302,6 +329,7 @@ pub async fn exec(args: &ArgMatches, db_cores: JobCores) -> anyhow::Result<()> { /// Note: There is a small race condition between this check and the actual bind - /// another process could grab the port in between. This is unlikely in practice /// and the actual bind will fail with a clear error if it happens. +#[cfg(not(simulation))] pub fn is_port_available(host: &str, port: u16) -> bool { let requested = match parse_host(host) { Some(r) => r, @@ -336,11 +364,13 @@ pub fn is_port_available(host: &str, port: u16) -> bool { } #[derive(Debug, Clone, Copy)] +#[cfg(not(simulation))] enum RequestedHost { Localhost, Ip(IpAddr), } +#[cfg(not(simulation))] fn parse_host(host: &str) -> Option { let host = host.trim(); @@ -354,6 +384,7 @@ fn parse_host(host: &str) -> Option { host.parse::().ok().map(RequestedHost::Ip) } +#[cfg(not(simulation))] fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { match requested { RequestedHost::Localhost => match listener_addr { @@ -424,6 +455,7 @@ fn conflicts(requested: RequestedHost, listener_addr: IpAddr) -> bool { /// Find an available port starting from the requested port. /// Returns the first port that is available on both IPv4 and IPv6. +#[cfg(not(simulation))] fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Option { for offset in 0..max_attempts { let port = requested_port.saturating_add(offset); @@ -438,6 +470,7 @@ fn find_available_port(host: &str, requested_port: u16, max_attempts: u16) -> Op } /// Prompt the user with a yes/no question. Returns true if they answer yes. +#[cfg(not(simulation))] fn prompt_yes_no(question: &str) -> bool { print!("{} [y/N] ", question); io::stdout().flush().ok(); diff --git a/tools/ci/README.md b/tools/ci/README.md index 9b71b406fef..23bcfc6c23b 100644 --- a/tools/ci/README.md +++ b/tools/ci/README.md @@ -239,6 +239,17 @@ Usage: docs - `--help`: Print help +### `io-boundary` + +**Usage:** +```bash +Usage: io-boundary +``` + +**Options:** + +- `--help`: Print help + ### `help` **Usage:** diff --git a/tools/ci/src/main.rs b/tools/ci/src/main.rs index 2454ea3349f..99a9a2b86c3 100644 --- a/tools/ci/src/main.rs +++ b/tools/ci/src/main.rs @@ -161,6 +161,8 @@ enum CiCmd { VersionUpgradeCheck, /// Builds the docs site. Docs, + /// Checks that runtime is not used as a Tokio-shaped IO facade. + IoBoundary, } fn run_all_clap_subcommands(skips: &[String]) -> Result<()> { @@ -189,6 +191,99 @@ fn tracked_rs_files_under(path: &str) -> Result> { .collect()) } +fn check_io_boundary() -> Result<()> { + ensure_repo_root()?; + + let mut violations = Vec::new(); + for root in ["crates/runtime", "crates/datastore", "crates/core", "crates/commitlog"] { + for path in tracked_rs_files_under(root)? { + check_file_for_runtime_io_facade(&path, &mut violations)?; + } + } + + if violations.is_empty() { + return Ok(()); + } + + for violation in &violations { + eprintln!("{violation}"); + } + bail!( + "spacetimedb_runtime must not be used as a Tokio-shaped io/fs/net facade; use Tokio directly in normal-only code and semantic seams for simulation code" + ); +} + +fn check_file_for_runtime_io_facade(path: &Path, violations: &mut Vec) -> Result<()> { + let contents = fs::read_to_string(path)?; + let mut in_runtime_use_tree = false; + + for (line_idx, line) in contents.lines().enumerate() { + let line_no = line_idx + 1; + let code = line.split("//").next().unwrap_or(line); + + for module in ["io", "fs", "net", "blocking_fs"] { + if code.contains(&format!("spacetimedb_runtime::{module}")) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade usage", + path.display() + )); + } + if path == Path::new("crates/runtime/src/lib.rs") && code.contains(&format!("pub mod {module}")) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade export", + path.display() + )); + } + } + + if in_runtime_use_tree { + for module in ["io", "fs", "net", "blocking_fs"] { + if use_tree_mentions_token(code, module) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade import", + path.display() + )); + } + } + if code.contains("};") { + in_runtime_use_tree = false; + } + continue; + } + + if code.contains("use spacetimedb_runtime::{") { + for module in ["io", "fs", "net", "blocking_fs"] { + if use_tree_mentions_token(code, module) { + violations.push(format!( + "{}:{line_no}: spacetimedb_runtime::{module} facade import", + path.display() + )); + } + } + if !code.contains("};") { + in_runtime_use_tree = true; + } + } + } + + Ok(()) +} + +fn use_tree_mentions_token(code: &str, forbidden: &str) -> bool { + let mut token = String::new(); + for ch in code.chars() { + if ch == '_' || ch.is_ascii_alphanumeric() { + token.push(ch); + continue; + } + if token == forbidden { + return true; + } + token.clear(); + } + token == forbidden +} + fn run_publish_checks() -> Result<()> { cmd!("bash", "-lc", "test -d venv || python3 -m venv venv").run()?; cmd!("venv/bin/pip3", "install", "argparse", "toml").run()?; @@ -352,6 +447,7 @@ fn main() -> Result<()> { Some(CiCmd::Lint) => { ensure_repo_root()?; + check_io_boundary()?; // `cargo fmt --all` only checks files that Cargo discovers through workspace/package targets. // However, we also keep Rust sources in a locations that are tracked but not part of our workspace, // so this approach properly catches all the files, where `cargo fmt` does not. @@ -540,6 +636,10 @@ fn main() -> Result<()> { run_docs_build()?; } + Some(CiCmd::IoBoundary) => { + check_io_boundary()?; + } + None => run_all_clap_subcommands(&cli.skip)?, }