diff --git a/Cargo.lock b/Cargo.lock index f3878680c1..88e882908e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1921,6 +1921,21 @@ dependencies = [ "winit", ] +[[package]] +name = "graph-storage" +version = "0.0.0" +dependencies = [ + "blake3", + "core-types", + "graph-craft", + "graphene-resource", + "rmp-serde", + "rustc-hash 2.1.1", + "serde", + "serde_json", + "thiserror 2.0.18", +] + [[package]] name = "graphene-application-io" version = "0.1.0" @@ -4690,6 +4705,25 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmp" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "rmp-serde" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155" +dependencies = [ + "rmp", + "serde", +] + [[package]] name = "ron" version = "0.12.0" diff --git a/Cargo.toml b/Cargo.toml index 0297028792..444fdffa9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "desktop/platform/mac", "desktop/platform/win", "document/container", + "document/graph-storage", "editor", "frontend/wrapper", "libraries/dyn-any", @@ -87,6 +88,7 @@ repeat-nodes = { path = "node-graph/nodes/repeat" } math-nodes = { path = "node-graph/nodes/math" } path-bool-nodes = { path = "node-graph/nodes/path-bool" } graph-craft = { path = "node-graph/graph-craft" } +graph-storage = { path = "document/graph-storage" } raster-nodes = { path = "node-graph/nodes/raster" } graphene-std = { path = "node-graph/nodes/gstd" } interpreted-executor = { path = "node-graph/interpreted-executor" } @@ -167,6 +169,7 @@ color = "0.3" # Linebender ecosystem (END) rand = { version = "0.9", default-features = false, features = ["std_rng"] } rand_chacha = "0.9" +rmp-serde = "1.3" glam = { version = "0.32.1", default-features = false, features = [ "nostd-libm", "scalar-math", diff --git a/document/graph-storage/Cargo.toml b/document/graph-storage/Cargo.toml new file mode 100644 index 0000000000..53d86b4b6f --- /dev/null +++ b/document/graph-storage/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "graph-storage" +description = "Provides a delta based graph representation used in the Graphite file format" +edition.workspace = true +version.workspace = true +license.workspace = true +authors.workspace = true + +[features] +conversion = ["dep:graph-craft"] +default = ["conversion"] + +[dependencies] +graph-craft = { workspace = true, optional = true } +graphene-resource = { workspace = true } +core-types = { workspace = true } + +thiserror = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +blake3 = { workspace = true } +rustc-hash = { workspace = true } +rmp-serde = { workspace = true } + +[dev-dependencies] +graph-craft = { workspace = true, features = ["loading"] } diff --git a/document/graph-storage/src/attributes.rs b/document/graph-storage/src/attributes.rs new file mode 100644 index 0000000000..c7d8d7bccc --- /dev/null +++ b/document/graph-storage/src/attributes.rs @@ -0,0 +1,194 @@ +use crate::TimeStamp; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; + +/// Attribute keys. Glob-import (`use crate::attr::*`) at conversion sites. +/// +/// `ui::*` keys are namespaced per CRDT design so each value gets its own LWW timestamp. Per-input +/// keys live on `Node.inputs_attributes[i]`; per-network keys live on `Network.attributes`. +pub mod attr { + pub const CALL_ARGUMENT: &str = "call_argument"; + pub const CONTEXT_FEATURES: &str = "context_features"; + pub const IMPORT_TYPE: &str = "import_type"; + pub const VISIBLE: &str = "visible"; + pub const SKIP_DEDUPLICATION: &str = "skip_deduplication"; + pub const REFLECTION_METADATA: &str = "reflection_metadata"; + pub const ORIGINAL_NODE_ID: &str = "original_node_id"; + pub const EXPORTED_NODES_TS: &str = "library::exported_nodes_ts"; + /// Whole-map LWW of a network's `scope_injections` (`key -> (storage NodeId, Type)`), stored as a + /// serialized blob so its shape can evolve (e.g. dropping the `Type`) without a model change. The + /// node references use stable storage IDs, resolved back to runtime-local IDs on conversion. + pub const SCOPE_INJECTIONS: &str = "compute::scope_injections"; + + pub const UI_POSITION: &str = "ui::position"; + pub const UI_IS_LAYER: &str = "ui::is_layer"; + pub const UI_DISPLAY_NAME: &str = "ui::display_name"; + pub const UI_LOCKED: &str = "ui::locked"; + pub const UI_PINNED: &str = "ui::pinned"; + + pub const UI_INPUT_NAME: &str = "ui::input_name"; + pub const UI_INPUT_DESCRIPTION: &str = "ui::input_description"; + pub const UI_WIDGET_OVERRIDE: &str = "ui::widget_override"; + /// Prefix for `InputPersistentMetadata::input_data` entries. Full key: `ui::input_data::`. + pub const UI_INPUT_DATA_PREFIX: &str = "ui::input_data::"; + + pub const UI_OUTPUT_NAMES: &str = "ui::output_names"; + /// Lives on the *owning* node (the one with `Implementation::Network`), not on the nested network. + pub const UI_REFERENCE: &str = "ui::reference"; + + // Delta-level annotations (on `Delta.attributes`, not the registry). Local + mutable, excluded + // from the content-addressed `Rev`. + /// Marks the last delta of a user gesture, so the undo cursor steps per-gesture, not per-delta. + pub const GESTURE_END: &str = "compute::gesture_end"; +} + +/// A type-erased attribute value paired with the timestamp at which it was last set. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct Value { + pub value: serde_json::Value, + pub timestamp: TimeStamp, +} + +impl Value { + pub fn new(value: serde_json::Value, timestamp: TimeStamp) -> Self { + Self { value, timestamp } + } +} + +pub type Attributes = BTreeMap; + +/// Write helpers for `Attributes`. +pub trait AttributesExt { + /// Inserts a JSON value under `key`. + fn set(&mut self, key: &str, value: serde_json::Value, timestamp: TimeStamp); + + /// Serializes `value` and inserts it under `key`. + fn set_serialized(&mut self, key: &str, value: &T, timestamp: TimeStamp) -> Result<(), serde_json::Error>; + + /// Inserts only when `value != default`, so the read side falls back to the same default. + fn set_if_not_default(&mut self, key: &str, value: &T, default: &T, timestamp: TimeStamp) -> Result<(), serde_json::Error>; +} + +impl AttributesExt for Attributes { + fn set(&mut self, key: &str, value: serde_json::Value, timestamp: TimeStamp) { + self.insert(key.to_string(), Value { value, timestamp }); + } + + fn set_serialized(&mut self, key: &str, value: &T, timestamp: TimeStamp) -> Result<(), serde_json::Error> { + self.set(key, serde_json::to_value(value)?, timestamp); + Ok(()) + } + + fn set_if_not_default(&mut self, key: &str, value: &T, default: &T, timestamp: TimeStamp) -> Result<(), serde_json::Error> { + if value != default { + self.set_serialized(key, value, timestamp)?; + } + Ok(()) + } +} + +/// Typed read helpers for `Attributes`. +pub trait AttributesRead { + /// Deserializes the value under `key`, or `None` if missing or undecodable. + fn get_typed(&self, key: &str) -> Option; + + /// Same as `get_typed`, falling back to `default`. + fn get_or(&self, key: &str, default: T) -> T { + self.get_typed(key).unwrap_or(default) + } + + /// Same as `get_typed`, falling back to `T::default()`. + fn get_or_default(&self, key: &str) -> T { + self.get_typed(key).unwrap_or_default() + } +} + +impl AttributesRead for Attributes { + fn get_typed(&self, key: &str) -> Option { + self.get(key).and_then(|v| serde_json::from_value(v.value.clone()).ok()) + } +} + +/// Fractional priority for ordering a resource's source chain. New sources are inserted by picking +/// a value strictly between two neighbors, so concurrent insertions elsewhere never collide; an +/// exact tie between two peers inserting at the same gap is broken by `PeerId` in [`SourceKey`]. +/// `f64` precision is ample for the short fallback chains resources carry in practice. +#[derive(Copy, Clone, Debug, Serialize, Deserialize)] +#[serde(try_from = "f64")] +pub struct Priority(f64); + +impl Priority { + /// Rejects non-finite input. The field is private and deserialization routes through here, so a + /// `Priority` is always finite, keeping its `Ord`/`Hash`/`Eq` agreement sound. + pub fn new(value: f64) -> Result { + if value.is_finite() { Ok(Self(value)) } else { Err(NonFinitePriority(value)) } + } + + pub fn value(self) -> f64 { + self.0 + } +} + +impl TryFrom for Priority { + type Error = NonFinitePriority; + fn try_from(value: f64) -> Result { + Self::new(value) + } +} + +/// A [`Priority`] was constructed from a `NaN` or infinite value. +#[derive(Debug, thiserror::Error)] +#[error("priority must be finite, got {0}")] +pub struct NonFinitePriority(pub f64); + +// `total_cmp` drives `Ord`, `Hash`, and `Eq` together so `Priority` is a sound `BTree`/`Hash` key: +// a derived `PartialEq` would disagree with this ordering on `-0.0` and `NaN`. +impl PartialEq for Priority { + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == std::cmp::Ordering::Equal + } +} + +impl Eq for Priority {} + +impl Ord for Priority { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.total_cmp(&other.0) + } +} + +impl PartialOrd for Priority { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl std::hash::Hash for Priority { + fn hash(&self, state: &mut H) { + self.0.to_bits().hash(state); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn priority_rejects_non_finite() { + assert!(Priority::new(f64::NAN).is_err()); + assert!(Priority::new(f64::INFINITY).is_err()); + assert!(Priority::new(-1.5).is_ok(), "negative finite priorities are valid"); + } + + /// Deserialization routes through `Priority::new`, so a non-finite value on disk is rejected rather + /// than silently producing an unsound map key. MessagePack (the storage format) can carry a + /// non-finite `f64`, unlike JSON, so this guards the real round-trip path. + #[test] + fn priority_deserialize_validates_finiteness() { + let finite = rmp_serde::to_vec(&3.5_f64).unwrap(); + assert!(rmp_serde::from_slice::(&finite).is_ok()); + + let non_finite = rmp_serde::to_vec(&f64::INFINITY).unwrap(); + assert!(rmp_serde::from_slice::(&non_finite).is_err(), "a non-finite priority on disk must be rejected"); + } +} diff --git a/document/graph-storage/src/crdt.rs b/document/graph-storage/src/crdt.rs new file mode 100644 index 0000000000..9eff36fe1d --- /dev/null +++ b/document/graph-storage/src/crdt.rs @@ -0,0 +1,191 @@ +use crate::{Attributes, AttributesExt, Network, NetworkId, Node, NodeId, NodeInput, PeerId, ResourceEntry, ResourceId, Rev, SourceKey, TimeStamp, UserId, Value, attr, compute_rev}; +use graphene_resource::ResourceHash; +use serde::{Deserialize, Serialize}; + +/// Content-addressed delta: `id` is `blake3_128(parents, author, timestamp, delta_type)`. +/// +/// `reverse` is state-dependent undo bookkeeping (it captures pre-state at the moment the forward +/// op was applied), so it's serialized for storage but excluded from the identity hash — two peers +/// observing the same forward delta against different local states would otherwise compute +/// different Revs for the same logical op. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Delta { + pub id: Rev, + pub parents: Vec, + pub author: PeerId, + pub timestamp: TimeStamp, + pub delta_type: RegistryDelta, + pub reverse: RegistryDelta, + /// Local, mutable annotations on this commit (gesture-end marker, future commit messages / labels). + /// Deliberately excluded from `compute_rev`: relabeling a commit must not change its content-addressed + /// identity, and two peers annotating the same op differently must still dedup to one `Rev`. + #[serde(default, skip_serializing_if = "Attributes::is_empty")] + pub attributes: Attributes, +} + +impl Delta { + pub fn new(parents: Vec, author: PeerId, timestamp: TimeStamp, delta_type: RegistryDelta, reverse: RegistryDelta) -> Self { + let id = compute_rev(&parents, author, timestamp, &delta_type); + Self { + id, + parents, + author, + timestamp, + delta_type, + reverse, + attributes: Attributes::default(), + } + } + + /// Mark this delta as the last op of a user gesture, so the undo cursor treats it as a checkpoint. + pub fn mark_gesture_end(&mut self, timestamp: TimeStamp) { + self.attributes.set(attr::GESTURE_END, serde_json::Value::Bool(true), timestamp); + } + + pub fn is_gesture_end(&self) -> bool { + self.attributes.get(attr::GESTURE_END).is_some_and(|marker| marker.value == serde_json::Value::Bool(true)) + } + + /// The content-addressed `Rev` this delta's identity fields hash to. Equals `id` for a delta built + /// via `new`; differs only if `id` was tampered with or the hash derivation changed. + pub fn recomputed_id(&self) -> Rev { + compute_rev(&self.parents, self.author, self.timestamp, &self.delta_type) + } + + /// Whether `id` matches the recomputed content hash. `Delta` deserializes without checking this + /// (the hash is not cheap over a large history); callers verify explicitly when they don't trust + /// the source via [`Session::verify_history`]. + pub fn has_valid_id(&self) -> bool { + self.id == self.recomputed_id() + } +} + +/// Op payload. Timestamps live on the wrapping `Delta` — one per delta, applied to all LWW-eligible +/// writes within. See `notes/document-format-collaboration.md`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum RegistryDelta { + AddNode { + node_id: NodeId, + node: Node, + }, + /// `snapshot` lets the reverse `AddNode` rebuild without reading the (already-removed) node from + /// the registry, mirroring `RemoveNetwork`. + RemoveNode { + node_id: NodeId, + snapshot: Node, + }, + ChangeNodeInput { + node_id: NodeId, + input_idx: usize, + new_input: NodeInput, + }, + ChangeNodeAttribute { + node_id: NodeId, + delta: AttributeDelta, + }, + ChangeNodeInputAttribute { + node_id: NodeId, + input_idx: usize, + delta: AttributeDelta, + }, + /// LWW per slot. `target == None` removes the slot. + SetExport { + network: NetworkId, + slot: u32, + target: Option, + }, + /// Per-network attribute change, LWW per key. Mirrors `ChangeDocumentAttribute`. + ChangeNetworkAttribute { + network: NetworkId, + delta: AttributeDelta, + }, + AddNetwork { + network: NetworkId, + contents: Network, + }, + /// `snapshot` lets the reverse delta rebuild without re-walking history. + RemoveNetwork { + network: NetworkId, + snapshot: Network, + }, + /// Whole-list LWW; timestamp lives under `attr::EXPORTED_NODES_TS` on the document. + SetExportedNodes { + nodes: Vec, + }, + ChangeDocumentAttribute { + delta: AttributeDelta, + }, + /// Append-only registration of a device's `PeerId` against its owning `UserId`. + /// First write wins; conflicting re-registration errors. Duplicate identical registration + /// is a no-op. Not LWW — the mapping is forever. + RegisterPeer { + peer: PeerId, + user: UserId, + }, + /// LWW on a resource's resolved content hash. Creates the resource entry if absent. + /// Concurrent resolves agree by construction (the hash is content-derived), so LWW is safe. + SetResourceHash { + id: ResourceId, + hash: Option, + }, + /// Add (or LWW-overwrite) one entry in a resource's source fallback chain. The source body is + /// type-erased; `key` carries the fractional priority + peer that order it. Add-wins: concurrent + /// adds at distinct keys all survive. Creates the resource entry if absent. + AddSource { + id: ResourceId, + key: SourceKey, + source: serde_json::Value, + }, + /// Remove one entry from a resource's source chain. LWW against the entry's timestamp. + RemoveSource { + id: ResourceId, + key: SourceKey, + }, + /// Register a whole resource entry at once. Overwrites any existing entry for `id`; the reverse + /// of `RemoveResource`, the way `AddNetwork` pairs with `RemoveNetwork`. + AddResource { + id: ResourceId, + entry: ResourceEntry, + }, + /// Remove a whole resource entry. `snapshot` is the state of the resource before it was removed. + RemoveResource { + id: ResourceId, + snapshot: ResourceEntry, + }, +} + +/// `value: None` means remove. The timestamp comes from the wrapping `Delta`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct AttributeDelta { + pub key: String, + pub value: Option, +} + +pub(crate) fn reverse_attribute_delta(delta: &AttributeDelta, attributes: &Attributes) -> AttributeDelta { + AttributeDelta { + key: delta.key.clone(), + value: attributes.get(&delta.key).map(|previous| previous.value.clone()), + } +} + +pub(crate) fn apply_attribute_delta(delta: AttributeDelta, timestamp: TimeStamp, force: bool, attributes: &mut Attributes) { + let AttributeDelta { key, value } = delta; + match value { + Some(value) => match attributes.entry(key) { + std::collections::btree_map::Entry::Occupied(mut entry) => { + if force || timestamp > entry.get().timestamp { + entry.insert(Value { value, timestamp }); + } + } + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(Value { value, timestamp }); + } + }, + None => { + let should_remove = force || attributes.get(&key).is_none_or(|existing| timestamp > existing.timestamp); + if should_remove { + attributes.remove(&key); + } + } + } +} diff --git a/document/graph-storage/src/crdt_tests.rs b/document/graph-storage/src/crdt_tests.rs new file mode 100644 index 0000000000..858673cf0f --- /dev/null +++ b/document/graph-storage/src/crdt_tests.rs @@ -0,0 +1,812 @@ +use core_types::uuid::NodeId as RuntimeNodeId; +use graph_craft::ProtoNodeIdentifier; +use graph_craft::concrete; +use graph_craft::document::{DocumentNode, DocumentNodeImplementation, NodeInput, NodeNetwork}; + +use crate::{Delta, Document, HotOp, Implementation, Network, NoMetadata, Node, NodeId, PeerId, ROOT_NETWORK, RegistryDelta, RegistryTarget, Session, TimeStamp}; + +fn fresh_document(peer: PeerId) -> Document { + Session::with_peer(peer).document +} + +fn remove_node_op(node_id: NodeId) -> RegistryDelta { + // The snapshot only matters for reverse computation; this op is used to test a no-op removal on an + // absent node, so a placeholder node is fine. + let snapshot = Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: Vec::new(), + inputs_attributes: Vec::new(), + attributes: crate::Attributes::new(), + network: ROOT_NETWORK, + }; + RegistryDelta::RemoveNode { node_id, snapshot } +} + +/// Commit a single op to a document as a retired delta. Mints a fresh timestamp, links to +/// current head, applies, records in history, advances head. +fn commit_op(document: &mut Document, op: RegistryDelta) { + let reverse = document.compute_reverse_delta(RegistryTarget::Working, &op).expect("compute_reverse_delta failed"); + let timestamp = document.clock.tick(); + let parents = if document.head == 0 { Vec::new() } else { vec![document.head] }; + let delta = Delta::new(parents, document.peer, timestamp, op, reverse); + let rev = delta.id; + document.apply_retired_delta(delta).expect("apply_retired_delta failed"); + document.head = rev; +} + +/// Every applied op must advance the local clock past the op's timestamp, so any subsequent +/// local tick is causally later than what we just observed. Locks in the invariant that +/// `apply_op` calls `clock.observe`, regardless of which apply entry point was used. +#[test] +fn apply_hot_op_advances_clock_past_observed_timestamp() { + let mut document = fresh_document(PeerId(1)); + assert_eq!(document.clock.counter, 0); + + let observed = TimeStamp { counter: 42, peer: PeerId(2) }; + let hot_op = HotOp { + op: remove_node_op(99), + timestamp: observed, + author: PeerId(2), + }; + + document.apply_hot_op(hot_op).expect("RemoveNode on absent node is a no-op, not an error"); + + assert!( + document.clock.counter >= observed.counter, + "clock counter {} did not advance past observed counter {}", + document.clock.counter, + observed.counter + ); + + let next = document.clock.tick(); + assert!( + next.counter > observed.counter, + "next tick {} must be strictly later than the observed timestamp {}", + next.counter, + observed.counter + ); +} + +/// `next_node_id` must never repeat across successive calls on the same document. The blake3 output +/// space is enormous, so any collision in a small loop is a counter-bumping bug, not a hash +/// collision. +#[test] +fn next_node_id_is_unique_within_a_document() { + let mut document = fresh_document(PeerId(1)); + + let mut seen = std::collections::HashSet::new(); + for _ in 0..1000 { + let id = document.next_node_id(); + assert!(seen.insert(id), "next_node_id repeated after {} calls", seen.len()); + } +} + +/// Two peers reading the same shared counter must produce different `NodeId`s. This is the whole +/// reason the counter can be shared across peers instead of being per-peer. +#[test] +fn next_node_id_differs_across_peers_at_same_counter() { + let mut document_a = fresh_document(PeerId(1)); + let mut document_b = fresh_document(PeerId(2)); + + let id_a = document_a.next_node_id(); + let id_b = document_b.next_node_id(); + assert_ne!(id_a, id_b, "peer-scoping is broken: two peers minted the same NodeId at counter 1"); +} + +fn tiny_network() -> NodeNetwork { + NodeNetwork { + exports: vec![NodeInput::node(RuntimeNodeId(0), 0)], + nodes: [( + RuntimeNodeId(0), + DocumentNode { + inputs: vec![NodeInput::import(concrete!(u32), 0)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::ops::identity::IdentityNode")), + ..Default::default() + }, + )] + .into_iter() + .collect(), + ..Default::default() + } +} + +/// `verify_history` passes on a normally built history and flags a delta whose content-addressed +/// `id` no longer matches its identity fields (corrupt or crafted history). +#[test] +fn verify_history_detects_rev_mismatch() { + let resources = graphene_resource::ResourceRegistry::new(); + + let mut session = Session::with_peer(PeerId(1)); + session.stage_from_runtime(&tiny_network(), &NoMetadata, &resources).expect("stage failed"); + let last_timestamp = session.hot_log().last().expect("staged a hot op").timestamp; + session.retire(last_timestamp).expect("retire failed"); + + session.verify_history().expect("a freshly built history must validate"); + + // Tamper one delta's stored id (the field, not its key) so it no longer matches its content hash. + let some_rev = *session.document.history.keys().next().expect("history is non-empty"); + session.document.history.get_mut(&some_rev).expect("delta exists").id = 0xdead_beef; + + assert!(matches!(session.verify_history(), Err(crate::CrdtError::RevMismatch { .. })), "a tampered delta id must be flagged"); +} + +/// `history_topological` emits parents before children and is a pure function of the delta set: +/// two sessions independently built from the same network produce byte-identical history order. +#[test] +fn history_topological_is_causal_and_deterministic() { + let resources = graphene_resource::ResourceRegistry::new(); + + let build = || { + let mut session = Session::with_peer(PeerId(1)); + session.stage_from_runtime(&tiny_network(), &NoMetadata, &resources).expect("stage failed"); + let last_timestamp = session.hot_log().last().expect("staged at least one hot op").timestamp; + session.retire(last_timestamp).expect("retire failed"); + session + }; + + let session_a = build(); + let session_b = build(); + + let order_a: Vec = session_a.history_topological().iter().map(|delta| delta.id).collect(); + let order_b: Vec = session_b.history_topological().iter().map(|delta| delta.id).collect(); + + assert!(order_a.len() > 1, "expected a multi-delta history to make ordering meaningful"); + assert_eq!(order_a, order_b, "same delta set must serialize in the same topological order"); + + // Every parent that's part of this history precedes its child. + let position: std::collections::HashMap = order_a.iter().enumerate().map(|(i, rev)| (*rev, i)).collect(); + for delta in session_a.history_topological() { + for parent in &delta.parents { + if let Some(parent_pos) = position.get(parent) { + assert!(*parent_pos < position[&delta.id], "parent {parent} must precede child {} in topological order", delta.id); + } + } + } +} + +/// Committing the same NodeNetwork twice must produce zero history entries on the second commit. +/// Without value-only diffing in compute_deltas, the second commit would emit spurious +/// ChangeNodeInput / ChangeNodeAttribute ops because self.registry has real timestamps while the +/// freshly-built `to` registry has TimeStamp::ORIGIN. +#[test] +fn stage_from_runtime_is_idempotent_for_unchanged_network() { + let mut session = Session::with_peer(PeerId(1)); + let network = tiny_network(); + + let resources = graphene_resource::ResourceRegistry::new(); + let (first, _) = session.stage_from_runtime(&network, &NoMetadata, &resources).expect("first stage failed"); + assert!(!first.is_empty(), "first stage should produce at least one hot op for the initial network"); + + let (second, _) = session.stage_from_runtime(&network, &NoMetadata, &resources).expect("second stage failed"); + assert_eq!(second.len(), 0, "second stage of unchanged network produced {} spurious hot ops: {:?}", second.len(), second); +} + +/// The peer's first contribution prepends a `RegisterPeer` op (establishing its `UserId` mapping); +/// later contributions don't re-register, and a no-op batch registers nothing. +#[test] +fn first_contribution_registers_the_peer() { + let mut session = Session::with_peer(PeerId(7)); + let resources = graphene_resource::ResourceRegistry::new(); + + assert!(session.registry().peer_users.is_empty(), "no registration before any contribution"); + + let (first, _) = session.stage_from_runtime(&tiny_network(), &NoMetadata, &resources).expect("first stage failed"); + let registrations = first.iter().filter(|hot_op| matches!(hot_op.op, RegistryDelta::RegisterPeer { .. })).count(); + assert_eq!(registrations, 1, "exactly one RegisterPeer on first contribution"); + assert!(matches!(first[0].op, RegistryDelta::RegisterPeer { .. }), "RegisterPeer must precede the edit ops"); + assert_eq!(session.registry().peer_users.get(&PeerId(7)), Some(&crate::UserId(7)), "peer mapped to its UserId"); + + // A second, distinct contribution must not re-register. + let mut other_network = tiny_network(); + other_network.exports.clear(); + let (second, _) = session.stage_from_runtime(&other_network, &NoMetadata, &resources).expect("second stage failed"); + assert!( + !second.iter().any(|hot_op| matches!(hot_op.op, RegistryDelta::RegisterPeer { .. })), + "already-registered peer must not re-register" + ); + + // A no-op batch (re-staging an already-converged network) registers nothing on a fresh peer: + // registration rides a real edit, never a lone op. + let mut fresh = Session::with_peer(PeerId(8)); + fresh.stage_from_runtime(&tiny_network(), &NoMetadata, &resources).expect("seed stage failed"); + let peers_before = fresh.registry().peer_users.clone(); + let (empty, _) = fresh.stage_from_runtime(&tiny_network(), &NoMetadata, &resources).expect("no-op stage failed"); + assert!(empty.is_empty(), "an unchanged re-stage must produce no hot ops"); + assert_eq!(fresh.registry().peer_users, peers_before, "a no-op batch must not add a registration"); +} + +/// A SetExport against a removed network must restore the network from history rather than error. +#[test] +fn set_export_resurrects_absent_network() { + let mut document = fresh_document(PeerId(1)); + let network_id = 7; + + commit_op( + &mut document, + RegistryDelta::AddNetwork { + network: network_id, + contents: Network::default(), + }, + ); + commit_op( + &mut document, + RegistryDelta::RemoveNetwork { + network: network_id, + snapshot: Network::default(), + }, + ); + assert!(!document.registry.networks.contains_key(&network_id), "network should be removed before the resurrection test"); + + commit_op( + &mut document, + RegistryDelta::SetExport { + network: network_id, + slot: 0, + target: None, + }, + ); + + assert!(document.registry.networks.contains_key(&network_id), "SetExport should have resurrected the network"); +} + +/// Cascading resurrection: bringing a node back must also restore its owning network when absent. +#[test] +fn add_node_resurrects_owning_network() { + use crate::{Implementation, Node}; + + let mut document = fresh_document(PeerId(1)); + let network_id = 7; + let node_id = 42; + + commit_op( + &mut document, + RegistryDelta::AddNetwork { + network: network_id, + contents: Network::default(), + }, + ); + commit_op( + &mut document, + RegistryDelta::RemoveNetwork { + network: network_id, + snapshot: Network::default(), + }, + ); + + let node = Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: Vec::new(), + inputs_attributes: Vec::new(), + attributes: crate::Attributes::new(), + network: network_id, + }; + commit_op(&mut document, RegistryDelta::AddNode { node_id, node }); + + assert!( + document.registry.networks.contains_key(&network_id), + "AddNode should have cascaded a resurrection of the owning network" + ); + assert!(document.registry.node_instances.contains_key(&node_id), "the node itself should also be present"); +} + +/// Reverting the same removal twice (the moral equivalent of two peers concurrently resurrecting +/// the same node) must not error on the second apply. Today the second revert hits +/// `apply_op(AddNode, false)` against a present node and returns `NodeAlreadyExists`. +#[test] +fn concurrent_resurrection_via_revert_is_idempotent() { + use crate::{Implementation, Node}; + + let mut document = fresh_document(PeerId(1)); + let network_id = 7; + let node_id = 42; + + commit_op( + &mut document, + RegistryDelta::AddNetwork { + network: network_id, + contents: Network::default(), + }, + ); + let node = Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: Vec::new(), + inputs_attributes: Vec::new(), + attributes: crate::Attributes::new(), + network: network_id, + }; + commit_op(&mut document, RegistryDelta::AddNode { node_id, node: node.clone() }); + commit_op(&mut document, RegistryDelta::RemoveNode { node_id, snapshot: node }); + assert!(!document.registry.node_instances.contains_key(&node_id), "node should be removed before the resurrection test"); + + document.restore_node_from_history(RegistryTarget::Working, node_id).expect("first resurrection should succeed"); + assert!(document.registry.node_instances.contains_key(&node_id), "first resurrection should bring the node back"); + + let second = document.restore_node_from_history(RegistryTarget::Working, node_id); + assert!(second.is_ok(), "second resurrection of an already-present node should be a no-op, got {second:?}"); +} + +/// History-based resurrection must work when the matching delta is the *root* commit. The history +/// walk used to drop the root (its empty parent list short-circuited the iterator before yielding +/// it), so a node removed by the very first commit could not be restored. +#[test] +fn restore_node_from_root_commit() { + use crate::{Implementation, Node}; + + let mut document = fresh_document(PeerId(1)); + let node_id = 42; + + let node = Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: Vec::new(), + inputs_attributes: Vec::new(), + attributes: crate::Attributes::new(), + network: ROOT_NETWORK, + }; + + // Seed the working state so the root commit can remove the node (its reverse is the `AddNode` the + // resurrection looks for). This `RemoveNode` is the only commit, so the match sits at the root. + document.registry.networks.insert(ROOT_NETWORK, Network::default()); + document.retired_snapshot.networks.insert(ROOT_NETWORK, Network::default()); + document.registry.node_instances.insert(node_id, node.clone()); + document.retired_snapshot.node_instances.insert(node_id, node.clone()); + commit_op(&mut document, RegistryDelta::RemoveNode { node_id, snapshot: node }); + assert!(!document.registry.node_instances.contains_key(&node_id), "node should be removed by the root commit"); + + document + .restore_node_from_history(RegistryTarget::Working, node_id) + .expect("resurrection from the root commit should succeed"); + assert!(document.registry.node_instances.contains_key(&node_id), "node must be restored from the root commit"); +} + +/// Erroring ops still bump the clock: we observed the timestamp on the wire, the fact that the +/// op was rejected locally doesn't unobserve it. +#[test] +fn apply_op_advances_clock_even_when_op_errors() { + let mut document = fresh_document(PeerId(1)); + + let observed = TimeStamp { counter: 17, peer: PeerId(2) }; + let failing_op = RegistryDelta::ChangeNodeInput { + node_id: 7, + input_idx: 0, + new_input: crate::NodeInput::Import { import_idx: 0 }, + }; + + let result = document.apply_op(failing_op, observed); + + assert!(result.is_err(), "op targeting a nonexistent node should be rejected"); + assert!(document.clock.counter >= observed.counter, "clock should advance on observation even when the op errors"); +} + +// --- Resource CRDT semantics --- + +use crate::{Priority, RegistryDelta as RD, ResourceHash, ResourceId, SourceKey}; + +fn source_key(priority: f64, peer: u64) -> SourceKey { + SourceKey { + priority: Priority::new(priority).expect("test priorities are finite"), + peer: PeerId(peer), + } +} + +fn ts(counter: u64, peer: u64) -> TimeStamp { + TimeStamp { counter, peer: PeerId(peer) } +} + +/// Two peers concurrently add a source to the same resource at distinct priorities. Both survive +/// (add-wins union), ordered by priority. +#[test] +fn concurrent_source_adds_at_distinct_priorities_both_survive() { + let mut document = fresh_document(PeerId(1)); + let id = ResourceId::new(); + + document + .apply_op( + RD::AddSource { + id, + key: source_key(0.5, 1), + source: serde_json::json!("embedded"), + }, + ts(1, 1), + ) + .unwrap(); + document + .apply_op( + RD::AddSource { + id, + key: source_key(0.75, 2), + source: serde_json::json!("url"), + }, + ts(1, 2), + ) + .unwrap(); + + let entry = document.registry.resources.get(&id).expect("resource entry exists"); + assert_eq!(entry.sources.len(), 2, "both concurrent additions survive"); + // The chain iterates in priority order. + let bodies: Vec<_> = entry.sources.iter().map(|(_, v)| v.source.clone()).collect(); + assert_eq!(bodies, vec![serde_json::json!("embedded"), serde_json::json!("url")]); +} + +/// Re-adding the same source key is LWW on its timestamp: a later write wins, an earlier one is ignored. +#[test] +fn same_source_key_is_last_writer_wins() { + let mut document = fresh_document(PeerId(1)); + let id = ResourceId::new(); + let key = source_key(0.5, 1); + + document + .apply_op( + RD::AddSource { + id, + key, + source: serde_json::json!("old"), + }, + ts(5, 1), + ) + .unwrap(); + // Earlier timestamp: ignored. + document + .apply_op( + RD::AddSource { + id, + key, + source: serde_json::json!("stale"), + }, + ts(2, 1), + ) + .unwrap(); + // Later timestamp: wins. + document + .apply_op( + RD::AddSource { + id, + key, + source: serde_json::json!("new"), + }, + ts(9, 1), + ) + .unwrap(); + + let entry = document.registry.resources.get(&id).unwrap(); + assert_eq!(entry.source(&key).unwrap().source, serde_json::json!("new")); +} + +/// SetResourceHash is LWW on the hash; a later resolve wins, an earlier one is ignored. +#[test] +fn register_resource_hash_is_last_writer_wins() { + let mut document = fresh_document(PeerId(1)); + let id = ResourceId::new(); + let hash_a = ResourceHash::from(&b"alpha"[..]); + let hash_b = ResourceHash::from(&b"beta"[..]); + + document.apply_op(RD::SetResourceHash { id, hash: Some(hash_a) }, ts(5, 1)).unwrap(); + document.apply_op(RD::SetResourceHash { id, hash: Some(hash_b) }, ts(2, 1)).unwrap(); + assert_eq!(document.registry.resources.get(&id).unwrap().hash, Some(hash_a), "earlier resolve must not clobber later one"); + + document.apply_op(RD::SetResourceHash { id, hash: Some(hash_b) }, ts(9, 1)).unwrap(); + assert_eq!(document.registry.resources.get(&id).unwrap().hash, Some(hash_b), "later resolve wins"); +} + +/// The reverse delta of a RemoveSource restores the prior source body, and applying op-then-reverse +/// round-trips the source chain. +#[test] +fn remove_source_reverse_restores_prior() { + let mut document = fresh_document(PeerId(1)); + let id = ResourceId::new(); + let key = source_key(0.5, 1); + + commit_op( + &mut document, + RD::AddSource { + id, + key, + source: serde_json::json!("kept"), + }, + ); + + // Compute the reverse while the body is still present, then apply the removal. + let reverse = document.compute_reverse_delta(RegistryTarget::Working, &RD::RemoveSource { id, key }).unwrap(); + match &reverse { + RD::AddSource { source, .. } => assert_eq!(*source, serde_json::json!("kept"), "reverse of removal re-adds the body"), + other => panic!("expected AddSource reverse, got {other:?}"), + } + + document.apply_op(RD::RemoveSource { id, key }, ts(5, 1)).unwrap(); + assert!(document.registry.resources.get(&id).unwrap().sources.is_empty(), "source removed"); + + // Applying the reverse restores the chain. + document.apply_op(reverse, ts(6, 1)).unwrap(); + assert_eq!(document.registry.resources.get(&id).unwrap().source(&key).unwrap().source, serde_json::json!("kept")); +} + +/// AddSource on a fresh slot reverses to a RemoveSource; on an occupied slot it restores the prior body. +#[test] +fn add_source_reverse_depends_on_prior_state() { + let mut document = fresh_document(PeerId(1)); + let id = ResourceId::new(); + let key = source_key(0.5, 1); + + // Fresh slot: reverse removes. + let reverse_fresh = document + .compute_reverse_delta( + RegistryTarget::Working, + &RD::AddSource { + id, + key, + source: serde_json::json!("first"), + }, + ) + .unwrap(); + assert!(matches!(reverse_fresh, RD::RemoveSource { .. }), "reverse of add-to-empty is remove, got {reverse_fresh:?}"); + + // Occupy the slot, then reverse of a new add restores the existing body. + document + .apply_op( + RD::AddSource { + id, + key, + source: serde_json::json!("existing"), + }, + ts(1, 1), + ) + .unwrap(); + let reverse_overwrite = document + .compute_reverse_delta( + RegistryTarget::Working, + &RD::AddSource { + id, + key, + source: serde_json::json!("overwrite"), + }, + ) + .unwrap(); + match reverse_overwrite { + RD::AddSource { source, .. } => assert_eq!(source, serde_json::json!("existing"), "reverse restores prior body"), + other => panic!("expected AddSource reverse, got {other:?}"), + } +} + +// --- compute_deltas resource diffing --- + +use crate::{ResourceEntry, ResourceStore, SourceValue}; + +fn entry_with_source(priority: f64, peer: u64, body: serde_json::Value, hash: Option) -> ResourceEntry { + ResourceEntry { + sources: vec![(source_key(priority, peer), SourceValue { source: body, timestamp: ts(1, peer) })], + hash, + hash_timestamp: ts(1, peer), + } +} + +fn registry_with_resources(resources: ResourceStore) -> crate::Registry { + crate::Registry { resources, ..Default::default() } +} + +/// An unchanged resource store produces zero deltas, even when timestamps differ (value-only diff). +#[test] +fn compute_deltas_ignores_unchanged_resources() { + let id = ResourceId::new(); + let hash = ResourceHash::from(&b"img"[..]); + + let mut from = ResourceStore::new(); + from.insert(id, entry_with_source(0.0, 1, serde_json::json!("embedded"), Some(hash))); + // Same value, different timestamps: must not count as a change. + let mut to = ResourceStore::new(); + let mut to_entry = entry_with_source(0.0, 1, serde_json::json!("embedded"), Some(hash)); + to_entry.hash_timestamp = ts(99, 2); + to_entry.sources.iter_mut().for_each(|(_, v)| v.timestamp = ts(99, 2)); + to.insert(id, to_entry); + + let deltas = crate::delta::compute_deltas(®istry_with_resources(from), ®istry_with_resources(to)); + assert!(deltas.is_empty(), "unchanged resource (value-equal) produced deltas: {deltas:?}"); +} + +/// Adding, changing, and removing resources each produce the matching delta, and applying the diff +/// transforms `from` into a registry value-equal to `to`. +#[test] +fn compute_deltas_diffs_resources_and_round_trips() { + let kept = ResourceId::new(); + let removed = ResourceId::new(); + let added = ResourceId::new(); + let hash_old = ResourceHash::from(&b"old"[..]); + let hash_new = ResourceHash::from(&b"new"[..]); + + let mut from = ResourceStore::new(); + from.insert(kept, entry_with_source(0.0, 1, serde_json::json!("embedded"), Some(hash_old))); + from.insert(removed, entry_with_source(0.0, 1, serde_json::json!("gone"), None)); + + let mut to = ResourceStore::new(); + // `kept`: hash changes and a second source is added. + let mut kept_entry = entry_with_source(0.0, 1, serde_json::json!("embedded"), Some(hash_new)); + kept_entry.set_source( + source_key(1.0, 1), + SourceValue { + source: serde_json::json!("url"), + timestamp: ts(1, 1), + }, + ); + to.insert(kept, kept_entry); + // `added`: brand new resource. + to.insert(added, entry_with_source(0.0, 1, serde_json::json!("fresh"), None)); + + let deltas = crate::delta::compute_deltas(®istry_with_resources(from.clone()), ®istry_with_resources(to.clone())); + + // A brand-new resource is a single whole-entry AddResource, never a fan-out of per-source ops. + let added_deltas: Vec<_> = deltas.iter().filter(|d| matches!(d, RD::AddResource { id, .. } if *id == added)).collect(); + assert_eq!(added_deltas.len(), 1, "adding a resource should produce exactly one AddResource delta, got {added_deltas:?}"); + assert!( + !deltas.iter().any(|d| matches!(d, RD::AddSource { id, .. } | RD::SetResourceHash { id, .. } if *id == added)), + "a brand-new resource must not emit per-source or hash ops" + ); + // The removed resource is a single whole-entry RemoveResource. + assert_eq!( + deltas.iter().filter(|d| matches!(d, RD::RemoveResource { id, .. } if *id == removed)).count(), + 1, + "removing a resource should produce exactly one RemoveResource delta" + ); + + // Apply the diff to a document seeded with `from`, then check it matches `to` by value. + let mut document = fresh_document(PeerId(1)); + document.registry = registry_with_resources(from); + for op in deltas { + let timestamp = document.clock.tick(); + document.apply_op(op, timestamp).expect("apply resource delta"); + } + + assert!( + document.registry.value_equal(®istry_with_resources(to)), + "applying the resource diff did not reproduce the target registry" + ); +} + +/// Resource GC must keep an undone gesture's resources alive: undo removes a gesture's `AddResource` +/// from the working registry, but redo still needs those bytes. `all_referenced_resource_hashes` must +/// therefore report history-referenced resources even after they leave the current registry, so the +/// editor's GC "used" set doesn't evict them between an undo and a redo. +#[test] +fn all_referenced_resource_hashes_survives_undo() { + use crate::ResourceId; + + let mut session = Session::with_peer(PeerId(1)); + let resources = graphene_resource::ResourceRegistry::new(); + + // Base gesture: the first gesture is intentionally not undoable (the mount-base floor), so commit a + // network first. Undoing the later resource gesture then lands on this base rather than the root. + session.stage_from_runtime(&tiny_network(), &NoMetadata, &resources).expect("stage base"); + let base_up_to = session.hot_log().last().expect("staged base").timestamp; + let base_revs = session.retire(base_up_to).expect("retire base"); + session.mark_gesture_end(*base_revs.last().expect("one base delta")); + + // Second gesture: add a resource and mark the retired delta as a gesture boundary. + let hash = ResourceHash::from(&b"declaration-bytes"[..]); + let id = ResourceId::new(); + let hot_ops = session.stage_embedded_resource(id, hash).expect("stage resource"); + let up_to = hot_ops.last().expect("staged one op").timestamp; + let revs = session.retire(up_to).expect("retire"); + session.mark_gesture_end(*revs.last().expect("one retired delta")); + + assert!(session.registry().resources.contains_key(&id), "resource is present after the gesture"); + assert!(session.all_referenced_resource_hashes().contains(&hash)); + + // Undo the gesture: the resource leaves the working registry but stays in history. + session.undo().expect("undo"); + assert!(!session.registry().resources.contains_key(&id), "undo drops the resource from the working registry"); + assert!( + session.all_referenced_resource_hashes().contains(&hash), + "the undone gesture's resource must still be reported so GC keeps its bytes for redo" + ); +} + +/// A commit that produces no deltas must not touch the redo stack. Redo is only abandoned by a real +/// new edit; a no-op commit (here `embed_resource_sources` over an empty id set) leaving it cleared +/// would silently disable redo after an undo. +#[test] +fn no_op_commit_preserves_redo_stack() { + let mut session = Session::with_peer(PeerId(1)); + let resources = graphene_resource::ResourceRegistry::new(); + + // Base gesture (the non-undoable mount floor), then a second gesture to undo onto it. + session.stage_from_runtime(&tiny_network(), &NoMetadata, &resources).expect("stage base"); + let base_up_to = session.hot_log().last().expect("staged base").timestamp; + let base_revs = session.retire(base_up_to).expect("retire base"); + session.mark_gesture_end(*base_revs.last().expect("one base delta")); + + let hash = ResourceHash::from(&b"declaration-bytes"[..]); + let id = ResourceId::new(); + let hot_ops = session.stage_embedded_resource(id, hash).expect("stage resource"); + let up_to = hot_ops.last().expect("staged one op").timestamp; + let revs = session.retire(up_to).expect("retire"); + session.mark_gesture_end(*revs.last().expect("one retired delta")); + + session.undo().expect("undo"); + assert!(session.can_redo(), "undo must populate the redo stack"); + + // A commit over no resources produces no deltas; redo must survive it. + session.embed_resource_sources(std::iter::empty::()).expect("no-op embed"); + assert!(session.can_redo(), "a no-op commit must not clear the redo stack"); +} + +/// `embed_resource_sources` overwrites the working registry with the snapshot, valid only when no +/// unretired hot ops are present. Called with a non-empty hot log it must error rather than silently +/// drop the hot-zone edits. +#[test] +fn embed_resource_sources_rejects_unretired_hot_ops() { + let mut session = Session::with_peer(PeerId(1)); + let resources = graphene_resource::ResourceRegistry::new(); + + // Stage without retiring, leaving hot ops in the log. + session.stage_from_runtime(&tiny_network(), &NoMetadata, &resources).expect("stage"); + assert!(!session.hot_log().is_empty(), "staging should leave unretired hot ops"); + + let result = session.embed_resource_sources(std::iter::empty::()); + assert!(matches!(result, Err(crate::CrdtError::HotLogNotEmpty)), "expected HotLogNotEmpty, got {result:?}"); +} + +/// A delta's `Rev` is content-addressed, so two byte-equal deltas must hash identically regardless +/// of the order their attributes were inserted. This guards the `Attributes` map staying canonically +/// ordered (`BTreeMap`): a hash-randomized map would give the same logical delta different `Rev`s. +#[test] +fn add_node_rev_is_independent_of_attribute_insertion_order() { + use crate::{AttributesExt, Value}; + + let keys = ["ui::position", "ui::display_name", "ui::locked", "ui::pinned", "call_argument", "context_features"]; + + // Fixed implementation so the two nodes differ only in attribute insertion order. + let implementation = Implementation::ProtoNode(ResourceId::new()); + + let make_node = |insertion_order: &[&str]| { + let mut attributes = crate::Attributes::new(); + for &key in insertion_order { + attributes.set(key, serde_json::json!(key), TimeStamp::ORIGIN); + } + + let mut input_attributes = crate::Attributes::new(); + for &key in insertion_order { + input_attributes.insert(key.to_string(), Value::new(serde_json::json!(key), TimeStamp::ORIGIN)); + } + + Node { + implementation: implementation.clone(), + inputs: Vec::new(), + inputs_attributes: vec![input_attributes], + attributes, + network: ROOT_NETWORK, + } + }; + + let forward: Vec<&str> = keys.to_vec(); + let reversed: Vec<&str> = keys.iter().rev().copied().collect(); + + let parents = vec![1, 2]; + let author = PeerId(7); + let timestamp = TimeStamp { counter: 42, peer: PeerId(7) }; + + let delta_forward = Delta::new( + parents.clone(), + author, + timestamp, + RegistryDelta::AddNode { + node_id: 9, + node: make_node(&forward), + }, + RegistryDelta::AddNode { + node_id: 9, + node: make_node(&forward), + }, + ); + let delta_reversed = Delta::new( + parents, + author, + timestamp, + RegistryDelta::AddNode { + node_id: 9, + node: make_node(&reversed), + }, + RegistryDelta::AddNode { + node_id: 9, + node: make_node(&reversed), + }, + ); + + assert_eq!(delta_forward.id, delta_reversed.id, "Rev must not depend on attribute insertion order"); +} diff --git a/document/graph-storage/src/delta.rs b/document/graph-storage/src/delta.rs new file mode 100644 index 0000000000..1f851ef9dd --- /dev/null +++ b/document/graph-storage/src/delta.rs @@ -0,0 +1,451 @@ +use std::collections::HashSet; + +use crate::{AttributeDelta, NetworkId, Node, NodeId, Registry, RegistryDelta, ResourceEntry, ResourceId}; + +/// Collect a `HashSet` walk (difference/intersection) into ascending order. The sets iterate in +/// random order, so sorting keeps `compute_deltas` emitting a deterministic delta sequence. +fn sorted<'a, T: Ord + Copy + 'a>(ids: impl Iterator) -> Vec { + let mut ids: Vec = ids.copied().collect(); + ids.sort_unstable(); + ids +} + +/// Minimal set of deltas to transform `from` into `to`. +/// +/// Emits timestamp-less op shapes; the caller (`Document::commit_local` or equivalent) wraps each +/// in a `Delta` with a fresh clock tick. +pub fn compute_deltas(from: &Registry, to: &Registry) -> Vec { + let mut deltas = Vec::new(); + + let from_network_ids: HashSet = from.networks.keys().copied().collect(); + let to_network_ids: HashSet = to.networks.keys().copied().collect(); + + // AddNetwork before any AddNode that references it. `HashSet` difference/intersection iterate in + // random order, so every set walk below is sorted to keep the emitted delta sequence (and thus the + // resulting `Rev` chain) deterministic across runs. + for network_id in sorted(to_network_ids.difference(&from_network_ids)) { + deltas.push(RegistryDelta::AddNetwork { + network: network_id, + contents: to.networks[&network_id].clone(), + }); + } + + let from_node_ids: HashSet = from.node_instances.keys().copied().collect(); + let to_node_ids: HashSet = to.node_instances.keys().copied().collect(); + + for node_id in sorted(from_node_ids.difference(&to_node_ids)) { + deltas.push(RegistryDelta::RemoveNode { + node_id, + snapshot: from.node_instances[&node_id].clone(), + }); + } + + for node_id in sorted(to_node_ids.difference(&from_node_ids)) { + deltas.push(RegistryDelta::AddNode { + node_id, + node: to.node_instances[&node_id].clone(), + }); + } + + for node_id in sorted(from_node_ids.intersection(&to_node_ids)) { + let from_node = &from.node_instances[&node_id]; + let to_node = &to.node_instances[&node_id]; + + // No `ChangeImplementation` op; the only path is remove + re-add. Same for input-count and + // containing-network changes (a moved node has no in-place op either). `inputs_attributes` is + // checked too: the per-slot loops below `zip` only the shared prefix, so a length change there + // must force a remove + re-add rather than silently dropping the extra slots. + let structural_change = !nodes_have_same_implementation(from_node, to_node) + || from_node.inputs.len() != to_node.inputs.len() + || from_node.inputs_attributes.len() != to_node.inputs_attributes.len() + || from_node.network != to_node.network; + if structural_change { + deltas.push(RegistryDelta::RemoveNode { node_id, snapshot: from_node.clone() }); + deltas.push(RegistryDelta::AddNode { node_id, node: to_node.clone() }); + continue; + } + + // Compare by value, ignoring the per-slot timestamp. Timestamps are derived from the diff + // (assigned by the caller via clock.tick), not part of the diff itself: a slot whose value + // is unchanged but whose timestamp differs should not emit a delta. + for (input_idx, (from_slot, to_slot)) in from_node.inputs.iter().zip(&to_node.inputs).enumerate() { + if from_slot.input != to_slot.input { + deltas.push(RegistryDelta::ChangeNodeInput { + node_id, + input_idx, + new_input: to_slot.input.clone(), + }); + } + } + + for delta in compute_attribute_deltas(&from_node.attributes, &to_node.attributes) { + deltas.push(RegistryDelta::ChangeNodeAttribute { node_id, delta }); + } + + for (input_idx, (from_attrs, to_attrs)) in from_node.inputs_attributes.iter().zip(&to_node.inputs_attributes).enumerate() { + for delta in compute_attribute_deltas(from_attrs, to_attrs) { + deltas.push(RegistryDelta::ChangeNodeInputAttribute { node_id, input_idx, delta }); + } + } + } + + for network_id in sorted(from_network_ids.difference(&to_network_ids)) { + deltas.push(RegistryDelta::RemoveNetwork { + network: network_id, + snapshot: from.networks[&network_id].clone(), + }); + } + + for network_id in sorted(from_network_ids.intersection(&to_network_ids)) { + let from_network = &from.networks[&network_id]; + let to_network = &to.networks[&network_id]; + + let max_len = from_network.exports.len().max(to_network.exports.len()); + for slot_idx in 0..max_len { + let from_slot = from_network.exports.get(slot_idx); + let to_slot = to_network.exports.get(slot_idx); + + let from_target = from_slot.and_then(|s| s.target.as_ref()); + let to_target = to_slot.and_then(|s| s.target.as_ref()); + if from_target != to_target { + deltas.push(RegistryDelta::SetExport { + network: network_id, + slot: slot_idx as u32, + target: to_target.cloned(), + }); + } + } + + // Per-network attributes. + for delta in compute_attribute_deltas(&from_network.attributes, &to_network.attributes) { + deltas.push(RegistryDelta::ChangeNetworkAttribute { network: network_id, delta }); + } + } + + // Document-level attributes (`ui::doc::*`, format version, ...). + for delta in compute_attribute_deltas(&from.attributes, &to.attributes) { + deltas.push(RegistryDelta::ChangeDocumentAttribute { delta }); + } + + // Public library export list (whole-list LWW). + if from.exported_nodes != to.exported_nodes { + deltas.push(RegistryDelta::SetExportedNodes { nodes: to.exported_nodes.clone() }); + } + + compute_resource_deltas(from, to, &mut deltas); + + deltas +} + +/// Diff the resource store, emitting whole-entry add/remove for resources that appear or vanish and +/// fine-grained hash/source ops for resources present in both. Value-only: per-entry and per-source +/// timestamps are derived by the caller, so an unchanged resource emits nothing. +fn compute_resource_deltas(from: &Registry, to: &Registry, deltas: &mut Vec) { + let from_ids: HashSet = from.resources.keys().copied().collect(); + let to_ids: HashSet = to.resources.keys().copied().collect(); + + for id in sorted(from_ids.difference(&to_ids)) { + deltas.push(RegistryDelta::RemoveResource { + id, + snapshot: from.resources[&id].clone(), + }); + } + + for id in sorted(to_ids.difference(&from_ids)) { + deltas.push(RegistryDelta::AddResource { id, entry: to.resources[&id].clone() }); + } + + for id in sorted(from_ids.intersection(&to_ids)) { + diff_resource_entry(id, &from.resources[&id], &to.resources[&id], deltas); + } +} + +/// Per-entry diff for a resource present in both registries: hash change, then source chain +/// additions/changes/removals. +fn diff_resource_entry(id: ResourceId, from: &ResourceEntry, to: &ResourceEntry, deltas: &mut Vec) { + if from.hash != to.hash { + deltas.push(RegistryDelta::SetResourceHash { id, hash: to.hash }); + } + + for (key, _) in &from.sources { + if to.source(key).is_none() { + deltas.push(RegistryDelta::RemoveSource { id, key: *key }); + } + } + + // Compare source bodies only; the per-source timestamp is derived from the diff, not part of it. + for (key, to_source) in &to.sources { + if from.source(key).is_none_or(|from_source| from_source.source != to_source.source) { + deltas.push(RegistryDelta::AddSource { + id, + key: *key, + source: to_source.source.clone(), + }); + } + } +} + +fn nodes_have_same_implementation(a: &Node, b: &Node) -> bool { + use crate::Implementation::*; + match (&a.implementation, &b.implementation) { + (ProtoNode(a_id), ProtoNode(b_id)) => a_id == b_id, + (Network(a_id), Network(b_id)) => a_id == b_id, + _ => false, + } +} + +fn compute_attribute_deltas(from: &crate::Attributes, to: &crate::Attributes) -> Vec { + let mut deltas = Vec::new(); + + for key in from.keys() { + if !to.contains_key(key) { + deltas.push(AttributeDelta { key: key.clone(), value: None }); + } + } + + // Compare by `value` only; the per-entry `timestamp` is derived from the diff, not part of it. + for (key, to_value) in to { + if from.get(key).is_none_or(|from_value| from_value.value != to_value.value) { + deltas.push(AttributeDelta { + key: key.clone(), + value: Some(to_value.value.clone()), + }); + } + } + + deltas +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Attributes, ExportSlot, Implementation, Network, Node, NodeInput, TimeStamp}; + + #[test] + fn test_compute_deltas_empty() { + let registry = Registry::default(); + + let deltas = compute_deltas(®istry, ®istry); + assert_eq!(deltas.len(), 0, "No deltas should be generated for identical registries"); + } + + /// The emitted delta sequence must not depend on `HashMap`/`HashSet` iteration order, which varies + /// per run and per compiler version. Building the same registry repeatedly (each `HashMap` gets a + /// fresh random seed) must yield identical `AddNode` order, since the diff sorts its set walks. + #[test] + fn compute_deltas_emits_nodes_in_deterministic_order() { + let make_registry = || { + let mut registry = Registry::default(); + registry.networks.insert(0, Network::default()); + for node_id in [50, 3, 17, 999, 1, 42, 8, 256, 100, 7] { + registry.node_instances.insert( + node_id, + Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: vec![], + inputs_attributes: vec![], + attributes: Attributes::new(), + network: 0, + }, + ); + } + registry + }; + + let empty = Registry::default(); + let add_node_ids = |registry: &Registry| -> Vec { + compute_deltas(&empty, registry) + .into_iter() + .filter_map(|delta| match delta { + RegistryDelta::AddNode { node_id, .. } => Some(node_id), + _ => None, + }) + .collect() + }; + + let expected = vec![1, 3, 7, 8, 17, 42, 50, 100, 256, 999]; + for _ in 0..16 { + assert_eq!(add_node_ids(&make_registry()), expected, "AddNode order must be deterministic (ascending)"); + } + } + + #[test] + fn test_compute_deltas_add_node() { + let from = Registry::default(); + + let mut to = from.clone(); + let node = Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: vec![], + inputs_attributes: vec![], + attributes: Attributes::new(), + network: 0, + }; + to.node_instances.insert(42, node); + + let deltas = compute_deltas(&from, &to); + assert_eq!(deltas.len(), 1); + assert!(matches!(deltas[0], RegistryDelta::AddNode { node_id: 42, .. })); + } + + /// A change in `inputs_attributes` length is structural: the per-slot diff only `zip`s the shared + /// prefix, so it must force a remove + re-add rather than dropping the extra attribute slots. + #[test] + fn compute_deltas_treats_inputs_attributes_length_change_as_structural() { + // Same implementation/inputs/network in both registries; only `inputs_attributes` length differs. + let base = Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: vec![], + inputs_attributes: vec![Attributes::new()], + attributes: Attributes::new(), + network: 0, + }; + + let mut from = Registry::default(); + from.node_instances.insert(42, base.clone()); + + let mut to = from.clone(); + to.node_instances.get_mut(&42).unwrap().inputs_attributes.push(Attributes::new()); + + let deltas = compute_deltas(&from, &to); + assert!( + deltas.iter().any(|delta| matches!(delta, RegistryDelta::RemoveNode { node_id: 42, .. })) && deltas.iter().any(|delta| matches!(delta, RegistryDelta::AddNode { node_id: 42, .. })), + "an inputs_attributes length change must emit RemoveNode + AddNode, got {deltas:?}" + ); + } + + #[test] + fn test_compute_deltas_change_network_attribute() { + use crate::{AttributesExt, TimeStamp}; + + let mut from = Registry::default(); + from.networks.insert(0, Network::default()); + + let mut to = from.clone(); + to.networks.get_mut(&0).unwrap().attributes.set("ui::nav::width", serde_json::json!(640.0), TimeStamp::ORIGIN); + + let deltas = compute_deltas(&from, &to); + assert_eq!(deltas.len(), 1, "a changed per-network attribute must emit one delta"); + assert!( + matches!(&deltas[0], RegistryDelta::ChangeNetworkAttribute { network: 0, delta } if delta.key == "ui::nav::width"), + "expected ChangeNetworkAttribute for ui::nav::width, got {:?}", + deltas[0] + ); + } + + #[test] + fn test_compute_deltas_remove_node() { + let mut from = Registry::default(); + + let node = Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: vec![], + inputs_attributes: vec![], + attributes: Attributes::new(), + network: 0, + }; + from.node_instances.insert(42, node); + + let to = Registry::default(); + + let deltas = compute_deltas(&from, &to); + assert_eq!(deltas.len(), 1); + assert!(matches!(deltas[0], RegistryDelta::RemoveNode { node_id: 42, .. })); + } + + #[test] + fn test_compute_deltas_modify_attribute() { + let mut from = Registry::default(); + + let mut node = Node { + implementation: Implementation::ProtoNode(ResourceId::new()), + inputs: vec![], + inputs_attributes: vec![], + attributes: Attributes::new(), + network: 0, + }; + let stamp = |counter: u64| TimeStamp { counter, peer: crate::PeerId(0) }; + node.attributes.insert( + "test".to_string(), + crate::Value { + value: serde_json::json!("old"), + timestamp: stamp(0), + }, + ); + from.node_instances.insert(42, node); + + let mut to = from.clone(); + to.node_instances.get_mut(&42).unwrap().attributes.insert( + "test".to_string(), + crate::Value { + value: serde_json::json!("new"), + timestamp: stamp(1), + }, + ); + + let deltas = compute_deltas(&from, &to); + assert_eq!(deltas.len(), 1); + assert!(matches!( + &deltas[0], + RegistryDelta::ChangeNodeAttribute { node_id: 42, delta: AttributeDelta { key, value: Some(_) } } if key == "test" + )); + } + + /// Document-level attributes (the `Registry.attributes` bucket) must diff into + /// `ChangeDocumentAttribute` deltas, so a document-scoped attribute change reaches the commit path. + /// (Per-peer `ui::doc::*` view settings live in `session.json`, not here.) + #[test] + fn test_compute_deltas_document_attribute() { + let stamp = |counter: u64| TimeStamp { counter, peer: crate::PeerId(0) }; + let from = Registry::default(); + + let mut to = from.clone(); + to.attributes.insert( + "doc::test_attribute".to_string(), + crate::Value { + value: serde_json::json!("value"), + timestamp: stamp(1), + }, + ); + + let deltas = compute_deltas(&from, &to); + assert_eq!(deltas.len(), 1); + assert!(matches!( + &deltas[0], + RegistryDelta::ChangeDocumentAttribute { delta: AttributeDelta { key, value: Some(_) } } if key == "doc::test_attribute" + )); + } + + #[test] + fn test_compute_deltas_network_changes() { + let make_slot = |id: u64| ExportSlot { + target: Some(NodeInput::Node { node_id: id, output_index: 0 }), + timestamp: TimeStamp::ORIGIN, + }; + + let mut from = Registry::default(); + from.networks.insert( + 0, + Network { + exports: vec![make_slot(1), make_slot(2)], + ..Default::default() + }, + ); + + let mut to = from.clone(); + to.networks.get_mut(&0).unwrap().exports.push(make_slot(3)); + + let deltas = compute_deltas(&from, &to); + // Only slot 2 changed (added). Slots 0 and 1 are unchanged so they don't emit ops. + assert_eq!(deltas.len(), 1); + assert!(matches!( + &deltas[0], + RegistryDelta::SetExport { + network: 0, + slot: 2, + target: Some(NodeInput::Node { node_id: 3, .. }), + .. + } + )); + } +} diff --git a/document/graph-storage/src/document.rs b/document/graph-storage/src/document.rs new file mode 100644 index 0000000000..c60db1c979 --- /dev/null +++ b/document/graph-storage/src/document.rs @@ -0,0 +1,482 @@ +use crate::{ + CrdtError, Delta, ExportSlot, HotOp, LamportClock, MAX_EXPORT_SLOTS, NetworkId, NodeId, NodeInput, PeerId, Registry, RegistryDelta, ResourceEntry, Rev, SourceValue, TimeStamp, Value, + apply_attribute_delta, attr, mint_node_id, reverse_attribute_delta, +}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Document { + /// Working registry: retired state with the current hot ops applied on top. This is what live + /// reads and `registry()` observe, and what undo/redo force-apply against. + pub(crate) registry: Registry, + /// The registry as of the last retirement, with no un-retired hot ops applied. Retirement computes + /// each delta's `reverse` against this (so LWW reverses capture the true pre-op value, not the + /// hot-polluted working state) and advances it, stamping fields at the fresh `T_retire`. Kept equal + /// to `registry` *by value* whenever the hot log is empty (undo/redo resync it after moving the + /// cursor), but field timestamps can differ: retirement bumps the snapshot's to `T_retire` while the + /// working registry keeps the staging-time timestamps. Benign while the local monotonic clock makes + /// new edits win + pub(crate) retired_snapshot: Registry, + pub(crate) history: HashMap, + /// Live broadcast stream — applied to the registry on receive, GC'd at retirement. + /// Persisted for crash recovery so in-flight unretired work survives editor restarts. + pub(crate) hot_log: Vec, + /// User's cursor in their local chain. + pub(crate) head: Rev, + /// Revs undone past (most-recent last), so `redo` can re-apply them. Local-view state the DAG can't + /// recover (a parent may have several children). A new edit while non-empty clears it. + pub(crate) redo_stack: Vec, + pub(crate) clock: LamportClock, + pub(crate) peer: PeerId, + /// Latest retired commit on the local chain that has been broadcast to at least one peer. + /// Commits after this can be rewritten silently; commits at or before this are published + /// and require forward reverse-delta ops to undo. `None` means nothing broadcast yet. + pub(crate) last_broadcast_rev: Option, + /// Shared-monotonic counter feeding `next_node_id`. Bumped on every mint regardless of which + /// peer is calling; collision avoidance comes from hashing `(self.peer, counter)`, so two peers + /// reading the same counter still produce distinct IDs. + pub(crate) next_node_counter: u64, +} + +impl Document { + /// Mint a fresh `NodeId` scoped to this document's peer. The 64-bit ID is `blake3(peer, counter)` + /// truncated; the counter is shared across peers and persisted with the document. + pub fn next_node_id(&mut self) -> NodeId { + self.next_node_counter += 1; + mint_node_id(self.peer, self.next_node_counter) + } + + pub(crate) fn restore_node_from_history(&mut self, target: RegistryTarget, old_node_id: NodeId) -> Result<(), CrdtError> { + let delta = self + .history_iter() + .find(|d| matches!(d.reverse, RegistryDelta::AddNode { node_id, .. } if node_id == old_node_id)) + .ok_or(CrdtError::NodeNotInHistory(old_node_id))? + .clone(); + self.revert_delta(target, delta) + } + + pub(crate) fn restore_network_from_history(&mut self, target: RegistryTarget, network_id: NetworkId) -> Result<(), CrdtError> { + // Find the Delta whose forward op removed this network. Its `reverse` is `AddNetwork`, + // which is what we want to re-apply. + let delta = self + .history_iter() + .find(|d| matches!(&d.reverse, RegistryDelta::AddNetwork { network, .. } if *network == network_id)) + .ok_or(CrdtError::NetworkNotInHistory(network_id))? + .clone(); + self.revert_delta(target, delta) + } + + /// Apply a delta's `reverse` as the new forward op (silent-zone undo). Force-applied: structural + /// ops are idempotent, and LWW arms assign the reverse value unconditionally even though it carries + /// the same timestamp as the forward op it undoes. + pub(crate) fn revert_delta(&mut self, target: RegistryTarget, mut delta: Delta) -> Result<(), CrdtError> { + std::mem::swap(&mut delta.delta_type, &mut delta.reverse); + for parent in &delta.parents { + if !self.history.contains_key(parent) { + return Err(CrdtError::NotFoundInHistory(*parent)); + } + } + self.apply_op_with(target, delta.delta_type, delta.timestamp, ApplyMode::Force) + } + + /// Apply a live broadcast op. Updates the registry via LWW and appends to the hot log. + /// Doesn't touch history or `head` — hot ops are transient. + pub fn apply_hot_op(&mut self, hot_op: HotOp) -> Result<(), CrdtError> { + self.apply_hot_op_with(hot_op, false) + } + + /// Replay a hot op recovered from persisted state. Idempotent on structural ops so that + /// re-applying an op whose effect is already reflected in the registry is a no-op rather + /// than an error. + pub fn replay_hot_op(&mut self, hot_op: HotOp) -> Result<(), CrdtError> { + self.apply_hot_op_with(hot_op, true) + } + + fn apply_hot_op_with(&mut self, hot_op: HotOp, idempotent: bool) -> Result<(), CrdtError> { + if idempotent { + self.apply_op_idempotent(hot_op.op.clone(), hot_op.timestamp)?; + } else { + self.apply_op(hot_op.op.clone(), hot_op.timestamp)?; + } + self.hot_log.push(hot_op); + Ok(()) + } + + /// Apply a retired commit. Idempotent on structural ops (AddNode/AddNetwork on existing + /// targets, Remove on missing ones) since hot ops already produced the structural state. + /// The point is to bump field timestamps to T_retire via the LWW arms. + pub fn apply_retired_delta(&mut self, delta: Delta) -> Result<(), CrdtError> { + for parent in &delta.parents { + if !self.history.contains_key(parent) { + return Err(CrdtError::NotFoundInHistory(*parent)); + } + } + self.apply_op_idempotent(delta.delta_type.clone(), delta.timestamp)?; + self.history.insert(delta.id, delta); + Ok(()) + } + + /// The registry an apply reads and writes, resolved from the explicit [`RegistryTarget`]. + fn registry_mut(&mut self, target: RegistryTarget) -> &mut Registry { + match target { + RegistryTarget::Working => &mut self.registry, + RegistryTarget::Snapshot => &mut self.retired_snapshot, + } + } + + fn registry_ref(&self, target: RegistryTarget) -> &Registry { + match target { + RegistryTarget::Working => &self.registry, + RegistryTarget::Snapshot => &self.retired_snapshot, + } + } + + /// New local/remote op against the working registry: structural ops error on duplicate/missing + /// targets; LWW arms keep the newer-timestamp value (strict `>`). The common entry point for edits. + pub(crate) fn apply_op(&mut self, op: RegistryDelta, timestamp: TimeStamp) -> Result<(), CrdtError> { + self.apply_op_with(RegistryTarget::Working, op, timestamp, ApplyMode::Live) + } + + /// Replay/retire against the working registry: structural ops skip duplicate/missing targets (the + /// state is already present from hot ops or a prior snapshot); LWW arms still gate on strict `>`. + pub(crate) fn apply_op_idempotent(&mut self, op: RegistryDelta, timestamp: TimeStamp) -> Result<(), CrdtError> { + self.apply_op_with(RegistryTarget::Working, op, timestamp, ApplyMode::Idempotent) + } + + /// Silent-zone undo/redo rewind against the working registry: structural ops are idempotent, and + /// LWW arms assign unconditionally. We own the single-writer chain here, so the precomputed reverse + /// (undo) or forward (redo) value is authoritative even though its timestamp ties what it replaces. + pub(crate) fn force_apply_op(&mut self, op: RegistryDelta, timestamp: TimeStamp) -> Result<(), CrdtError> { + self.apply_op_with(RegistryTarget::Working, op, timestamp, ApplyMode::Force) + } + + pub(crate) fn apply_op_with(&mut self, target: RegistryTarget, op: RegistryDelta, timestamp: TimeStamp, mode: ApplyMode) -> Result<(), CrdtError> { + // Advance the local clock past every observed op, including ones that subsequently no-op or + // error. Observation is about causality knowledge, not about whether the op took effect. + self.clock.observe(timestamp); + + // Structural ops skip (rather than error) on duplicate/missing targets when not a fresh edit; + // LWW arms assign unconditionally only under `Force`. + let idempotent = mode != ApplyMode::Live; + let force = mode == ApplyMode::Force; + + // Resurrect any concurrently-removed targets the op references before binding the registry + // (resurrection re-borrows `self` via history), so the mutation below holds one `registry` ref. + self.ensure_referenced_exist(target, &op)?; + + let registry = self.registry_mut(target); + match op { + RegistryDelta::AddNode { node_id, node } => { + if registry.node_instances.contains_key(&node_id) { + if idempotent { + // Hot ops already created this node; skip rather than error. + return Ok(()); + } + return Err(CrdtError::NodeAlreadyExists(node_id)); + } + registry.node_instances.insert(node_id, node); + } + RegistryDelta::RemoveNode { node_id, .. } => { + registry.node_instances.remove(&node_id); + } + RegistryDelta::ChangeNodeInput { node_id, input_idx, new_input } => { + let node = registry.node_instances.get_mut(&node_id).ok_or(CrdtError::TargetNodeDoesNotExist(node_id))?; + let slot = node.inputs.get_mut(input_idx).ok_or(CrdtError::InputIndexOutOfBounds(input_idx))?; + if force || timestamp > slot.timestamp { + slot.input = new_input; + slot.timestamp = timestamp; + } + } + RegistryDelta::ChangeNodeAttribute { node_id, delta } => { + let node = registry.node_instances.get_mut(&node_id).ok_or(CrdtError::TargetNodeDoesNotExist(node_id))?; + apply_attribute_delta(delta, timestamp, force, &mut node.attributes); + } + RegistryDelta::ChangeNodeInputAttribute { node_id, input_idx, delta } => { + let node = registry.node_instances.get_mut(&node_id).ok_or(CrdtError::TargetNodeDoesNotExist(node_id))?; + let input_attributes = node.inputs_attributes.get_mut(input_idx).ok_or(CrdtError::InputIndexOutOfBounds(input_idx))?; + apply_attribute_delta(delta, timestamp, force, input_attributes); + } + RegistryDelta::SetExport { network, slot, target: export_target } => { + let net = registry.networks.get_mut(&network).ok_or(CrdtError::NetworkDoesNotExist(network))?; + let slot_idx = slot as usize; + + if slot_idx >= net.exports.len() { + if slot_idx >= MAX_EXPORT_SLOTS { + return Err(CrdtError::ExportSlotOutOfBounds(slot)); + } + net.exports.resize( + slot_idx + 1, + ExportSlot { + target: None, + timestamp: TimeStamp::ORIGIN, + }, + ); + } + + let existing = &mut net.exports[slot_idx]; + if force || timestamp > existing.timestamp { + existing.target = export_target; + existing.timestamp = timestamp; + } + } + RegistryDelta::AddNetwork { network, contents } => { + if registry.networks.contains_key(&network) { + if idempotent { + return Ok(()); + } + return Err(CrdtError::NetworkAlreadyExists(network)); + } + registry.networks.insert(network, contents); + } + RegistryDelta::RemoveNetwork { network, .. } => { + registry.networks.remove(&network); + } + RegistryDelta::SetExportedNodes { nodes } => { + let current_ts = registry.attributes.get(attr::EXPORTED_NODES_TS).map(|v| v.timestamp).unwrap_or(TimeStamp::ORIGIN); + if force || timestamp > current_ts { + registry.exported_nodes = nodes; + registry.attributes.insert( + attr::EXPORTED_NODES_TS.to_string(), + Value { + value: serde_json::Value::Null, + timestamp, + }, + ); + } + } + RegistryDelta::ChangeNetworkAttribute { network, delta } => { + let net = registry.networks.get_mut(&network).ok_or(CrdtError::NetworkDoesNotExist(network))?; + apply_attribute_delta(delta, timestamp, force, &mut net.attributes); + } + RegistryDelta::ChangeDocumentAttribute { delta } => { + apply_attribute_delta(delta, timestamp, force, &mut registry.attributes); + } + RegistryDelta::RegisterPeer { peer, user } => match registry.peer_users.get(&peer) { + Some(existing) if *existing != user => return Err(CrdtError::PeerRegistrationConflict(peer)), + Some(_) => {} + None => { + registry.peer_users.insert(peer, user); + } + }, + RegistryDelta::SetResourceHash { id, hash } => { + let entry = registry.resources.entry(id).or_default(); + if force || timestamp > entry.hash_timestamp { + entry.hash = hash; + entry.hash_timestamp = timestamp; + } + } + RegistryDelta::AddSource { id, key, source } => { + let entry = registry.resources.entry(id).or_default(); + let value = SourceValue { source, timestamp }; + if force { entry.force_set_source(key, value) } else { entry.set_source(key, value) } + } + RegistryDelta::RemoveSource { id, key } => { + if let Some(entry) = registry.resources.get_mut(&id) { + if force { + entry.force_remove_source(&key); + } else { + entry.remove_source(&key, timestamp); + } + } + } + RegistryDelta::AddResource { id, entry } => { + registry.resources.insert(id, entry); + } + RegistryDelta::RemoveResource { id, .. } => { + registry.resources.remove(&id); + } + } + Ok(()) + } + + /// Resurrect (from history) any nodes/networks an op references that were concurrently removed, so + /// the op applies against a consistent registry. Cascading: a node's owning network is restored + /// before the node. No-op for ops that reference nothing absent. + fn ensure_referenced_exist(&mut self, target: RegistryTarget, op: &RegistryDelta) -> Result<(), CrdtError> { + match op { + RegistryDelta::AddNode { node, .. } => self.ensure_network_exists(target, node.network())?, + RegistryDelta::ChangeNodeInput { node_id, new_input, .. } => { + if let NodeInput::Node { node_id: referenced, .. } = new_input { + self.ensure_node_exists(target, *referenced)?; + } + self.ensure_node_exists(target, *node_id)?; + } + RegistryDelta::ChangeNodeAttribute { node_id, .. } | RegistryDelta::ChangeNodeInputAttribute { node_id, .. } => self.ensure_node_exists(target, *node_id)?, + RegistryDelta::SetExport { network, target: export_target, .. } => { + if let Some(NodeInput::Node { node_id: referenced, .. }) = export_target { + self.ensure_node_exists(target, *referenced)?; + } + self.ensure_network_exists(target, *network)?; + } + RegistryDelta::ChangeNetworkAttribute { network, .. } => self.ensure_network_exists(target, *network)?, + _ => {} + } + Ok(()) + } + + fn ensure_node_exists(&mut self, target: RegistryTarget, node_id: u64) -> Result<(), CrdtError> { + if !self.registry_ref(target).node_instances.contains_key(&node_id) { + self.restore_node_from_history(target, node_id)?; + } + Ok(()) + } + + fn ensure_network_exists(&mut self, target: RegistryTarget, network_id: NetworkId) -> Result<(), CrdtError> { + if !self.registry_ref(target).networks.contains_key(&network_id) { + self.restore_network_from_history(target, network_id)?; + } + Ok(()) + } + + /// Compute the inverse of `delta` against the registry named by `target`. Retirement passes + /// [`RegistryTarget::Snapshot`] so LWW reverses (export target, inputs, attributes, resource hash) + /// capture the true pre-op value rather than the hot-polluted working state. + pub(crate) fn compute_reverse_delta(&self, target: RegistryTarget, delta: &RegistryDelta) -> Result { + let registry = self.registry_ref(target); + Ok(match delta { + RegistryDelta::AddNode { node_id, node } => RegistryDelta::RemoveNode { + node_id: *node_id, + snapshot: node.clone(), + }, + RegistryDelta::RemoveNode { node_id, snapshot } => RegistryDelta::AddNode { + node_id: *node_id, + node: snapshot.clone(), + }, + &RegistryDelta::ChangeNodeInput { node_id, input_idx, .. } => { + let node = registry.node_instances.get(&node_id).ok_or(CrdtError::TargetNodeDoesNotExist(node_id))?; + let slot = node.inputs().get(input_idx).ok_or(CrdtError::InputIndexOutOfBounds(input_idx))?; + RegistryDelta::ChangeNodeInput { + node_id, + input_idx, + new_input: slot.input.clone(), + } + } + &RegistryDelta::ChangeNodeAttribute { node_id, ref delta } => { + let node = registry.node_instances.get(&node_id).ok_or(CrdtError::TargetNodeDoesNotExist(node_id))?; + RegistryDelta::ChangeNodeAttribute { + node_id, + delta: reverse_attribute_delta(delta, node.attributes()), + } + } + &RegistryDelta::ChangeNodeInputAttribute { node_id, input_idx, ref delta } => { + let node = registry.node_instances.get(&node_id).ok_or(CrdtError::TargetNodeDoesNotExist(node_id))?; + let input_attributes = node.inputs_attributes().get(input_idx).ok_or(CrdtError::InputIndexOutOfBounds(input_idx))?; + RegistryDelta::ChangeNodeInputAttribute { + node_id, + input_idx, + delta: reverse_attribute_delta(delta, input_attributes), + } + } + &RegistryDelta::SetExport { network, slot, .. } => { + // If the network is absent the forward op will resurrect it; the reverse is "set the export to None" + // since pre-forward there was no export to point at. + let export_target = registry.networks.get(&network).and_then(|net| net.exports.get(slot as usize)).and_then(|s| s.target.clone()); + RegistryDelta::SetExport { network, slot, target: export_target } + } + RegistryDelta::AddNetwork { network, contents } => RegistryDelta::RemoveNetwork { + network: *network, + snapshot: contents.clone(), + }, + &RegistryDelta::RemoveNetwork { network, ref snapshot } => RegistryDelta::AddNetwork { network, contents: snapshot.clone() }, + RegistryDelta::SetExportedNodes { .. } => RegistryDelta::SetExportedNodes { + nodes: registry.exported_nodes.clone(), + }, + &RegistryDelta::ChangeNetworkAttribute { network, ref delta } => { + let current = registry.networks.get(&network).map(|net| &net.attributes).ok_or(CrdtError::NetworkDoesNotExist(network))?; + RegistryDelta::ChangeNetworkAttribute { + network, + delta: reverse_attribute_delta(delta, current), + } + } + RegistryDelta::ChangeDocumentAttribute { delta } => RegistryDelta::ChangeDocumentAttribute { + delta: reverse_attribute_delta(delta, ®istry.attributes), + }, + // Registrations are append-only and not user-undoable; reverse is the same op, + // which applies as a no-op on the already-registered PeerId. + &RegistryDelta::RegisterPeer { peer, user } => RegistryDelta::RegisterPeer { peer, user }, + &RegistryDelta::SetResourceHash { id, .. } => RegistryDelta::SetResourceHash { + id, + hash: registry.resources.get(&id).and_then(|entry| entry.hash), + }, + &RegistryDelta::AddSource { id, key, .. } => match registry.resources.get(&id).and_then(|entry| entry.source(&key)) { + // The slot already held a source: undo restores it. + Some(existing) => RegistryDelta::AddSource { + id, + key, + source: existing.source.clone(), + }, + // The slot was empty: undo removes what this op added. + None => RegistryDelta::RemoveSource { id, key }, + }, + &RegistryDelta::RemoveSource { id, key } => match registry.resources.get(&id).and_then(|entry| entry.source(&key)) { + Some(existing) => RegistryDelta::AddSource { + id, + key, + source: existing.source.clone(), + }, + // Nothing to restore; reverse is a no-op removal. + None => RegistryDelta::RemoveSource { id, key }, + }, + &RegistryDelta::AddResource { id, .. } => match registry.resources.get(&id) { + // Overwrote an existing entry: undo restores it. + Some(existing) => RegistryDelta::AddResource { id, entry: existing.clone() }, + // Created a new entry: undo removes what this op added (snapshot is empty since there was nothing prior). + None => RegistryDelta::RemoveResource { + id, + snapshot: ResourceEntry::default(), + }, + }, + &RegistryDelta::RemoveResource { id, .. } => { + let snapshot = registry.resources.get(&id).cloned().unwrap_or_default(); + RegistryDelta::AddResource { id, entry: snapshot } + } + }) + } + + /// Retired-only walk from `head` along first parents. Hot ops are excluded by design. + fn history_iter(&self) -> HistoryIter<'_> { + HistoryIter { + document: self, + parent_rev: self.head, + } + } +} + +struct HistoryIter<'a> { + document: &'a Document, + parent_rev: Rev, +} + +impl<'a> Iterator for HistoryIter<'a> { + type Item = &'a Delta; + + fn next(&mut self) -> Option { + let delta = self.document.history.get(&self.parent_rev)?; + // First parent only for now. Local-chain walking (filter by author) is a follow-up. The root + // delta has no parents, so fall back to the `0` sentinel: the next `get` misses and ends the + // walk *after* yielding the root (using `?` here would drop the root instead). + self.parent_rev = delta.parents.first().copied().unwrap_or(0); + Some(delta) + } +} + +/// Which of a [`Document`]'s two registries an apply targets: the working copy (retired state plus +/// live hot ops) or the retired snapshot (retired deltas only). Retirement targets the snapshot so +/// reverses capture pre-op values; the hot path and undo/redo target the working copy. +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) enum RegistryTarget { + Working, + Snapshot, +} + +/// How [`Document::apply_op_with`] resolves structural collisions and LWW timestamp ties. +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) enum ApplyMode { + /// Fresh local/remote edit: structural ops error on duplicate/missing targets; LWW uses strict `>`. + Live, + /// Replay/retire: structural ops skip duplicate/missing targets; LWW still uses strict `>`. + Idempotent, + /// Silent-zone undo/redo rewind: structural ops are idempotent and LWW arms assign unconditionally. + Force, +} diff --git a/document/graph-storage/src/from_runtime.rs b/document/graph-storage/src/from_runtime.rs new file mode 100644 index 0000000000..efad0512f4 --- /dev/null +++ b/document/graph-storage/src/from_runtime.rs @@ -0,0 +1,573 @@ +use std::collections::HashMap; + +use core_types::Context; +use core_types::context::ContextDependencies; +use core_types::uuid::NodeId as RuntimeNodeId; +use graph_craft::concrete; +use graph_craft::document::value::TaggedValue; +use graph_craft::document::{DocumentNode, DocumentNodeImplementation, NodeInput as GraphCraftNodeInput, NodeNetwork}; +use serde::Serialize; + +use crate::attr::*; +use crate::metadata_source::{NoMetadata, NodeMetadataSource}; +use crate::{AttributesExt, ExportSlot, Implementation, InputSlot, Network, NetworkId, Node, NodeId, NodeInput, PeerId, ProtoNode, ROOT_NETWORK, Registry, ResourceHash, ResourceId, TimeStamp}; + +fn map_serialization_error(key: &str) -> impl FnOnce(serde_json::Error) -> ConversionError + '_ { + move |e| ConversionError::SerializationError(format!("{key}: {e:?}")) +} + +/// Path to a node, used to mint stable global IDs by hashing. +/// +/// Hashing uses blake3 truncated to 64 bits with the document's `PeerId` mixed in, so two peers +/// converting runtime states that happen to share local IDs (e.g. both editors seeded the same +/// UUID RNG) still produce distinct global IDs. Determinism: same `(peer, path, local_id)` always +/// yields the same global ID, so a peer re-converting its own runtime state preserves IDs. +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +struct NodePath { + path: Vec<(NodeId, NetworkId)>, + local_id: NodeId, +} + +impl NodePath { + fn root(node_id: NodeId) -> Self { + Self { path: vec![], local_id: node_id } + } + + fn nested(parent_path: &NodePath, parent_node_id: NodeId, network_id: NetworkId, local_id: NodeId) -> Self { + let mut path = parent_path.path.clone(); + path.push((parent_node_id, network_id)); + Self { path, local_id } + } + + fn to_global_id(&self, peer: PeerId) -> NodeId { + let bytes = rmp_serde::to_vec(&(peer, self)).expect("NodePath must serialize"); + let digest = blake3::hash(&bytes); + let mut truncated = [0u8; 8]; + truncated.copy_from_slice(&digest.as_bytes()[..8]); + NodeId::from_le_bytes(truncated) + } + + /// Stable id of the network owned by the node at this path, derived purely from the (structural) + /// path and peer so it reproduces across `to_runtime` -> `from_runtime` round trips rather than + /// depending on traversal order. A domain tag keeps it from colliding with this node's own + /// `to_global_id`. The root network is `ROOT_NETWORK` and never goes through here. + fn owned_network_id(&self, peer: PeerId) -> NetworkId { + let bytes = rmp_serde::to_vec(&("network", peer, self)).expect("NodePath must serialize"); + let digest = blake3::hash(&bytes); + let mut truncated = [0u8; 8]; + truncated.copy_from_slice(&digest.as_bytes()[..8]); + NetworkId::from_le_bytes(truncated) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum ConversionError { + #[error("Failed to serialize value: {0}")] + SerializationError(String), + #[error("Unsupported node implementation type")] + UnsupportedImplementation, + #[error("Invalid network structure: {0}")] + InvalidNetwork(String), +} + +/// Graph-only conversion (no editor metadata). Use [`Registry::from_runtime_with_metadata`] for +/// editor round-trips. +impl TryFrom<&NodeNetwork> for Registry { + type Error = ConversionError; + + /// Test/utility entry point: scopes IDs under `PeerId(0)`. Real editor conversions go through + /// `from_runtime_with_metadata` and pass the document's actual peer. + fn try_from(node_network: &NodeNetwork) -> Result { + Registry::from_runtime_with_metadata(node_network, &NoMetadata, &graphene_resource::ResourceRegistry::new(), PeerId(0)) + } +} + +/// Proto-node declaration bytes extracted during conversion, keyed by content hash, for the caller +/// to persist into its byte store. +pub type DeclarationBytes = HashMap>; + +/// A `from_runtime` conversion result: the reference-only [`Registry`] plus the proto-node +/// declaration *bytes* it extracted, keyed by content hash. `graph-storage` doesn't own a byte +/// store, so the caller (the `Gdd`) persists these into its content store; the registry only holds +/// the `ResourceId`/`ResourceHash` references. +pub struct RuntimeConversion { + pub registry: Registry, + pub declaration_bytes: DeclarationBytes, + /// Each network's runtime `metadata_path` mapped to its stable storage `NetworkId`, for associating + /// per-network, per-peer view state (`session.json`) without re-deriving ids. + pub network_ids: HashMap, NetworkId>, +} + +impl RuntimeConversion { + /// Rebuild the [`Declarations`](crate::Declarations) map (`ResourceId` → [`ProtoNode`]) from the + /// extracted bytes, for callers that keep the bytes in hand instead of routing them through a + /// byte store (tests, the round-trip CLI). Editor/`Gdd` paths persist the bytes and resolve via + /// their byte store instead. + pub fn declarations(&self) -> Result { + self.declaration_bytes + .iter() + .map(|(hash, bytes)| { + let proto = decode_declaration(bytes).map_err(|error| ConversionError::SerializationError(format!("declaration {hash}: {error}")))?; + Ok((ResourceId::from_hash(hash), proto)) + }) + .collect() + } +} + +/// Encode a [`ProtoNode`] declaration to its content-addressed bytes: through a self-describing +/// `serde_json::Value` (so serde aliases keep working and the on-disk shape stays migratable), then +/// rmp-serialized (which encodes the intermediate `Value` compactly). Paired with [`decode_declaration`]. +pub fn encode_declaration(proto: &ProtoNode) -> Result, String> { + let value = serde_json::to_value(proto).map_err(|error| error.to_string())?; + rmp_serde::to_vec(&value).map_err(|error| error.to_string()) +} + +/// Decode a [`ProtoNode`] declaration from the bytes [`encode_declaration`] produced. +pub fn decode_declaration(bytes: &[u8]) -> Result { + let value: serde_json::Value = rmp_serde::from_slice(bytes).map_err(|error| error.to_string())?; + serde_json::from_value(value).map_err(|error| error.to_string()) +} + +impl Registry { + /// Convenience wrapper returning only the registry (declaration bytes discarded). For callers + /// that don't persist a byte store — e.g. the graph-only `TryFrom` and value-comparison tests. + pub fn from_runtime_with_metadata(node_network: &NodeNetwork, metadata: &M, resources: &graphene_resource::ResourceRegistry, peer: PeerId) -> Result { + Ok(Self::convert_from_runtime(node_network, metadata, resources, peer)?.registry) + } + + /// Full conversion: returns the registry and the extracted declaration bytes for the caller to + /// persist. See [`RuntimeConversion`]. + pub fn convert_from_runtime( + node_network: &NodeNetwork, + metadata: &M, + resources: &graphene_resource::ResourceRegistry, + peer: PeerId, + ) -> Result { + let mut registry = Registry::default(); + let mut ctx = ConversionContext { + declaration_ids: HashMap::new(), + declaration_bytes: HashMap::new(), + network_ids: HashMap::new(), + metadata, + peer, + }; + + convert_network(node_network, ROOT_NETWORK, None, &[], &mut registry, &mut ctx)?; + + // Only snapshot resources the network actually references. The runtime resource cache also keeps + // resources alive across undo (so legacy redo can restore them), so it can contain orphans whose + // node was removed by an undo. Snapshotting those would re-introduce an `AddResource` on the next + // diff and let an undone resource resurface as a phantom edit. Declaration resources are added + // separately by `convert_network` and are always referenced, so they're unaffected by this filter. + let referenced = collect_referenced_resources(node_network); + convert_resources(resources, &referenced, peer, &mut registry)?; + + Ok(RuntimeConversion { + registry, + declaration_bytes: ctx.declaration_bytes, + network_ids: ctx.network_ids, + }) + } +} + +/// Snapshot the runtime [`ResourceRegistry`](graphene_resource::ResourceRegistry) into the storage +/// [`ResourceStore`](crate::ResourceStore). Each source's chain position becomes a fractional +/// [`Priority`](crate::Priority) (index-as-priority preserves order); the `DataSource` body is +/// stored type-erased as `serde_json::Value` so its on-disk shape can migrate freely. All +/// timestamps are `ORIGIN`, since this is a bootstrap snapshot, not an edit. +fn convert_resources(resources: &graphene_resource::ResourceRegistry, referenced: &std::collections::HashSet, peer: PeerId, registry: &mut Registry) -> Result<(), ConversionError> { + for id in resources.ids() { + if !referenced.contains(&id) { + continue; + } + let Some(info) = resources.info(&id) else { continue }; + + let mut entry = crate::ResourceEntry { + hash: info.hash.copied(), + hash_timestamp: TimeStamp::ORIGIN, + ..Default::default() + }; + for (position, source) in info.sources.iter().enumerate() { + let key = crate::SourceKey { + priority: crate::Priority::new(position as f64).expect("enumerate index is finite"), + peer, + }; + let body = serde_json::to_value(source).map_err(|error| ConversionError::SerializationError(error.to_string()))?; + entry.set_source( + key, + crate::SourceValue { + source: body, + timestamp: TimeStamp::ORIGIN, + }, + ); + } + + registry.resources.insert(id, entry); + } + Ok(()) +} + +/// Collect the `ResourceId`s referenced by `TaggedValue::Resource` inputs anywhere in the network +/// (recursively through nested networks). These are the resources the document actually uses; the +/// runtime cache may hold more (history-retained orphans) that shouldn't be snapshotted into storage. +fn collect_referenced_resources(network: &NodeNetwork) -> std::collections::HashSet { + let mut referenced = std::collections::HashSet::new(); + collect_referenced_resources_inner(network, &mut referenced); + referenced +} + +fn collect_referenced_resources_inner(network: &NodeNetwork, referenced: &mut std::collections::HashSet) { + for export in &network.exports { + collect_input_resource(export, referenced); + } + + for node in network.nodes.values() { + for input in &node.inputs { + collect_input_resource(input, referenced); + } + if let DocumentNodeImplementation::Network(nested) = &node.implementation { + collect_referenced_resources_inner(nested, referenced); + } + } +} + +fn collect_input_resource(input: &GraphCraftNodeInput, referenced: &mut std::collections::HashSet) { + if let GraphCraftNodeInput::Value { tagged_value, .. } = input + && let TaggedValue::Resource(id) = &**tagged_value + { + referenced.insert(*id); + } +} + +/// Register a proto-node declaration as a content-addressed resource: a single `DataSource::Embedded` +/// source resolved to `hash`. The bytes themselves are persisted by the caller's byte store. +fn register_declaration_resource(registry: &mut Registry, id: ResourceId, hash: ResourceHash, peer: PeerId) { + registry.resources.insert(id, crate::ResourceEntry::embedded(hash, peer, TimeStamp::ORIGIN)); +} + +struct ConversionContext<'m, M: NodeMetadataSource + ?Sized> { + /// Cache from proto-node identifier to its derived `ResourceId`, so repeated proto-nodes reuse + /// one id without re-serializing. (Identical content hashes to the same id anyway; this just + /// skips the work.) + declaration_ids: HashMap, + /// Extracted declaration content keyed by hash, handed back for the caller's byte store. + declaration_bytes: DeclarationBytes, + /// Maps each network's runtime `metadata_path` to its stable storage `NetworkId`, so the caller can + /// associate per-network, per-peer view state (in `session.json`) with networks without re-deriving ids. + network_ids: HashMap, NetworkId>, + metadata: &'m M, + peer: PeerId, +} + +fn convert_network( + node_network: &NodeNetwork, + network_id: NetworkId, + parent_path: Option<&NodePath>, + metadata_path: &[RuntimeNodeId], + registry: &mut Registry, + ctx: &mut ConversionContext<'_, M>, +) -> Result<(), ConversionError> { + for (runtime_node_id, doc_node) in &node_network.nodes { + let local_id = runtime_node_id.0; + let node_path = child_path(parent_path, network_id, local_id); + let global_id = node_path.to_global_id(ctx.peer); + + let location = NodeLocation { + local_id, + network_id, + parent_path, + metadata_path, + runtime_node_id: *runtime_node_id, + }; + let mut node = convert_node(doc_node, location, registry, ctx)?; + node.attributes.set(ORIGINAL_NODE_ID, serde_json::json!(local_id), TimeStamp::ORIGIN); + registry.node_instances.insert(global_id, node); + } + + let exports = node_network + .exports + .iter() + .map(|export| { + Ok(ExportSlot { + target: Some(convert_input(export, parent_path, network_id, ctx.peer)?), + timestamp: TimeStamp::ORIGIN, + }) + }) + .collect::, ConversionError>>()?; + + let mut attributes = crate::Attributes::new(); + write_ui_network_attributes(&mut attributes, ctx.metadata, metadata_path, TimeStamp::ORIGIN)?; + write_scope_injections(&mut attributes, node_network, parent_path, network_id, ctx.peer, TimeStamp::ORIGIN)?; + + registry.networks.insert(network_id, Network { exports, attributes }); + ctx.network_ids.insert(metadata_path.to_vec(), network_id); + + Ok(()) +} + +/// Serialize a network's `scope_injections` onto its attributes as one whole-map LWW blob, remapping +/// each runtime-local node reference to its stable storage global ID so the reference survives a +/// round trip even if runtime IDs are later reshuffled. +fn write_scope_injections( + attributes: &mut crate::Attributes, + node_network: &NodeNetwork, + parent_path: Option<&NodePath>, + network_id: NetworkId, + peer: PeerId, + timestamp: TimeStamp, +) -> Result<(), ConversionError> { + if node_network.scope_injections.is_empty() { + return Ok(()); + } + + let stored: HashMap = node_network + .scope_injections + .iter() + .map(|(key, (runtime_id, ty))| { + let storage_id = child_path(parent_path, network_id, runtime_id.0).to_global_id(peer); + (key.clone(), (storage_id, ty.clone())) + }) + .collect(); + + attributes + .set_serialized(SCOPE_INJECTIONS, &stored, timestamp) + .map_err(map_serialization_error("compute::scope_injections")) +} + +fn child_path(parent_path: Option<&NodePath>, network_id: NetworkId, local_id: NodeId) -> NodePath { + match parent_path { + None => NodePath::root(local_id), + Some(parent) => NodePath::nested(parent, parent.local_id, network_id, local_id), + } +} + +/// Where a node sits in both the storage tree (`local_id`, `network_id`, `parent_path`) and the +/// runtime tree (`metadata_path`, `runtime_node_id`). `metadata_path` is the chain of runtime IDs +/// from the root down to (but not including) this node. +struct NodeLocation<'a> { + local_id: NodeId, + network_id: NetworkId, + parent_path: Option<&'a NodePath>, + metadata_path: &'a [RuntimeNodeId], + runtime_node_id: RuntimeNodeId, +} + +fn convert_node(doc_node: &DocumentNode, location: NodeLocation<'_>, registry: &mut Registry, ctx: &mut ConversionContext<'_, M>) -> Result { + let NodeLocation { + local_id, + network_id, + parent_path, + metadata_path, + runtime_node_id, + } = location; + + let node_path = child_path(parent_path, network_id, local_id); + let timestamp = TimeStamp::ORIGIN; + + let mut inputs = Vec::with_capacity(doc_node.inputs.len()); + let mut inputs_attributes = Vec::with_capacity(doc_node.inputs.len()); + for (input_index, input) in doc_node.inputs.iter().enumerate() { + inputs.push(InputSlot { + input: convert_input(input, parent_path, network_id, ctx.peer)?, + timestamp, + }); + + let mut input_attrs = convert_input_attributes(input)?; + write_ui_input_attributes(&mut input_attrs, ctx.metadata, metadata_path, runtime_node_id, input_index, timestamp)?; + inputs_attributes.push(input_attrs); + } + + // For nested networks, append this node onto the metadata path. + let mut extended_path = Vec::new(); + let child_metadata_path = if matches!(doc_node.implementation, DocumentNodeImplementation::Network(_)) { + extended_path.extend_from_slice(metadata_path); + extended_path.push(runtime_node_id); + extended_path.as_slice() + } else { + metadata_path + }; + let implementation = convert_implementation(&doc_node.implementation, &node_path, child_metadata_path, registry, ctx)?; + + // Defaults match `DocumentNode::default()`; `to_runtime` rehydrates absent keys from the same defaults. + let mut attributes = crate::Attributes::new(); + attributes + .set_if_not_default(CALL_ARGUMENT, &doc_node.call_argument, &concrete!(Context), timestamp) + .map_err(map_serialization_error("call_argument"))?; + attributes + .set_if_not_default(CONTEXT_FEATURES, &doc_node.context_features, &ContextDependencies::default(), timestamp) + .map_err(map_serialization_error("context_features"))?; + attributes + .set_if_not_default(VISIBLE, &doc_node.visible, &true, timestamp) + .map_err(map_serialization_error("visible"))?; + attributes + .set_if_not_default(SKIP_DEDUPLICATION, &doc_node.skip_deduplication, &false, timestamp) + .map_err(map_serialization_error("skip_deduplication"))?; + + write_ui_attributes(&mut attributes, ctx.metadata, metadata_path, runtime_node_id, timestamp)?; + + Ok(Node { + implementation, + inputs, + inputs_attributes, + attributes, + network: network_id, + }) +} + +fn write_ui_attributes( + attributes: &mut crate::Attributes, + metadata: &M, + metadata_path: &[RuntimeNodeId], + runtime_node_id: RuntimeNodeId, + timestamp: TimeStamp, +) -> Result<(), ConversionError> { + if let Some(position) = metadata.position(metadata_path, runtime_node_id) { + attributes.set_serialized(UI_POSITION, &position, timestamp).map_err(map_serialization_error("ui::position"))?; + } + + // Bool flags are only emitted when true; absence reads as false. + for (key, value) in [ + (UI_IS_LAYER, metadata.is_layer(metadata_path, runtime_node_id)), + (UI_LOCKED, metadata.locked(metadata_path, runtime_node_id)), + (UI_PINNED, metadata.pinned(metadata_path, runtime_node_id)), + ] { + if value { + attributes.set(key, serde_json::Value::Bool(true), timestamp); + } + } + + if let Some(name) = metadata.display_name(metadata_path, runtime_node_id) + && !name.is_empty() + { + attributes.set(UI_DISPLAY_NAME, serde_json::Value::String(name.to_string()), timestamp); + } + + // One whole-vec attribute; per-slot LWW would be overkill for rename-on-output. + let output_names = metadata.output_names(metadata_path, runtime_node_id); + if !output_names.is_empty() { + attributes + .set_serialized(UI_OUTPUT_NAMES, &output_names, timestamp) + .map_err(map_serialization_error("ui::output_names"))?; + } + + Ok(()) +} + +fn write_ui_network_attributes(attributes: &mut crate::Attributes, metadata: &M, network_path: &[RuntimeNodeId], timestamp: TimeStamp) -> Result<(), ConversionError> { + if let Some(reference) = metadata.reference(network_path) { + attributes.set(UI_REFERENCE, serde_json::Value::String(reference.to_string()), timestamp); + } + + Ok(()) +} + +/// Empty strings (the runtime's "unset" sentinel) and absent values are both skipped. +/// `input_data` entries each get their own `ui::input_data::` attribute for per-key LWW. +fn write_ui_input_attributes( + attributes: &mut crate::Attributes, + metadata: &M, + metadata_path: &[RuntimeNodeId], + runtime_node_id: RuntimeNodeId, + input_index: usize, + timestamp: TimeStamp, +) -> Result<(), ConversionError> { + let non_empty_string = |key: &'static str, value: Option<&str>, attributes: &mut crate::Attributes| { + if let Some(value) = value.filter(|s| !s.is_empty()) { + attributes.set(key, serde_json::Value::String(value.to_string()), timestamp); + } + }; + + non_empty_string(UI_INPUT_NAME, metadata.input_name(metadata_path, runtime_node_id, input_index), attributes); + non_empty_string(UI_INPUT_DESCRIPTION, metadata.input_description(metadata_path, runtime_node_id, input_index), attributes); + non_empty_string(UI_WIDGET_OVERRIDE, metadata.widget_override(metadata_path, runtime_node_id, input_index), attributes); + + for (sub_key, value) in metadata.input_data(metadata_path, runtime_node_id, input_index) { + attributes.set(&format!("{UI_INPUT_DATA_PREFIX}{sub_key}"), value, timestamp); + } + + Ok(()) +} + +fn convert_input(input: &GraphCraftNodeInput, parent_path: Option<&NodePath>, network_id: NetworkId, peer: PeerId) -> Result { + Ok(match input { + GraphCraftNodeInput::Node { node_id, output_index } => NodeInput::Node { + node_id: child_path(parent_path, network_id, node_id.0).to_global_id(peer), + output_index: *output_index, + }, + GraphCraftNodeInput::Value { tagged_value, exposed } => { + let value = serde_json::to_value(&**tagged_value).map_err(|e| ConversionError::SerializationError(format!("{e:?}")))?; + NodeInput::Value { value, exposed: *exposed } + } + GraphCraftNodeInput::Scope(s) => NodeInput::Scope(s.clone()), + GraphCraftNodeInput::Import { import_index, .. } => NodeInput::Import { import_idx: *import_index }, + GraphCraftNodeInput::Reflection(_) => NodeInput::Reflection, + // GPU-specific; not modeled in the Registry format. + GraphCraftNodeInput::Inline(_) => return Err(ConversionError::UnsupportedImplementation), + }) +} + +fn convert_input_attributes(input: &GraphCraftNodeInput) -> Result { + let mut attributes = crate::Attributes::new(); + let timestamp = TimeStamp::ORIGIN; + + match input { + GraphCraftNodeInput::Import { import_type, .. } => { + attributes.set_serialized(IMPORT_TYPE, import_type, timestamp).map_err(map_serialization_error("import_type"))?; + } + GraphCraftNodeInput::Reflection(metadata) => { + attributes + .set_serialized(REFLECTION_METADATA, metadata, timestamp) + .map_err(map_serialization_error("reflection_metadata"))?; + } + _ => {} + } + + Ok(attributes) +} + +fn convert_implementation( + implementation: &DocumentNodeImplementation, + current_node_path: &NodePath, + child_metadata_path: &[RuntimeNodeId], + registry: &mut Registry, + ctx: &mut ConversionContext<'_, M>, +) -> Result { + Ok(match implementation { + DocumentNodeImplementation::ProtoNode(identifier) => { + let identifier_str = identifier.as_str().to_string(); + + // Reuse a previously-converted proto-node's id; identical content hashes to the same id + // anyway, so this only skips re-serializing. + if let Some(id) = ctx.declaration_ids.get(&identifier_str) { + return Ok(Implementation::ProtoNode(*id)); + } + + let proto = ProtoNode { + identifier: identifier_str.clone(), + code: None, + wasm: None, + attributes: Default::default(), + }; + // Content-address the declaration: serialize, hash, derive a deterministic id. + let bytes = encode_declaration(&proto).map_err(|error| ConversionError::SerializationError(format!("proto-node {identifier_str}: {error}")))?; + let hash = ResourceHash::from(bytes.as_slice()); + let id = ResourceId::from_hash(&hash); + + register_declaration_resource(registry, id, hash, ctx.peer); + ctx.declaration_bytes.insert(hash, bytes); + ctx.declaration_ids.insert(identifier_str, id); + + Implementation::ProtoNode(id) + } + DocumentNodeImplementation::Network(nested_network) => { + // Stable, traversal-order-independent id derived from the owning node's path, so a + // `to_runtime` -> `from_runtime` round trip reproduces the same `NetworkId` (and thus the + // same node-path hashes underneath it). + let nested_network_id = current_node_path.owned_network_id(ctx.peer); + convert_network(nested_network, nested_network_id, Some(current_node_path), child_metadata_path, registry, ctx)?; + Implementation::Network(nested_network_id) + } + // TODO: Support Extract in the Registry format. + DocumentNodeImplementation::Extract => return Err(ConversionError::UnsupportedImplementation), + }) +} diff --git a/document/graph-storage/src/ids.rs b/document/graph-storage/src/ids.rs new file mode 100644 index 0000000000..d042a80620 --- /dev/null +++ b/document/graph-storage/src/ids.rs @@ -0,0 +1,92 @@ +use crate::RegistryDelta; +use serde::{Deserialize, Serialize}; + +pub type NodeId = u64; +pub type NetworkId = u64; +/// Content-addressed identity for a `Delta`. +/// 128-bit blake3 truncation: comfortable collision headroom for any plausible document lifetime +/// without being adversarial-grade. Same delta content always produces the same `Rev`. +pub type Rev = u128; + +/// Root network ID. The renderable graph lives in `networks[&ROOT_NETWORK]`. +pub const ROOT_NETWORK: NetworkId = 0; + +/// Upper bound on a network's export slot count, guarding `SetExport` against a malicious or corrupted +/// slot index forcing an unbounded `exports` allocation. +pub(crate) const MAX_EXPORT_SLOTS: usize = 1 << 16; + +/// Unified storage-side position. The valid variants depend on `attr::UI_IS_LAYER`: +/// layers use `Absolute` or `Stack`; non-layer nodes use `Absolute` or `Chain`. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum Position { + Absolute([i32; 2]), + Chain, + Stack(u32), +} + +/// Per-device identity. Stable per `(device, document)`. Used for CRDT tiebreaking and `NodeId` +/// scoping. Globally unique across all peers ever in a document. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize)] +pub struct PeerId(pub u64); + +/// Per-human identity. Stable across devices (one user, many devices). Used for identity display +/// and undo-chain walking. Derived from `PeerId` via `Registry.peer_users`. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize)] +pub struct UserId(pub u64); + +/// Lamport timestamp with a peer-ID tiebreak. Higher counter wins; ties broken by peer. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Serialize, Deserialize)] +pub struct TimeStamp { + pub counter: u64, + pub peer: PeerId, +} + +impl TimeStamp { + /// Pre-edit origin. Used by initial `from_runtime` conversion before any edits have happened. + pub const ORIGIN: Self = TimeStamp { counter: 0, peer: PeerId(0) }; +} + +#[derive(Copy, Clone, Debug, Serialize, Deserialize)] +pub struct LamportClock { + pub(crate) counter: u64, + peer: PeerId, +} + +impl LamportClock { + pub fn new(peer: PeerId) -> Self { + Self { counter: 0, peer } + } + + /// Mints a fresh local timestamp. + pub fn tick(&mut self) -> TimeStamp { + self.counter += 1; + TimeStamp { + counter: self.counter, + peer: self.peer, + } + } + + /// Advances past an incoming op so future local ticks are causally later. + pub fn observe(&mut self, incoming: TimeStamp) { + self.counter = self.counter.max(incoming.counter); + } +} + +pub fn mint_node_id(peer: PeerId, counter: u64) -> NodeId { + let bytes = rmp_serde::to_vec(&(peer, counter)).expect("(PeerId, counter) must serialize"); + let digest = blake3::hash(&bytes); + let mut truncated = [0u8; 8]; + truncated.copy_from_slice(&digest.as_bytes()[..8]); + NodeId::from_le_bytes(truncated) +} + +/// Hash the identity-bearing fields of a `Delta` with blake3 and truncate to 128 bits. +pub(crate) fn compute_rev(parents: &[Rev], author: PeerId, timestamp: TimeStamp, delta_type: &RegistryDelta) -> Rev { + let mut hasher = blake3::Hasher::new(); + let bytes = rmp_serde::to_vec(&(parents, author, timestamp, delta_type)).expect("Delta identity fields must serialize"); + hasher.update(&bytes); + let digest = hasher.finalize(); + let mut truncated = [0u8; 16]; + truncated.copy_from_slice(&digest.as_bytes()[..16]); + Rev::from_le_bytes(truncated) +} diff --git a/document/graph-storage/src/lib.rs b/document/graph-storage/src/lib.rs new file mode 100644 index 0000000000..e7ddff34cc --- /dev/null +++ b/document/graph-storage/src/lib.rs @@ -0,0 +1,35 @@ +pub use graphene_resource::{ResourceHash, ResourceId}; + +pub mod attributes; +pub mod crdt; +pub mod delta; +pub mod document; +pub mod ids; +pub mod model; +pub mod registry; +pub mod resources; +pub mod session; + +#[cfg(any(feature = "conversion", test))] +pub mod from_runtime; +#[cfg(any(feature = "conversion", test))] +pub mod metadata_source; +#[cfg(any(feature = "conversion", test))] +pub mod to_runtime; + +pub use attributes::*; +pub use crdt::*; +pub use document::*; +pub use from_runtime::{RuntimeConversion, decode_declaration, encode_declaration}; +pub use ids::*; +pub use metadata_source::{InputMetadataEntry, NetworkMetadataEntry, NoMetadata, NodeMetadataEntry, NodeMetadataSource}; +pub use model::*; +pub use registry::*; +pub use resources::*; +pub use session::*; +pub use to_runtime::Declarations; + +#[cfg(test)] +mod crdt_tests; +#[cfg(test)] +mod round_trip_tests; diff --git a/document/graph-storage/src/metadata_source.rs b/document/graph-storage/src/metadata_source.rs new file mode 100644 index 0000000000..a32a4545cc --- /dev/null +++ b/document/graph-storage/src/metadata_source.rs @@ -0,0 +1,123 @@ +//! Lets `from_runtime` read editor-side per-node metadata without depending on the editor crate. +//! The editor implements this on `NodeNetworkInterface`; tests pass [`NoMetadata`]. +//! +//! `network_path` is the chain of runtime local `NodeId`s from the root down to (but not including) +//! the queried node, matching `NodeNetworkInterface::node_metadata(node_id, network_path)`. + +use std::collections::HashMap; + +use core_types::uuid::NodeId as RuntimeNodeId; + +use crate::Position; + +/// One node's editor-side metadata, produced by `Registry::to_runtime_with_metadata`. +#[derive(Clone, Debug, PartialEq)] +pub struct NodeMetadataEntry { + pub network_path: Vec, + pub local_id: RuntimeNodeId, + pub position: Option, + pub is_layer: bool, + pub display_name: Option, + pub locked: bool, + pub pinned: bool, + /// Always sized to match the runtime node's `inputs.len()`; absent slots use `Default`. The rebuild + /// returns an error if this length does not match the node's input count. + pub input_metadata: Vec, + pub output_names: Vec, +} + +impl NodeMetadataEntry { + pub fn is_empty(&self) -> bool { + self.position.is_none() + && !self.is_layer + && self.display_name.is_none() + && !self.locked + && !self.pinned + && self.output_names.is_empty() + && self.input_metadata.iter().all(InputMetadataEntry::is_empty) + } +} + +/// Per-network metadata (navigation, previewing). Separate from `NodeMetadataEntry` since these are +/// properties of a network, not of any node. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct NetworkMetadataEntry { + /// Owning-node chain from the root to (and including) the node containing this network. + /// Empty = root network. + pub network_path: Vec, + /// Stable storage id of this network. Lets the editor associate per-network, per-peer view state + /// (node-graph nav + previewing, in `session.json`) with a network across reparenting. + pub network_id: crate::NetworkId, + /// Matches the runtime's `NodeNetworkPersistentMetadata::reference` — definition lineage tag. + pub reference: Option, +} + +impl NetworkMetadataEntry { + pub fn is_empty(&self) -> bool { + self.reference.is_none() + } +} + +/// Per-input editor metadata. Mirrors `InputPersistentMetadata` but wraps strings in `Option` so +/// unset (`""` on the runtime side) is distinguishable from an explicit empty string. +#[derive(Clone, Debug, Default, PartialEq)] +pub struct InputMetadataEntry { + pub input_name: Option, + pub input_description: Option, + pub widget_override: Option, + /// Reassembled from `ui::input_data::` attributes. + pub input_data: HashMap, +} + +impl InputMetadataEntry { + pub fn is_empty(&self) -> bool { + self.input_name.is_none() && self.input_description.is_none() && self.widget_override.is_none() && self.input_data.is_empty() + } +} + +/// Editor-side metadata source. Methods default to "no data" so implementors only override what +/// they carry. Returns are JSON-shaped where the underlying types live editor-side (PTZ, etc.). +pub trait NodeMetadataSource { + fn position(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId) -> Option { + None + } + fn is_layer(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId) -> bool { + false + } + fn display_name(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId) -> Option<&str> { + None + } + fn locked(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId) -> bool { + false + } + fn pinned(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId) -> bool { + false + } + /// Empty vec = no overrides. Stored as a single `ui::output_names` attribute (whole-vec LWW). + fn output_names(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId) -> Vec { + Vec::new() + } + + fn input_name(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId, _input_index: usize) -> Option<&str> { + None + } + fn input_description(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId, _input_index: usize) -> Option<&str> { + None + } + fn widget_override(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId, _input_index: usize) -> Option<&str> { + None + } + /// Returns owned to stay object-safe. Each entry is stored as `ui::input_data::` for per-key LWW. + fn input_data(&self, _network_path: &[RuntimeNodeId], _local_id: RuntimeNodeId, _input_index: usize) -> HashMap { + HashMap::new() + } + + fn reference(&self, _network_path: &[RuntimeNodeId]) -> Option<&str> { + None + } +} + +/// No-op metadata source. Use when there's nothing to attach (synthetic networks, CLI tools). +pub struct NoMetadata; + +impl NodeMetadataSource for NoMetadata {} diff --git a/document/graph-storage/src/model.rs b/document/graph-storage/src/model.rs new file mode 100644 index 0000000000..6265c92013 --- /dev/null +++ b/document/graph-storage/src/model.rs @@ -0,0 +1,184 @@ +use crate::{Attributes, NetworkId, NodeId, ResourceId, TimeStamp, attributes_value_equal}; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct Node { + pub(crate) implementation: Implementation, + pub(crate) inputs: Vec, + pub(crate) inputs_attributes: Vec, + pub(crate) attributes: Attributes, + pub(crate) network: NetworkId, +} + +impl Node { + pub fn implementation(&self) -> &Implementation { + &self.implementation + } + pub fn inputs(&self) -> &[InputSlot] { + &self.inputs + } + pub fn inputs_attributes(&self) -> &[Attributes] { + &self.inputs_attributes + } + pub fn attributes(&self) -> &Attributes { + &self.attributes + } + pub fn network(&self) -> NetworkId { + self.network + } + + /// True if both nodes agree on every value-bearing field, ignoring slot/attribute timestamps. + pub fn value_equal(&self, other: &Self) -> bool { + if self.implementation != other.implementation || self.network != other.network { + return false; + } + if self.inputs.len() != other.inputs.len() { + return false; + } + if !self.inputs.iter().zip(&other.inputs).all(|(a, b)| a.input == b.input) { + return false; + } + if self.inputs_attributes.len() != other.inputs_attributes.len() { + return false; + } + if !self.inputs_attributes.iter().zip(&other.inputs_attributes).all(|(a, b)| attributes_value_equal(a, b)) { + return false; + } + attributes_value_equal(&self.attributes, &other.attributes) + } +} + +/// One positional input. The timestamp drives LWW on concurrent `ChangeNodeInput` ops targeting +/// the same `(node_id, input_idx)`. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct InputSlot { + pub input: NodeInput, + pub timestamp: TimeStamp, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum NodeInput { + Node { + node_id: NodeId, + output_index: usize, + }, + Value { + value: serde_json::Value, + exposed: bool, + }, + Scope(Cow<'static, str>), + Import { + import_idx: usize, + }, + /// Marker; the `DocumentNodeMetadata` lives in `inputs_attributes`. + Reflection, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum Implementation { + /// References a proto-node declaration resource (see [`ProtoNode`]); the binding to content lives + /// in `Registry.resources` like any other resource. + ProtoNode(ResourceId), + Network(NetworkId), +} + +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct Network { + pub exports: Vec, + /// Per-network `ui::*` state (navigation, previewing). Separate from `Node.attributes` so + /// view-state edits LWW independently. + pub attributes: Attributes, +} + +impl Network { + /// True if both networks agree on every value-bearing field, ignoring slot/attribute timestamps. + pub fn value_equal(&self, other: &Self) -> bool { + // Compare slot targets index-by-index, treating out-of-range slots as `None`. A `SetExport(None)` + // truncation leaves a trailing empty slot (a tombstone in the CRDT state) that is value-equal to + // the slot being absent, so trailing `None`s must not count as drift. Mirrors `compute_deltas` + // (emits nothing for them) and `to_runtime` (drops them). + let max_len = self.exports.len().max(other.exports.len()); + for slot_idx in 0..max_len { + let self_target = self.exports.get(slot_idx).and_then(|slot| slot.target.as_ref()); + let other_target = other.exports.get(slot_idx).and_then(|slot| slot.target.as_ref()); + if self_target != other_target { + return false; + } + } + + attributes_value_equal(&self.attributes, &other.attributes) + } +} + +/// One positional export slot. `target == None` marks an empty/removed slot. Timestamp drives LWW +/// on concurrent `SetExport` ops. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct ExportSlot { + pub target: Option, + pub timestamp: TimeStamp, +} + +/// Content of a proto-node declaration. Stored as a content-addressed resource (serialized bytes +/// keyed by `ResourceHash`, held by the `Gdd` byte store) and referenced from +/// `Implementation::ProtoNode(ResourceId)`. `graph-storage` itself only holds the reference. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct ProtoNode { + pub identifier: String, + pub code: Option, + pub wasm: Option>, + pub attributes: Attributes, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::TimeStamp; + + fn target_slot(node_id: u64) -> ExportSlot { + ExportSlot { + target: Some(NodeInput::Node { node_id, output_index: 0 }), + timestamp: TimeStamp::ORIGIN, + } + } + + fn empty_slot() -> ExportSlot { + ExportSlot { + target: None, + timestamp: TimeStamp { counter: 5, peer: crate::PeerId(1) }, + } + } + + /// A `SetExport(None)` truncation leaves a trailing empty slot. Such a network is value-equal to + /// the same network without that slot, so the soak oracle doesn't false-report drift. + #[test] + fn trailing_empty_export_slot_is_value_equal() { + let compact = Network { + exports: vec![target_slot(1), target_slot(2)], + ..Default::default() + }; + let with_trailing_empty = Network { + exports: vec![target_slot(1), target_slot(2), empty_slot()], + ..Default::default() + }; + + assert!(compact.value_equal(&with_trailing_empty)); + assert!(with_trailing_empty.value_equal(&compact)); + } + + /// A `None` slot *between* live targets is a real value difference (a hole), not a trailing + /// tombstone, so it must still count as drift. + #[test] + fn interior_empty_export_slot_is_not_value_equal() { + let dense = Network { + exports: vec![target_slot(1), target_slot(2)], + ..Default::default() + }; + let with_hole = Network { + exports: vec![target_slot(1), empty_slot(), target_slot(2)], + ..Default::default() + }; + + assert!(!dense.value_equal(&with_hole)); + } +} diff --git a/document/graph-storage/src/registry.rs b/document/graph-storage/src/registry.rs new file mode 100644 index 0000000000..757061154e --- /dev/null +++ b/document/graph-storage/src/registry.rs @@ -0,0 +1,161 @@ +use crate::{Attributes, Network, NetworkId, Node, NodeId, PeerId, ResourceId, ResourceStore, SourceKey, TimeStamp, UserId}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct Registry { + pub node_instances: HashMap, + pub networks: HashMap, + /// Public library API: nodes an importing document can reference. + /// `library::*` attributes on each referenced node carry its display name, category, docs. + pub exported_nodes: Vec, + /// Append-only mapping from per-device `PeerId` to per-human `UserId`. + /// Registered by each device's first contribution via `RegistryDelta::RegisterPeer`. + pub peer_users: HashMap, + /// Content-addressable resources (images, fonts, eventually proto-node declarations) referenced + /// by `ResourceId`. See [`ResourceStore`]. + pub resources: ResourceStore, + pub attributes: Attributes, +} + +impl Registry { + /// True if both registries agree on every value-bearing field, ignoring per-slot and + /// per-attribute timestamps. Mirrors `compute_deltas`'s value-only semantics, so unchanged + /// state at a stamped slot doesn't count as drift. `peer_users` is excluded: it isn't diffed by + /// `compute_deltas` (the mapping is injected on the commit path via `RegisterPeer`, never by a + /// fresh `from_runtime` conversion), so a committed registry and a fresh conversion legitimately + /// differ there without it counting as drift. + pub fn value_equal(&self, other: &Self) -> bool { + if self.exported_nodes != other.exported_nodes { + return false; + } + if !resources_value_equal(&self.resources, &other.resources) { + return false; + } + if !attributes_value_equal(&self.attributes, &other.attributes) { + return false; + } + + if self.node_instances.len() != other.node_instances.len() { + return false; + } + for (id, node) in &self.node_instances { + let Some(other_node) = other.node_instances.get(id) else { return false }; + if !node.value_equal(other_node) { + return false; + } + } + + if self.networks.len() != other.networks.len() { + return false; + } + for (id, network) in &self.networks { + let Some(other_network) = other.networks.get(id) else { return false }; + if !network.value_equal(other_network) { + return false; + } + } + + true + } + + /// True if the relative timestamp order on every shared timestamped slot agrees across + /// the two registries. Catches LWW-bookkeeping bugs that `value_equal` deliberately ignores. + /// + /// For every pair of shared keys (a, b), checks that `self[a].cmp(self[b])` and + /// `other[a].cmp(other[b])` are compatible: `Equal` on either side is always compatible; + /// otherwise both sides must agree on direction. Equality on one side imposes no order, so + /// a registry with all-equal timestamps trivially passes against any other. + /// + /// Slots present in only one registry are skipped. O(N²) in the number of shared timestamped + /// slots; intended for debug-only use. + pub fn order_consistent(&self, other: &Self) -> bool { + let self_stamps = collect_timestamps(self); + let other_stamps = collect_timestamps(other); + + let shared: Vec<(TimestampKey, TimeStamp, TimeStamp)> = self_stamps.into_iter().filter_map(|(key, ts)| other_stamps.get(&key).map(|other_ts| (key, ts, *other_ts))).collect(); + + for i in 0..shared.len() { + for j in (i + 1)..shared.len() { + let self_order = shared[i].1.cmp(&shared[j].1); + let other_order = shared[i].2.cmp(&shared[j].2); + use std::cmp::Ordering::*; + let compatible = matches!((self_order, other_order), (Equal, _) | (_, Equal) | (Less, Less) | (Greater, Greater)); + if !compatible { + return false; + } + } + } + true + } +} + +pub(crate) fn attributes_value_equal(a: &Attributes, b: &Attributes) -> bool { + if a.len() != b.len() { + return false; + } + a.iter().all(|(key, value)| b.get(key).is_some_and(|other| value.value == other.value)) +} + +/// Value-level resource comparison: same resolved hashes and same source chains (keyed by +/// `SourceKey`, comparing source bodies), ignoring LWW timestamps. Mirrors `attributes_value_equal`. +pub(crate) fn resources_value_equal(a: &ResourceStore, b: &ResourceStore) -> bool { + if a.len() != b.len() { + return false; + } + a.iter().all(|(id, entry)| { + b.get(id).is_some_and(|other| { + entry.hash == other.hash + && entry.sources.len() == other.sources.len() + && entry.sources.iter().all(|(key, value)| other.source(key).is_some_and(|other_value| value.source == other_value.source)) + }) + }) +} + +/// Stable identity for any timestamped slot in a `Registry`. Used by `order_consistent`. +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +enum TimestampKey { + NodeInput(NodeId, usize), + NodeInputAttribute(NodeId, usize, String), + NodeAttribute(NodeId, String), + NetworkExport(NetworkId, usize), + NetworkAttribute(NetworkId, String), + DocumentAttribute(String), + ResourceHash(ResourceId), + ResourceSource(ResourceId, SourceKey), +} + +fn collect_timestamps(registry: &Registry) -> HashMap { + let mut out = HashMap::new(); + for (node_id, node) in ®istry.node_instances { + for (i, slot) in node.inputs.iter().enumerate() { + out.insert(TimestampKey::NodeInput(*node_id, i), slot.timestamp); + } + for (i, attrs) in node.inputs_attributes.iter().enumerate() { + for (key, value) in attrs { + out.insert(TimestampKey::NodeInputAttribute(*node_id, i, key.clone()), value.timestamp); + } + } + for (key, value) in &node.attributes { + out.insert(TimestampKey::NodeAttribute(*node_id, key.clone()), value.timestamp); + } + } + for (network_id, network) in ®istry.networks { + for (i, slot) in network.exports.iter().enumerate() { + out.insert(TimestampKey::NetworkExport(*network_id, i), slot.timestamp); + } + for (key, value) in &network.attributes { + out.insert(TimestampKey::NetworkAttribute(*network_id, key.clone()), value.timestamp); + } + } + for (key, value) in ®istry.attributes { + out.insert(TimestampKey::DocumentAttribute(key.clone()), value.timestamp); + } + for (id, entry) in ®istry.resources { + out.insert(TimestampKey::ResourceHash(*id), entry.hash_timestamp); + for (source_key, source_value) in &entry.sources { + out.insert(TimestampKey::ResourceSource(*id, *source_key), source_value.timestamp); + } + } + out +} diff --git a/document/graph-storage/src/resources.rs b/document/graph-storage/src/resources.rs new file mode 100644 index 0000000000..3091f55d41 --- /dev/null +++ b/document/graph-storage/src/resources.rs @@ -0,0 +1,218 @@ +use crate::{PeerId, Priority, TimeStamp}; +use graphene_resource::{ResourceHash, ResourceId}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +/// Ordering key for an entry in a resource's source chain: fractional `priority`, with `peer` as +/// the tiebreak so concurrent insertions at the same priority converge deterministically. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct SourceKey { + pub priority: Priority, + pub peer: PeerId, +} + +/// One entry in a resource's source chain. The `source` body is type-erased (`serde_json::Value`) +/// so the on-disk `DataSource` shape can evolve through migrations without the storage layer +/// committing to a Rust enum; `timestamp` drives LWW on re-setting this same entry. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct SourceValue { + pub source: serde_json::Value, + pub timestamp: TimeStamp, +} + +/// A single content-addressable resource: an ordered, conflict-mergeable chain of fallback sources +/// plus the resolved content hash. The source chain is an add-wins ordered set (concurrent +/// additions all survive); the hash is last-writer-wins (concurrent resolves of the same logical +/// resource agree by construction, since the hash is content-derived). +#[derive(Clone, Debug, Default, PartialEq, Serialize)] +pub struct ResourceEntry { + /// Fallback chain kept sorted by `SourceKey`, so iteration yields highest-priority first. + pub sources: Vec<(SourceKey, SourceValue)>, + pub hash: Option, + pub hash_timestamp: TimeStamp, +} + +impl<'de> Deserialize<'de> for ResourceEntry { + fn deserialize>(deserializer: D) -> Result { + // The `binary_search`-based accessors require `sources` sorted by `SourceKey` with unique keys. + // On-disk data (older writers, hand edits) can't be trusted to preserve either, so re-sort and + // collapse any duplicate keys, keeping the higher-timestamp value (LWW). + #[derive(Deserialize)] + struct Raw { + sources: Vec<(SourceKey, SourceValue)>, + hash: Option, + hash_timestamp: TimeStamp, + } + + let Raw { mut sources, hash, hash_timestamp } = Raw::deserialize(deserializer)?; + sources.sort_by(|(a, _), (b, _)| a.cmp(b)); + sources.dedup_by(|(later_key, later_value), (kept_key, kept_value)| { + // `dedup_by` keeps the first of each run; sorting is stable, so resolve duplicates by LWW. + if later_key != kept_key { + return false; + } + if later_value.timestamp > kept_value.timestamp { + *kept_value = later_value.clone(); + } + true + }); + + Ok(Self { sources, hash, hash_timestamp }) + } +} + +impl ResourceEntry { + /// A resource backed by a single `DataSource::Embedded` fallback resolved to `hash`. Both the + /// source entry and the resolved hash carry `timestamp` so later LWW writes order against it. + /// The bytes themselves are persisted separately by the caller's byte store. + pub fn embedded(hash: ResourceHash, peer: PeerId, timestamp: TimeStamp) -> Self { + let embedded = serde_json::to_value(graphene_resource::DataSource::Embedded).expect("DataSource::Embedded serializes"); + let priority = Priority::new(0.).expect("0. is finite"); + let sources = vec![(SourceKey { priority, peer }, SourceValue { source: embedded, timestamp })]; + + Self { + sources, + hash: Some(hash), + hash_timestamp: timestamp, + } + } + + /// The source body and timestamp stored under `key`, if any. + pub fn source(&self, key: &SourceKey) -> Option<&SourceValue> { + self.sources.binary_search_by(|(candidate, _)| candidate.cmp(key)).ok().map(|index| &self.sources[index].1) + } + + /// Insert or LWW-overwrite the entry at `key`. A re-set at an existing key wins only if `value`'s + /// timestamp is strictly newer; a fresh key is inserted in sorted position. + pub fn set_source(&mut self, key: SourceKey, value: SourceValue) { + match self.sources.binary_search_by(|(candidate, _)| candidate.cmp(&key)) { + Ok(index) => { + if value.timestamp > self.sources[index].1.timestamp { + self.sources[index].1 = value; + } + } + Err(index) => self.sources.insert(index, (key, value)), + } + } + + /// Like [`set_source`](Self::set_source) but assigns unconditionally (silent-zone rewind), where the + /// precomputed reverse/forward value is authoritative even if its timestamp ties what it replaces. + pub fn force_set_source(&mut self, key: SourceKey, value: SourceValue) { + match self.sources.binary_search_by(|(candidate, _)| candidate.cmp(&key)) { + Ok(index) => self.sources[index].1 = value, + Err(index) => self.sources.insert(index, (key, value)), + } + } + + /// Remove the entry at `key` if its timestamp is strictly older than `timestamp` (LWW). Returns + /// whether anything was removed. + pub fn remove_source(&mut self, key: &SourceKey, timestamp: TimeStamp) -> bool { + match self.sources.binary_search_by(|(candidate, _)| candidate.cmp(key)) { + Ok(index) if timestamp > self.sources[index].1.timestamp => { + self.sources.remove(index); + true + } + _ => false, + } + } + + /// Like [`remove_source`](Self::remove_source) but removes unconditionally (silent-zone rewind). + pub fn force_remove_source(&mut self, key: &SourceKey) -> bool { + match self.sources.binary_search_by(|(candidate, _)| candidate.cmp(key)) { + Ok(index) => { + self.sources.remove(index); + true + } + _ => false, + } + } + + /// True if the chain already carries a `DataSource::Embedded` source. Decodes each source body into + /// `DataSource` so a shape change in the serialized form can't slip an embedded source past detection. + pub fn has_embedded_source(&self) -> bool { + self.sources.iter().any(|(_, value)| { + matches!( + serde_json::from_value::(value.source.clone()), + Ok(graphene_resource::DataSource::Embedded) + ) + }) + } + + /// A `SourceKey` ordered strictly ahead of every current source, so an inserted entry becomes the + /// highest-precedence fallback. + pub fn highest_precedence_key(&self, peer: PeerId) -> SourceKey { + let min_priority = self.sources.first().map(|(key, _)| key.priority.value()).unwrap_or(0.); + SourceKey { + priority: Priority::new(min_priority - 1.).expect("finite priority minus one is finite"), + peer, + } + } +} + +/// All resources referenced by the document, keyed by stable per-document [`ResourceId`]. Replicates +/// through the normal CmRDT path; bytes live in content-addressed storage keyed by [`ResourceHash`]. +pub type ResourceStore = HashMap; + +#[cfg(test)] +mod tests { + use super::*; + + /// `ResourceEntry`'s accessors rely on `sources` being sorted by `SourceKey`. Deserializing an + /// out-of-order chain (older writer, hand-edited file) must restore the invariant rather than leave + /// `binary_search` to silently misbehave. + #[test] + fn deserialize_sorts_sources() { + let source = |priority: f64| { + ( + SourceKey { + priority: Priority::new(priority).expect("finite"), + peer: PeerId(1), + }, + SourceValue { + source: serde_json::json!(priority), + timestamp: TimeStamp::ORIGIN, + }, + ) + }; + + // Serialize a deliberately unsorted chain through the raw shape, then deserialize as `ResourceEntry`. + let unsorted = serde_json::json!({ + "sources": [source(2.), source(0.), source(1.)], + "hash": null, + "hash_timestamp": TimeStamp::ORIGIN, + }); + + let entry: ResourceEntry = serde_json::from_value(unsorted).expect("deserialize"); + let priorities: Vec = entry.sources.iter().map(|(key, _)| key.priority.value()).collect(); + assert_eq!(priorities, vec![0., 1., 2.], "sources must be sorted by SourceKey after deserialization"); + } + + /// Duplicate keys on disk violate the `binary_search` uniqueness invariant. Deserialization must + /// collapse them, keeping the higher-timestamp value (LWW). + #[test] + fn deserialize_dedups_sources_by_lww() { + let key = SourceKey { + priority: Priority::new(1.).expect("finite"), + peer: PeerId(1), + }; + let entry = |counter: u64, body: &str| { + ( + key, + SourceValue { + source: serde_json::json!(body), + timestamp: TimeStamp { counter, peer: PeerId(1) }, + }, + ) + }; + + let with_duplicates = serde_json::json!({ + "sources": [entry(5, "newer"), entry(1, "older")], + "hash": null, + "hash_timestamp": TimeStamp::ORIGIN, + }); + + let resource: ResourceEntry = serde_json::from_value(with_duplicates).expect("deserialize"); + assert_eq!(resource.sources.len(), 1, "duplicate keys must collapse to one entry"); + assert_eq!(resource.sources[0].1.source, serde_json::json!("newer"), "the higher-timestamp value must win"); + } +} diff --git a/document/graph-storage/src/round_trip_tests.rs b/document/graph-storage/src/round_trip_tests.rs new file mode 100644 index 0000000000..4e906467e5 --- /dev/null +++ b/document/graph-storage/src/round_trip_tests.rs @@ -0,0 +1,744 @@ +use std::borrow::Cow; +use std::collections::HashMap; + +use core_types::context::ContextDependencies; +use core_types::uuid::NodeId; +use graph_craft::document::{DocumentNode, DocumentNodeImplementation, NodeInput, NodeNetwork}; +use graph_craft::graphene_compiler::Compiler; +use graph_craft::{ProtoNodeIdentifier, Type, concrete}; + +use crate::{NodeMetadataSource, PeerId, Position, Registry}; + +/// Helper function to verify a NodeNetwork can be compiled successfully. +/// Note: This only works for complete networks with all inputs resolved. +/// Test networks with Import inputs will fail compilation (which is expected). +fn verify_network_compiles(network: &NodeNetwork) -> Result<(), String> { + let compiler = Compiler {}; + compiler.compile_single(network.clone()).map_err(|e| format!("Compilation failed: {:?}", e))?; + Ok(()) +} + +/// Convert a runtime network to a storage `Registry`, returning the declarations alongside it. +/// Proto-node declaration content is no longer stored in the registry (it lives in a byte store); +/// these tests have no byte store, so they keep the extracted bytes in hand and rebuild a +/// `Declarations` map for the back-conversion. +fn to_registry(network: &NodeNetwork) -> (Registry, crate::Declarations) { + let conversion = Registry::convert_from_runtime(network, &crate::NoMetadata, &Default::default(), PeerId(0)).expect("Failed to convert NodeNetwork to Registry"); + let declarations = conversion.declarations().expect("rebuild declarations"); + (conversion.registry, declarations) +} + +/// A one-node network whose single node references `id` via a `TaggedValue::Resource` input, so +/// `convert_resources` (which only snapshots network-referenced resources) carries the resource. +fn network_referencing_resource(id: graphene_resource::ResourceId) -> NodeNetwork { + network_referencing_resources(&[id]) +} + +/// A network with one node per resource, each referencing its resource via a `TaggedValue::Resource` +/// input, so all listed resources are network-referenced and survive conversion. +fn network_referencing_resources(ids: &[graphene_resource::ResourceId]) -> NodeNetwork { + use graph_craft::document::value::TaggedValue; + + let nodes = ids + .iter() + .enumerate() + .map(|(i, id)| { + ( + NodeId(i as u64), + DocumentNode { + inputs: vec![NodeInput::value(TaggedValue::Resource(*id), false)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::ops::identity::IdentityNode")), + ..Default::default() + }, + ) + }) + .collect(); + + NodeNetwork { nodes, ..Default::default() } +} + +fn create_simple_network() -> NodeNetwork { + NodeNetwork { + exports: vec![NodeInput::node(NodeId(1), 0)], + nodes: [ + ( + NodeId(0), + DocumentNode { + inputs: vec![NodeInput::import(concrete!(u32), 0), NodeInput::import(concrete!(u32), 1)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::structural::ConsNode")), + ..Default::default() + }, + ), + ( + NodeId(1), + DocumentNode { + inputs: vec![NodeInput::node(NodeId(0), 0)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::ops::AddPairNode")), + ..Default::default() + }, + ), + ] + .into_iter() + .collect(), + ..Default::default() + } +} + +/// Creates a network with a nested sub-network +fn create_nested_network() -> NodeNetwork { + // Create a simple inner network + let inner_network = NodeNetwork { + exports: vec![NodeInput::node(NodeId(10), 0)], + nodes: [( + NodeId(10), + DocumentNode { + inputs: vec![NodeInput::import(concrete!(u32), 0)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::ops::identity::IdentityNode")), + ..Default::default() + }, + )] + .into_iter() + .collect(), + ..Default::default() + }; + + // Create outer network that uses the inner network + NodeNetwork { + exports: vec![NodeInput::node(NodeId(1), 0)], + nodes: [ + ( + NodeId(0), + DocumentNode { + inputs: vec![NodeInput::import(concrete!(u32), 0)], + implementation: DocumentNodeImplementation::Network(inner_network), + ..Default::default() + }, + ), + ( + NodeId(1), + DocumentNode { + inputs: vec![NodeInput::node(NodeId(0), 0)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::ops::identity::IdentityNode")), + ..Default::default() + }, + ), + ] + .into_iter() + .collect(), + ..Default::default() + } +} + +#[test] +fn test_simple_round_trip() { + let original_network = create_simple_network(); + + // Convert to Registry + let (registry, declarations) = to_registry(&original_network); + + // Convert back to NodeNetwork + let (converted_network, _) = registry.to_runtime_with_metadata(&declarations).expect("Failed to convert Registry back to NodeNetwork"); + + // Verify structure is preserved + assert_eq!(converted_network.nodes.len(), original_network.nodes.len(), "Node count should be preserved"); + assert_eq!(converted_network.exports.len(), original_network.exports.len(), "Export count should be preserved"); + + // Verify exports reference the correct nodes + match (&original_network.exports[0], &converted_network.exports[0]) { + ( + NodeInput::Node { + node_id: orig_id, + output_index: orig_idx, + }, + NodeInput::Node { + node_id: conv_id, + output_index: conv_idx, + }, + ) => { + assert_eq!(orig_id, conv_id, "Export should reference the same node"); + assert_eq!(orig_idx, conv_idx, "Export output index should match"); + } + _ => panic!("Exports should both be Node inputs"), + } + + // Verify node implementations are preserved + for (node_id, orig_node) in &original_network.nodes { + let conv_node = converted_network.nodes.get(node_id).expect("Node should exist after round-trip"); + + match (&orig_node.implementation, &conv_node.implementation) { + (DocumentNodeImplementation::ProtoNode(orig_ident), DocumentNodeImplementation::ProtoNode(conv_ident)) => { + assert_eq!(orig_ident.as_str(), conv_ident.as_str(), "ProtoNode identifier should be preserved"); + } + _ => panic!("Implementation type should be preserved"), + } + + // Verify input count is preserved + assert_eq!(conv_node.inputs.len(), orig_node.inputs.len(), "Input count should be preserved"); + } +} + +#[test] +fn test_nested_network_round_trip() { + let original_network = create_nested_network(); + + // Convert to Registry + let (registry, declarations) = to_registry(&original_network); + + // Convert back to NodeNetwork + let (converted_network, _) = registry.to_runtime_with_metadata(&declarations).expect("Failed to convert Registry back to NodeNetwork"); + + // Verify structure is preserved + assert_eq!(converted_network.nodes.len(), original_network.nodes.len(), "Node count should be preserved"); + + // Find the node with nested network + let orig_nested_node = original_network.nodes.get(&NodeId(0)).expect("Node 0 should exist"); + let conv_nested_node = converted_network.nodes.get(&NodeId(0)).expect("Node 0 should exist after round-trip"); + + // Verify nested network is preserved + match (&orig_nested_node.implementation, &conv_nested_node.implementation) { + (DocumentNodeImplementation::Network(orig_inner), DocumentNodeImplementation::Network(conv_inner)) => { + assert_eq!(orig_inner.nodes.len(), conv_inner.nodes.len(), "Inner network node count should be preserved"); + assert_eq!(orig_inner.exports.len(), conv_inner.exports.len(), "Inner network export count should be preserved"); + } + _ => panic!("Nested network should be preserved"), + } +} + +#[test] +fn test_registry_structure() { + let network = create_simple_network(); + + let (registry, _declarations) = to_registry(&network); + + assert!(registry.resources.len() >= 2, "Should have proto-node declaration resources"); + assert!(!registry.networks.is_empty(), "Should have at least one network"); + + let root_network = registry.networks.get(&crate::ROOT_NETWORK).expect("Root network should exist"); + assert_eq!(root_network.exports.len(), network.exports.len(), "Export count should match"); + + // Exports are first-class slots, no synthetic identity nodes in node_instances. + for slot in &root_network.exports { + assert!(slot.target.is_some(), "Round-tripped exports should have a target"); + } +} + +#[test] +fn test_nested_network_flattening() { + let network = create_nested_network(); + + let registry = Registry::try_from(&network).expect("Failed to convert to Registry"); + + // Outer network has 2 nodes, one of which contains a nested network with 1 node. + // No more identity-node padding, so node_instances has exactly the real nodes. + let expected_nodes = 3; + assert_eq!( + registry.node_instances.len(), + expected_nodes, + "Registry should have exactly {} nodes, found {}", + expected_nodes, + registry.node_instances.len() + ); + + // Two networks: root (ROOT_NETWORK) and nested (1). + assert!(registry.networks.len() >= 2, "Should have at least 2 networks (root + nested)"); +} + +#[test] +fn test_metadata_preservation() { + // Create a network with nodes that have non-default metadata + let context_features = ContextDependencies { + extract: core_types::context::ContextFeatures::FOOTPRINT | core_types::context::ContextFeatures::REAL_TIME, + ..Default::default() + }; + + let network = NodeNetwork { + exports: vec![NodeInput::node(NodeId(1), 0)], + nodes: [ + ( + NodeId(0), + DocumentNode { + inputs: vec![NodeInput::import(concrete!(f64), 0), NodeInput::import(Type::Generic(Cow::Borrowed("T")), 1)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("test::NodeWithMetadata")), + call_argument: concrete!(String), + context_features, + visible: false, // Non-default value + skip_deduplication: true, // Non-default value + ..Default::default() + }, + ), + ( + NodeId(1), + DocumentNode { + inputs: vec![NodeInput::node(NodeId(0), 0)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("test::OutputNode")), + call_argument: concrete!((u32, u32)), + ..Default::default() + }, + ), + ] + .into_iter() + .collect(), + ..Default::default() + }; + + // Convert to Registry and back + let (registry, declarations) = to_registry(&network); + let (converted, _) = registry.to_runtime_with_metadata(&declarations).expect("Failed to convert back to NodeNetwork"); + + // Verify call_argument is preserved + let orig_node_0 = network.nodes.get(&NodeId(0)).unwrap(); + let conv_node_0 = converted.nodes.get(&NodeId(0)).unwrap(); + assert_eq!(orig_node_0.call_argument, conv_node_0.call_argument, "call_argument for node 0 should be preserved"); + + let orig_node_1 = network.nodes.get(&NodeId(1)).unwrap(); + let conv_node_1 = converted.nodes.get(&NodeId(1)).unwrap(); + assert_eq!(orig_node_1.call_argument, conv_node_1.call_argument, "call_argument for node 1 should be preserved"); + + // Verify context_features is preserved + assert_eq!(orig_node_0.context_features, conv_node_0.context_features, "context_features should be preserved"); + + // Verify visible is preserved + assert_eq!(orig_node_0.visible, conv_node_0.visible, "visible should be preserved"); + + // Verify skip_deduplication is preserved + assert_eq!(orig_node_0.skip_deduplication, conv_node_0.skip_deduplication, "skip_deduplication should be preserved"); + + // Verify import_type is preserved for Import inputs + match (&orig_node_0.inputs[0], &conv_node_0.inputs[0]) { + (NodeInput::Import { import_type: orig_type, .. }, NodeInput::Import { import_type: conv_type, .. }) => { + assert_eq!(orig_type, conv_type, "import_type for first import should be preserved (f64)"); + } + _ => panic!("First input should be Import"), + } + + match (&orig_node_0.inputs[1], &conv_node_0.inputs[1]) { + (NodeInput::Import { import_type: orig_type, .. }, NodeInput::Import { import_type: conv_type, .. }) => { + assert_eq!(orig_type, conv_type, "import_type for second import should be preserved (generic T)"); + } + _ => panic!("Second input should be Import"), + } +} + +#[test] +fn test_demo_artwork_round_trip() { + use graph_craft::util::{DEMO_ART, load_from_name}; + + // Test each demo artwork + for artwork_name in DEMO_ART { + println!("Testing artwork: {}", artwork_name); + + let original_network = load_from_name(artwork_name); + + // Convert to Registry + let (registry, declarations) = to_registry(&original_network); + + // Convert back to NodeNetwork + let (converted_network, _) = registry + .to_runtime_with_metadata(&declarations) + .unwrap_or_else(|e| panic!("Failed to convert {} back to NodeNetwork: {:?}", artwork_name, e)); + + // Basic structural checks + assert_eq!(original_network.nodes.len(), converted_network.nodes.len(), "{}: Node count should be preserved", artwork_name); + + assert_eq!(original_network.exports.len(), converted_network.exports.len(), "{}: Export count should be preserved", artwork_name); + + // Verify each node's metadata is preserved + for (node_id, orig_node) in &original_network.nodes { + let conv_node = converted_network + .nodes + .get(node_id) + .unwrap_or_else(|| panic!("{}: Node {:?} should exist after round-trip", artwork_name, node_id)); + + // Check metadata fields + assert_eq!( + orig_node.call_argument, conv_node.call_argument, + "{}: call_argument should be preserved for node {:?}", + artwork_name, node_id + ); + assert_eq!( + orig_node.context_features, conv_node.context_features, + "{}: context_features should be preserved for node {:?}", + artwork_name, node_id + ); + assert_eq!(orig_node.visible, conv_node.visible, "{}: visible should be preserved for node {:?}", artwork_name, node_id); + assert_eq!( + orig_node.skip_deduplication, conv_node.skip_deduplication, + "{}: skip_deduplication should be preserved for node {:?}", + artwork_name, node_id + ); + + // Check input count + assert_eq!( + orig_node.inputs.len(), + conv_node.inputs.len(), + "{}: Input count should be preserved for node {:?}", + artwork_name, + node_id + ); + } + + // Verify the converted demo artwork can be compiled (demo artworks are complete networks) + verify_network_compiles(&converted_network).unwrap_or_else(|e| panic!("{}: Converted artwork should compile successfully: {}", artwork_name, e)); + + println!("✓ {} passed", artwork_name); + } +} + +/// Per-node UI state used by the in-test metadata source. Keyed by `(network_path, local_id)`. +#[derive(Clone, Debug, Default, PartialEq)] +struct UiState { + position: Option, + is_layer: bool, + display_name: Option, + locked: bool, + pinned: bool, +} + +/// In-test `NodeMetadataSource` backed by a `HashMap` keyed on the full `(network_path, local_id)` +/// addressing the editor would use. +struct TestMetadata { + entries: HashMap<(Vec, NodeId), UiState>, +} + +impl TestMetadata { + fn new() -> Self { + Self { entries: HashMap::new() } + } + + fn insert(&mut self, network_path: &[NodeId], local_id: NodeId, state: UiState) { + self.entries.insert((network_path.to_vec(), local_id), state); + } + + fn get(&self, network_path: &[NodeId], local_id: NodeId) -> Option<&UiState> { + self.entries.get(&(network_path.to_vec(), local_id)) + } +} + +impl NodeMetadataSource for TestMetadata { + fn position(&self, network_path: &[NodeId], local_id: NodeId) -> Option { + self.get(network_path, local_id).and_then(|s| s.position) + } + fn is_layer(&self, network_path: &[NodeId], local_id: NodeId) -> bool { + self.get(network_path, local_id).is_some_and(|s| s.is_layer) + } + fn display_name(&self, network_path: &[NodeId], local_id: NodeId) -> Option<&str> { + self.get(network_path, local_id).and_then(|s| s.display_name.as_deref()) + } + fn locked(&self, network_path: &[NodeId], local_id: NodeId) -> bool { + self.get(network_path, local_id).is_some_and(|s| s.locked) + } + fn pinned(&self, network_path: &[NodeId], local_id: NodeId) -> bool { + self.get(network_path, local_id).is_some_and(|s| s.pinned) + } +} + +/// Round-trips a nested network with editor metadata: layer + absolute position on one node, +/// node-in-chain on another, layer-in-stack inside a nested network. Asserts every entry comes +/// back unchanged and addressed by the correct `(network_path, local_id)`. +#[test] +fn test_ui_metadata_round_trip() { + let network = create_nested_network(); + + let mut metadata = TestMetadata::new(); + + // Root-network node 0 (the one with a nested network): a layer at an absolute position with + // a display name. Editor `network_path` for root-network nodes is empty. + metadata.insert( + &[], + NodeId(0), + UiState { + position: Some(Position::Absolute([3, 5])), + is_layer: true, + display_name: Some("Outer layer".into()), + locked: true, + pinned: false, + }, + ); + + // Root-network node 1: a plain node in a chain. + metadata.insert( + &[], + NodeId(1), + UiState { + position: Some(Position::Chain), + ..Default::default() + }, + ); + + // Nested-network node 10 (lives under node 0): a layer in a stack. + metadata.insert( + &[NodeId(0)], + NodeId(10), + UiState { + position: Some(Position::Stack(7)), + is_layer: true, + ..Default::default() + }, + ); + + let conversion = Registry::convert_from_runtime(&network, &metadata, &Default::default(), PeerId(0)).expect("Failed to convert to Registry with metadata"); + let declarations = conversion.declarations().expect("rebuild declarations"); + let registry = conversion.registry; + + let (converted, entries) = registry.to_runtime_with_metadata(&declarations).expect("Failed to convert Registry back with metadata"); + + // Graph structure still round-trips. + assert_eq!(converted.nodes.len(), network.nodes.len()); + + // Three entries — one per node we attached metadata to. + assert_eq!(entries.len(), 3, "expected 3 metadata entries, got {}: {entries:#?}", entries.len()); + + // Look entries back up by their address so we don't rely on emission order. + let lookup: HashMap<(Vec, NodeId), &crate::NodeMetadataEntry> = entries.iter().map(|e| ((e.network_path.clone(), e.local_id), e)).collect(); + + let root_layer = lookup.get(&(vec![], NodeId(0))).expect("entry for root-network layer node missing"); + assert_eq!(root_layer.position, Some(Position::Absolute([3, 5]))); + assert!(root_layer.is_layer); + assert_eq!(root_layer.display_name.as_deref(), Some("Outer layer")); + assert!(root_layer.locked); + assert!(!root_layer.pinned); + + let root_node = lookup.get(&(vec![], NodeId(1))).expect("entry for root-network chain node missing"); + assert_eq!(root_node.position, Some(Position::Chain)); + assert!(!root_node.is_layer); + + let nested_layer = lookup.get(&(vec![NodeId(0)], NodeId(10))).expect("entry for nested layer-in-stack missing"); + assert_eq!(nested_layer.position, Some(Position::Stack(7))); + assert!(nested_layer.is_layer); +} + +/// A runtime `ResourceRegistry` (source chain + resolved hash) survives conversion into the storage +/// `Registry`: source bodies are preserved in priority order and the hash carries through. +#[test] +fn resources_round_trip_through_from_runtime() { + use graphene_resource::{DataSource, ResourceHash, ResourceId, ResourceRegistry}; + + let mut resources = ResourceRegistry::new(); + let id = ResourceId::new(); + // Two sources in chain order: an embedded fallback then a URL. + resources.push_source_back(&id, DataSource::Embedded); + resources.push_source_back(&id, DataSource::Url("https://example.com/img.png".parse().unwrap())); + let hash = ResourceHash::from(&b"image bytes"[..]); + resources.resolve(&id, hash); + + // The resource must be referenced by a node to be snapshotted: `convert_resources` only carries + // resources the network uses (orphans in the runtime cache, e.g. retained across undo, are dropped). + let network = network_referencing_resource(id); + + let registry = Registry::from_runtime_with_metadata(&network, &crate::NoMetadata, &resources, PeerId(7)).expect("from_runtime failed"); + + let entry = registry.resources.get(&id).expect("resource entry present in storage registry"); + assert_eq!(entry.hash, Some(hash), "resolved hash carried through"); + assert_eq!(entry.sources.len(), 2, "both sources carried through"); + + // The chain iterates in priority order; decode bodies back to DataSource to compare. + let decoded: Vec = entry.sources.iter().map(|(_, v)| serde_json::from_value(v.source.clone()).expect("source body decodes")).collect(); + assert_eq!(decoded, vec![DataSource::Embedded, DataSource::Url("https://example.com/img.png".parse().unwrap())]); + + // All source keys carry the document peer. + assert!(entry.sources.iter().all(|(key, _)| key.peer == PeerId(7)), "source keys scoped to the document peer"); +} + +/// Full resource round-trip: a runtime `ResourceRegistry` converted into storage and back is equal +/// to the original (source chains in order, resolved hashes preserved). +#[test] +fn resource_registry_round_trips_runtime_to_storage_to_runtime() { + use graphene_resource::{DataSource, ResourceHash, ResourceId, ResourceRegistry}; + + let mut original = ResourceRegistry::new(); + + // A resolved resource with a two-entry fallback chain. + let image = ResourceId::new(); + original.push_source_back(&image, DataSource::Embedded); + original.push_source_back(&image, DataSource::Url("https://example.com/img.png".parse().unwrap())); + original.resolve(&image, ResourceHash::from(&b"image bytes"[..])); + + // An unresolved resource (sources but no hash yet). + let font = ResourceId::new(); + original.push_source_back( + &font, + DataSource::Font { + family: "Inter".into(), + style: Some("Bold".into()), + }, + ); + + // Both resources must be referenced by a node to be snapshotted (see `convert_resources`). + let network = network_referencing_resources(&[image, font]); + + let registry = Registry::from_runtime_with_metadata(&network, &crate::NoMetadata, &original, PeerId(3)).expect("from_runtime failed"); + let restored = registry.to_resource_registry().expect("to_resource_registry failed"); + + // Compare the two document resources specifically; the referencing nodes' proto-node declarations + // also become resources in the registry, so the restored set is a superset of `original`. + for id in [image, font] { + assert_eq!( + restored.info(&id).map(|info| info.sources), + original.info(&id).map(|info| info.sources), + "sources for {id:?} did not survive the round-trip" + ); + assert_eq!( + restored.info(&id).and_then(|info| info.hash.copied()), + original.info(&id).and_then(|info| info.hash.copied()), + "resolved hash for {id:?} did not survive the round-trip" + ); + } +} + +/// A resource present in the runtime cache but not referenced by any node is *not* snapshotted into the +/// storage registry. This is the orphan case: undoing an image paste removes the node but the runtime +/// keeps the resource alive for redo, so a later diff must not see the orphan as a new `AddResource` +/// (which would resurface the undone paste as a phantom gesture). Regression guard for that divergence. +#[test] +fn unreferenced_runtime_resource_is_not_snapshotted() { + use graphene_resource::{DataSource, ResourceHash, ResourceId, ResourceRegistry}; + + let referenced = ResourceId::new(); + let orphan = ResourceId::new(); + + let mut resources = ResourceRegistry::new(); + for id in [referenced, orphan] { + resources.push_source_back(&id, DataSource::Embedded); + resources.resolve(&id, ResourceHash::from(&b"bytes"[..])); + } + + // Only `referenced` is wired to a node; `orphan` lingers in the cache (as it would after an undo). + let network = network_referencing_resource(referenced); + + let registry = Registry::from_runtime_with_metadata(&network, &crate::NoMetadata, &resources, PeerId(1)).expect("from_runtime failed"); + + assert!(registry.resources.contains_key(&referenced), "the network-referenced resource must be snapshotted"); + assert!(!registry.resources.contains_key(&orphan), "the unreferenced (orphan) resource must not be snapshotted"); +} + +/// A node-input `TaggedValue::F64` must survive the storage round-trip bit-exact. Inputs are stored as a +/// self-describing `serde_json::Value` (encoded with the registry's MessagePack codec), so this guards +/// against any precision loss in the f64 -> serde_json::Number -> f64 path for a value with a full +/// 17-significant-digit mantissa. +#[test] +fn node_input_f64_round_trips_bit_exact() { + use graph_craft::document::value::TaggedValue; + + // A value whose exact f64 bits matter: 1/3-ish with a non-terminating binary expansion. + let precise = 107.33334350585939_f64; + let network = NodeNetwork { + nodes: [( + NodeId(0), + DocumentNode { + inputs: vec![NodeInput::value(TaggedValue::F64(precise), false)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::ops::identity::IdentityNode")), + ..Default::default() + }, + )] + .into_iter() + .collect(), + ..Default::default() + }; + + let (registry, declarations) = to_registry(&network); + let (converted, _) = registry.to_runtime_with_metadata(&declarations).expect("to_runtime"); + + let input = &converted.nodes.get(&NodeId(0)).expect("node 0").inputs[0]; + let NodeInput::Value { tagged_value, .. } = input else { + panic!("expected a value input, got {input:?}") + }; + let TaggedValue::F64(actual) = &**tagged_value else { + panic!("expected F64, got {:?}", tagged_value) + }; + + assert_eq!(actual.to_bits(), precise.to_bits(), "f64 node input drifted: {actual} != {precise}"); +} + +/// Two storage nodes in one network carrying the same `ORIGINAL_NODE_ID` both map to one runtime ID. +/// Conversion must reject this rather than silently collapse them and drop a node. +#[test] +fn duplicate_runtime_node_id_is_rejected() { + use crate::AttributesExt; + use crate::TimeStamp; + use crate::attr::ORIGINAL_NODE_ID; + use crate::to_runtime::ConversionError; + + let (mut registry, declarations) = to_registry(&create_simple_network()); + + // Force both root-network nodes onto the same runtime ID. + for node in registry.node_instances.values_mut() { + node.attributes.set(ORIGINAL_NODE_ID, serde_json::json!(7), TimeStamp::ORIGIN); + } + + let error = registry.to_runtime_with_metadata(&declarations).expect_err("duplicate runtime ID must error"); + assert!( + matches!(error, ConversionError::DuplicateRuntimeNodeId { runtime_id: 7, .. }), + "expected DuplicateRuntimeNodeId, got {error:?}" + ); +} + +/// A node input referencing a node in a different network can't be remapped to a valid local runtime +/// ID, so conversion must reject it rather than emit a dangling reference. +#[test] +fn cross_network_reference_is_rejected() { + use crate::to_runtime::ConversionError; + use crate::{Network, NodeInput}; + + let (mut registry, declarations) = to_registry(&create_simple_network()); + + // `create_simple_network` wires one node's input to another, both in the root network. Find the + // referenced storage ID, then move that node into a fresh second network so the reference crosses + // a network boundary. + let referenced_storage_id = registry + .node_instances + .values() + .flat_map(|node| node.inputs()) + .find_map(|slot| match slot.input { + NodeInput::Node { node_id, .. } => Some(node_id), + _ => None, + }) + .expect("simple network has a node-to-node reference"); + + let other_network = 999; + registry.networks.insert(other_network, Network::default()); + registry.node_instances.get_mut(&referenced_storage_id).expect("referenced node exists").network = other_network; + + let error = registry.to_runtime_with_metadata(&declarations).expect_err("cross-network reference must error"); + assert!(matches!(error, ConversionError::CrossNetworkReference { .. }), "expected CrossNetworkReference, got {error:?}"); +} + +/// A network's `scope_injections` (key -> (NodeId, Type)) must survive a storage round trip, with the +/// node reference resolved back to the same runtime-local ID it pointed at originally. +#[test] +fn scope_injections_round_trip() { + let mut network = create_simple_network(); + network.scope_injections.insert("editor-api".to_string(), (NodeId(0), concrete!(u32))); + + let (registry, declarations) = to_registry(&network); + let (converted, _) = registry.to_runtime_with_metadata(&declarations).expect("to_runtime"); + + let (node_id, ty) = converted.scope_injections.get("editor-api").expect("scope injection must survive the round trip"); + assert_eq!(*node_id, NodeId(0), "the injection's node reference must resolve back to its original runtime ID"); + assert_eq!(*ty, concrete!(u32), "the injection's type must be preserved"); +} + +/// A stored scope injection whose node reference no longer resolves (node removed, or moved to another +/// network) must error rather than emit an injection pointing at a nonexistent runtime node. +#[test] +fn dangling_scope_injection_is_rejected() { + use crate::AttributesExt; + use crate::TimeStamp; + use crate::to_runtime::ConversionError; + + let (mut registry, declarations) = to_registry(&create_simple_network()); + + // Store an injection pointing at a storage ID that no node carries, leaving the reference dangling + // while the rest of the graph stays valid. The root network is whichever one holds the nodes. + let root_network_id = registry.node_instances.values().next().expect("simple network has nodes").network(); + let injections: HashMap = [("editor-api".to_string(), (u64::MAX, concrete!(u32)))].into_iter().collect(); + registry + .networks + .get_mut(&root_network_id) + .expect("root network exists") + .attributes + .set_serialized(crate::attr::SCOPE_INJECTIONS, &injections, TimeStamp::ORIGIN) + .expect("serialize injections"); + + let error = registry.to_runtime_with_metadata(&declarations).expect_err("dangling scope injection must error"); + assert!(matches!(error, ConversionError::DanglingScopeInjection { .. }), "expected DanglingScopeInjection, got {error:?}"); +} diff --git a/document/graph-storage/src/session.rs b/document/graph-storage/src/session.rs new file mode 100644 index 0000000000..2a5a549014 --- /dev/null +++ b/document/graph-storage/src/session.rs @@ -0,0 +1,563 @@ +use crate::from_runtime; +use crate::{ApplyMode, AttributesExt, Delta, Document, LamportClock, NetworkId, NodeId, NodeMetadataSource, PeerId, Registry, RegistryDelta, RegistryTarget, ResourceEntry, Rev, TimeStamp, UserId}; +use graphene_resource::{ResourceHash, ResourceId}; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; + +pub mod attrs { + pub const UI_NAV_PTZ: &str = "ui::nav::ptz"; + pub const UI_NAV_TRANSFORM: &str = "ui::nav::transform"; + pub const UI_NAV_WIDTH: &str = "ui::nav::width"; + pub const UI_PREVIEWING: &str = "ui::previewing"; + + // Document-level editor chrome, stored in `Registry.attributes` (document scope). Each setting is + // its own key so concurrent edits to one don't clobber another. + pub const UI_DOC_PTZ: &str = "ui::doc::ptz"; + pub const UI_DOC_RENDER_MODE: &str = "ui::doc::render_mode"; + pub const UI_DOC_OVERLAYS: &str = "ui::doc::overlays"; + pub const UI_DOC_RULERS_VISIBLE: &str = "ui::doc::rulers_visible"; + pub const UI_DOC_SNAPPING: &str = "ui::doc::snapping"; + pub const UI_DOC_COLLAPSED: &str = "ui::doc::collapsed"; +} + +/// A live editing session over a `Document`. Owns the document plus runtime collaboration +/// state that isn't persisted (currently just peer heartbeat tracking). +#[derive(Clone, Debug)] +pub struct Session { + pub(crate) document: Document, + /// Each peer's `retirement_tip` as reported by their most recent heartbeat. Drives + /// leader-eligibility computation (lowest PeerId among peers whose tip matches the session max). + #[expect(dead_code, reason = "Populated once heartbeat/leader-election transport lands; held now so the field and constructors are in place.")] + remote_tips: HashMap, +} + +impl Session { + /// Mints a fresh `PeerId` from the process-wide UUID generator and wraps an empty `Document`. + /// Two peers in the same process will collide (the generator is seeded once); use `with_peer` + /// in tests where determinism matters. + pub fn new() -> Self { + Self::with_peer(PeerId(core_types::uuid::generate_uuid())) + } + + /// Construct a session bound to a specific `PeerId`. Used by tests; production code wants + /// `Session::new`. + pub fn with_peer(peer: PeerId) -> Self { + Self { + document: Document { + registry: Registry::default(), + retired_snapshot: Registry::default(), + history: HashMap::new(), + hot_log: Vec::new(), + head: 0, + redo_stack: Vec::new(), + clock: LamportClock::new(peer), + peer, + last_broadcast_rev: None, + next_node_counter: 0, + }, + remote_tips: HashMap::new(), + } + } + + pub fn peer(&self) -> PeerId { + self.document.peer + } + + pub fn registry(&self) -> &Registry { + &self.document.registry + } + + /// Diff the current registry against a fresh conversion of `network`, then commit each emitted + /// op as its own `Delta` on the local chain. One `clock.tick()` per op (strictly causal within + /// a commit). Returns the new `Rev`s in commit order (empty if nothing changed) plus the + /// proto-node declaration bytes the conversion extracted, keyed by content hash, for the caller + /// to persist into its byte store (`graph-storage` itself is byte-unaware). + /// + /// Stages the diff as hot ops rather than retired deltas: each op is applied to the registry and + /// pushed onto the hot log. The caller persists the returned hot frames and then calls `retire` + /// to promote them into durable history. + #[cfg(any(feature = "conversion", test))] + pub fn stage_from_runtime( + &mut self, + network: &graph_craft::document::NodeNetwork, + metadata: &M, + resources: &graphene_resource::ResourceRegistry, + ) -> Result<(Vec, from_runtime::DeclarationBytes), CommitError> { + let conversion = Registry::convert_from_runtime(network, metadata, resources, self.document.peer)?; + let ops = crate::delta::compute_deltas(&self.document.registry, &conversion.registry); + let hot_ops = self.stage_ops(ops)?; + Ok((hot_ops, conversion.declaration_bytes)) + } + + /// Resolve each runtime `network_path` to its stable [`NetworkId`] for this document's peer, so the + /// caller can key per-network, per-peer view state (`session.json`) by a stable id. Derived from the + /// network structure alone; resources/declarations are irrelevant to the ids. + #[cfg(any(feature = "conversion", test))] + pub fn network_ids(&self, network: &graph_craft::document::NodeNetwork, metadata: &M) -> Result, NetworkId>, CommitError> { + let conversion = Registry::convert_from_runtime(network, metadata, &graphene_resource::ResourceRegistry::new(), self.document.peer)?; + Ok(conversion.network_ids) + } + + /// Register a content-addressed resource as a single `DataSource::Embedded` source resolved to + /// `hash`, staged as one `AddResource` hot op. The caller owns `id` allocation, persists the + /// returned hot frame, retires, and persists the bytes into its byte store separately. + pub fn stage_embedded_resource(&mut self, id: ResourceId, hash: ResourceHash) -> Result, CrdtError> { + let entry = ResourceEntry::embedded(hash, self.document.peer, self.document.clock.tick()); + self.stage_ops([RegistryDelta::AddResource { id, entry }]) + } + + /// Commit an `AddSource(Embedded)` retired delta for each given resource, making it the highest- + /// precedence fallback. Skips resources that already have an `Embedded` source or no longer exist. + /// Used on a throwaway session clone at export time so the exported registry and history agree; + /// callers must guarantee the bytes are available in the export's resource store. + pub fn embed_resource_sources(&mut self, ids: impl IntoIterator) -> Result, CrdtError> { + let embedded = serde_json::to_value(graphene_resource::DataSource::Embedded).expect("DataSource::Embedded serializes"); + + let mut ops = Vec::new(); + for id in ids { + let Some(entry) = self.document.registry.resources.get(&id) else { continue }; + if entry.has_embedded_source() { + continue; + } + let key = entry.highest_precedence_key(self.document.peer); + ops.push(RegistryDelta::AddSource { id, key, source: embedded.clone() }); + } + + // Caller contract: this runs on a throwaway export clone with no unretired hot ops, so the + // working registry equals the snapshot. Overwriting working with the advanced snapshot below + // would otherwise drop hot-zone edits, so reject the call rather than corrupt state. + if !self.document.hot_log.is_empty() { + return Err(CrdtError::HotLogNotEmpty); + } + + let revs = self.commit_ops(ops, false)?; + self.document.registry = self.document.retired_snapshot.clone(); + Ok(revs) + } + + /// Apply each op as a hot op with a freshly-ticked timestamp, returning the staged frames in + /// order. Each tick is strictly later than the last, so the final frame carries the latest + /// timestamp, which is what the caller passes to `retire`. + /// + /// The peer's first contribution is preceded by a `RegisterPeer` op, so the device's + /// `PeerId → UserId` mapping is established (and, under causal delivery, observed by other peers) + /// before any of its edits. A no-op batch doesn't register — registration rides a real edit. + fn stage_ops(&mut self, ops: impl IntoIterator) -> Result, CrdtError> { + let mut pending: Vec = ops.into_iter().collect(); + if pending.is_empty() { + return Ok(Vec::new()); + } + + if !self.document.registry.peer_users.contains_key(&self.document.peer) { + let user = UserId(self.document.peer.0); + pending.insert(0, RegistryDelta::RegisterPeer { peer: self.document.peer, user }); + } + + let mut staged = Vec::with_capacity(pending.len()); + for op in pending { + let hot_op = HotOp { + op, + timestamp: self.document.clock.tick(), + author: self.document.peer, + }; + self.document.apply_hot_op(hot_op.clone())?; + staged.push(hot_op); + } + Ok(staged) + } + + /// Wrap each op as a `Delta`, apply it, and chain it onto the local history. One tick per op. + /// + /// Operates on the *retired snapshot*: reverses are computed against and forward ops applied to it, + /// so each `reverse` captures the true pre-op value rather than the hot-polluted working state. The + /// working registry already reflects these ops (they were staged as hot ops before retirement, or + /// equal the snapshot when there are none), so it is left untouched. + /// + /// `idempotent`: pass `true` when the snapshot already reflects the op (retirement of an already- + /// applied hot op) so duplicate structural inserts no-op rather than error. + fn commit_ops(&mut self, ops: impl IntoIterator, idempotent: bool) -> Result, CrdtError> { + let target = RegistryTarget::Snapshot; + let ops = ops.into_iter(); + let mut produced = Vec::with_capacity(ops.size_hint().0); + + for op in ops { + // A new edit abandons any undone-forward branch: those revs stay in the DAG but are no + // longer reachable via redo. (Mirrors the legacy editor clearing its redo history on + // commit.) Done on the first real op so a no-op commit doesn't silently disable redo. + if produced.is_empty() { + self.document.redo_stack.clear(); + } + + let reverse = self.document.compute_reverse_delta(target, &op)?; + let timestamp = self.document.clock.tick(); + let parents = if self.document.head == 0 { Vec::new() } else { vec![self.document.head] }; + let author = self.document.peer; + + let delta = Delta::new(parents, author, timestamp, op, reverse); + let rev = delta.id; + + for parent in &delta.parents { + if !self.document.history.contains_key(parent) { + return Err(CrdtError::NotFoundInHistory(*parent)); + } + } + let mode = if idempotent { ApplyMode::Idempotent } else { ApplyMode::Live }; + self.document.apply_op_with(target, delta.delta_type.clone(), delta.timestamp, mode)?; + self.document.history.insert(rev, delta); + self.document.head = rev; + produced.push(rev); + } + + Ok(produced) + } + + /// Wrap an already-materialized snapshot. Trusts `registry` to match `history`; advances the + /// clock past every observed timestamp but does not re-apply ops. + pub fn load(peer: PeerId, registry: Registry, history: HashMap, head: Rev, redo_stack: Vec, next_node_counter: u64) -> Self { + let mut clock = LamportClock::new(peer); + for delta in history.values() { + clock.observe(delta.timestamp); + } + + Self { + document: Document { + // The persisted snapshot is the retired state; hot ops (replayed by the caller after + // `load`) build the working registry on top, leaving `retired_snapshot` at retired. + retired_snapshot: registry.clone(), + registry, + history, + hot_log: Vec::new(), + head, + redo_stack, + clock, + peer, + last_broadcast_rev: None, + next_node_counter, + }, + remote_tips: HashMap::new(), + } + } + + /// Rebuild the registry from scratch by applying every delta in causal order. + /// `deltas` must be in causal order (every parent before its children). + pub fn replay_from_history(peer: PeerId, deltas: impl IntoIterator, next_node_counter: u64) -> Result { + let mut session = Self::with_peer(peer); + session.document.next_node_counter = next_node_counter; + + for delta in deltas { + let rev = delta.id; + session.document.apply_op_idempotent(delta.delta_type.clone(), delta.timestamp)?; + session.document.history.insert(rev, delta); + session.document.head = rev; + } + + // Pure retired-delta replay: no hot ops, so the working registry is fully retired. + session.document.retired_snapshot = session.document.registry.clone(); + Ok(session) + } + + /// Apply a hot op without going through the broadcast stream. + pub fn apply_hot_op(&mut self, hot_op: HotOp) -> Result<(), CrdtError> { + self.document.apply_hot_op(hot_op) + } + + /// Replay a persisted hot op. Idempotent on structural ops, suitable for crash recovery + /// where the registry may already reflect the op's effect from a prior retired snapshot. + pub fn replay_hot_op(&mut self, hot_op: HotOp) -> Result<(), CrdtError> { + self.document.replay_hot_op(hot_op) + } + + /// Promote hot ops with timestamp `≤ up_to` into retired deltas, re-applied with fresh + /// retirement timestamps so LWW arms bump field timestamps to `T_retire`. + /// + /// Today: one retired delta per hot op. Coarsening is a future step. + pub fn retire(&mut self, up_to: TimeStamp) -> Result, CrdtError> { + let mut drained = Vec::new(); + let mut remaining = Vec::with_capacity(self.document.hot_log.len()); + for hot_op in self.document.hot_log.drain(..) { + if hot_op.timestamp <= up_to { + drained.push(hot_op); + } else { + remaining.push(hot_op); + } + } + self.document.hot_log = remaining; + + self.commit_ops(drained.into_iter().map(|hot_op| hot_op.op), true) + } + + /// Mark a retired delta as the end of a user gesture, so the undo cursor treats it as a checkpoint. + /// Called once per gesture by the editor-facing commit path (not by resource/internal commits). + pub fn mark_gesture_end(&mut self, rev: Rev) { + let timestamp = self.document.clock.tick(); + if let Some(delta) = self.document.history.get_mut(&rev) { + delta.mark_gesture_end(timestamp); + } + } + + /// Low-level: set a local annotation attribute (e.g. a commit message) on a retired delta in place. + /// Excluded from the delta's content-addressed `Rev`, so identity is unchanged. Returns whether the + /// delta was found. The `Gdd` layer re-persists the affected history frame after calling this. + pub fn annotate_delta(&mut self, rev: Rev, key: &str, value: serde_json::Value) -> bool { + let timestamp = self.document.clock.tick(); + self.document.history.get_mut(&rev).map(|delta| delta.attributes.set(key, value, timestamp)).is_some() + } + + /// Whether there is a retired commit at `head` that can be undone in the silent zone (a commit + /// after `last_broadcast_rev`). `head == 0` is the empty history; published commits aren't + /// silently undoable (that needs a forward reverse-delta op, deferred until transport lands). + /// + /// The earliest gesture (the document's loaded/created base) is *not* undoable: undoing it would + /// rewind into the pre-base state, which legacy never offers (opening a document gives an empty undo + /// history). We detect "head is on the earliest gesture" by walking `head`'s gesture back along + /// first-parents and checking whether it bottoms out at the root with no earlier gesture boundary to + /// land on. If so, there is nothing before this gesture to undo to, so undo is disabled. + pub fn can_undo(&self) -> bool { + if self.document.head == 0 || self.document.last_broadcast_rev == Some(self.document.head) { + return false; + } + self.gesture_start_parent(self.document.head).is_some_and(|parent| parent != 0) + } + + /// Walk the gesture containing `rev` back along first-parents to its first delta, returning that + /// delta's parent (the rev the cursor would rest on after undoing this gesture, or `0` for the root). + /// Mirrors the boundary condition in [`undo`](Self::undo): stop when the parent is a `gesture_end` + /// boundary or the root. + fn gesture_start_parent(&self, rev: Rev) -> Option { + let mut current = rev; + loop { + let parent = self.document.history.get(¤t)?.parents.first().copied().unwrap_or(0); + if parent == 0 || self.document.history.get(&parent).is_some_and(|d| d.is_gesture_end()) { + return Some(parent); + } + current = parent; + } + } + + pub fn can_redo(&self) -> bool { + !self.document.redo_stack.is_empty() + } + + /// Silent-zone undo of one *gesture*: revert deltas walking `head` back along first-parents until + /// it reaches the previous gesture boundary (a delta marked `gesture_end`) or the empty root. One + /// gesture spans several deltas (one `commit_from_runtime` batch), so undo reverts the whole run, + /// not a single delta — matching the legacy per-gesture undo granularity. The undone gesture's + /// `head` rev is pushed onto the redo stack. Reflog semantics: the DAG is never rewritten. + pub fn undo(&mut self) -> Result { + if !self.can_undo() { + return Err(CrdtError::NothingToUndo); + } + let checkpoint = self.document.head; + + // Revert this gesture's last delta, then keep going back until `head` rests on the previous + // gesture's boundary (its `gesture_end` delta) or the root. + loop { + let rev = self.document.head; + let delta = self.document.history.get(&rev).ok_or(CrdtError::NotFoundInHistory(rev))?.clone(); + let parent = delta.parents.first().copied().unwrap_or(0); + + self.document.revert_delta(RegistryTarget::Working, delta)?; + self.document.head = parent; + + if parent == 0 || self.document.history.get(&parent).is_some_and(|d| d.is_gesture_end()) { + break; + } + } + + // Undo runs with an empty hot log, so keep the retired snapshot in lockstep with the rewound + // working registry (the next gesture's reverses are computed against it). + self.document.retired_snapshot = self.document.registry.clone(); + self.document.redo_stack.push(checkpoint); + Ok(checkpoint) + } + + /// Redo the most-recently-undone gesture: re-apply every delta from the current `head` forward to + /// (and including) the checkpoint rev, advancing `head` to it. Collects the forward span by walking + /// parents back from the checkpoint to `head` (the chain is linear in the silent solo zone). + pub fn redo(&mut self) -> Result { + let checkpoint = self.document.redo_stack.pop().ok_or(CrdtError::NothingToRedo)?; + + let mut forward = Vec::new(); + let mut cursor = checkpoint; + while cursor != self.document.head { + let delta = self.document.history.get(&cursor).ok_or(CrdtError::NotFoundInHistory(cursor))?.clone(); + let parent = delta.parents.first().copied().unwrap_or(0); + forward.push(delta); + cursor = parent; + if cursor == 0 { + break; + } + } + + // Force-apply so each forward value wins the LWW tie against the reverse that undo force-applied + // at the same timestamp. Symmetric with `revert_delta`. + for delta in forward.into_iter().rev() { + self.document.force_apply_op(delta.delta_type.clone(), delta.timestamp)?; + } + self.document.head = checkpoint; + + // Redo runs with an empty hot log; keep the retired snapshot in lockstep with the working registry. + self.document.retired_snapshot = self.document.registry.clone(); + Ok(checkpoint) + } + + /// Build a synthetic linear history whose replay reproduces `registry`. Each op gets a + /// freshly-ticked clock timestamp and chains to the previous op's `Rev`. + pub fn bootstrap_from_registry(peer: PeerId, registry: Registry) -> Result { + let ops = crate::delta::compute_deltas(&Registry::default(), ®istry); + let mut session = Self::with_peer(peer); + session.commit_ops(ops, false)?; + // No hot ops on this path, so the working registry must mirror the freshly-built snapshot. + session.document.registry = session.document.retired_snapshot.clone(); + Ok(session) + } + + pub fn history(&self) -> impl Iterator + '_ { + self.document.history.values() + } + + /// Verify that every delta's content-addressed `id` matches its recomputed hash. `Delta` skips this + /// on deserialize to keep loading cheap, so call this after loading history from an untrusted source + /// (it walks the whole history and rehashes each delta). Returns the first mismatch found. + pub fn verify_history(&self) -> Result<(), CrdtError> { + for (&stored, delta) in &self.document.history { + let expected = delta.recomputed_id(); + if stored != expected || delta.id != expected { + return Err(CrdtError::RevMismatch { stored, expected }); + } + } + Ok(()) + } + + /// Every resource hash referenced by the current registry *or* anywhere in history. Undo removes a + /// gesture's `AddResource` from the working registry, so a redoable (or re-undoable) gesture's + /// resources no longer appear in `registry().resources` even though redo still needs them. Resource GC + /// must keep this whole set alive, not just the current head's, or undo then redo loses declaration + /// bytes. Walks current resources plus each delta's `AddResource`/`RemoveResource` snapshot. + pub fn all_referenced_resource_hashes(&self) -> HashSet { + let mut hashes: HashSet = self.document.registry.resources.values().filter_map(|entry| entry.hash).collect(); + + for delta in self.document.history.values() { + match &delta.delta_type { + RegistryDelta::AddResource { entry, .. } => hashes.extend(entry.hash), + RegistryDelta::RemoveResource { snapshot, .. } => hashes.extend(snapshot.hash), + _ => {} + } + } + + hashes + } + + /// History in deterministic causal order: a topological sort with ties among + /// ready deltas broken by `Rev`. Every parent precedes its children, so the result is a valid + /// replay order. + /// The order is a pure function of the delta set, so two peers holding the same + /// history serialize byte-identical output. Parents outside this history (already-known ancestors) + /// don't gate emission. O(V + E) in deltas and parent edges. + pub fn history_topological(&self) -> Vec<&Delta> { + let history = &self.document.history; + + // Unsatisfied in-history parent count per delta, plus reverse edges to decrement as parents emit. + let mut pending_parents: HashMap = HashMap::with_capacity(history.len()); + let mut children: HashMap> = HashMap::new(); + for (rev, delta) in history { + let in_history_parents = delta.parents.iter().filter(|parent| history.contains_key(parent)).count(); + pending_parents.insert(*rev, in_history_parents); + for parent in &delta.parents { + if history.contains_key(parent) { + children.entry(*parent).or_default().push(*rev); + } + } + } + + // Ready set as a min-heap on `Rev` (via `Reverse`) so ties resolve deterministically. + let mut ready: std::collections::BinaryHeap> = pending_parents.iter().filter(|(_, count)| **count == 0).map(|(rev, _)| std::cmp::Reverse(*rev)).collect(); + + let mut ordered = Vec::with_capacity(history.len()); + while let Some(std::cmp::Reverse(rev)) = ready.pop() { + ordered.push(&history[&rev]); + for child in children.get(&rev).into_iter().flatten() { + let count = pending_parents.get_mut(child).expect("child is in history"); + *count -= 1; + if *count == 0 { + ready.push(std::cmp::Reverse(*child)); + } + } + } + + ordered + } + + pub fn hot_log(&self) -> &[HotOp] { + &self.document.hot_log + } + + pub fn head_rev(&self) -> Rev { + self.document.head + } + + pub fn redo_stack(&self) -> &[Rev] { + &self.document.redo_stack + } + + pub fn next_node_counter(&self) -> u64 { + self.document.next_node_counter + } +} + +/// Errors from `Session::commit_from_runtime`. +#[derive(Debug, thiserror::Error)] +pub enum CommitError { + #[error("Failed to convert runtime network: {0}")] + Conversion(#[from] from_runtime::ConversionError), + #[error("Failed to apply commit: {0}")] + Crdt(#[from] CrdtError), +} + +impl Default for Session { + fn default() -> Self { + Self::new() + } +} + +/// One live op in the hot zone. Carries only enough to drive live LWW; no parents (transient), +/// no Rev (not content-addressed in the durable DAG). GC'd at retirement. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct HotOp { + pub op: RegistryDelta, + pub timestamp: TimeStamp, + pub author: PeerId, +} + +#[derive(Debug, thiserror::Error)] +pub enum CrdtError { + #[error("Target node {0} does not exist")] + TargetNodeDoesNotExist(NodeId), + #[error("Network {0} does not exist")] + NetworkDoesNotExist(NetworkId), + #[error("Input index {0} out of bounds")] + InputIndexOutOfBounds(usize), + #[error("Export slot index {0} out of bounds")] + ExportSlotOutOfBounds(u32), + #[error("Delta {0} not found in history")] + NotFoundInHistory(Rev), + #[error("No history entry resurrects node {0}")] + NodeNotInHistory(NodeId), + #[error("No history entry resurrects network {0}")] + NetworkNotInHistory(NetworkId), + #[error("Nothing to undo")] + NothingToUndo, + #[error("Nothing to redo")] + NothingToRedo, + #[error("Node {0} already exists")] + NodeAlreadyExists(NodeId), + #[error("Network {0} already exists")] + NetworkAlreadyExists(NetworkId), + /// PeerId is already registered to a different UserId. + #[error("Peer {0:?} is already registered to a different user")] + PeerRegistrationConflict(PeerId), + #[error("Operation requires an empty hot log")] + HotLogNotEmpty, + #[error("Delta stored under {stored} hashes to {expected}")] + RevMismatch { stored: Rev, expected: Rev }, +} diff --git a/document/graph-storage/src/to_runtime.rs b/document/graph-storage/src/to_runtime.rs new file mode 100644 index 0000000000..7c5d56c403 --- /dev/null +++ b/document/graph-storage/src/to_runtime.rs @@ -0,0 +1,338 @@ +use std::borrow::Cow; +use std::collections::HashMap; + +use core_types::memo::MemoHash; +use core_types::uuid::NodeId as RuntimeNodeId; +use graph_craft::document::value::TaggedValue; +use graph_craft::document::{DocumentNode, DocumentNodeImplementation, NodeInput as GraphCraftNodeInput, NodeNetwork}; +use graph_craft::{ProtoNodeIdentifier, Type, concrete}; +use rustc_hash::FxHashMap; + +use crate::attr::*; +use crate::metadata_source::{InputMetadataEntry, NetworkMetadataEntry, NodeMetadataEntry}; +use crate::{AttributesRead, Implementation, NetworkId, Node, NodeId, NodeInput, Position, ProtoNode, ROOT_NETWORK, Registry, ResourceId}; + +#[derive(Debug, thiserror::Error)] +pub enum ConversionError { + #[error("Network {0} not found")] + NetworkNotFound(NetworkId), + #[error("Node {0} not found")] + NodeNotFound(NodeId), + #[error("ProtoNode declaration {0} not found in provided declarations")] + DeclarationNotFound(ResourceId), + #[error("Deserialization error: {0}")] + DeserializationError(String), + #[error("Node {node:?} has {inputs} inputs but {attributes} input-attribute entries")] + InputAttributeCountMismatch { node: NodeId, inputs: usize, attributes: usize }, + #[error("Network {network} has two nodes mapping to runtime ID {runtime_id}")] + DuplicateRuntimeNodeId { network: NetworkId, runtime_id: u64 }, + #[error("Network {network} references node {referenced}, which lives in a different network")] + CrossNetworkReference { network: NetworkId, referenced: NodeId }, + #[error("Scope injection {key:?} in network {network} references node {referenced}, which is missing or in a different network")] + DanglingScopeInjection { network: NetworkId, key: String, referenced: NodeId }, +} + +/// Resolved proto-node declarations, keyed by the `ResourceId` that `Implementation::ProtoNode` +/// references. The caller resolves these from its byte store (`ResourceId` → `ResourceHash` → +/// stored `ProtoNode` bytes) before converting, since `graph-storage` holds only references. +pub type Declarations = std::collections::HashMap; + +impl Registry { + /// Returns the network plus per-node metadata entries (one per node carrying any `ui::*` attribute). + pub fn to_runtime_with_metadata(&self, declarations: &Declarations) -> Result<(NodeNetwork, Vec), ConversionError> { + let (network, node_entries, _) = self.to_runtime_with_full_metadata(declarations)?; + Ok((network, node_entries)) + } + + /// Like `to_runtime_with_metadata` but also returns per-network entries (navigation, previewing). + /// Used by the editor's full-rebuild path. + pub fn to_runtime_with_full_metadata(&self, declarations: &Declarations) -> Result<(NodeNetwork, Vec, Vec), ConversionError> { + let mut node_metadata = Some(Vec::new()); + let mut network_metadata = Some(Vec::new()); + + // Group nodes by their owning network in one pass, so each `convert_network` call (one per + // network, including nested ones) takes its node list by lookup instead of rescanning the whole + // flat `node_instances` map, which would be quadratic on graphs with many networks. + let mut nodes_by_network: FxHashMap> = FxHashMap::default(); + for (&global_id, node) in &self.node_instances { + nodes_by_network.entry(node.network).or_default().push((global_id, node)); + } + + let context = ConversionContext { + registry: self, + declarations, + nodes_by_network, + }; + let network = convert_network(&context, ROOT_NETWORK, &[], &mut node_metadata, &mut network_metadata)?; + Ok((network, node_metadata.expect("seeded above"), network_metadata.expect("seeded above"))) + } + + /// Rebuild the runtime [`ResourceRegistry`](graphene_resource::ResourceRegistry) from the stored + /// `resources`. Each entry's source chain is restored in priority order (the chain is kept + /// sorted by key) with bodies decoded from their type-erased `serde_json::Value` form back to + /// `DataSource`; the resolved hash, if any, is restored last. Inverse of `convert_resources` in + /// `from_runtime`. + pub fn to_resource_registry(&self) -> Result { + let mut registry = graphene_resource::ResourceRegistry::new(); + + for (id, entry) in &self.resources { + for (_, source) in &entry.sources { + let decoded: graphene_resource::DataSource = serde_json::from_value(source.source.clone()).map_err(|error| ConversionError::DeserializationError(error.to_string()))?; + registry.push_source_back(id, decoded); + } + if let Some(hash) = entry.hash { + registry.resolve(id, hash); + } + } + + Ok(registry) + } +} + +/// Immutable shared context threaded through the recursive conversion. `nodes_by_network` is the +/// one-pass grouping of `registry.node_instances` by owning network, so each network's nodes are an +/// O(1) lookup rather than a full rescan. +struct ConversionContext<'a> { + registry: &'a Registry, + declarations: &'a Declarations, + nodes_by_network: FxHashMap>, +} + +/// Converts a single network. Recurses through `Implementation::Network` owning nodes. +/// +/// **ID remapping:** Registry uses globally hashed IDs; runtime networks need local IDs. We pull +/// the original local ID from `attr::ORIGINAL_NODE_ID` on each node and on each `NodeInput::Node` +/// reference. References only point within the same network, so per-network lookup suffices. +/// +/// **Exports:** the storage-side `Vec` is sparse (`None` slots are valid). Compacted +/// here into the runtime's dense `Vec` — slot stability is a storage-side concern. +/// +/// `metadata_path` is the owning-node chain naming *this* network (empty for the root). +fn convert_network( + context: &ConversionContext, + network_id: NetworkId, + metadata_path: &[RuntimeNodeId], + node_collector: &mut Option>, + network_collector: &mut Option>, +) -> Result { + let network = context.registry.networks.get(&network_id).ok_or(ConversionError::NetworkNotFound(network_id))?; + + if let Some(collector) = network_collector.as_mut() { + collector.push(extract_network_metadata(&network.attributes, metadata_path, network_id)); + } + + let mut nodes: FxHashMap = FxHashMap::default(); + for &(global_id, node) in context.nodes_by_network.get(&network_id).map(Vec::as_slice).unwrap_or_default() { + let local_id = node.attributes.get(ORIGINAL_NODE_ID).and_then(|v| v.value.as_u64()).unwrap_or(global_id); + let runtime_id = RuntimeNodeId(local_id); + + if node.inputs.len() != node.inputs_attributes.len() { + return Err(ConversionError::InputAttributeCountMismatch { + node: global_id, + inputs: node.inputs.len(), + attributes: node.inputs_attributes.len(), + }); + } + + if let Some(collector) = node_collector.as_mut() + && let Some(entry) = extract_ui_metadata(node, metadata_path, runtime_id) + { + collector.push(entry); + } + + let doc_node = convert_node(context, node, metadata_path, runtime_id, node_collector, network_collector)?; + + // Two storage nodes resolving to the same runtime ID would silently collapse into one on + // insert, dropping a node from the reconstructed graph. + if nodes.insert(runtime_id, doc_node).is_some() { + return Err(ConversionError::DuplicateRuntimeNodeId { + network: network_id, + runtime_id: local_id, + }); + } + } + + // Input attributes aren't round-tripped for exports — Reflection/Import inputs don't appear there. + let empty_attrs = crate::Attributes::new(); + let exports: Vec = network + .exports + .iter() + .filter_map(|slot| slot.target.as_ref()) + .map(|input| convert_input(context.registry, network_id, input, &empty_attrs)) + .collect::, _>>()?; + + let scope_injections = read_scope_injections(context.registry, network_id, &network.attributes)?; + + Ok(NodeNetwork { + exports, + nodes, + scope_injections, + generated: false, + }) +} + +/// Rebuild a network's `scope_injections` from its serialized attribute blob, resolving each stored +/// storage node ID back to its runtime-local ID. Mirrors `from_runtime::write_scope_injections`. +fn read_scope_injections(registry: &Registry, network_id: NetworkId, attributes: &crate::Attributes) -> Result, ConversionError> { + let Some(stored) = attributes.get_typed::>(SCOPE_INJECTIONS) else { + return Ok(FxHashMap::default()); + }; + + stored + .into_iter() + .map(|(key, (storage_id, ty))| { + // The injection must point at a node in this same network, like any `NodeInput::Node`. + let referenced = registry.node_instances.get(&storage_id).filter(|node| node.network == network_id); + let Some(referenced) = referenced else { + return Err(ConversionError::DanglingScopeInjection { + network: network_id, + key, + referenced: storage_id, + }); + }; + + let local_id = referenced.attributes.get(ORIGINAL_NODE_ID).and_then(|v| v.value.as_u64()).unwrap_or(storage_id); + Ok((key, (RuntimeNodeId(local_id), ty))) + }) + .collect() +} + +/// Returns `None` when the node has no `ui::*` attributes at all so callers don't end up with +/// empty entries for unconverted-from-runtime nodes. `input_metadata` is always sized to match +/// `node.inputs.len()` for a strict slot-by-slot rebuild; empty slots use `InputMetadataEntry::default()`. +fn extract_ui_metadata(node: &crate::Node, network_path: &[RuntimeNodeId], local_id: RuntimeNodeId) -> Option { + let position: Option = node.attributes.get_typed(UI_POSITION); + let is_layer = node.attributes.get_or(UI_IS_LAYER, false); + let display_name: Option = node.attributes.get_typed(UI_DISPLAY_NAME); + let locked = node.attributes.get_or(UI_LOCKED, false); + let pinned = node.attributes.get_or(UI_PINNED, false); + let output_names: Vec = node.attributes.get_or_default(UI_OUTPUT_NAMES); + + let input_metadata: Vec = node.inputs_attributes.iter().map(extract_input_metadata).collect(); + + let entry = NodeMetadataEntry { + network_path: network_path.to_vec(), + local_id, + position, + is_layer, + display_name, + locked, + pinned, + input_metadata, + output_names, + }; + (!entry.is_empty()).then_some(entry) +} + +fn extract_network_metadata(attributes: &crate::Attributes, network_path: &[RuntimeNodeId], network_id: NetworkId) -> NetworkMetadataEntry { + NetworkMetadataEntry { + network_path: network_path.to_vec(), + network_id, + reference: attributes.get_typed(UI_REFERENCE), + } +} + +/// Reassembles `input_data` by scanning every attribute under `ui::input_data::` and stripping the prefix. +fn extract_input_metadata(attributes: &crate::Attributes) -> InputMetadataEntry { + let input_data: HashMap = attributes + .iter() + .filter_map(|(key, value)| key.strip_prefix(UI_INPUT_DATA_PREFIX).map(|sub_key| (sub_key.to_owned(), value.value.clone()))) + .collect(); + + InputMetadataEntry { + input_name: attributes.get_typed(UI_INPUT_NAME), + input_description: attributes.get_typed(UI_INPUT_DESCRIPTION), + widget_override: attributes.get_typed(UI_WIDGET_OVERRIDE), + input_data, + } +} + +fn convert_node( + context: &ConversionContext, + node: &crate::Node, + metadata_path: &[RuntimeNodeId], + runtime_node_id: RuntimeNodeId, + node_collector: &mut Option>, + network_collector: &mut Option>, +) -> Result { + let inputs = node + .inputs + .iter() + .zip(node.inputs_attributes.iter()) + .map(|(slot, input_attrs)| convert_input(context.registry, node.network, &slot.input, input_attrs)) + .collect::, _>>()?; + + // Defaults must match `DocumentNode::default()` (and the `set_if_not_default` calls in `from_runtime`). + Ok(DocumentNode { + inputs, + call_argument: node.attributes.get_or(CALL_ARGUMENT, concrete!(core_types::Context)), + implementation: convert_implementation(context, &node.implementation, metadata_path, runtime_node_id, node_collector, network_collector)?, + visible: node.attributes.get_or(VISIBLE, true), + skip_deduplication: node.attributes.get_or(SKIP_DEDUPLICATION, false), + context_features: node.attributes.get_or_default(CONTEXT_FEATURES), + // Regenerated during compilation; not stored. + original_location: Default::default(), + }) +} + +fn convert_input(registry: &Registry, network_id: NetworkId, input: &NodeInput, input_attributes: &crate::Attributes) -> Result { + Ok(match input { + NodeInput::Node { node_id, output_index } => { + let referenced = registry.node_instances.get(node_id).ok_or(ConversionError::NodeNotFound(*node_id))?; + + // Runtime references are local to one network. A cross-network reference would remap to a + // local ID that doesn't exist in the current runtime network, so reject it. + if referenced.network != network_id { + return Err(ConversionError::CrossNetworkReference { + network: network_id, + referenced: *node_id, + }); + } + + let local_id = referenced.attributes.get(ORIGINAL_NODE_ID).and_then(|v| v.value.as_u64()).unwrap_or(*node_id); + GraphCraftNodeInput::Node { + node_id: RuntimeNodeId(local_id), + output_index: *output_index, + } + } + NodeInput::Value { value, exposed } => { + let tagged_value: TaggedValue = serde_json::from_value(value.clone()).map_err(|e| ConversionError::DeserializationError(format!("TaggedValue: {e:?}")))?; + GraphCraftNodeInput::Value { + tagged_value: MemoHash::new(tagged_value), + exposed: *exposed, + } + } + NodeInput::Scope(s) => GraphCraftNodeInput::Scope(s.clone()), + NodeInput::Import { import_idx } => GraphCraftNodeInput::Import { + import_type: input_attributes.get_or(IMPORT_TYPE, Type::Generic(Cow::Borrowed("T"))), + import_index: *import_idx, + }, + NodeInput::Reflection => GraphCraftNodeInput::Reflection( + input_attributes + .get_typed(REFLECTION_METADATA) + .ok_or_else(|| ConversionError::DeserializationError("Missing reflection_metadata in input_attributes".to_string()))?, + ), + }) +} + +fn convert_implementation( + context: &ConversionContext, + implementation: &Implementation, + parent_metadata_path: &[RuntimeNodeId], + owning_runtime_id: RuntimeNodeId, + node_collector: &mut Option>, + network_collector: &mut Option>, +) -> Result { + Ok(match implementation { + Implementation::ProtoNode(id) => { + let proto = context.declarations.get(id).ok_or(ConversionError::DeclarationNotFound(*id))?; + DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::with_owned_string(proto.identifier.clone())) + } + Implementation::Network(net_id) => { + let mut child_path = Vec::with_capacity(parent_metadata_path.len() + 1); + child_path.extend_from_slice(parent_metadata_path); + child_path.push(owning_runtime_id); + DocumentNodeImplementation::Network(convert_network(context, *net_id, &child_path, node_collector, network_collector)?) + } + }) +} diff --git a/node-graph/libraries/resources/src/lib.rs b/node-graph/libraries/resources/src/lib.rs index 0fffabb324..516e186371 100644 --- a/node-graph/libraries/resources/src/lib.rs +++ b/node-graph/libraries/resources/src/lib.rs @@ -246,6 +246,16 @@ impl ResourceId { pub fn new() -> Self { Self(core_types::uuid::generate_uuid()) } + + /// Derive a deterministic ID from a content hash (first 8 bytes, little-endian). Used when + /// bootstrapping resources from an existing document so re-conversion is stable and identical + /// content maps to one ID. New resources created live should use [`ResourceId::new`] instead. + pub fn from_hash(hash: &ResourceHash) -> Self { + let bytes: [u8; 32] = hash.into(); + let mut truncated = [0u8; 8]; + truncated.copy_from_slice(&bytes[..8]); + Self(u64::from_le_bytes(truncated)) + } } impl From for ResourceId {