diff --git a/.gitignore b/.gitignore index dd89e49636..3791ae413c 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ vcpkg_installed/ Workspace CLAUDE.md .claude +docs/superpowers diff --git a/docs/vv_templates/deviation_template.md b/docs/vv_templates/deviation_template.md new file mode 100644 index 0000000000..d1e0428c6c --- /dev/null +++ b/docs/vv_templates/deviation_template.md @@ -0,0 +1,79 @@ +# Deviations from DREAM3D 6.5.171: + +This file lists every documented behavioral difference between this SIMPLNX filter and its DREAM3D 6.5.171 equivalent. + +Entries are referenced by stable ID (`-D`) from the V&V report and from public migration guidance. The ID is stable across renames; the Filter UUID field is the permanent cross-reference anchor. + +--- + +## -D1 + +| Field | Value | +|---|---| +| **Deviation ID** | `-D1` | +| **Filter UUID** | `` | +| **Status** | active *or* superseded by `-D` *or* retired YYYY-MM-DD | + +**Symptom:** ** + +**Root cause:** *bug | precision | order of operations | library | algorithmic choice* +*One paragraph explaining the technical mechanism. Cite source files and line ranges where helpful.* + +**Affected users:** ** + +**Recommendation:** *trust SIMPLNX | trust 6.5.171 | either acceptable within tolerance X | see quick-patch link for legacy-parity* +*One sentence justifying the recommendation.* + +--- + +## -D2 + +| Field | Value | +|---|---| +| **Deviation ID** | `-D2` | +| **Filter UUID** | `` | +| **Status** | active | + +**Symptom:** ... + +**Root cause:** ... + +**Affected users:** ... + +**Recommendation:** ... + +--- + +## Examples (delete this section before sign-off) + +### Precision example + +| Field | Value | +|---|---| +| **Deviation ID** | `ComputeEulerAngles-D1` | +| **Filter UUID** | `aaaa1111-0000-0000-0000-000000000001` *(illustrative)* | +| **Status** | active | + +**Symptom:** Euler-angle output differs from 6.5.171 by up to 0.003° in orientations near grain boundaries. + +**Root cause:** Precision. SIMPLNX performs the internal orientation-matrix operations in `double`; 6.5.171 performed the same operations in `float`. + +**Affected users:** Workflows that compute orientation statistics on features larger than ~10⁴ voxels, where accumulated float32 round-off becomes visible at the 10⁻³ degree level. Users who only visualize IPF colors will not notice. + +**Recommendation:** Trust SIMPLNX. The 6.5.171 output was limited by float32 round-off and is not materially more correct for any downstream calculation. + +### Legacy-bug example + +| Field | Value | +|---|---| +| **Deviation ID** | `SegmentFeatures-D2` | +| **Filter UUID** | `aaaa2222-0000-0000-0000-000000000002` *(illustrative)* | +| **Status** | active | + +**Symptom:** FeatureId count on a 50×50×50 block test pattern is 27 in SIMPLNX and 26 in 6.5.171. + +**Root cause:** Bug in 6.5.171. The outer segmentation loop used `< dimZ` where it should have used `<= dimZ`, silently dropping features that touched the +Z boundary. Corrected in SIMPLNX. + +**Affected users:** Anyone who ran `SegmentFeatures` on datasets where a feature touched the +Z volume boundary. The missing feature was always the one nearest +Z. + +**Recommendation:** Trust SIMPLNX. The 6.5.171 result was mathematically incorrect. diff --git a/docs/vv_templates/oracle_classes.md b/docs/vv_templates/oracle_classes.md new file mode 100644 index 0000000000..f958dddb01 --- /dev/null +++ b/docs/vv_templates/oracle_classes.md @@ -0,0 +1,234 @@ +# Oracle Classes — Detailed Reference + +The DREAM3D-NX V&V policy (see [`vv_policy.md`](./vv_policy.md)) names five oracle classes — five ways to establish what the *correct* output of a filter should be, independently of any DREAM3D implementation. The policy's table is a quick lookup. This document is the long-form explanation: what each class actually means in practice, what kinds of bugs it catches, what its weaknesses are, and how to choose between them when designing the oracle for a new (or retroactive) V&V pass. + +## What an oracle is, and why this matters + +An **oracle** is the source of truth you compare the filter against. The policy's "one ordering rule" is to pick the oracle *before* running any DREAM3D 6.5.172 comparison — because if the oracle is wrong, you will happily confirm a buggy filter. The oracle must be independent of the implementation it validates: it cannot be the filter's own output, and it cannot be the legacy DREAM3D's output (which is "trusted by reputation, not by proof"). + +The five classes differ in **what they trust** (math, a library, a paper, properties, or a person) and **how durable** that trust is over time. The policy ranks them by trust durability, and prefers oracles that are stable, reproducible, and external to the code being tested. + +## Class 1 — Analytical + +**What it is:** You can write down, on paper, the *exact* expected output as a closed-form function of the input. The math is small enough to do by hand or in a spreadsheet on a toy dataset. + +**Where the trust comes from:** Mathematics itself. Arithmetic doesn't drift. `2 + 2 == 4` today, tomorrow, and forever. + +**Examples:** + +- Thresholding: `output[i] = (input[i] > T)`. Hand-pick a few inputs above and below `T`; the expected output is obvious. +- Cropping: `output[i,j,k] = input[i+oₓ, j+oᵧ, k+oᵤ]`. Pick a 5×5 input and a 2×2 crop; you know exactly which input cells should appear in which output cells. +- Color conversion (e.g., RGB→HSV): the formulas are in any reference; apply them by hand to a few colors. +- Indirection lookups: `output[i] = input[map[i]]`. Trivial to verify on a 6-element example. +- Grouping density: `density[i] = ParentVolumes[i] / Σ Volumes[k]` where `k` ranges over a set you compute by hand from the neighbor lists. Set-union sums + division — exactly the kind of thing that fits Class 1. + +**Strengths:** + +- Zero drift. The expected output is a function of the input, not of any library or external state. +- No external dependencies. The test runs in any environment. +- The derivation can be embedded as a comment next to the `REQUIRE(result == X)`, so future readers see both the truth and the assertion. +- Provable: if someone disputes the answer, they can re-derive it. + +**Weaknesses:** + +- Only works when the algorithm has a tractable closed form on small inputs. Many real algorithms don't (e.g., segmentation, optimization, iterative convergence). +- Toy datasets may not exercise all code paths in a complex filter. + +**How it's encoded in tests:** Usually inline `REQUIRE(result == 0.6428571)` with a comment showing the hand derivation. Or a tiny exemplar `.dream3d` whose values were hand-computed and the `.dream3d` is just a cache of the computation. + +## Class 2 — Reference implementation + +**What it is:** A trusted external library — one with broader user base, more testing, and longer history than your filter — computes the same operation on the same input. Its output becomes your expected output. + +**Where the trust comes from:** The library's reputation. NumPy has millions of users; if its `np.linalg.inv()` is wrong, the world notices. + +**Examples:** + +- NumPy / SciPy for general array operations (matrix multiply, FFT, sort, statistics). +- MTEX (MATLAB) for orientation math and texture analysis. +- EbsdLib upstream for crystallographic operations (the simplnx filter might use a *different version* of EbsdLib than the standalone library does). +- Eigen for linear algebra in C++ tests. +- A reference Python implementation of an algorithm published as a research paper companion. + +**Workflow:** Write a small Python (or other) script that takes the toy input, runs the reference library, and saves the expected output. That script + the library version + any random seed used become part of the V&V archive. Future tests load the saved expected output and compare bit-for-bit (or with appropriate float tolerance). + +**Strengths:** + +- External cross-check: you're not just testing your filter against your own arithmetic. +- For libraries that implement the same algorithm (e.g., the simplnx filter calls EbsdLib for some orientation math, and you can run EbsdLib's standalone test on the same input), this can catch real implementation drift between versions. + +**Weaknesses:** + +- **HIGH library-version drift.** If NumPy 1.24 produces output X and NumPy 2.0 produces output Y (different rounding, different algorithm internals), your "expected output" silently changes. You must record the exact library version (and seed for any random parts) in the archive — and accept that re-running the script years later may produce a different "expected output" unless you've pinned the library. +- The library may have its own bugs. Trusting it transitively trusts every commit in its history. +- The library might solve a slightly *different* problem than your filter (e.g., "convex hull" can mean different things). Subtle semantics mismatches are a frequent source of false-positive deviations. + +**How it's encoded in tests:** A cached exemplar `.dream3d` file in the archive, generated by the Python/MATLAB script. The script and its version pin live in the archive too. Test compares filter output against the cached arrays. + +## Class 3 — Paper-based + +**What it is:** The filter implements an algorithm published in a peer-reviewed paper. The paper contains a worked example, figure, or equation showing the expected output. You reproduce that exact example. + +**Where the trust comes from:** Peer review. The paper survived a referee process and (typically) replication by others. + +**Examples:** + +- **Rowenhorst 2015** ("Consistent representations of and conversions between 3D rotations"): provides explicit worked examples of orientation-representation conversions (Euler → quaternion → axis-angle → rotation matrix → Rodrigues vector). simplnx's `ConvertOrientations` filter implements this — the paper's worked examples are direct Class 3 oracle fixtures. +- **Bunge texture analysis** for ODF (orientation distribution function) calculations. +- **Hoshen-Kopelman 1976** for connected-component labeling — the paper has small worked examples with labels you can verify against. +- Whatever paper your filter's algorithm cites in its header comment. + +**Strengths:** + +- The most authoritative source. The author of the algorithm isn't writing the test, eliminating the "tests unconsciously designed around the existing code" risk. +- Captures the algorithm's *intent* precisely (the paper says what it should do; the filter is supposed to do that). +- Often comes with multiple test cases (figures, tables, edge cases). + +**Weaknesses:** + +- Low-to-medium drift. Papers don't update, but later papers may publish errata or alternate formulations. You need to pin to the exact paper revision (DOI + edition). +- Extracting numbers from PDF figures/tables is tedious and error-prone (re-read the text twice; pixel-pick figure values). +- Only applicable to filters that implement a published algorithm. Custom internal algorithms don't have one. + +**How it's encoded in tests:** Inline `REQUIRE(result == X)` with a citation comment, e.g. `// Per Rowenhorst 2015, Eq. 7, p. 12: q = [0.7071, 0, 0.7071, 0]`. The paper PDF is embedded in the V&V archive for traceability. + +## Class 4 — Invariant-based + +**What it is:** The output must satisfy certain mathematical properties (invariants), regardless of input. You assert these properties as predicates instead of asserting specific values. + +**Where the trust comes from:** Logical derivation from the algorithm's specification. The invariants follow from what the filter is *supposed to do*. + +**Examples:** + +- **Bounds:** `0.0 ≤ density ≤ 1.0` (when no sentinel is involved). +- **Sentinel signaling:** `density == -1.0f` exactly when no features touched the parent (no in-between values, no other negative numbers). +- **Range membership:** `FeatureIds[k] ∈ {0, 1, …, numFeatures-1}`. +- **Contiguity / completeness:** `FeatureIds` start at 1, no gaps (no orphan IDs from a segmentation filter). +- **Conservation:** total cell count out == total cell count in (no cells dropped silently). +- **Sum-to-one:** `Σ phase_fraction[p] == 1.0` for any cell. +- **Symmetry under transformation:** `Filter(Rotate(input)) == Rotate(Filter(input))` for rotation-equivariant filters. +- **Idempotence:** `Filter(Filter(input)) == Filter(input)` for filters that should converge in one pass. + +**Strengths:** + +- Cheap. No expected-output computation; you just write the property as a predicate. +- Zero drift. The invariants don't change unless the filter's specification changes. +- Catches whole *classes* of bugs at once. A boundary-handling regression that produces `density = 1.7` is caught by a single `REQUIRE(density ≤ 1.0)` regardless of what the actual correct value was. +- Works for *any* input, not just hand-picked fixtures. Especially powerful when paired with property-based testing (random input generation). + +**Weaknesses:** + +- **Does not fully specify behavior.** A buggy filter could satisfy every invariant and still be wrong. Example: a filter that always outputs `density = 0.5` would satisfy `0 ≤ density ≤ 1` but be obviously wrong. +- Best used as a **companion** to a stronger class (1, 2, or 3). On its own, Class 4 is necessary but not sufficient. +- Requires reasoning about what properties *must* hold, which is its own design skill. + +**How it's encoded in tests:** Inline `REQUIRE(predicate)` assertions, often in a loop over output indices. No exemplar file needed. + +## Class 5 — Expert-visual + +**What it is:** A named domain expert visually inspects the output on canonical test cases and signs off that it looks correct. No formal computation of expected output; trust is placed in the expert's judgment. + +**Where the trust comes from:** A person's experience and reputation. + +**When it's used:** Last resort. Used when: + +- The output is genuinely subjective (e.g., "does this segmentation look reasonable?", "is this rendering visually pleasing?"). +- The algorithm is more art than math (heuristic image processing, aesthetic decisions). +- All other classes have been considered and rejected with documented reason. + +**Strengths:** + +- Captures expert intuition that's hard to formalize. +- Sometimes it's the only realistic option for visualization filters. + +**Weaknesses:** + +- **HIGHEST drift risk.** The expert disappears, changes their mind, or is replaced. Future maintainers can't reproduce the judgment. +- **Social drift.** Over time, the bar for "good output" shifts as the team adapts to what the filter actually produces — exactly the opposite of what an oracle should do. +- Reproducibility is poor: a screenshot signed off in 2024 may not be regenerable from the same input on different hardware/drivers in 2027. +- The policy **requires documented justification** for using Class 5 — you must explain why no Class 1–4 was feasible. + +**How it's encoded in tests:** A cached exemplar `.dream3d` (or screenshot folder) signed off by named expert + date. Comparison via `CompareDataArrays` against the cached output. + +## The drift-risk concept + +Every oracle answers the question: *what's the right answer for this input?* **Drift** is the risk that, over time, the oracle's answer changes without the filter's specification changing. + +| Class | Name | Drift source | Drift severity | +|---|---|---|---| +| 1 | Analytical | Math itself doesn't drift; only the *derivation* can be miscopied | None (if derivation is preserved as a comment) | +| 4 | Invariant | Properties don't drift; only the *specification* could change | None | +| 3 | Paper-based | Papers don't update, but related papers might publish errata | Low–Medium | +| 2 | Reference impl | Library version changes silently | **High** | +| 5 | Expert-visual | The expert leaves; the team's standards shift | **High (social drift)** | + +The policy's preference order (1, 4 > 3 > 2 > 5) is essentially *prefer oracles with low drift, plus prefer oracles whose computation is transparent and reproducible.* + +## Combining classes (encouraged) + +Classes 1 and 4 are cheap — they cost almost nothing once you've done the work for a stronger class. The recommended pattern is: + +- **Class 1 + Class 4** — hand-derive specific expected values *and* assert the invariants. The invariants act as a cheap sanity check against derivation typos; the specific values act as a tight bound on correctness. +- **Class 3 + Class 4** — cite the paper for specific values *and* assert the algorithm's invariants. The paper might have a transcription error in one figure; the invariants catch what slipped. +- **Class 2 + Class 4** — cross-check against a library *and* assert algorithm invariants. If the library output and the invariants disagree, you've found either a library bug or a misunderstanding of the algorithm. + +You almost never use Class 5 alone. If you must use Class 5, pair it with Class 4 to limit damage. + +## Decision tree + +``` +Is the output computable in closed form on a small input? + YES → Class 1 (Analytical). Cheapest, no drift. Default choice. + NO ↓ + +Are there mathematical properties the output MUST satisfy? + YES → Class 4 (Invariant). Add these regardless of which other class you pick. + +Does the filter implement a published algorithm? + YES → Class 3 (Paper-based). Strong external authority. + NO ↓ + +Is there a trusted external library that solves the same problem? + YES → Class 2 (Reference impl). Watch for library version drift. + NO ↓ + +Is there a named domain expert willing to sign off, and can you document +why no Class 1-4 was feasible? + YES → Class 5 (Expert-visual). Last resort. Pair with Class 4. + NO ↓ + +Stop. You don't have an oracle. You cannot do V&V on this filter +until you find one. +``` + +## What is NOT an oracle + +A common trap is to mistake one of the following for an oracle. None of these qualify: + +- **The filter's own output**, captured on a previous date and saved as a "golden" exemplar. This is circular: any bug present at capture time becomes the new "correct" answer, and the test then confirms the bug forever. +- **The legacy DREAM3D 6.5.172's output.** Treated by reputation, not by proof. Useful for diff explanation (see [`vv_policy.md`](./vv_policy.md)), not for correctness. +- **Another simplnx filter's output**, when both filters might share the same bug (e.g., both call into a buggy shared utility). +- **A previous version of the same filter** ("my SIMPL implementation works, so I'll just diff against that"). Identical to the legacy DREAM3D trap. + +If the existing exemplar in the data archive was generated from one of the above, it should be regenerated from a real oracle as part of the V&V pass — this is the "circular oracle" pattern called out in the retroactive audit's cross-cutting findings ([`docs/vv_retroactive_reports/INDEX.md`](../vv_retroactive_reports/INDEX.md)). + +## Quick reference: when each class shines + +| Class | Name | Best for | Default choice when | +|---|---|---|---| +| 1 | Analytical | Tight algorithms with closed-form output, hand-pickable toy inputs, small outputs | The math is short enough to do on paper | +| 2 | Reference impl | Algorithms where a trusted library exists and you trust it more than your own implementation | You're porting an algorithm that has a well-known library implementation elsewhere | +| 3 | Paper-based | Implementations of published algorithms (orientation math, classical algorithms with named authors) | The filter's header cites a paper | +| 4 | Invariant | Anything with conservation laws, range bounds, or structural constraints | Always — add Class 4 alongside whatever other class you pick | +| 5 | Expert-visual | Visualization quality, subjective image processing, rendering decisions | All Class 1–4 options have been ruled out with written justification | + +## Encoding the choice in the V&V report + +The per-filter V&V report's `## Oracle` section (per [`report_template.md`](./report_template.md)) requires four fields: + +- *Class* — one or more class numbers (e.g., "1 primary, 4 companion"). +- *Applied* — a one-paragraph description of how the oracle generates expected output for *this* filter. +- *Encoded* — a citation of the specific `TEST_CASE` or fixture in the codebase that runs the oracle, with the number of fixtures. +- *Second-engineer review* — named reviewer + date, or "Skipped — <documented reason>". + +See [`report_gates.md`](./report_gates.md) for the exit criteria the Oracle section must satisfy before the V&V report can move from DRAFT to READY FOR REVIEW. diff --git a/docs/vv_templates/provenance_template.md b/docs/vv_templates/provenance_template.md new file mode 100644 index 0000000000..b80c494c9a --- /dev/null +++ b/docs/vv_templates/provenance_template.md @@ -0,0 +1,75 @@ +# Exemplar Archive Provenance: .tar.gz + +This sidecar records how an exemplar archive used in unit tests was generated. It is the answer to "where did this gold-standard data come from?" + +One sidecar per archive. The archive name and SHA512 must match `download_test_data()` in `src/Plugins/

/test/CMakeLists.txt`. + +--- + +## Archive identity + +| Field | Value | +|---|---| +| **Archive** | `.tar.gz` | +| **SHA512** | `` | +| **Used by tests** | ``, `` | +| **Generated by** | ** | +| **Generated on** | *YYYY-MM-DD* | +| **Generated at commit** | ** | + +## How it was generated + +*Describe in 2–4 sentences how the archive's contents were produced. Reference any pipeline (`.d3dpipeline`) or script (`.py`, `.m`) by file path. State whether inputs were from a public dataset, synthetic generation, or hand-construction.* + +Example: +> The archive contains a 100×100×100 synthetic microstructure generated by `pipelines/generate_in100_subvol.d3dpipeline` plus the expected output of `ComputeGroupingDensityFilter` on that input. Inputs were generated synthetically (no external data); expected output was computed by the Class 3 Rowenhorst 2015 §4.2 worked example. + +## Canonical oracle output + +*Which arrays in the archive are the canonical oracle output (the thing tests compare against)? Reference by DataPath.* + +| DataPath | Source of expected values | +|---|---| +| `//` | *Class 1 hand derivation* / *Class 2 script X.py* / *Class 3 paper ref* / *Class 4 invariant* / *Class 5 expert sign-off* | + +## Oracle provenance (Classes 2, 3, 5 only) + +*Classes 1 and 4 need no provenance block — the oracle lives in the test code directly.* + +### Class 2 — Reference implementation + +- **Library:** ** +- **Exact version:** ** +- **Runtime:** ** +- **Random seed:** ** +- **Script in archive:** ** +- *Optional:* **Output hash for drift detection:** ** + +### Class 3 — Paper-based + +- **Citation:** ** +- **DOI:** ** +- **Edition:** ** +- **Figure / Table / Equation #:** ** +- **Page #:** ** +- **Paper PDF in archive:** *`notes/.pdf`* +- **Reproduced figure (if any):** *`notes/_figN.png`* + +### Class 5 — Expert-visual + +- **Approving expert:** ** +- **Approval date:** *YYYY-MM-DD* +- **Signed-off outputs:** *`notes/expert_signoff/*.png`* +- **Class-5-only justification:** *Why no Class 1–4 oracle was feasible for this filter.* + +## Second-engineer oracle review + +- **Reviewer:** ** OR *skipped* +- **Date:** *YYYY-MM-DD* +- **Skip reason** (if skipped): ** + +## Regenerated to fix a circular-oracle situation? + +*If this archive was created to *replace* a prior archive that was regenerated from post-fix SIMPLNX output (a circular oracle), state that here with a reference to the prior archive.* + +> *Example: this archive replaces `compute_grouping_density.tar.gz` (retired YYYY-MM-DD), which was regenerated from SIMPLNX output in `` after the bug fix in `` and therefore could not be used as an independent oracle.* diff --git a/docs/vv_templates/report_gates.md b/docs/vv_templates/report_gates.md new file mode 100644 index 0000000000..a1a0fab96a --- /dev/null +++ b/docs/vv_templates/report_gates.md @@ -0,0 +1,104 @@ +# V&V Report — Section Gates + +A section of `src/Plugins/

/vv/.md` is "done" when all its gates pass. Sections may be worked in any order. + +**The only ordering rule:** the **Oracle** is chosen *before* any DREAM3D 6.5.171 comparison is run. 6.5.171 is never the source of truth — it is the *thing being compared against* the independently-established oracle. + +--- + +## Header table + +- [ ] Plugin, SIMPLNX UUID, legacy DREAM3D equivalent (or "None") filled in +- [ ] Status reflects current state: `DRAFT` | `READY FOR REVIEW` | `COMPLETE` +- [ ] Verified-commit field present (filled in at SBIR deliverable assembly) +- [ ] Sign-off line has named engineer(s) and date at sign-off + +## At a glance + +The dashboard a reviewer reads first. Lets a reviewer decide in 30 seconds whether they need to dig into the long-form sections below. + +- [ ] All 8 rows present: Algorithm Relationship, Oracle (confirmed), Code paths enumerated, Tests today, Exemplar archive, Legacy comparison, Bug flags, V&V phase +- [ ] Each cell is one sentence to one short paragraph — not a single word, not a full subsection. If a row needs more than ~3 sentences, that detail belongs in the long-form section and the dashboard summarizes it +- [ ] **Algorithm Relationship** row names the legacy equivalent (or "no legacy equivalent") and the classification — must agree with the long-form `## Algorithm Relationship` section +- [ ] **Oracle (confirmed)** row names the Class number(s) and the encoded test fixture(s). Use "confirmed" only when the oracle has been applied and the test passes; otherwise write "tentative" or "in progress" +- [ ] **Code paths enumerated** row states `N of M exercised` — agrees with the long-form `## Code path coverage` count +- [ ] **Tests today** row gives the test-case count and a one-phrase shape of coverage (parameter sweep, positive/negative/conversion, etc.) +- [ ] **Exemplar archive** row names the archive and flags retired/replaced archives (cross-reference the long-form `## Exemplar archive` SHA512) +- [ ] **Legacy comparison** row is `Run` / `Not run` / `Three-way (SIMPLNX vs 6.5.171 vs 6.5.172)` plus a one-sentence headline. "Not run" must include a brief reason ("design-by-inspection — pure port", "legacy binary unavailable", "deferred to Phase 9") +- [ ] **Bug flags** row is `None` or a list of deviation IDs flagged as suspected bugs (not all deviations are bugs; only those classified as bug under the root-cause taxonomy) +- [ ] **V&V phase** row lists which phases of the workflow are complete and what is outstanding — drives the Status field in the header table + +## Summary + +- [ ] 2–3 sentences only +- [ ] States what the filter does +- [ ] States the verification approach (one phrase, e.g., "Class 3 paper-based vs Rowenhorst 2015") +- [ ] States the headline result (e.g., "1 deviation, all tests pass") + +## Algorithm Relationship + +- [ ] One classification: Port | Minor changes | Rewrite | New filter +- [ ] One-line evidence (UUID inheritance, PR history, line-count diff with legacy) +- [ ] **Rewrite + outputs diverge from legacy** → explicit defense required in the Deviations file (same UUID is a claim of functional equivalence) +- [ ] *Optional but encouraged for Port / Minor changes:* numbered list of **Port-time deltas** — API swaps, library version differences, progress-reporting changes, normalization steps added. Each delta gets one sentence justifying why it does or does not change output. Forces the engineer to *enumerate* the structural diff instead of asserting "it's a port" with no evidence. +- [ ] *Optional:* **Material PRs since baseline** line — `(none identified for this filter)` is a valid answer. The discipline of looking is what matters; it surfaces drift introduced after the last V&V pass. + +## Oracle + +For detailed explanations of each class — with examples, strengths and weaknesses, drift-risk analysis, and a decision tree for picking the right class — see [`oracle_classes.md`](./oracle_classes.md). The summary below is the gate checklist. + +- [ ] Class named (1–5) + - 1 = Analytical (closed-form expected output on toy input) + - 2 = Reference implementation (NumPy / SciPy / MTEX / EbsdLib upstream) + - 3 = Paper-based (published figure / table / equation) + - 4 = Invariant-based (derivable property the output must satisfy) + - 5 = Expert-visual (last resort, requires justification) +- [ ] If Class 5: justification block stating why no Class 1–4 oracle was feasible +- [ ] One-line description of how oracle was applied +- [ ] Encoded test reference: `::` exists and is greppable +- [ ] N fixtures stated; all pass at the verified commit +- [ ] Second-engineer review of oracle design, OR documented skip reason + +## Code path coverage + +- [ ] `Source:` line cites the algorithm `.cpp` with line count (e.g., `Source: src/Plugins/

/.../Algorithms/.cpp (181 lines).`) — anchors the reader to the file being audited +- [ ] *Optional 1–2 sentences* naming the algorithm's logical phases when it has staged structure (e.g., "(a) preflight scan, (b) per-cell accumulation, (c) per-feature finalize") so the `Phase` column reads in context +- [ ] All algorithm code paths enumerated — kernel choices, mask on/off, edge cases, error paths, cancel paths, background/sentinel branches +- [ ] Paths numbered via the `#` column so they have stable IDs for referencing from Test inventory, Deviations, and review comments +- [ ] *Recommended:* `Phase` (or `Pass` / `Stage`) column groups paths when the algorithm has distinct stages; drop the column when the algorithm is flat +- [ ] `N of M paths exercised.` count stated at the top of the section +- [ ] If `N < M`: each uncovered path appears as its own table row with `*Not directly tested. *` in the Test case cell — paths are **never silently omitted**. Acceptable reasons include: low-value loop-guard, exercised implicitly by shipping pipelines (name one), requires cancel-signal injection, deferred to integration test +- [ ] Each covered path maps to ≥1 named test case (`TEST_CASE` name or `DYNAMIC_SECTION` label as it appears in the test source) +- [ ] Parameter-dependent paths: every combination of interest represented (don't trust a single test case to cover the parameter cube) + +## Test inventory + +- [ ] Every `TEST_CASE` in the filter's test file listed (including `DYNAMIC_SECTION` variants that show up as separate ctest entries — list each one) +- [ ] Each marked: `kept` | `new-for-V&V` | `retired` (with one-line reason for retired) +- [ ] **Notes** column states what each test actually verifies (number of arrays compared, number of assertions, exemplar archive consumed, any expected-failure status). Don't leave Notes blank — "Validates 80 element-wise assertions against bundled exemplar" beats no entry +- [ ] If a test was modified for this V&V cycle (e.g., inline expected-array updates, exemplar bump), the Notes column records what changed and why (one line; cite the deviation ID if the change is traceable to one) +- [ ] All non-retired tests pass at the verified commit in **both** in-core and OOC builds + +## Exemplar archive + +- [ ] Archive name matches `download_test_data()` entry in `test/CMakeLists.txt` +- [ ] SHA512 in report matches SHA512 in `test/CMakeLists.txt` +- [ ] Provenance sidecar exists at `src/Plugins/

/vv/provenance/.md` and documents: + - who generated the archive + - when + - with what pipeline / script + - what oracle output was canonical +- [ ] If the archive was regenerated during V&V to fix a circular-oracle situation → documented in the sidecar + +## Deviations from DREAM3D 6.5.171 + +- [ ] Comparison was run on at least one fixture (named in the report) +- [ ] If no deviations: a one-line confirmation that 6.5.171 and SIMPLNX outputs matched within tolerance +- [ ] If deviations: each ID referenced in the report points to a fleshed-out entry in `src/Plugins/

/vv/deviations/.md` +- [ ] Each deviation entry has: + - stable ID (`-D`) + - filter UUID (permanent cross-reference anchor) + - symptom (user-visible) + - root cause: `bug` | `precision` | `order of operations` | `library` | `algorithmic choice` + - affected users + - recommendation: `trust SIMPLNX` | `trust 6.5.171` | `either acceptable within tolerance` | `see quick-patch link` diff --git a/docs/vv_templates/report_template.md b/docs/vv_templates/report_template.md new file mode 100644 index 0000000000..3d6020982b --- /dev/null +++ b/docs/vv_templates/report_template.md @@ -0,0 +1,90 @@ +# V&V Report: + +| | | +|--------|--------------| +| Plugin | | +| SIMPLNX UUID | | +| DREAM3D 6.5.171 equivalent | *or* None (new filter) | +| Verified commit | ** | +| Status | DRAFT | +| Sign-off | ** | + +## At a glance + +A scannable dashboard for reviewers. Each row is one sentence to one short paragraph — enough that a reader can decide whether they need to read the long-form sections below. + +| Aspect | Current state | +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +| Algorithm Relationship | *Port \| Minor changes \| Rewrite \| New filter* — *one sentence naming the legacy equivalent (or "no legacy") + any material changes.* | +| Oracle (confirmed) | *Class N — one-sentence applied + one-sentence "encoded as" pointer (e.g., "5 fixtures in `test/Test.cpp`, all pass").* | +| Code paths enumerated | *N of M exercised; one phrase about any uncovered paths.* | +| Tests today | *N test cases — one phrase about coverage (e.g., "parameter sweep over (Tolerance, NumberOfNeighbors)", "1 positive + 1 negative + 1 SIMPL backward-compat").* | +| Exemplar archive | *`` — one phrase on what it provides (inputs only, inputs + outputs, retired/replaced).* | +| Legacy comparison | *Run / Not run / Three-way (SIMPLNX vs 6.5.171 vs 6.5.172) — one-sentence headline (bit-identical, N deviations).* | +| Bug flags | *None / list of deviation IDs flagged as suspected bugs.* | +| V&V phase | *Which phases of the V&V workflow are complete; what is outstanding before status promotion.* | + +For worked instances see `src/Plugins/OrientationAnalysis/vv/BadDataNeighborOrientationCheckFilter.md` and `src/Plugins/OrientationAnalysis/vv/ComputeAvgCAxesFilter.md` (on `topic/vv/compute_avg_caxis`). + +## Summary + +*2–3 sentences: what the filter does, how it was verified, headline result.* + +## Algorithm Relationship + +*One of:* Port | Minor changes | Rewrite | New filter + +*Evidence:* *one line — UUID inheritance, PR history, complexity comparison.* + +## Oracle + +*Class:* *N (1=Analytical, 2=Reference, 3=Paper, 4=Invariant, 5=Expert-visual)* + +*Applied:* *one line describing how the oracle generates expected output.* + +*Encoded:* *`::` — N fixtures, all pass.* + +*Second-engineer review:* ** OR *skipped — reason.* + +## Code path coverage + +*N of M paths exercised. If N < M: which paths are NOT covered, and why each gap is acceptable (or what would close it).* + +Source: *`src/Plugins/

/src/

/Filters/Algorithms/.cpp` ( lines).* + +*Optional 1–2 sentences naming the algorithm's logical phases (e.g., "(a) preflight scan, (b) per-cell accumulation, (c) per-feature finalize") so the `Phase` column reads in context.* + +| # | Phase | Path | Test case | +|----|-----------------|---------------------------------------------------|--------------------------------------------| +| 1 | *(a) Preflight* | *e.g. all phases non-Hex → return error `-76402`* | *`No_Hex_Phase`* | +| 2 | *(a) Preflight* | *e.g. mixed phases → push warning, proceed* | *`Class 1 Oracle`* | +| 3 | *(b) Per-cell* | *e.g. `featureId == 0` → skip* | *Not directly tested. * | +| 4 | *(b) Per-cell* | *e.g. normal accumulation branch* | *`Class 1 Oracle` — F1, F2, F3 checks* | +| 5 | *(c) Finalize* | *e.g. `cellCount == 0` → write NaN* | *`Class 1 Oracle` — F0, F5, F6 NaN checks* | + +When a path is intentionally not covered, write `*Not directly tested. *` in the Test case cell rather than omitting the row. Uncovered paths must still appear in the table so the reader can audit the gap. + +*The `Phase` column can be dropped (or renamed `Pass` / `Stage`) when the algorithm has no obvious staged structure. For a worked two-pass instance see `src/Plugins/OrientationAnalysis/vv/BadDataNeighborOrientationCheckFilter.md`; for a worked three-phase instance see `src/Plugins/OrientationAnalysis/vv/ComputeAvgCAxesFilter.md` (on `topic/vv/compute_avg_caxis`).* + +## Test inventory + +| Test case | Status | Notes | +|-----------|--------|-------| +| *TestName* | kept / new-for-V&V / retired | *one line if needed* | + +## Exemplar archive + +- **Archive:** *``* +- **SHA512:** *``* +- **Provenance:** *`src/Plugins/

/vv/provenance/.md`* + +## Deviations from DREAM3D 6.5.171 + +*Either:* + +- No deviations observed. Comparison run on **. + +*Or:* + +- `-D1` — *one-line symptom* — see `vv/deviations/.md` +- `-D2` — *one-line symptom* — see `vv/deviations/.md` diff --git a/docs/vv_templates/vv_policy.md b/docs/vv_templates/vv_policy.md new file mode 100644 index 0000000000..027aec3162 --- /dev/null +++ b/docs/vv_templates/vv_policy.md @@ -0,0 +1,95 @@ +# DREAM3D-NX Filter V&V Policy (v2) + +This is the short-form policy for filter Verification & Validation (V&V) in DREAM3D-NX. It exists to support MTR SBIR deliverables and the public DREAM3D → DREAM3D-NX migration guide. + +Read this once. The deliverable is a one-page report per filter; the working details live in the templates and gates docs alongside this file. + +--- + +## Why + +DREAM3D 6.5.171 is trusted by reputation, not by proof. It has been in the field long enough that users rely on its numbers, but it was never subjected to V&V rigor. We therefore cannot treat "matches 6.5.171" as a correctness check — it is a **diff-explanation** check. + +Each filter must first be shown correct **independently** of 6.5.171 against an oracle, and only then diffed against legacy to produce user-facing migration guidance. + +## The one ordering rule + +> **Pick the oracle before running any DREAM3D 6.5.171 comparison.** + +Everything else — code-path enumeration, test inventory, exemplar provenance, algorithm review, documentation — can be worked in any order. + +## Oracle classes + +Every filter must be verified against at least one of the following. Classes 1–4 are preferred; Class 5 requires a documented justification for why no Class 1–4 oracle was feasible. + +| # | Class | What it is | Drift risk | +|---|---|---|---| +| 1 | Analytical | Closed-form expected output on toy input (threshold, crop, rotate, color conversion, array arithmetic) | None | +| 2 | Reference implementation | Trusted external library produces the expected output (NumPy / SciPy / MTEX / EbsdLib upstream / Eigen) | **High** — library version drift | +| 3 | Paper-based | Filter reproduces a published figure, table, or equation from a named reference | Low–Medium | +| 4 | Invariant | Derivable properties the output must satisfy (FeatureIds start at 1 and contiguous; sum of phase fractions = 1; mass conservation) | None | +| 5 | Expert-visual | Domain expert signs off on output for canonical test cases. Last resort. | **High** — social drift | + +**"Legacy 6.5.171 produced this output" is never a valid oracle for correctness.** + +The oracle and its design **really should** be reviewed by a second engineer. A wrong oracle silently confirms buggy filters, and the filter author is the least likely person to notice — test cases unconsciously get designed around the code that already exists. If skipped, record the reason in the provenance sidecar. + +For detailed explanations of each class — with examples, strengths and weaknesses, drift-risk analysis, common anti-patterns ("what is NOT an oracle"), and a decision tree for picking the right class for a given filter — see [`oracle_classes.md`](./oracle_classes.md). + +## Deliverables per filter + +Three artifacts live in the source tree at the filter's commit: + +| Artifact | Location | +|---|---| +| **V&V report** (1 page) | `src/Plugins//vv/.md` | +| **Deviations from 6.5.171** | `src/Plugins//vv/deviations/.md` | +| **Exemplar provenance** (per archive) | `src/Plugins//vv/provenance/.md` | + +The verified state is pinned by **(commit hash, archive SHA512)**. The commit captures source + tests + `download_test_data()` declaration; the SHA512 captures the archive contents. + +## Templates and gates + +| File | What it is | +|---|---| +| [`report_template.md`](./report_template.md) | Empty report — copy into `src/Plugins/

/vv/.md` | +| [`report_gates.md`](./report_gates.md) | Per-section "Done when:" checklists — reference while filling in the report | +| [`deviation_template.md`](./deviation_template.md) | Empty deviation file — copy into `src/Plugins/

/vv/deviations/.md` | +| [`provenance_template.md`](./provenance_template.md) | Empty exemplar-provenance sidecar — copy per exemplar archive | + +## Engineer workflow + +1. Read this policy doc once. +2. Decide the oracle class for this filter (write it down). +3. Run `python scripts/vv_init.py ` to scaffold the report and deviation files in the plugin tree. +4. Open `report_gates.md` in a second tab. +5. Work each section in any order. A section is "done" when all its gates pass. +6. When all gates green, set `Status: READY FOR REVIEW`, push a `vv/` branch. +7. After sign-off, set `Status: COMPLETE`. Verified commit hash is filled in at SBIR deliverable assembly. + +## Status tracking across filters + +Each report's Status line gives a fleet-wide view via one grep: + +```bash +grep -r '^| Status |' src/Plugins/*/vv/*.md | sort +``` + +## Algorithm Relationship — opening claim of every report + +Every report's Algorithm Relationship section uses one of these classifications. This sets reader expectations and frames the Deviation entries. + +- **Port** — line-by-line translation of the legacy algorithm. Differences should be minor and confined to type precision, library calls, or parallelization. +- **Minor changes** — same algorithm intent with small deliberate improvements (e.g., `float` → `double`, corrected boundary handling, Eigen instead of hand-rolled math). +- **Rewrite** — substantially different implementation under the same UUID. **A rewrite that produces materially different outputs is a red flag** — keeping the UUID is a claim of functional equivalence. The Deviations file must defend the claim. +- **New filter, no legacy equivalent** — legacy comparison is N/A. + +## Root-cause categories for Deviation entries + +When SIMPLNX and 6.5.171 differ, each Deviation entry names one root cause (or a short compound like "precision + library"): + +- **Bug** — one implementation is mathematically wrong +- **Precision** — different floating-point width or intermediate-math type +- **Order of operations** — associativity differences in parallel reductions, different loop order, different accumulation +- **Library** — Eigen vs. hand-rolled, different EbsdLib version, different HDF5 versions +- **Algorithmic choice** — deliberate change in method (used only for Rewrite relationships) diff --git a/scripts/vv_init.py b/scripts/vv_init.py new file mode 100644 index 0000000000..09a76a7542 --- /dev/null +++ b/scripts/vv_init.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +"""Scaffold V&V working files for a DREAM3D-NX filter. + +Creates the following from the v2 templates under docs/vv_templates/: + + src/Plugins//vv/.md + src/Plugins//vv/deviations/.md + +Provenance sidecars are created per exemplar archive on demand and are not +scaffolded by this script. + +Usage: + python scripts/vv_init.py [--plugin ] [--force] + +If --plugin is omitted, the script searches src/Plugins/*/src/*/Filters for +the matching .cpp and uses that plugin. The SIMPLNX UUID is +extracted from the .cpp file when present. +""" + +from __future__ import annotations + +import argparse +import re +import sys +from datetime import date +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parent.parent +TEMPLATE_DIR = REPO_ROOT / "docs" / "vv_templates" +PLUGINS_DIR = REPO_ROOT / "src" / "Plugins" + + +def find_filter_source(filter_name: str, plugin_hint: str | None) -> tuple[Path, Path]: + """Return (plugin_dir, filter_cpp_path) for the named filter.""" + candidates: list[Path] = [] + if plugin_hint: + plugin_dir = PLUGINS_DIR / plugin_hint + if not plugin_dir.is_dir(): + sys.exit(f"Plugin directory not found: {plugin_dir}") + candidates = list(plugin_dir.rglob(f"Filters/{filter_name}.cpp")) + else: + candidates = list(PLUGINS_DIR.glob(f"*/src/*/Filters/{filter_name}.cpp")) + + if not candidates: + sys.exit( + f"Could not locate source file for {filter_name}.cpp under " + f"{PLUGINS_DIR}. Pass --plugin if the filter is in an " + f"unusual location." + ) + if len(candidates) > 1: + paths = "\n ".join(str(p) for p in candidates) + sys.exit(f"Multiple candidates for {filter_name}.cpp:\n {paths}\nPass --plugin to disambiguate.") + + filter_cpp = candidates[0] + # plugin_dir is src/Plugins// — three parents up from src//Filters/.cpp + plugin_dir = filter_cpp.parents[3] + return plugin_dir, filter_cpp + + +def extract_uuid(filter_cpp: Path) -> str: + """Pull the SIMPLNX UUID out of the filter .cpp by regex. Returns '' if not found.""" + text = filter_cpp.read_text(encoding="utf-8", errors="replace") + # Match the canonical Uuid::FromString("...") literal used by simplnx filters. + match = re.search(r'Uuid::FromString\(\s*"([0-9a-fA-F\-]{36})"', text) + if match: + return match.group(1) + return "" + + +def render(template_path: Path, replacements: dict[str, str]) -> str: + text = template_path.read_text(encoding="utf-8") + for key, value in replacements.items(): + text = text.replace(key, value) + return text + + +def write_if_absent(target: Path, content: str, force: bool) -> None: + if target.exists() and not force: + print(f" exists, skipped: {target.relative_to(REPO_ROOT)}") + return + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + verb = "overwrote" if target.exists() and force else "wrote" + print(f" {verb}: {target.relative_to(REPO_ROOT)}") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Scaffold V&V working files for a filter.") + parser.add_argument("filter_name", help="Filter class name, e.g., ComputeGroupingDensityFilter") + parser.add_argument("--plugin", help="Plugin name (auto-detected from source layout if omitted)") + parser.add_argument("--force", action="store_true", help="Overwrite existing files") + args = parser.parse_args() + + plugin_dir, filter_cpp = find_filter_source(args.filter_name, args.plugin) + plugin_name = plugin_dir.name + uuid_value = extract_uuid(filter_cpp) + today = date.today().isoformat() + + report_template = TEMPLATE_DIR / "report_template.md" + deviation_template = TEMPLATE_DIR / "deviation_template.md" + for path in (report_template, deviation_template): + if not path.is_file(): + sys.exit(f"Missing template: {path}") + + replacements = { + "": args.filter_name, + "": plugin_name, + "": uuid_value, + } + + report_target = plugin_dir / "vv" / f"{args.filter_name}.md" + deviation_target = plugin_dir / "vv" / "deviations" / f"{args.filter_name}.md" + + print(f"Filter: {args.filter_name}") + print(f"Plugin: {plugin_name}") + print(f"UUID: {uuid_value}") + print(f"Date: {today}") + print(f"Source: {filter_cpp.relative_to(REPO_ROOT)}") + print() + print("Writing:") + write_if_absent(report_target, render(report_template, replacements), args.force) + write_if_absent(deviation_target, render(deviation_template, replacements), args.force) + + # Ensure the provenance dir exists so the engineer has a place to land sidecars. + provenance_dir = plugin_dir / "vv" / "provenance" + provenance_dir.mkdir(parents=True, exist_ok=True) + + print() + print("Next steps:") + print(f" 1. Open {report_target.relative_to(REPO_ROOT)} and fill in the header.") + print(f" 2. Open {TEMPLATE_DIR.relative_to(REPO_ROOT) / 'report_gates.md'} alongside it.") + print(f" 3. Decide the oracle class (the one ordering rule).") + print(f" 4. As exemplar archives are touched, copy provenance_template.md into") + print(f" {provenance_dir.relative_to(REPO_ROOT)}/.md") + + +if __name__ == "__main__": + main()