From 7c46085a5da275e701700e188fb12493106c4146 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Sun, 17 May 2026 15:58:51 +0200 Subject: [PATCH 01/10] Document verbosity category and workspace.cif layout --- .../adr_workspace-root-project-category.md | 56 +++++++++- .../plan_workspace-root-project-category.md | 103 +++++++++++++++--- 2 files changed, 138 insertions(+), 21 deletions(-) diff --git a/docs/dev/ADR-suggestions/adr_workspace-root-project-category.md b/docs/dev/ADR-suggestions/adr_workspace-root-project-category.md index b53ead95..04ea8f88 100644 --- a/docs/dev/ADR-suggestions/adr_workspace-root-project-category.md +++ b/docs/dev/ADR-suggestions/adr_workspace-root-project-category.md @@ -73,6 +73,7 @@ workspace = ed.Workspace(project_id='lbco_hrpt') workspace.project.id workspace.project.title = 'La0.5Ba0.5CoO3 at HRPT@PSI' workspace.rendering.table_engine = 'rich' +workspace.verbosity = 'short' workspace.structures workspace.experiments workspace.analysis @@ -89,9 +90,29 @@ _project.last_modified _rendering.chart_engine _rendering.table_engine + +_verbosity.level +``` + +The saved directory is a workspace directory whose filesystem name is +chosen by the user. The canonical layout is: + +```text +/ +|-- workspace.cif +|-- structures/ +| `-- cosio.cif +|-- experiments/ +| `-- d20.cif +|-- analysis/ +| `-- analysis.cif +`-- summary/ + `-- summary.cif ``` -Do not introduce `_meta.*` CIF tags. +Do not introduce `_meta.*` CIF tags. Do not use `project.cif`, +`config.cif`, or `meta.cif` as the primary singleton configuration file +in the target layout. The intended naming split is: @@ -99,6 +120,7 @@ The intended naming split is: Workspace |-- project # information about the scientific project |-- rendering # rendering preferences +|-- verbosity # console/output verbosity preference |-- structures # real structure datablocks |-- experiments # real experiment datablocks |-- analysis # analysis section @@ -169,15 +191,25 @@ After this decision, each layer has a clear rule: This avoids one-off aliases such as `project.info` while preserving semantic CIF names. +### `workspace.cif` is clearer than `project.cif`, `config.cif`, or `meta.cif` + +The file stores singleton settings owned by the workspace: scientific +project information, rendering preferences, and verbosity. `project.cif` +overloads the project name again, while `config.cif` and `meta.cif` are +generic. `workspace.cif` names the owning layer and lets each category +inside the file keep its domain-specific name. + ## Consequences ### Positive - The root object and project-information category no longer share the same conceptual name. -- Public category access becomes uniform: `workspace.project`, - `workspace.rendering`, `workspace.analysis`. +- Public access becomes uniform: `workspace.project`, + `workspace.rendering`, `workspace.verbosity`, `workspace.analysis`. - CIF stays semantic and does not introduce `_meta.*`. +- Workspace-level preferences such as rendering and verbosity are + saved with the workspace instead of being hidden runtime-only state. - Project information can use short item names such as `id`, `title`, and `description`. - The top-level facade name better reflects active runtime @@ -190,6 +222,8 @@ semantic CIF names. updated from `Project` to `Workspace`. - Existing saved directories using `project.cif` must be migrated to `workspace.cif` if no compatibility loader is kept. +- Existing code that expected verbosity to be runtime-only must account + for it round-tripping through `workspace.cif`. - Users familiar with `Project` must learn the new root name. - `Workspace` can be confused with a filesystem workspace in some ecosystems, so documentation must define it clearly as the active @@ -254,6 +288,16 @@ This is readable, but it preserves a special-case category alias. The current goal is stronger consistency between public categories and CIF category concepts. +### Use `project.cif`, `config.cif`, or `meta.cif` for singleton settings + +Rejected for the target layout. + +`project.cif` repeats the overloaded term that this migration removes. +`config.cif` and `meta.cif` are too generic and do not say which layer +owns the settings. `workspace.cif` is more explicit while still allowing +semantic categories such as `_project`, `_rendering`, and `_verbosity` +inside the file. + ### Rename only internal files and keep public API unchanged Rejected for the target design. @@ -293,7 +337,8 @@ The high-level migration is: 8. Keep CIF tags `_project.*` and `_rendering.*`. 9. Rename saved singleton config file from `project.cif` to `workspace.cif`. -10. Update code, tests, scripts, tutorials, docs, and architecture +10. Persist workspace verbosity in `workspace.cif` as `_verbosity.level`. +11. Update code, tests, scripts, tutorials, docs, and architecture references. ## Post-Implementation ADR Update @@ -323,6 +368,7 @@ This ADR is satisfied when: - the saved directory path is exposed as `workspace.path`. - the public rendering category is `workspace.rendering`. - saved singleton configuration lives in `workspace.cif`. -- `workspace.cif` uses `_project.*` and `_rendering.*` tags. +- `workspace.cif` uses `_project.*`, `_rendering.*`, and + `_verbosity.level` tags. - no `_meta.*` tags are introduced for project information. - tutorials and architecture documentation use `Workspace`. diff --git a/docs/dev/plan_workspace-root-project-category.md b/docs/dev/plan_workspace-root-project-category.md index b57e318a..81c9b183 100644 --- a/docs/dev/plan_workspace-root-project-category.md +++ b/docs/dev/plan_workspace-root-project-category.md @@ -96,6 +96,7 @@ workspace = ed.Workspace(project_id='lbco_hrpt') workspace.project.id workspace.project.title = 'La0.5Ba0.5CoO3 at HRPT@PSI' workspace.rendering.table_engine = 'rich' +workspace.verbosity = 'short' workspace.structures workspace.experiments workspace.analysis @@ -121,10 +122,27 @@ _project.last_modified _rendering.chart_engine _rendering.table_engine + +_verbosity.level ``` Do not introduce `_meta.*` tags. +Target saved layout: + +```text +/ +|-- workspace.cif +|-- structures/ +| `-- cosio.cif +|-- experiments/ +| `-- d20.cif +|-- analysis/ +| `-- analysis.cif +`-- summary/ + `-- summary.cif +``` + ## Decisions Already Made Use these decisions unless the user explicitly changes the ADR before @@ -137,7 +155,12 @@ implementation: - The public rendering category remains `workspace.rendering`. - The project-information category keeps semantic CIF tags `_project.*`. - The rendering category keeps semantic CIF tags `_rendering.*`. +- The verbosity preference is serialized as `_verbosity.level`. - The saved singleton config file becomes `workspace.cif`. +- The saved root is a workspace directory with a user-chosen filesystem + name; do not use `project` as the conceptual root name in new docs. +- Do not use `project.cif`, `config.cif`, or `meta.cif` as the primary + singleton config file in the target layout. - The storage directory path belongs to `Workspace.path`, not `workspace.project.path`. - The old `Project` public API is removed unless the user explicitly @@ -185,7 +208,8 @@ src/easydiffraction/workspace/ |-- display.py # class WorkspaceDisplay `-- categories/ |-- project/ # ProjectInfo category - `-- rendering/ # Rendering category + |-- rendering/ # Rendering category + `-- verbosity/ # Verbosity category ``` Target public API: @@ -194,6 +218,7 @@ Target public API: workspace = ed.Workspace(project_id='my_project') workspace.project.title workspace.rendering.table_engine +workspace.verbosity = 'short' ``` ## Out Of Scope @@ -201,6 +226,8 @@ workspace.rendering.table_engine Do not do these in this migration: - Do not add `_meta.*` CIF tags. +- Do not use `project.cif`, `config.cif`, or `meta.cif` as the target + singleton settings file. - Do not redesign structure or experiment datablocks. - Do not change analysis fit-mode semantics. - Do not change calculator behavior. @@ -527,6 +554,8 @@ Rename the saved singleton configuration file from `project.cif` to - `src/easydiffraction/workspace/workspace.py` - `src/easydiffraction/io/cif/serialize.py` +- `src/easydiffraction/workspace/workspace_config.py` +- `src/easydiffraction/workspace/categories/verbosity/` - CLI entry points in `src/easydiffraction/__main__.py` - docs that describe saved project directories - test fixtures in Phase 2 @@ -556,24 +585,43 @@ Rename the saved singleton configuration file from `project.cif` to workspace.cif ``` -4. Do not add `project.cif` fallback unless the user approved a - compatibility loader. +4. Do not add `project.cif`, `config.cif`, or `meta.cif` fallbacks + unless the user approved a compatibility loader. + +5. Move the public workspace verbosity preference into the workspace + singleton configuration. Keep the simple public access path: + + ```python + workspace.verbosity = 'short' + ``` + + Serialize it as: + + ```cif + _verbosity.level short + ``` + + A small `Verbosity` category under `WorkspaceConfig` is preferred if + it follows the local category-owner pattern cleanly. If that is too + much for this migration, use a focused serializer/deserializer helper + and document the reason. -5. Keep the contents semantic: +6. Keep the contents semantic: ```cif _project.id _project.title _rendering.table_engine + _verbosity.level ``` -6. Update logging and console output from `project.cif` to +7. Update logging and console output from `project.cif` to `workspace.cif`. -7. Run grep: +8. Run grep: ```shell - rg -n "project\\.cif|project_config_to_cif|project_config_from_cif|project_to_cif" src docs tests + rg -n "project\\.cif|config\\.cif|meta\\.cif|project_config_to_cif|project_config_from_cif|project_to_cif|verbosity" src docs tests ``` In Phase 1, update source and docs only. Test files are handled in @@ -586,6 +634,8 @@ Stop and ask if: - repository fixtures or tutorials contain saved directories that must remain loadable without conversion; - the user wants a one-release compatibility loader. +- the verbosity setting cannot be represented as a category without + weakening the public `workspace.verbosity` API. ### Commit @@ -716,6 +766,7 @@ Do not edit these by hand: ```text workspace.project ProjectInfo workspace.rendering Rendering + workspace.verbosity str workspace.display WorkspaceDisplay ``` @@ -884,14 +935,17 @@ Add focused tests for: 3. `workspace.project.title` round-trips through `workspace.cif`. 4. `workspace.rendering.table_engine` round-trips through `workspace.cif`. -5. `Workspace.save()` writes `workspace.cif`. -6. `Workspace.load()` reads `workspace.cif`. -7. `workspace.cif` contains `_project.id`, not `_meta.project_id`. -8. `workspace.cif` contains `_rendering.table_engine`. -9. `workspace.path` is set after `save_as()` and `load()`. -10. `workspace.project` has no serialized path field. -11. `project.cif` is not written unless compatibility was approved. -12. `ed.Project` is absent unless compatibility was approved. +5. `workspace.verbosity` round-trips through `workspace.cif`. +6. `Workspace.save()` writes `workspace.cif`. +7. `Workspace.load()` reads `workspace.cif`. +8. `workspace.cif` contains `_project.id`, not `_meta.project_id`. +9. `workspace.cif` contains `_rendering.table_engine`. +10. `workspace.cif` contains `_verbosity.level`. +11. `workspace.path` is set after `save_as()` and `load()`. +12. `workspace.project` has no serialized path field. +13. `project.cif`, `config.cif`, and `meta.cif` are not written unless + compatibility was approved. +14. `ed.Project` is absent unless compatibility was approved. If compatibility alias was approved, add tests for: @@ -952,7 +1006,13 @@ rg -n "_meta\\.|_project\\." src tests docs tools README.md CONTRIBUTING.md Saved config file should be `workspace.cif`: ```shell -rg -n "project\\.cif|workspace\\.cif" src tests docs tools README.md CONTRIBUTING.md +rg -n "project\\.cif|config\\.cif|meta\\.cif|workspace\\.cif" src tests docs tools README.md CONTRIBUTING.md +``` + +Workspace verbosity should serialize as a workspace-level category: + +```shell +rg -n "_verbosity|verbosity" src tests docs tools README.md CONTRIBUTING.md ``` Generated docs should not be manually edited: @@ -1001,6 +1061,17 @@ Incorrect: _meta.project_title ``` +### Mistake: Keeping `project.cif` Or Switching To Generic File Names + +Do not use `project.cif`, `config.cif`, or `meta.cif` as the target +singleton settings file. The file belongs to the workspace layer. + +Correct: + +```text +workspace.cif +``` + ### Mistake: Blindly Replacing Every `project` Some uses of `project` should remain: From 2e81c0fce34b70a031a90170ceffae1b2f953efe Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Sun, 17 May 2026 16:20:27 +0200 Subject: [PATCH 02/10] Reorganize development documentation structure --- .github/copilot-instructions.md | 34 +- .../accepted/category-owner-sections.md} | 0 .../accepted/category-parameter-access.md | 44 + .../accepted/development-docs-structure.md | 66 + .../accepted/display-ux.md} | 0 .../accepted/enum-backed-closed-values.md | 34 + docs/dev/adrs/accepted/factory-contracts.md | 44 + docs/dev/adrs/accepted/factory-tag-naming.md | 48 + .../accepted/fit-mode-categories.md} | 0 .../adrs/accepted/free-flag-cif-encoding.md | 39 + .../accepted/guarded-public-properties.md | 44 + .../accepted/help-discoverability.md} | 0 .../accepted/immutable-experiment-type.md | 35 + .../accepted/lint-complexity-thresholds.md | 33 + docs/dev/adrs/accepted/notebook-generation.md | 34 + .../project-facade-and-persistence.md | 47 + .../accepted/property-docstring-template.md | 39 + docs/dev/adrs/accepted/runtime-fit-results.md | 36 + docs/dev/adrs/accepted/selector-families.md | 40 + .../string-paths-and-live-descriptors.md | 45 + .../adrs/accepted/switchable-category-api.md | 45 + docs/dev/adrs/accepted/test-strategy.md | 41 + .../accepted/type-neutral-adp-parameters.md | 42 + docs/dev/adrs/index.md | 49 + .../suggestions/analysis-cif-fit-state.md} | 8 +- .../parameter-correlation-persistence.md} | 0 .../parameter-posterior-summary.md} | 4 +- .../suggestions/undo-fit.md} | 2 +- .../workspace-root-project-category.md} | 23 +- docs/dev/architecture.md | 1631 ----------------- docs/dev/index.md | 29 + .../issues_closed.md => issues/closed.md} | 3 +- .../{Issues/issues_open.md => issues/open.md} | 12 +- .../full.md} | 0 .../short.md} | 0 .../workspace-root-project-category.md} | 28 +- docs/dev/{ => roadmap}/ROADMAP.md | 0 tools/generate_package_docs.py | 12 +- tools/param_consistency.py | 4 +- 39 files changed, 903 insertions(+), 1692 deletions(-) rename docs/dev/{ADRs/adr_category-owner-sections.md => adrs/accepted/category-owner-sections.md} (100%) create mode 100644 docs/dev/adrs/accepted/category-parameter-access.md create mode 100644 docs/dev/adrs/accepted/development-docs-structure.md rename docs/dev/{ADRs/adr_display-ux.md => adrs/accepted/display-ux.md} (100%) create mode 100644 docs/dev/adrs/accepted/enum-backed-closed-values.md create mode 100644 docs/dev/adrs/accepted/factory-contracts.md create mode 100644 docs/dev/adrs/accepted/factory-tag-naming.md rename docs/dev/{ADRs/adr_fit-mode-categories.md => adrs/accepted/fit-mode-categories.md} (100%) create mode 100644 docs/dev/adrs/accepted/free-flag-cif-encoding.md create mode 100644 docs/dev/adrs/accepted/guarded-public-properties.md rename docs/dev/{ADRs/adr_help-discoverability.md => adrs/accepted/help-discoverability.md} (100%) create mode 100644 docs/dev/adrs/accepted/immutable-experiment-type.md create mode 100644 docs/dev/adrs/accepted/lint-complexity-thresholds.md create mode 100644 docs/dev/adrs/accepted/notebook-generation.md create mode 100644 docs/dev/adrs/accepted/project-facade-and-persistence.md create mode 100644 docs/dev/adrs/accepted/property-docstring-template.md create mode 100644 docs/dev/adrs/accepted/runtime-fit-results.md create mode 100644 docs/dev/adrs/accepted/selector-families.md create mode 100644 docs/dev/adrs/accepted/string-paths-and-live-descriptors.md create mode 100644 docs/dev/adrs/accepted/switchable-category-api.md create mode 100644 docs/dev/adrs/accepted/test-strategy.md create mode 100644 docs/dev/adrs/accepted/type-neutral-adp-parameters.md create mode 100644 docs/dev/adrs/index.md rename docs/dev/{ADR-suggestions/adr_analysis-cif-fit-state.md => adrs/suggestions/analysis-cif-fit-state.md} (96%) rename docs/dev/{ADR-suggestions/adr_parameter-correlation-persistence.md => adrs/suggestions/parameter-correlation-persistence.md} (100%) rename docs/dev/{ADR-suggestions/adr_parameter-posterior-summary.md => adrs/suggestions/parameter-posterior-summary.md} (99%) rename docs/dev/{ADR-suggestions/adr_undo-fit.md => adrs/suggestions/undo-fit.md} (99%) rename docs/dev/{ADR-suggestions/adr_workspace-root-project-category.md => adrs/suggestions/workspace-root-project-category.md} (94%) delete mode 100644 docs/dev/architecture.md create mode 100644 docs/dev/index.md rename docs/dev/{Issues/issues_closed.md => issues/closed.md} (98%) rename docs/dev/{Issues/issues_open.md => issues/open.md} (99%) rename docs/dev/{package-structure-full.md => package-structure/full.md} (100%) rename docs/dev/{package-structure-short.md => package-structure/short.md} (100%) rename docs/dev/{plan_workspace-root-project-category.md => plans/workspace-root-project-category.md} (97%) rename docs/dev/{ => roadmap}/ROADMAP.md (100%) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 0a54cc94..af6f3606 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -77,7 +77,7 @@ ## Testing - Every new module, class, or bug fix ships with tests. See - `docs/dev/architecture.md` §10 for the full strategy. + `docs/dev/adrs/accepted/test-strategy.md` for the full strategy. - Unit tests mirror the source tree: `src/easydiffraction//.py` → `tests/unit/easydiffraction//test_.py`. Verify with @@ -102,8 +102,11 @@ - Before any structural/design change (new categories, factories, switchable-category wiring, datablocks, CIF serialisation), read - `docs/dev/architecture.md` and follow documented patterns. Localised - bug fixes or test updates need only this file. + `docs/dev/adrs/index.md` and the relevant accepted ADRs. Localised bug + fixes or test updates need only this file. +- Development documentation lives under `docs/dev/`. Use + `docs/dev/adrs/index.md` as the architecture and decision navigation + surface; there is no separate `architecture.md` source of truth. - Project is in beta: no legacy shims, no deprecation warnings — update tests and tutorials to the current API. - Minimal diffs; don't reformat working code. Fix only what's asked; @@ -137,21 +140,24 @@ Non-trivial changes use a two-phase workflow: -- **Phase 1 — Implementation.** Code, docs, and architecture updates - only. Do not create or run tests unless the user explicitly asks. When - done, present for review and iterate until approved. +- **Phase 1 — Implementation.** Code and docs updates only. Update ADRs + when the change affects architecture or documented decisions. Do not + create or run tests unless the user explicitly asks. When done, + present for review and iterate until approved. - **Phase 2 — Verification.** Add/update tests, then run `pixi run fix`, `pixi run check`, `pixi run unit-tests`, `pixi run integration-tests`, `pixi run script-tests`. Notes: -- `pixi run fix` regenerates `docs/dev/package-structure-*.md` - automatically — never edit those by hand. Don't review auto-fixes; - accept and move on. Then `pixi run check` until clean. +- `pixi run fix` regenerates `docs/dev/package-structure/full.md` and + `docs/dev/package-structure/short.md` automatically — never edit those + by hand. Don't review auto-fixes; accept and move on. Then + `pixi run check` until clean. - Open issues / design questions / planned improvements live in - `docs/dev/issues_open.md` (priority-ordered). On resolution, move to - `docs/dev/issues_closed.md` and update `architecture.md` if affected. + `docs/dev/issues/open.md` (priority-ordered). On resolution, move to + `docs/dev/issues/closed.md` and update the relevant ADR or + `docs/dev/adrs/index.md` if affected. ### Planning @@ -161,9 +167,9 @@ When asked to create a plan: all ambiguous or unclear questions in one concise batch; record unresolved questions in the plan if the user wants it saved before answering them. -- Save plans as `docs/dev/plan_.md` (lowercase, - dash-separated, e.g. `plan_background-refactor.md`). Use the same - `` for the implementation branch +- Save plans as `docs/dev/plans/.md` (lowercase, + dash-separated, e.g. `docs/dev/plans/background-refactor.md`). Use the + same `` for the implementation branch (`feature/`). Do not push the branch unless asked. - Include a status checklist with `[ ]` items; mark `[x]` as completed during implementation. diff --git a/docs/dev/ADRs/adr_category-owner-sections.md b/docs/dev/adrs/accepted/category-owner-sections.md similarity index 100% rename from docs/dev/ADRs/adr_category-owner-sections.md rename to docs/dev/adrs/accepted/category-owner-sections.md diff --git a/docs/dev/adrs/accepted/category-parameter-access.md b/docs/dev/adrs/accepted/category-parameter-access.md new file mode 100644 index 00000000..8c4b17d3 --- /dev/null +++ b/docs/dev/adrs/accepted/category-parameter-access.md @@ -0,0 +1,44 @@ +# ADR: Two-Level Category Parameter Access + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Core model. + +## Context + +EasyDiffraction models CIF concepts as datablocks, categories, category +collections, and parameters. Users need predictable navigation paths +from a structure or experiment to any parameter. + +## Decision + +Use at most two navigation levels from a datablock to a parameter: + +```python +structure.cell.length_a = 3.88 +experiment.instrument.setup_wavelength = 1.494 +structure.atom_sites['Si'].adp_iso = 0.47 +experiment.background['10'].y = 170 +``` + +The general forms are: + +- `DATABLOCK.CATEGORY_ITEM.PARAMETER` +- `DATABLOCK.CATEGORY_COLLECTION[ITEM_ID].PARAMETER` + +Categories are flat siblings under their owner. A category must not own +another category of a different type. + +## Consequences + +API paths remain short, regular, and close to CIF category structure. +Nested category designs are rejected because they make parameter access +depth depend on the domain area and obscure the CIF-like model. diff --git a/docs/dev/adrs/accepted/development-docs-structure.md b/docs/dev/adrs/accepted/development-docs-structure.md new file mode 100644 index 00000000..223fb71e --- /dev/null +++ b/docs/dev/adrs/accepted/development-docs-structure.md @@ -0,0 +1,66 @@ +# ADR: Development Documentation Structure + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Documentation. + +## Context + +Development notes, ADRs, roadmap material, package-structure snapshots, +and issue backlogs were all kept directly under `docs/dev` or under +mixed-case directories. That made the development documentation harder +to scan and created several competing naming conventions. + +The repository also contains `docs/docs`, which is the MkDocs source for +published user documentation. Development-only material should not be +mixed into that tree unless it is intentionally published. + +## Decision + +Keep development-only documentation under `docs/dev`. + +Use this structure: + +```text +docs/dev/ +|-- index.md +|-- adrs/ +| |-- index.md +| |-- accepted/ +| `-- suggestions/ +|-- issues/ +| |-- open.md +| `-- closed.md +|-- package-structure/ +| |-- full.md +| `-- short.md +|-- plans/ +`-- roadmap/ + `-- ROADMAP.md +``` + +Use lowercase directory names for new development-doc folders. Keep +`ROADMAP.md` uppercase because it may later be copied into published +user documentation where the conventional filename is useful. + +Use `docs/dev/adrs/index.md` as the architecture and decision navigation +surface. Do not keep a separate architecture overview that duplicates +ADR content. + +## Consequences + +- Development documentation remains under the documentation tree without + being part of the published MkDocs source by default. +- ADRs have one entry point for architecture navigation and separate + accepted and proposed areas. +- Package-structure snapshots have stable, script-friendly paths. +- Roadmap publication can later be implemented as a build-time copy from + `docs/dev/roadmap/ROADMAP.md` into `docs/docs`. diff --git a/docs/dev/ADRs/adr_display-ux.md b/docs/dev/adrs/accepted/display-ux.md similarity index 100% rename from docs/dev/ADRs/adr_display-ux.md rename to docs/dev/adrs/accepted/display-ux.md diff --git a/docs/dev/adrs/accepted/enum-backed-closed-values.md b/docs/dev/adrs/accepted/enum-backed-closed-values.md new file mode 100644 index 00000000..1e89cc7c --- /dev/null +++ b/docs/dev/adrs/accepted/enum-backed-closed-values.md @@ -0,0 +1,34 @@ +# ADR: Enum-Backed Closed Value Sets + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Core model. + +## Context + +Many attributes accept a finite set of values: experiment axes, factory +tags, fit modes, calculators, minimizers, and rendering engines. +String-only dispatch is hard to grep and easy to mistype. + +## Decision + +Represent every finite closed set with a `(str, Enum)` class. + +Use enum members as the internal source of truth for validation, +dispatch, default rules, and descriptions. User-facing setters may +accept either enum members or their string values, but internal code +compares against enum members. + +## Consequences + +Finite choices are discoverable, type-checkable, and greppable. +Validation can use enum membership instead of hand-written string +patterns. diff --git a/docs/dev/adrs/accepted/factory-contracts.md b/docs/dev/adrs/accepted/factory-contracts.md new file mode 100644 index 00000000..154f092c --- /dev/null +++ b/docs/dev/adrs/accepted/factory-contracts.md @@ -0,0 +1,44 @@ +# ADR: Factory Contracts and Metadata + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Factories. + +## Context + +Many domain categories have multiple implementations, and some currently +have only one implementation but may gain more later. Construction, +default selection, compatibility filtering, and supported-option display +need a common contract. + +## Decision + +Every category that can be constructed by framework code is created +through a factory, even when it currently has only one implementation. + +Concrete factory-created classes carry metadata appropriate to their +role: + +- `type_info` for stable tag lookup and user descriptions. +- `compatibility` for experiment-axis compatibility where relevant. +- `calculator_support` for calculation-engine support where relevant. + +Child rows that only exist inside a collection do not carry factory +metadata; the collection carries the metadata. + +Factory registration is triggered by explicit imports in package +`__init__.py` files. + +## Consequences + +Construction, default resolution, support tables, and compatibility +filtering stay uniform. Single-implementation categories are ready for +future alternatives without changing the public construction pattern. diff --git a/docs/dev/adrs/accepted/factory-tag-naming.md b/docs/dev/adrs/accepted/factory-tag-naming.md new file mode 100644 index 00000000..edf8a724 --- /dev/null +++ b/docs/dev/adrs/accepted/factory-tag-naming.md @@ -0,0 +1,48 @@ +# ADR: Factory Tag Naming + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Naming. + +## Context + +Factory tags are persisted and used for public selection. Inconsistent +abbreviations or ordering would make saved files and user code harder to +read. + +## Decision + +Canonical factory tags are: + +- lowercase +- hyphen-separated +- semantically ordered from general to specific +- unique within a factory + +Use standard abbreviations: + +| Concept | Abbreviation | +| ------------------- | ------------ | +| Powder | `pd` | +| Single crystal | `sc` | +| Constant wavelength | `cwl` | +| Time-of-flight | `tof` | +| Bragg scattering | `bragg` | +| Total scattering | `total` | + +Context-local aliases are allowed when the owning object already +disambiguates the choice, for example `pseudo-voigt` inside a CWL +experiment. + +## Consequences + +Saved tags are stable and greppable, while user-facing APIs can remain +short where context makes the canonical prefix redundant. diff --git a/docs/dev/ADRs/adr_fit-mode-categories.md b/docs/dev/adrs/accepted/fit-mode-categories.md similarity index 100% rename from docs/dev/ADRs/adr_fit-mode-categories.md rename to docs/dev/adrs/accepted/fit-mode-categories.md diff --git a/docs/dev/adrs/accepted/free-flag-cif-encoding.md b/docs/dev/adrs/accepted/free-flag-cif-encoding.md new file mode 100644 index 00000000..e9e7509b --- /dev/null +++ b/docs/dev/adrs/accepted/free-flag-cif-encoding.md @@ -0,0 +1,39 @@ +# ADR: Free-Flag CIF Encoding + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Persistence. + +## Context + +Fitting needs to persist whether a parameter is fixed or free. A +separate free-parameter list would duplicate state already attached to +individual parameter values and could become stale. + +## Decision + +Encode a parameter's free/fixed status in the CIF value uncertainty +syntax: + +```text +3.89 fixed +3.89(2) free with estimated standard deviation +3.89() free without estimated standard deviation +``` + +Do not persist a separate list of free parameters as the source of +truth. + +## Consequences + +Each parameter carries its own fit-state information in the same place +as the value. CIF round-trips are simpler because there is no separate +state table to reconcile. diff --git a/docs/dev/adrs/accepted/guarded-public-properties.md b/docs/dev/adrs/accepted/guarded-public-properties.md new file mode 100644 index 00000000..15fd6308 --- /dev/null +++ b/docs/dev/adrs/accepted/guarded-public-properties.md @@ -0,0 +1,44 @@ +# ADR: Guarded Public Properties + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Core model. + +## Context + +Most EasyDiffraction domain objects inherit from `GuardedBase`. +Descriptors and parameters are stored privately and exposed through +public properties. The public API needs a clear rule for whether a user +can assign to an attribute, and internal construction code still needs a +way to populate read-only values. + +## Decision + +Treat the presence of a property setter as the public writability +contract. + +- Public properties with setters are user-editable. +- Public properties without setters are read-only. +- Internal mutation of read-only properties uses explicit private + methods such as `_set_sample_form`. +- Unknown public attributes are rejected by `GuardedBase.__setattr__` + with diagnostics. + +Do not add a public setter only for internal use, because that makes the +attribute user-writable. + +## Consequences + +The public API stays predictable: reading a property returns the live +descriptor or parameter object, and assigning to the property is allowed +only when the class explicitly declares that assignment as part of the +API. Internal loaders and factories remain greppable because private +mutator methods are named directly. diff --git a/docs/dev/ADRs/adr_help-discoverability.md b/docs/dev/adrs/accepted/help-discoverability.md similarity index 100% rename from docs/dev/ADRs/adr_help-discoverability.md rename to docs/dev/adrs/accepted/help-discoverability.md diff --git a/docs/dev/adrs/accepted/immutable-experiment-type.md b/docs/dev/adrs/accepted/immutable-experiment-type.md new file mode 100644 index 00000000..9740b110 --- /dev/null +++ b/docs/dev/adrs/accepted/immutable-experiment-type.md @@ -0,0 +1,35 @@ +# ADR: Immutable Experiment Type + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Experiment model. + +## Context + +An experiment is defined by four orthogonal axes: sample form, +scattering type, beam mode, and radiation probe. Changing those axes +after creation can require replacing categories, calculators, data +collections, and validation rules. + +## Decision + +Experiment type is set at creation time and is immutable afterwards. + +Factory and CIF-loading code may set the type through private +construction methods, but users cannot mutate the type axes on an +existing experiment. + +## Consequences + +The model avoids partial transformations between fundamentally different +experiment configurations. Runtime switching remains available for +category implementations, such as background or peak profile, where the +state replacement is bounded and explicit. diff --git a/docs/dev/adrs/accepted/lint-complexity-thresholds.md b/docs/dev/adrs/accepted/lint-complexity-thresholds.md new file mode 100644 index 00000000..d2ba16b8 --- /dev/null +++ b/docs/dev/adrs/accepted/lint-complexity-thresholds.md @@ -0,0 +1,33 @@ +# ADR: Lint Complexity Thresholds + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Quality. + +## Context + +The repository enforces ruff PLR complexity rules. Raising thresholds or +adding local suppressions would make complex functions easier to merge +without addressing maintainability. + +## Decision + +Keep ruff's default PLR thresholds, except set `max-args` and +`max-positional-args` to 6 because ruff counts `self` and `cls`. + +Do not raise complexity thresholds or add `# noqa` comments to silence +complexity rules. Refactor code instead. + +## Consequences + +Complexity failures are treated as design feedback. Large refactors that +change public API should be planned explicitly instead of bypassing the +guardrail. diff --git a/docs/dev/adrs/accepted/notebook-generation.md b/docs/dev/adrs/accepted/notebook-generation.md new file mode 100644 index 00000000..47faa3b6 --- /dev/null +++ b/docs/dev/adrs/accepted/notebook-generation.md @@ -0,0 +1,34 @@ +# ADR: Notebook Generation Source of Truth + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Documentation. + +## Context + +Tutorial notebooks are published as `.ipynb` files, but editing notebook +JSON by hand creates noisy diffs and inconsistent formatting. + +## Decision + +Treat tutorial `.py` files under `docs/docs/tutorials/` as the editable +source of truth. Regenerate notebooks with: + +```shell +pixi run notebook-prepare +``` + +Do not edit generated `.ipynb` tutorial files by hand. + +## Consequences + +Tutorial diffs stay reviewable, and notebooks are regenerated through +the same script path used by the documentation workflow. diff --git a/docs/dev/adrs/accepted/project-facade-and-persistence.md b/docs/dev/adrs/accepted/project-facade-and-persistence.md new file mode 100644 index 00000000..67492d8d --- /dev/null +++ b/docs/dev/adrs/accepted/project-facade-and-persistence.md @@ -0,0 +1,47 @@ +# ADR: Project Facade and Persistence Layout + +## Status + +Accepted current design. + +## Date + +2026-05-17 + +## Group + +Persistence. + +## Context + +`Project` is the current top-level user facade. It owns project +metadata, structures, experiments, rendering preferences, display +helpers, analysis, summaries, verbosity, and save/load behavior. + +The persisted project directory needs to separate real CIF datablocks +from singleton project sections. + +## Decision + +Use `Project` as the current top-level facade and persist projects as a +directory of CIF files: + +```text +project_dir/ +|-- project.cif +|-- summary.cif +|-- structures/ +|-- experiments/ +`-- analysis/ + `-- analysis.cif +``` + +Real structures and experiments serialize as `data_` datablocks. +Singleton sections such as project configuration, analysis, and summary +serialize without fake `data_` headers. + +## Consequences + +The saved layout mirrors the current object graph while preserving the +semantic difference between real datablocks and singleton sections. A +proposed `Workspace` rename is tracked separately as an ADR suggestion. diff --git a/docs/dev/adrs/accepted/property-docstring-template.md b/docs/dev/adrs/accepted/property-docstring-template.md new file mode 100644 index 00000000..e78bd457 --- /dev/null +++ b/docs/dev/adrs/accepted/property-docstring-template.md @@ -0,0 +1,39 @@ +# ADR: Descriptor Property Docstring Template + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Documentation. + +## Context + +Public properties backed by `Parameter`, `NumericDescriptor`, or +`StringDescriptor` need consistent documentation and type hints. +Duplicating free-form text across docstrings and descriptor descriptions +creates drift. + +## Decision + +Use descriptor `description` fields as the source of truth for property +docstrings. Public properties follow the architecture template for: + +- getter summary text +- writable versus read-only getter body +- getter return annotation +- setter value annotation + +Setter docstrings are omitted because they are not rendered by the API +documentation pipeline. + +## Consequences + +Generated API documentation stays consistent with descriptor metadata. +The `param-consistency` tool can validate and repair property docstrings +mechanically. diff --git a/docs/dev/adrs/accepted/runtime-fit-results.md b/docs/dev/adrs/accepted/runtime-fit-results.md new file mode 100644 index 00000000..f7ee8389 --- /dev/null +++ b/docs/dev/adrs/accepted/runtime-fit-results.md @@ -0,0 +1,36 @@ +# ADR: Runtime Fit Results + +## Status + +Accepted current design. + +## Date + +2026-05-17 + +## Group + +Analysis and fitting. + +## Context + +Analysis settings are persisted in `analysis/analysis.cif`, but full fit +outputs can include large arrays, posterior samples, predictive caches, +and diagnostics. Persisting all of that by default would make project +directories heavy and would require a broader result schema. + +## Decision + +Persist fit configuration, not full runtime fit results. + +Per-experiment calculator selection lives in experiment files. Common +fit configuration and fit-mode settings live in `analysis/analysis.cif`. +Runtime fit outputs such as `analysis.fit_results`, posterior samples, +posterior predictive arrays, summaries, and diagnostics remain +runtime-only unless a later ADR narrows the persisted projection. + +## Consequences + +Saved projects remain focused on reusable model and analysis settings. +Result persistence can be added later through specific ADRs without +making the initial project layout carry large runtime artifacts. diff --git a/docs/dev/adrs/accepted/selector-families.md b/docs/dev/adrs/accepted/selector-families.md new file mode 100644 index 00000000..69899164 --- /dev/null +++ b/docs/dev/adrs/accepted/selector-families.md @@ -0,0 +1,40 @@ +# ADR: Selector Families + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +User-facing API. + +## Context + +Several public names use a selector-like shape, but they do not all mean +the same thing. Treating every `_type` as a switchable category +would blur distinct concepts. + +## Decision + +Recognize three selector families: + +| Family | User intent | Examples | +| ---------------------------- | ------------------------------- | --------------------------------------------------------------------------------- | +| Backend selector | Pick an execution backend | `fitting.minimizer_type`, `calculation.calculator_type`, `rendering.chart_engine` | +| Switchable-category selector | Swap a category implementation | `experiment.background_type`, `experiment.peak_profile_type` | +| Active-sibling selector | Pick the active sibling surface | `analysis.fitting_mode_type` | + +Backend selectors live on dedicated configuration categories. +Switchable-category selectors live on the host because they replace a +category instance. Active-sibling selectors live on the owner and decide +which sibling categories are visible, authoritative, and serialized. + +## Consequences + +Selector names stay familiar without forcing one implementation pattern +onto different concepts. Future selectors should declare which family +they belong to before adding API. diff --git a/docs/dev/adrs/accepted/string-paths-and-live-descriptors.md b/docs/dev/adrs/accepted/string-paths-and-live-descriptors.md new file mode 100644 index 00000000..fa8815a9 --- /dev/null +++ b/docs/dev/adrs/accepted/string-paths-and-live-descriptors.md @@ -0,0 +1,45 @@ +# ADR: String Paths and Live Descriptors + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +User-facing API. + +## Context + +Some APIs refer to persisted CIF-style fields, while other APIs refer to +one concrete live model parameter. Accepting both forms everywhere would +make call sites ambiguous and harder to validate. + +## Decision + +Use CIF-style string paths for setup-time, schema-level, and +cross-experiment selectors: + +```python +sequential_fit_extract.create(target='diffrn.ambient_temperature') +project.display.fit.series( + param=structure.cell.length_a, + versus='diffrn.ambient_temperature', +) +``` + +Use live descriptor or parameter objects when the call targets one exact +model quantity and needs its `unique_name`, description, units, or +runtime value. + +Do not add new APIs that accept both a string path and a live descriptor +unless a later ADR defines a stronger reason. + +## Consequences + +Persisted fields and live model parameters stay distinct. APIs that +operate across experiments can round-trip through CIF paths, while APIs +that operate on one fitted parameter keep direct object references. diff --git a/docs/dev/adrs/accepted/switchable-category-api.md b/docs/dev/adrs/accepted/switchable-category-api.md new file mode 100644 index 00000000..629f01f9 --- /dev/null +++ b/docs/dev/adrs/accepted/switchable-category-api.md @@ -0,0 +1,45 @@ +# ADR: Switchable Category API + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +User-facing API. + +## Context + +Some categories have multiple concrete implementations that users can +switch at runtime, such as background, peak profile, and extinction. +Other categories are fixed by experiment type or have only one current +implementation. + +## Decision + +For multi-type switchable categories, expose the selector on the owner: + +```python +experiment.background_type = 'chebyshev' +experiment.peak_profile_type = 'pseudo-voigt' +``` + +The category object itself remains a read-only property. Switching the +owner-level type replaces the underlying category object. + +Expose `show__types()` on the owner so supported choices can +be filtered by the owner context. + +Do not expose public `_type` selectors for fixed-at-creation categories +or single-implementation categories. Their factories and internal type +attributes may still exist for consistency. + +## Consequences + +Users can tell when a change replaces a whole category implementation. +The public API stays smaller for single-type categories while preserving +the internal factory pattern. diff --git a/docs/dev/adrs/accepted/test-strategy.md b/docs/dev/adrs/accepted/test-strategy.md new file mode 100644 index 00000000..17194c51 --- /dev/null +++ b/docs/dev/adrs/accepted/test-strategy.md @@ -0,0 +1,41 @@ +# ADR: Test Strategy + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Quality. + +## Context + +EasyDiffraction combines core model logic, factories, calculators, +display helpers, tutorials, notebooks, and filesystem persistence. +Regressions can appear at several levels. + +## Decision + +Use layered tests: + +- unit tests for isolated classes and functions +- functional tests for multi-component workflows without heavy external + dependencies +- integration tests for end-to-end behavior with real calculation + engines and data +- script tests for tutorial `.py` files +- notebook tests for generated tutorials + +The unit-test tree mirrors the source tree where practical. The +`test-structure-check` script tracks expected test locations and known +aliases. + +## Consequences + +New features should add focused tests at the lowest useful layer and +broader tests when behavior crosses module boundaries. The mirrored +structure makes missing coverage easier to spot. diff --git a/docs/dev/adrs/accepted/type-neutral-adp-parameters.md b/docs/dev/adrs/accepted/type-neutral-adp-parameters.md new file mode 100644 index 00000000..5609c7a2 --- /dev/null +++ b/docs/dev/adrs/accepted/type-neutral-adp-parameters.md @@ -0,0 +1,42 @@ +# ADR: Type-Neutral ADP Parameters + +## Status + +Accepted. + +## Date + +2026-05-17 + +## Group + +Structure model. + +## Context + +Atomic displacement parameters support CIF-standard B/U and +isotropic/anisotropic variants. Type-specific public names such as +`b_iso` and `u_iso` would require replacing parameter objects when +`adp_type` changes, breaking references, constraints, and free flags. + +## Decision + +Use type-neutral parameter names: + +- `atom_site.adp_iso` +- `atom_site_aniso.adp_11` +- `atom_site_aniso.adp_22` +- `atom_site_aniso.adp_33` +- `atom_site_aniso.adp_12` +- `atom_site_aniso.adp_13` +- `atom_site_aniso.adp_23` + +Use `atom_site.adp_type` to determine whether those values are B or U, +isotropic or anisotropic. Keep `atom_site_aniso` as an always-present +sibling collection and synchronize it from `atom_sites`. + +## Consequences + +Parameter object identity remains stable across ADP type switches. +Serialization and calculator code can branch on `adp_type` without +replacing public parameter objects. diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md new file mode 100644 index 00000000..555b3b54 --- /dev/null +++ b/docs/dev/adrs/index.md @@ -0,0 +1,49 @@ +# Architecture Decision Records + +Project-specific ADRs are kept in this repository. Organization-wide ADR +indexing may exist separately, but repository decisions should stay near +the code they explain. + +The index follows the EasyScience discussion guidance from +[discussion #47](https://github.com/orgs/easyscience/discussions/47): +group decisions by topic first, then use titles and short descriptions +to keep the list easy to scan. If a group becomes too broad, split it; +if a group stays small, keep it as a group rather than adding deeper +folders. + +## Accepted ADRs + +| Group | Title | Short description | Link | +| -------------------- | ----------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | +| Analysis and fitting | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | +| Analysis and fitting | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | +| Core model | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | +| Core model | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | +| Core model | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | +| Core model | Two-Level Category Parameter Access | Keeps parameter access to `datablock.category.parameter` or `datablock.collection[id].parameter`. | [`category-parameter-access.md`](accepted/category-parameter-access.md) | +| Documentation | Descriptor Property Docstring Template | Makes descriptor metadata the source of truth for public property docstrings and annotations. | [`property-docstring-template.md`](accepted/property-docstring-template.md) | +| Documentation | Development Documentation Structure | Defines the `docs/dev` layout for ADRs, issues, plans, package structure, and roadmap. | [`development-docs-structure.md`](accepted/development-docs-structure.md) | +| Documentation | Help Method Discoverability | Requires primary public objects and facades to expose consistent `help()` output. | [`help-discoverability.md`](accepted/help-discoverability.md) | +| Documentation | Notebook Generation Source of Truth | Treats tutorial `.py` files as editable sources and notebooks as generated artifacts. | [`notebook-generation.md`](accepted/notebook-generation.md) | +| Experiment model | Immutable Experiment Type | Makes experiment type axes creation-time state rather than mutable runtime state. | [`immutable-experiment-type.md`](accepted/immutable-experiment-type.md) | +| Factories | Factory Contracts and Metadata | Standardizes factory construction, metadata, compatibility, and registration behavior. | [`factory-contracts.md`](accepted/factory-contracts.md) | +| Naming | Factory Tag Naming | Defines canonical factory tag style and standard abbreviations. | [`factory-tag-naming.md`](accepted/factory-tag-naming.md) | +| Persistence | Free-Flag CIF Encoding | Encodes fit free/fixed state through CIF uncertainty syntax instead of a separate free list. | [`free-flag-cif-encoding.md`](accepted/free-flag-cif-encoding.md) | +| Persistence | Project Facade and Persistence Layout | Documents the current `Project` facade and saved directory layout. | [`project-facade-and-persistence.md`](accepted/project-facade-and-persistence.md) | +| Quality | Lint Complexity Thresholds | Treats ruff PLR complexity limits as design guardrails that should not be bypassed. | [`lint-complexity-thresholds.md`](accepted/lint-complexity-thresholds.md) | +| Quality | Test Strategy | Defines layered unit, functional, integration, script, and notebook testing. | [`test-strategy.md`](accepted/test-strategy.md) | +| Structure model | Type-Neutral ADP Parameters | Keeps ADP parameter object identities stable across B/U and iso/ani switches. | [`type-neutral-adp-parameters.md`](accepted/type-neutral-adp-parameters.md) | +| User-facing API | Display UX Facade | Defines `project.display` and `project.rendering` responsibilities and display method names. | [`display-ux.md`](accepted/display-ux.md) | +| User-facing API | Selector Families | Distinguishes backend selectors, switchable-category selectors, and active-sibling selectors. | [`selector-families.md`](accepted/selector-families.md) | +| User-facing API | String Paths and Live Descriptors | Separates persisted field selectors from references to live model parameters. | [`string-paths-and-live-descriptors.md`](accepted/string-paths-and-live-descriptors.md) | +| User-facing API | Switchable Category API | Places multi-type category selectors on the owner and omits public selectors for fixed or single-type categories. | [`switchable-category-api.md`](accepted/switchable-category-api.md) | + +## ADR Suggestions + +| Group | Title | Short description | Link | +| -------------------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ | +| Analysis and fitting | Analysis CIF Fit State | Proposes a persisted scalar projection of fit state in `analysis.cif`. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | +| Analysis and fitting | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | +| Analysis and fitting | Parameter-Level Posterior Projection and Bayesian Persistence | Proposes saved Bayesian summaries and canonical posterior storage. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | +| Analysis and fitting | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | +| Workspace model | Workspace Root and Project Information Category | Proposes renaming the top-level facade from `Project` to `Workspace` and reserving `project` for project metadata. | [`workspace-root-project-category.md`](suggestions/workspace-root-project-category.md) | diff --git a/docs/dev/ADR-suggestions/adr_analysis-cif-fit-state.md b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md similarity index 96% rename from docs/dev/ADR-suggestions/adr_analysis-cif-fit-state.md rename to docs/dev/adrs/suggestions/analysis-cif-fit-state.md index c39fc192..d471bb11 100644 --- a/docs/dev/ADR-suggestions/adr_analysis-cif-fit-state.md +++ b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md @@ -33,7 +33,7 @@ This separation matters because: - `analysis.fit_results` already changes by fit type, but its persisted projection should have a stable analysis-owned home -The current architecture document still describes fit results as +The accepted runtime-fit-results ADR describes fit results as runtime-only. This ADR proposes a narrower persisted projection of the latest fit state, not a direct dump of backend runtime objects. @@ -137,7 +137,7 @@ and Bayesian fitting. Fit-type-specific extensions are layered on top: - Bayesian persistence extends this with `_bayesian_*` categories and an - HDF5 sidecar, as described in `adr_parameter-posterior-summary.md`. + HDF5 sidecar, as described in `parameter-posterior-summary.md`. - Future fit-specific summaries should follow the same pattern: generic shared fields in `_fit_result`, specialized fields in separate categories. @@ -204,7 +204,7 @@ _fit_result.reduced_chi_square 1.031 ### Trade-offs -- The architecture document must be updated because fit state is no +- The runtime fit-results ADR must be updated because fit state is no longer entirely runtime-only. - Analysis persistence becomes more stateful and must be kept in sync with live parameter objects. @@ -214,7 +214,7 @@ _fit_result.reduced_chi_square 1.031 ## Deferred Work - Bayesian-specific categories and HDF5 sidecar details remain in - `adr_parameter-posterior-summary.md`. + `parameter-posterior-summary.md`. - Undo semantics for `start_value` and `start_uncertainty` are defined in a separate ADR. - Correlation-matrix persistence is defined in a separate ADR. diff --git a/docs/dev/ADR-suggestions/adr_parameter-correlation-persistence.md b/docs/dev/adrs/suggestions/parameter-correlation-persistence.md similarity index 100% rename from docs/dev/ADR-suggestions/adr_parameter-correlation-persistence.md rename to docs/dev/adrs/suggestions/parameter-correlation-persistence.md diff --git a/docs/dev/ADR-suggestions/adr_parameter-posterior-summary.md b/docs/dev/adrs/suggestions/parameter-posterior-summary.md similarity index 99% rename from docs/dev/ADR-suggestions/adr_parameter-posterior-summary.md rename to docs/dev/adrs/suggestions/parameter-posterior-summary.md index f81e6447..3de39bf3 100644 --- a/docs/dev/ADR-suggestions/adr_parameter-posterior-summary.md +++ b/docs/dev/adrs/suggestions/parameter-posterior-summary.md @@ -12,8 +12,8 @@ single scalar used by calculations. Bayesian DREAM currently keeps posterior state only on `analysis.fit_results` via `BayesianFitResults`, including `posterior_samples`, `posterior_parameter_summaries`, -`posterior_predictive`, diagnostics, and sampler settings. The current -architecture document describes this state as runtime-only and not +`posterior_predictive`, diagnostics, and sampler settings. The accepted +runtime-fit-results ADR describes this state as runtime-only and not serialized. `analysis.fit_results` already changes by analysis type: deterministic diff --git a/docs/dev/ADR-suggestions/adr_undo-fit.md b/docs/dev/adrs/suggestions/undo-fit.md similarity index 99% rename from docs/dev/ADR-suggestions/adr_undo-fit.md rename to docs/dev/adrs/suggestions/undo-fit.md index 29974130..578d43a3 100644 --- a/docs/dev/ADR-suggestions/adr_undo-fit.md +++ b/docs/dev/adrs/suggestions/undo-fit.md @@ -82,7 +82,7 @@ and is deferred. The minimum persisted state required for clean cross-session undo is the pair of `_fit_parameter.start_value` and `_fit_parameter.start_uncertainty` defined in -`adr_analysis-cif-fit-state.md`. +`analysis-cif-fit-state.md`. If a parameter has no saved `start_value`, `undo_fit()` leaves that parameter unchanged. diff --git a/docs/dev/ADR-suggestions/adr_workspace-root-project-category.md b/docs/dev/adrs/suggestions/workspace-root-project-category.md similarity index 94% rename from docs/dev/ADR-suggestions/adr_workspace-root-project-category.md rename to docs/dev/adrs/suggestions/workspace-root-project-category.md index 04ea8f88..c0d91b9b 100644 --- a/docs/dev/ADR-suggestions/adr_workspace-root-project-category.md +++ b/docs/dev/adrs/suggestions/workspace-root-project-category.md @@ -208,8 +208,8 @@ inside the file keep its domain-specific name. - Public access becomes uniform: `workspace.project`, `workspace.rendering`, `workspace.verbosity`, `workspace.analysis`. - CIF stays semantic and does not introduce `_meta.*`. -- Workspace-level preferences such as rendering and verbosity are - saved with the workspace instead of being hidden runtime-only state. +- Workspace-level preferences such as rendering and verbosity are saved + with the workspace instead of being hidden runtime-only state. - Project information can use short item names such as `id`, `title`, and `description`. - The top-level facade name better reflects active runtime @@ -318,7 +318,7 @@ save/load, display, and analysis orchestration. The implementation should follow: ```text -docs/dev/plan_workspace-root-project-category.md +docs/dev/plans/workspace-root-project-category.md ``` The high-level migration is: @@ -337,9 +337,9 @@ The high-level migration is: 8. Keep CIF tags `_project.*` and `_rendering.*`. 9. Rename saved singleton config file from `project.cif` to `workspace.cif`. -10. Persist workspace verbosity in `workspace.cif` as `_verbosity.level`. -11. Update code, tests, scripts, tutorials, docs, and architecture - references. +10. Persist workspace verbosity in `workspace.cif` as + `_verbosity.level`. +11. Update code, tests, scripts, tutorials, docs, and ADR references. ## Post-Implementation ADR Update @@ -352,10 +352,11 @@ When implementation is complete: 3. Record whether a temporary or permanent `Project` compatibility alias was approved. 4. Record any deviations from the migration plan. -5. Move this file from `docs/dev/ADR-suggestions/` to `docs/dev/ADRs/` - if that is the repository convention for accepted decisions. -6. Update `docs/dev/architecture.md`. -7. Update or close related items in `docs/dev/Issues/issues_open.md`. +5. Move this file from `docs/dev/adrs/suggestions/` to + `docs/dev/adrs/accepted/` if the decision is accepted. +6. Update `docs/dev/adrs/index.md` and related accepted ADRs if the ADR + map changes. +7. Update or close related items in `docs/dev/issues/open.md`. ## Acceptance Criteria @@ -371,4 +372,4 @@ This ADR is satisfied when: - `workspace.cif` uses `_project.*`, `_rendering.*`, and `_verbosity.level` tags. - no `_meta.*` tags are introduced for project information. -- tutorials and architecture documentation use `Workspace`. +- tutorials and accepted ADRs use `Workspace`. diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md deleted file mode 100644 index 768531e7..00000000 --- a/docs/dev/architecture.md +++ /dev/null @@ -1,1631 +0,0 @@ -# EasyDiffraction Architecture - -**Version:** 1.0 -**Date:** 2026-03-24 -**Status:** Living document — updated as the project evolves - ---- - -## 1. Overview - -EasyDiffraction is a Python library for crystallographic diffraction -analysis (Rietveld refinement, pair-distribution-function fitting, -etc.). It models the domain using **CIF-inspired abstractions** — -datablocks, categories, and parameters — while providing a high-level, -user-friendly API through a single `Project` façade. - -### 1.1 Supported Experiment Dimensions - -Every experiment is fully described by four orthogonal axes: - -| Axis | Options | Enum | -| --------------- | ----------------------------------- | -------------------- | -| Sample form | powder, single crystal | `SampleFormEnum` | -| Scattering type | Bragg, total (PDF) | `ScatteringTypeEnum` | -| Beam mode | constant wavelength, time-of-flight | `BeamModeEnum` | -| Radiation probe | neutron, X-ray | `RadiationProbeEnum` | - -> **Planned extensions:** 1D / 2D data dimensionality, polarised / -> unpolarised neutron beam. - -### 1.2 Calculation Engines - -External libraries perform the heavy computation: - -| Engine | Scope | -| --------- | ----------------- | -| `cryspy` | Bragg diffraction | -| `crysfml` | Bragg diffraction | -| `pdffit2` | Total scattering | - ---- - -## 2. Core Abstractions - -All core types live in `core/` which contains **only** base classes and -utilities — no domain logic. - -### 2.1 Object Hierarchy - -```shell -GuardedBase # Controlled attribute access, parent linkage, identity -├── CategoryItem # Single CIF category row (e.g. Cell, Peak, Instrument) -├── CollectionBase # Ordered name→item container -│ ├── CategoryCollection # CIF loop (e.g. AtomSites, Background, Data) -│ └── DatablockCollection # Top-level container (e.g. Structures, Experiments) -└── CategoryOwner # Flat category owner (e.g. Analysis, DatablockItem) - └── DatablockItem # Real CIF data block (e.g. Structure, Experiment) -``` - -`CollectionBase` provides a unified dict-like API over an ordered item -list with name-based indexing. All key operations — `__getitem__`, -`__setitem__`, `__delitem__`, `__contains__`, `remove()` — resolve keys -through a single `_key_for(item)` method that returns -`category_entry_name` for category items or `datablock_entry_name` for -datablock items. Subclasses `CategoryCollection` and -`DatablockCollection` inherit this consistently. - -### 2.2 GuardedBase — Controlled Attribute Access - -`GuardedBase` is the root ABC. It enforces that only **declared -`@property` attributes** are accessible publicly: - -- **`__getattr__`** rejects any attribute not declared as a `@property` - on the class hierarchy. Shows diagnostics with closest-match - suggestions on typos. -- **`__setattr__`** distinguishes: - - **Private** (`_`-prefixed) — always allowed, no diagnostics. - - **Read-only public** (property without setter) — blocked with a - clear error. - - **Writable public** (property with setter) — goes through the - property setter, which is where validation happens. - - **Unknown** — blocked with diagnostics showing allowed writable - attrs. -- **Parent linkage** — when a `GuardedBase` child is assigned to - another, the child's `_parent` is set automatically, forming an - implicit ownership tree. -- **Identity** — every instance gets an `_identity: Identity` object for - lazy CIF-style name resolution (`datablock_entry_name`, - `category_code`, `category_entry_name`) by walking the `_parent` - chain. - -**Key design rule:** if a parameter has a public setter, it is writable -for the user. If only a getter — it is read-only. If internal code needs -to set it, a private method (underscore prefix) is used. See § 2.2.1 -below for the full pattern. - -#### 2.2.1 Public Property Convention — Editable vs Read-Only - -Every public parameter or descriptor exposed on a `GuardedBase` subclass -follows one of two patterns: - -| Kind | Getter | Setter | Internal mutation | -| ------------- | ------ | ------ | ---------------------------------- | -| **Editable** | yes | yes | Via the public setter | -| **Read-only** | yes | no | Via a private `_set_` method | - -**Editable property** — the user can both read and write the value. The -setter runs through `GuardedBase.__setattr__` and into the property -setter, where validation happens: - -```python -@property -def name(self) -> str: - """Human-readable name of the experiment.""" - return self._name - - -@name.setter -def name(self, new: str) -> None: - self._name = new -``` - -**Read-only property** — the user can read but cannot assign. Any -attempt to set the attribute is blocked by `GuardedBase.__setattr__` -with a clear error message. If _internal_ code (factory builders, CIF -loaders, etc.) needs to set the value, it calls a private `_set_` -method instead of exposing a public setter: - -```python -@property -def sample_form(self) -> StringDescriptor: - """Sample form descriptor (read-only for the user).""" - return self._sample_form - - -def _set_sample_form(self, value: str) -> None: - """Internal setter used by factory/CIF code during construction.""" - self._sample_form.value = value -``` - -**Why this matters:** - -- `GuardedBase.__setattr__` uses the presence of a setter to decide - writability. Adding a setter "just for internal use" would open the - attribute to users. -- Private `_set_` methods keep the public API surface minimal and - intention-clear, while remaining greppable and type-safe. -- The pattern avoids string-based dispatch — every mutator has an - explicit named method. - -### 2.3 CategoryItem and CategoryCollection - -**Parameter access pattern.** Users reach any parameter through at most -two levels of navigation from the datablock: - -```python -# CategoryItem → Parameter -structure.cell.length_a = 3.88 -experiment.instrument.setup_wavelength = 1.494 - -# CategoryCollection[item_id] → Parameter -structure.atom_sites['Si'].adp_iso = 0.47 -experiment.background['10'].y = 170 -``` - -The general forms are: - -- `DATABLOCK.CATEGORY_ITEM.PARAMETER` -- `DATABLOCK.CATEGORY_COLLECTION[ITEM_ID].PARAMETER` - -This two-level convention (category then parameter) is a deliberate -design constraint. Categories are never nested inside other categories -(see § 9.7), which keeps the path depth uniform and the API predictable. - -| Aspect | `CategoryItem` | `CategoryCollection` | -| --------------- | ---------------------------------- | ----------------------------------------- | -| CIF analogy | Single category row | Loop (table) of rows | -| Examples | Cell, SpaceGroup, Instrument, Peak | AtomSites, Background, Data, LinkedPhases | -| Parameters | All `GenericDescriptorBase` attrs | Aggregated from all child items | -| Serialisation | `as_cif` / `from_cif` | `as_cif` / `from_cif` | -| Update hook | `_update(called_by_minimizer=)` | `_update(called_by_minimizer=)` | -| Update priority | `_update_priority` (default 10) | `_update_priority` (default 10) | -| Display | `show()` on concrete subclasses | `show()` on concrete subclasses | -| Building items | N/A | `add(item)`, `create(**kwargs)` | - -**Update priority:** lower values run first. This ensures correct -execution order within a datablock (e.g. background before data). - -### 2.4 CategoryOwner, DatablockItem, and DatablockCollection - -`CategoryOwner` is the shared base class for objects that own flat -category siblings. It provides category discovery, category sorting by -`_update_priority`, parameter aggregation, `_need_categories_update` -tracking, and category-body CIF serialization without a `data_` header. - -`DatablockItem` extends `CategoryOwner` for real CIF `data_` blocks. -`Structure` and `ExperimentBase` subclasses are real datablocks. -`Analysis` is also a `CategoryOwner`, but it serializes as a singleton -section body in `analysis/analysis.cif` and does not emit a fake -`data_analysis` header. - -| Aspect | `DatablockItem` | `DatablockCollection` | -| ------------------ | ------------------------------------------- | -------------------------------------------------------- | -| CIF analogy | A single `data_` block | Collection of data blocks | -| Examples | Structure, BraggPdExperiment | Structures, Experiments | -| Category discovery | Scans `vars(self)` for categories | N/A | -| Update cascade | `_update_categories()` — sorted by priority | N/A | -| Parameters | Aggregated from all categories | Aggregated from all datablocks | -| Fittable params | N/A | `Parameter`s not blocked by user or symmetry constraints | -| Free params | N/A | Fittable + `free == True` | -| Dirty flag | `_need_categories_update` | N/A | - -When any `Parameter.value` is set, it propagates -`_need_categories_update = True` up to the owning `CategoryOwner`. -Serialisation (`as_cif`) and plotting trigger `_update_categories()` if -the flag is set. - -### 2.5 Variable System — Parameters and Descriptors - -```shell -GuardedBase -└── GenericDescriptorBase # name, value (validated via AttributeSpec), description - ├── GenericStringDescriptor # _value_type = DataTypes.STRING - └── GenericNumericDescriptor # _value_type = DataTypes.NUMERIC, + units - └── GenericParameter # + free, uncertainty, fit_min, fit_max, user_constrained, symmetry_constrained -``` - -CIF-bound concrete classes add a `CifHandler` for serialisation: - -| Class | Base | Use case | -| ------------------- | -------------------------- | ---------------------------- | -| `StringDescriptor` | `GenericStringDescriptor` | Read-only or writable text | -| `NumericDescriptor` | `GenericNumericDescriptor` | Read-only or writable number | -| `Parameter` | `GenericParameter` | Fittable numeric value | - -**Initialisation rule:** all Parameters/Descriptors are initialised with -their default values from `value_spec` (an `AttributeSpec`) **without -any validation** — we trust internal definitions. Changes go through -public property setters, which run both type and value validation. - -**Mixin safety:** Parameter/Descriptor classes must not have init -arguments so they can be used as mixins safely (e.g. -`PdTofDataPointMixin`). - -### 2.6 Validation - -`AttributeSpec` bundles `default`, `data_type`, `validator`, -`allow_none`. Validators include: - -| Validator | Purpose | -| --------------------- | -------------------------------------- | -| `TypeValidator` | Checks Python type against `DataTypes` | -| `RangeValidator` | `ge`, `le`, `gt`, `lt` bounds checking | -| `MembershipValidator` | Value must be in an allowed set | -| `RegexValidator` | Value must match a pattern | - -### 2.7 String Paths vs Live Descriptors in Public APIs - -Public APIs reference parameters in one of two ways. The choice is not -stylistic — it follows the call site's role: - -- **Setup-time / schema-level APIs use string paths** (CIF-style - `'category.attribute'`). The targeted descriptor may not yet exist on - any concrete object (e.g. an extraction rule applies uniformly to - files about to be loaded), and the value must round-trip through CIF. - Examples: - `sequential_fit_extract.create(target='diffrn.ambient_temperature', ...)`, - alias/constraint definitions persisted in project CIF. -- **Cross-experiment selectors use the same string paths at runtime.** - `project.display.fit.series(..., versus=...)` selects a persisted - `diffrn.*` column in `analysis/results.csv` and the matching field - across experiments; it does not use one experiment's current live - descriptor value. Example: - `project.display.fit.series(param=structure.cell.length_a, versus='diffrn.ambient_temperature')`. -- **Concrete model parameters still use live descriptors.** The call - needs the parameter's `unique_name`, `description`, and `units`, and - it refers to one exact fitted quantity in the model. Example: - `param=structure.cell.length_a` in `project.display.fit.series(...)`. - -When adding a new public API, place it on one side of this rule rather -than accepting both. Use string paths when the value names a persisted -field or cross-experiment selector, and use live descriptors when the -value names one concrete model parameter. - ---- - -## 3. Experiment System - -### 3.1 Experiment Type - -An experiment's type is defined by the four enum axes and is **immutable -after creation**. This avoids the complexity of transforming all -internal state when the experiment type changes. The type is stored in -an `ExperimentType` category with four `StringDescriptor`s validated by -`MembershipValidator`s. Public properties are read-only; factory and -CIF-loading code use private setters (`_set_sample_form`, -`_set_beam_mode`, `_set_radiation_probe`, `_set_scattering_type`) during -construction only. - -### 3.2 Experiment Hierarchy - -```shell -DatablockItem -└── ExperimentBase # name, type: ExperimentType, as_cif - ├── PdExperimentBase # + linked_phases, excluded_regions, peak, data - │ ├── BraggPdExperiment # + instrument, background (both via factories) - │ └── TotalPdExperiment # (no extra categories yet) - └── ScExperimentBase # + linked_crystal, extinction, instrument, data - ├── CwlScExperiment - └── TofScExperiment -``` - -Each concrete experiment class carries: - -- `type_info: TypeInfo` — tag and description for factory lookup -- `compatibility: Compatibility` — which enum axis values it supports - -### 3.3 Category Ownership - -Every experiment owns its categories as private attributes with public -read-only or read-write properties: - -```python -# Read-only — user cannot replace the object, only modify its contents -experiment.linked_phases # CategoryCollection -experiment.excluded_regions # CategoryCollection -experiment.instrument # CategoryItem -experiment.peak # CategoryItem -experiment.data # CategoryCollection (powder / total only) -experiment.refln # CategoryCollection (Bragg powder + single crystal) - -# Type-switchable — recreates the underlying object -experiment.background_type = 'chebyshev' # triggers BackgroundFactory.create(...) -experiment.peak_profile_type = 'thompson-cox-hastings' # triggers PeakFactory.create(...) -experiment.extinction_type = 'becker-coppens' # triggers ExtinctionFactory.create(...) -``` - -**Type switching pattern:** `expt.background_type = 'chebyshev'` rather -than `expt.background.type = 'chebyshev'`. This keeps the API at the -experiment level and makes it clear that the entire category object is -being replaced. - ---- - -## 4. Structure System - -### 4.1 Structure Hierarchy - -```shell -DatablockItem -└── Structure # name, cell, space_group, atom_sites, atom_site_aniso -``` - -A `Structure` contains four categories: - -- `Cell` — unit cell parameters (`CategoryItem`) -- `SpaceGroup` — symmetry information (`CategoryItem`) -- `AtomSites` — atomic positions and isotropic ADP - (`CategoryCollection`) -- `AtomSiteAniso` — anisotropic ADP tensor components - (`CategoryCollection`) - -Symmetry constraints (cell metric, atomic coordinates, ADPs) are applied -via the `crystallography` module during `_update_categories()`. - -Parameters that are fully determined by symmetry (e.g. `lattice_b` in -cubic, `fract_y` of an atom on a 4-fold axis, off-diagonal ADPs forced -to zero by site symmetry) are flagged as `symmetry_constrained = True` -on the `Parameter`. This forces `free = False`; any subsequent attempt -to set `free = True` on such a parameter is ignored with a warning. -Flags are recomputed on every `_update_categories()` so that changing -the space group, Wyckoff letter, or ADP type re-evaluates which -parameters are symmetry constrained. Surface helpers -`cell_symmetry_constrained_flags(...)` and -`atom_site_symmetry_constrained_flags(...)` in `crystallography` expose -the per-key flags. - -### 4.2 Atomic Displacement Parameters (ADP) - -ADP support covers four CIF-standard types: **Biso**, **Uiso**, -**Bani**, **Uani**. The design uses **type-neutral parameter names** so -that switching type is a one-line operation on `adp_type`: parameter -objects stay stable while their values and CIF output names change. - -**Two sibling collections.** Following CIF conventions (`_atom_site` and -`_atom_site_aniso` are separate loops), isotropic and anisotropic data -live in separate collections on `Structure`. Every atom always has an -entry in both collections; when `adp_type` is isotropic, the aniso -parameters hold `0.0` and are ignored by calculators. - -```shell -Structure -├── cell (CategoryItem) -├── space_group (CategoryItem) -├── atom_sites (CategoryCollection of AtomSite) -└── atom_site_aniso (CategoryCollection of AtomSiteAniso) -``` - -**Type-neutral names.** `atom_site.adp_iso` is the isotropic ADP -parameter. Its physical meaning (B or U) is determined by -`atom_site.adp_type`. Similarly, `atom_site_aniso.adp_11`...`adp_23` are -type-neutral tensor components. - -| Parameter | Location | CIF names | -| ---------- | --------------- | -------------------------------------------------------- | -| `adp_type` | `AtomSite` | `_atom_site.adp_type` | -| `adp_iso` | `AtomSite` | `_atom_site.B_iso_or_equiv`, `_atom_site.U_iso_or_equiv` | -| `adp_11` | `AtomSiteAniso` | `_atom_site_aniso.B_11`, `_atom_site_aniso.U_11` | -| `adp_22` | `AtomSiteAniso` | `_atom_site_aniso.B_22`, `_atom_site_aniso.U_22` | -| `adp_33` | `AtomSiteAniso` | `_atom_site_aniso.B_33`, `_atom_site_aniso.U_33` | -| `adp_12` | `AtomSiteAniso` | `_atom_site_aniso.B_12`, `_atom_site_aniso.U_12` | -| `adp_13` | `AtomSiteAniso` | `_atom_site_aniso.B_13`, `_atom_site_aniso.U_13` | -| `adp_23` | `AtomSiteAniso` | `_atom_site_aniso.B_23`, `_atom_site_aniso.U_23` | - -`adp_type` is a `StringDescriptor` validated against `AdpTypeEnum` -(`Biso`, `Uiso`, `Bani`, `Uani`). - -**Dual CIF names.** Each parameter's `CifHandler` carries both CIF name -variants (e.g. -`['_atom_site.B_iso_or_equiv', '_atom_site.U_iso_or_equiv']`). Reading -tries each name until a match is found; writing uses `names[0]`. The -`adp_type` setter reorders the list so the correct tag is emitted. - -For example, switching from `Biso` to `Uiso` makes -`_atom_site.U_iso_or_equiv` the first CIF name on `adp_iso`; switching -from `Bani` to `Uani` does the same for all six `_atom_site_aniso.U_*` -tensor tags. No dynamic CIF handler is needed. - -**Auto-conversion.** Setting `adp_type` triggers value conversion: B ↔ U -via `B = 8π²U`; iso → ani seeds the diagonal; ani → iso averages the -diagonal. - -| Transition | Conversion rule | -| ---------- | --------------------------------------------------------------------- | -| B ↔ U | `B = 8π²U` | -| Iso → Ani | `adp_11 = adp_22 = adp_33 = adp_iso`; off-diagonal terms become `0.0` | -| Ani → Iso | `adp_iso = (adp_11 + adp_22 + adp_33) / 3` | - -**Collection sync.** `Structure._update_categories()` reconciles the two -collections: adds missing aniso entries, removes stale ones, and rekeys -on label rename. Sync follows the datablock dirty-flag pattern: category -or parameter changes mark the structure as needing an update, and the -next serialisation, plot, or fit call performs the reconciliation. - -| Event | Sync action | -| ---------------------------------- | --------------------------------------------------------- | -| Atom added to `atom_sites` | Create matching `AtomSiteAniso` entry with `0.0` defaults | -| Atom removed from `atom_sites` | Remove matching `AtomSiteAniso` entry | -| Atom label renamed in `atom_sites` | Rekey the matching `AtomSiteAniso` entry | - -**User-facing access.** ADP parameters follow the same two-level access -pattern as other category parameters: - -```python -structure.atom_sites['Si'].adp_type = 'Biso' -structure.atom_sites['Si'].adp_iso = 0.47 -structure.atom_site_aniso['Si'].adp_11 = 0.05 -``` - -Creating an atom uses `adp_iso`; the matching `atom_site_aniso` entry is -created by `_update_categories()`: - -```python -structure.atom_sites.create( - label='Si', - type_symbol='Si', - fract_x=0.0, - fract_y=0.0, - fract_z=0.0, - adp_type='Biso', - adp_iso=0.47, -) -``` - -**Design rule.** ADP parameter names are type-neutral because -type-specific names (`b_iso`, `u_iso`, etc.) would require replacing -parameter objects when the ADP type changes, which would break -constraints, free flags, and existing references. The always-present -`atom_site_aniso` collection avoids conditional branches in -serialisation, calculators, parameter tables, constraint wiring, and UI. - ---- - -## 5. Factory System - -### 5.1 FactoryBase - -All factories inherit from `FactoryBase`, which provides: - -| Feature | Method / Attribute | Description | -| ------------------ | ---------------------------- | ------------------------------------------------- | -| Registration | `@Factory.register` | Class decorator, appends to `_registry` | -| Supported map | `_supported_map()` | `{tag: class}` from all registered classes | -| Creation | `create(tag)` | Instantiate by tag string | -| Default resolution | `default_tag(**conditions)` | Largest-subset matching on `_default_rules` | -| Context creation | `create_default_for(**cond)` | Resolve tag → create | -| Filtered query | `supported_for(**filters)` | Filter by `Compatibility` and `CalculatorSupport` | -| Display | `show_supported(**filters)` | Pretty-print table of type + description | -| Tag listing | `supported_tags()` | List of all registered tags | - -Each `__init_subclass__` gives every factory its own independent -`_registry` and `_default_rules`. - -### 5.2 Default Rules - -`_default_rules` maps frozensets of `(axis_name, enum_value)` tuples to -tag strings (preferably enum values for type safety): - -```python -class PeakFactory(FactoryBase): - _default_rules = { - frozenset({ - ('scattering_type', ScatteringTypeEnum.BRAGG), - ('beam_mode', BeamModeEnum.CONSTANT_WAVELENGTH), - }): PeakProfileTypeEnum.CWL_PSEUDO_VOIGT, - frozenset({ - ('scattering_type', ScatteringTypeEnum.BRAGG), - ('beam_mode', BeamModeEnum.TIME_OF_FLIGHT), - }): PeakProfileTypeEnum.TOF_JORGENSEN, - frozenset({ - ('scattering_type', ScatteringTypeEnum.TOTAL), - }): PeakProfileTypeEnum.TOTAL_GAUSSIAN_DAMPED_SINC, - } -``` - -Resolution uses **largest-subset matching**: the rule whose frozenset is -the biggest subset of the given conditions wins. `frozenset()` acts as a -universal fallback. - -### 5.3 Metadata on Registered Classes - -Every `@Factory.register`-ed class carries three frozen dataclass -attributes: - -```python -@PeakFactory.register -class CwlPseudoVoigt(PeakBase, CwlBroadeningMixin): - type_info = TypeInfo( - tag=PeakProfileTypeEnum.CWL_PSEUDO_VOIGT.value, - description=PeakProfileTypeEnum.CWL_PSEUDO_VOIGT.description(), - ) - compatibility = Compatibility( - scattering_type=frozenset({ScatteringTypeEnum.BRAGG}), - beam_mode=frozenset({BeamModeEnum.CONSTANT_WAVELENGTH}), - ) - calculator_support = CalculatorSupport( - calculators=frozenset({CalculatorEnum.CRYSPY, CalculatorEnum.CRYSFML}), - ) -``` - -| Metadata | Purpose | -| ------------------- | ------------------------------------------------------- | -| `TypeInfo` | Stable tag for lookup/serialisation + human description | -| `Compatibility` | Which enum axis values this class works with | -| `CalculatorSupport` | Which calculation engines support this class | - -### 5.4 Registration Trigger - -Concrete classes use `@Factory.register` decorators. To trigger -registration, each package's `__init__.py` must **explicitly import** -every concrete class: - -```python -# datablocks/experiment/categories/background/__init__.py -from .chebyshev import ChebyshevPolynomialBackground -from .line_segment import LineSegmentBackground -``` - -### 5.5 All Factories - -| Factory | Domain | Tags resolve to | -| ------------------------ | ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `BackgroundFactory` | Background categories | `LineSegmentBackground`, `ChebyshevPolynomialBackground` | -| `PeakFactory` | Peak profiles | `CwlPseudoVoigt`, `TofJorgensen`, `TofJorgensenVonDreele`, … | -| `InstrumentFactory` | Instruments | `CwlPdInstrument`, `TofPdInstrument`, … | -| `DataFactory` | Data collections | `PdCwlData`, `PdTofData`, `TotalData` | -| `ReflnFactory` | Reflection collections | `ReflnData`, `PowderCwlReflnData`, `PowderTofReflnData` | -| `ExtinctionFactory` | Extinction models | `BeckerCoppensExtinction` | -| `LinkedCrystalFactory` | Linked-crystal refs | `LinkedCrystal` | -| `ExcludedRegionsFactory` | Excluded regions | `ExcludedRegions` | -| `LinkedPhasesFactory` | Linked phases | `LinkedPhases` | -| `ExperimentTypeFactory` | Experiment descriptors | `ExperimentType` | -| `CellFactory` | Unit cells | `Cell` | -| `SpaceGroupFactory` | Space groups | `SpaceGroup` | -| `AtomSitesFactory` | Atom sites | `AtomSites` | -| `AtomSiteAnisoFactory` | Anisotropic ADPs | `AtomSiteAnisoCollection` | -| `AliasesFactory` | Parameter aliases | `Aliases` | -| `ConstraintsFactory` | Parameter constraints | `Constraints` | -| `FitModeFactory` | Fit-mode category | `FitMode` | -| `JointFitFactory` | Joint-fit weights | `JointFitCollection` | -| `CalculatorFactory` | Calculation engines | `CryspyCalculator`, `CrysfmlCalculator`, `PdffitCalculator` | -| `MinimizerFactory` | Minimisers | `LmfitMinimizer`, `LmfitLeastsqMinimizer`, `LmfitLeastSquaresMinimizer`, `DfolsMinimizer`, `BumpsMinimizer`, `BumpsLmMinimizer`, `BumpsDreamMinimizer`, `BumpsAmoebaMinimizer`, `BumpsDEMinimizer` | - -> **Note:** `ExperimentFactory` and `StructureFactory` are _builder_ -> factories with `from_cif_path`, `from_cif_str`, `from_data_path`, and -> `from_scratch` classmethods. `ExperimentFactory` inherits -> `FactoryBase` and uses `@register` on all four concrete experiment -> classes; `_resolve_class` looks up the registered class via -> `default_tag()` + `_supported_map()`. `StructureFactory` is a plain -> class without `FactoryBase` inheritance (only one structure type -> exists today). - -### 5.6 Tag Naming Convention - -Canonical tags are the stable identifiers for factory lookup and -serialisation. User-facing APIs may expose context-local aliases when -the owner object already provides enough context to disambiguate the -choice (for example, `experiment.peak_profile_type = 'pseudo-voigt'` -inside a CWL or TOF experiment). Canonical tags must be: - -- **Consistent** — use the same abbreviations everywhere. -- **Hyphen-separated** — all lowercase, words joined by hyphens. -- **Semantically ordered** — from general to specific. -- **Unique within a factory** — but may overlap across factories. - -#### Standard Abbreviations - -| Concept | Abbreviation | Never use | -| ------------------- | ------------ | --------------------------- | -| Powder | `pd` | `powder` | -| Single crystal | `sc` | `single-crystal` | -| Constant wavelength | `cwl` | `cw`, `constant-wavelength` | -| Time-of-flight | `tof` | `time-of-flight` | -| Bragg (scattering) | `bragg` | | -| Total (scattering) | `total` | | - -#### Complete Tag Registry - -**Background tags** - -| Tag | Class | -| -------------- | ------------------------------- | -| `line-segment` | `LineSegmentBackground` | -| `chebyshev` | `ChebyshevPolynomialBackground` | - -**Peak tags** - -Canonical peak tags are globally unique within `PeakFactory`. The -experiment-facing `peak_profile_type` getter, setter, and supported-type -display use context-local aliases so users do not need to type `cwl-`, -`tof-`, or `total-` when the experiment context already disambiguates -the choice. - -| Canonical tag | Local alias | Class | -| -------------------------------------- | ------------------------------------ | ---------------------------------- | -| `cwl-pseudo-voigt` | `pseudo-voigt` | `CwlPseudoVoigt` | -| `cwl-pseudo-voigt-empirical-asymmetry` | `pseudo-voigt + empirical asymmetry` | `CwlPseudoVoigtEmpiricalAsymmetry` | -| `cwl-thompson-cox-hastings` | `thompson-cox-hastings` | `CwlThompsonCoxHastings` | -| `tof-pseudo-voigt` | `pseudo-voigt` | `TofPseudoVoigt` | -| `tof-jorgensen` | `jorgensen` | `TofJorgensen` | -| `tof-jorgensen-von-dreele` | `jorgensen-von-dreele` | `TofJorgensenVonDreele` | -| `tof-double-jorgensen-von-dreele` | `double-jorgensen-von-dreele` | `TofDoubleJorgensenVonDreele` | -| `total-gaussian-damped-sinc` | `gaussian-damped-sinc` | `TotalGaussianDampedSinc` | - -**Instrument tags** - -| Tag | Class | -| -------- | ----------------- | -| `cwl-pd` | `CwlPdInstrument` | -| `cwl-sc` | `CwlScInstrument` | -| `tof-pd` | `TofPdInstrument` | -| `tof-sc` | `TofScInstrument` | - -**Data tags** - -| Tag | Class | -| -------------- | ----------- | -| `bragg-pd` | `PdCwlData` | -| `bragg-pd-tof` | `PdTofData` | -| `total-pd` | `TotalData` | - -**Refln tags** - -| Tag | Class | -| -------------------- | -------------------- | -| `bragg-sc` | `ReflnData` | -| `bragg-pd-refln` | `PowderCwlReflnData` | -| `bragg-pd-tof-refln` | `PowderTofReflnData` | - -**Extinction tags** - -| Tag | Class | -| ---------------- | ------------------------- | -| `becker-coppens` | `BeckerCoppensExtinction` | - -**Linked-crystal tags** - -| Tag | Class | -| --------- | --------------- | -| `default` | `LinkedCrystal` | - -**Experiment tags** - -| Tag | Class | -| -------------- | ------------------- | -| `bragg-pd` | `BraggPdExperiment` | -| `total-pd` | `TotalPdExperiment` | -| `bragg-sc-cwl` | `CwlScExperiment` | -| `bragg-sc-tof` | `TofScExperiment` | - -**Calculator tags** - -| Tag | Class | -| --------- | ------------------- | -| `cryspy` | `CryspyCalculator` | -| `crysfml` | `CrysfmlCalculator` | -| `pdffit` | `PdffitCalculator` | - -**Minimizer tags** - -| Tag | Class | -| ----------------------- | ---------------------------- | -| `lmfit` | `LmfitMinimizer` | -| `lmfit (leastsq)` | `LmfitLeastsqMinimizer` | -| `lmfit (least_squares)` | `LmfitLeastSquaresMinimizer` | -| `dfols` | `DfolsMinimizer` | -| `bumps` | `BumpsMinimizer` | -| `bumps (lm)` | `BumpsLmMinimizer` | -| `bumps (dream)` | `BumpsDreamMinimizer` | -| `bumps (amoeba)` | `BumpsAmoebaMinimizer` | -| `bumps (de)` | `BumpsDEMinimizer` | - -### 5.7 Metadata Classification — Which Classes Get What - -#### The Rule - -> **If a concrete class is created by a factory, it gets `type_info`, -> `compatibility`, and `calculator_support`.** -> -> **If a `CategoryItem` only exists as a child row inside a -> `CategoryCollection`, it does NOT get these attributes — the -> collection does.** - -#### Rationale - -A `LineSegment` item (a single background control point) is never -selected, created, or queried by a factory. It is always instantiated -internally by its parent `LineSegmentBackground` collection. The -meaningful unit of selection is the _collection_, not the item. The user -picks "line-segment background" (the collection type), not individual -line-segment points. - -#### Singleton CategoryItems — factory-created (get all three) - -| Class | Factory | -| ---------------------------------- | ----------------------- | -| `CwlPdInstrument` | `InstrumentFactory` | -| `CwlScInstrument` | `InstrumentFactory` | -| `TofPdInstrument` | `InstrumentFactory` | -| `TofScInstrument` | `InstrumentFactory` | -| `CwlPseudoVoigt` | `PeakFactory` | -| `CwlPseudoVoigtEmpiricalAsymmetry` | `PeakFactory` | -| `CwlThompsonCoxHastings` | `PeakFactory` | -| `TofJorgensen` | `PeakFactory` | -| `TofJorgensenVonDreele` | `PeakFactory` | -| `TofDoubleJorgensenVonDreele` | `PeakFactory` | -| `TotalGaussianDampedSinc` | `PeakFactory` | -| `BeckerCoppensExtinction` | `ExtinctionFactory` | -| `LinkedCrystal` | `LinkedCrystalFactory` | -| `Cell` | `CellFactory` | -| `SpaceGroup` | `SpaceGroupFactory` | -| `ExperimentType` | `ExperimentTypeFactory` | -| `FitMode` | `FitModeFactory` | - -#### CategoryCollections — factory-created (get all three) - -| Class | Factory | -| ------------------------------- | ------------------------ | -| `LineSegmentBackground` | `BackgroundFactory` | -| `ChebyshevPolynomialBackground` | `BackgroundFactory` | -| `PdCwlData` | `DataFactory` | -| `PdTofData` | `DataFactory` | -| `TotalData` | `DataFactory` | -| `ReflnData` | `ReflnFactory` | -| `PowderCwlReflnData` | `ReflnFactory` | -| `PowderTofReflnData` | `ReflnFactory` | -| `ExcludedRegions` | `ExcludedRegionsFactory` | -| `LinkedPhases` | `LinkedPhasesFactory` | -| `AtomSites` | `AtomSitesFactory` | -| `AtomSiteAnisoCollection` | `AtomSiteAnisoFactory` | -| `Aliases` | `AliasesFactory` | -| `Constraints` | `ConstraintsFactory` | -| `JointFitCollection` | `JointFitFactory` | - -#### CategoryItems that are ONLY children of collections (NO metadata) - -| Class | Parent collection | -| ---------------- | ------------------------------- | -| `LineSegment` | `LineSegmentBackground` | -| `PolynomialTerm` | `ChebyshevPolynomialBackground` | -| `AtomSite` | `AtomSites` | -| `AtomSiteAniso` | `AtomSiteAnisoCollection` | -| `PdCwlDataPoint` | `PdCwlData` | -| `PdTofDataPoint` | `PdTofData` | -| `TotalDataPoint` | `TotalData` | -| `Refln` | `ReflnData` | -| `LinkedPhase` | `LinkedPhases` | -| `ExcludedRegion` | `ExcludedRegions` | -| `Alias` | `Aliases` | -| `Constraint` | `Constraints` | -| `JointFitItem` | `JointFitCollection` | - -#### Non-category classes — factory-created (get `type_info` only) - -| Class | Factory | Notes | -| ------------------- | ------------------- | -------------------------------------------------------- | -| `CryspyCalculator` | `CalculatorFactory` | No `compatibility` — limitations expressed on categories | -| `CrysfmlCalculator` | `CalculatorFactory` | (same) | -| `PdffitCalculator` | `CalculatorFactory` | (same) | -| `LmfitMinimizer` | `MinimizerFactory` | `type_info` only | -| `DfolsMinimizer` | `MinimizerFactory` | (same) | -| `BumpsMinimizer` | `MinimizerFactory` | (same) | -| `BraggPdExperiment` | `ExperimentFactory` | `type_info` + `compatibility` (no `calculator_support`) | -| `TotalPdExperiment` | `ExperimentFactory` | (same) | -| `CwlScExperiment` | `ExperimentFactory` | (same) | -| `TofScExperiment` | `ExperimentFactory` | (same) | - ---- - -## 6. Analysis - -### 6.1 Calculator - -The calculator performs the actual diffraction computation. It is -attached per-experiment on the `ExperimentBase` object. Each experiment -auto-resolves its calculator on first access based on the experiment's -active support category (`data` for powder, `refln` for Bragg -single-crystal) `calculator_support` metadata and -`CalculatorFactory._default_rules`. The `CalculatorFactory` filters its -registry by `engine_imported` (whether the third-party library is -available in the environment). - -The experiment exposes a dedicated `calculation` category: - -- `calculation.calculator_type` — getter + setter for the calculator - backend tag -- `calculation.calculator` — read-only access to the live backend - instance -- `calculation.show_calculator_types()` — filtered by the active support - category and marks the current type - -### 6.2 Minimiser - -The minimiser drives the optimisation loop. `MinimizerFactory` creates -instances by tag (e.g. `'lmfit'`, `'dfols'`, `'bumps'`). - -### 6.3 Fitter - -`Fitter` wraps a minimiser instance and orchestrates the fitting -workflow: - -1. Collect `free_parameters` from structures + experiments. -2. Record start values. -3. Build an objective function that calls the calculator. -4. Delegate to `minimizer.fit()`. -5. Sync results (values + uncertainties) back to parameters. - -### 6.4 Analysis Object - -`Analysis` is bound to a `Project` and provides the high-level API: - -- Singleton section: `Analysis` is a `CategoryOwner`, not a - `DatablockItem`. It owns sibling categories and serializes as the body - of `analysis/analysis.cif` without a `data_` header. -- Fit configuration: `fitting` (`CategoryItem` with `minimizer_type`). - `fitting.minimizer_type` selects the minimizer backend. The active - fitting mode lives on the owner as `analysis.fitting_mode_type`, not - as a nested child category field. `fitting.show_minimizer_types()` - lists supported minimizers. -- Joint-fit weights: `joint_fit` (`CategoryCollection` of per-experiment - weight entries); sibling of `fitting`, not a child. -- Fit results: `analysis.fit_results` stores the latest runtime result - object. This is `FitResults` for deterministic fits and - `BayesianFitResults` for Bayesian DREAM runs. -- Parameter tables: `show_all_params()`, `show_fittable_params()`, - `show_free_params()`, `how_to_access_parameters()` Compact - summary-style parameter displays intentionally hide the large - loop-backed experiment categories `pd_data`, `total_data`, and `refln` - in `all()`, `access()`, and `cif_uids()` so the output stays readable. -- Fitting: `fitting.minimizer_type` stores the shared minimizer - selection; `fitting_mode_type` stores the active mode on `Analysis` - itself; `fit()` dispatches to the current mode using the persisted - sibling categories `joint_fit`, `sequential_fit`, and - `sequential_fit_extract`. `display.fit_results()` dispatches through - the active runtime result object. -- Aliases and constraints (single-type categories; no public `_type` - getter or setter) - -#### 6.4.1 Bayesian DREAM Runtime Results - -Bayesian sampling is integrated as a normal minimizer selection with tag -`'bumps (dream)'`. It does not create a parallel `Analysis` stack or a -new persisted results category. - -- `BayesianFitResults` extends `FitResults` with runtime-only posterior - state such as `posterior_samples`, `posterior_parameter_summaries`, - `posterior_predictive`, `diagnostics`, and `sampler_settings`. -- Posterior arrays and predictive caches remain runtime-only; they are - not serialized into CIF or project directories. -- `sampler_settings` records the resolved stochastic settings actually - used for the run, including `random_seed`, `steps`, `burn`, `thin`, - `pop`, and `parallel`. -- The current user-facing DREAM controls live on the active minimizer - object, for example `project.analysis.fitting.minimizer.steps`, - `burn`, `thin`, `pop`, `parallel`, and `init`. -- `plot_param_correlations()` uses posterior samples when available and - otherwise falls back to deterministic covariance or engine-derived - correlations. -- Bayesian-only plotting methods are exposed explicitly rather than by - overloading deterministic plot calls: `plot_posterior_pairs()`, - `plot_param_distribution(param)`, and - `plot_posterior_predictive(expt_name, ...)`. - ---- - -## 7. Project — The Top-Level Façade - -`Project` is the single entry point for the user: - -```python -import easydiffraction as ed - -project = ed.Project(name='my_project') -``` - -It owns and coordinates all components: - -| Property | Type | Description | -| --------------------- | ---------------- | ---------------------------------------- | -| `project.info` | `ProjectInfo` | Metadata: name, title, description, path | -| `project.structures` | `Structures` | Collection of structure datablocks | -| `project.experiments` | `Experiments` | Collection of experiment datablocks | -| `project.rendering` | `Rendering` | Plot/table engine selection | -| `project.display` | `ProjectDisplay` | Pattern/report facade | -| `project.analysis` | `Analysis` | Minimiser, fitting, aliases, constraints | -| `project.summary` | `Summary` | Report generation | -| `project.verbosity` | `str` | Console output level (full/short/silent) | - -Internally, `Project` keeps project-scoped singleton categories under a -private `ProjectConfig(CategoryOwner)` object. That owner currently -holds `project.info` (`ProjectInfo`) and `project.rendering` -(`Rendering`), while the public access paths stay flat on `Project` for -user discoverability. - -### 7.1 Data Flow - -``` -Parameter.value set - → AttributeSpec validation (type + value) - → _need_categories_update = True (on parent CategoryOwner) - -Plot / CIF export / fit objective evaluation - → _update_categories() - → categories sorted by _update_priority - → each category._update() - → background: interpolate/evaluate → write to data - → calculator: compute pattern → write to data - → _need_categories_update = False -``` - -### 7.2 Persistence - -Projects are saved as a directory of CIF files: - -```shell -project_dir/ -├── project.cif # ProjectConfig categories (info + rendering) -├── summary.cif # Summary report -├── structures/ -│ └── lbco.cif # One file per structure -├── experiments/ -│ └── hrpt.cif # One file per experiment -└── analysis/ - └── analysis.cif # Analysis settings -``` - -`project.cif` serializes the private `ProjectConfig` owner without a -`data_` header. It carries both the `_project.*` metadata category and -the `_rendering.*` engine preferences (`chart_engine`, `table_engine`), -so a saved project re-opens with the same display backends. -Per-experiment calculator selection (`_calculation.calculator_type`) -lives in each experiment file, and fit configuration -(`_fitting.minimizer_type`, `_fitting.mode_type`) lives in -`analysis/analysis.cif`. Runtime fit outputs, including -`analysis.fit_results`, posterior chains, posterior predictive -summaries, and convergence diagnostics, are not serialized. - -### 7.3 Verbosity - -`Project.verbosity` controls how much console output operations produce. -It is backed by `VerbosityEnum` (in `utils/enums.py`) and accepts three -values: - -| Level | Enum member | Behaviour | -| -------- | ---------------------- | -------------------------------------------------- | -| `full` | `VerbosityEnum.FULL` | Multi-line output with headers, tables, and detail | -| `short` | `VerbosityEnum.SHORT` | One-line status message per action | -| `silent` | `VerbosityEnum.SILENT` | No console output | - -The default is `'full'`. - -```python -project.verbosity = 'short' -``` - -**Resolution order:** methods that produce console output (e.g. -`analysis.fit()`, `experiments.add_from_data_path()`) read -`project.verbosity`. - -```python -# Use project-level default for all operations -project.verbosity = 'short' -project.analysis.fit() # → short mode - -# Override for one fit, then restore the project default -original_verbosity = project.verbosity -project.verbosity = 'silent' -project.analysis.fit() # → silent -project.verbosity = original_verbosity -``` - -**Output styles per level:** - -- **Data loading** — `full`: paragraph header + detail line; `short`: - `✅ Data loaded: Experiment 🔬 'name'. N points.`; `silent`: nothing. -- **Fitting** — `full`: per-iteration progress table with improvement - percentages; `short`: one-row-per-experiment summary table; `silent`: - nothing. - ---- - -## 8. User-Facing API Patterns - -All examples below are drawn from the actual tutorials (`tutorials/`). - -> **Notebook workflow:** Jupyter notebooks (`*.ipynb`) in -> `docs/docs/tutorials/` are generated artifacts. Edit only the -> corresponding `*.py` script, then run `pixi run notebook-prepare` to -> regenerate the notebook. Never edit `*.ipynb` files by hand. - -### 8.1 Project Setup - -```python -import easydiffraction as ed - -project = ed.Project(name='lbco_hrpt') -project.info.title = 'La0.5Ba0.5CoO3 at HRPT@PSI' -project.save_as(dir_path='lbco_hrpt', temporary=True) -``` - -### 8.2 Define Structures - -```python -# Create a structure datablock -project.structures.create(name='lbco') - -# Set space group and unit cell -project.structures['lbco'].space_group.name_h_m = 'P m -3 m' -project.structures['lbco'].cell.length_a = 3.88 - -# Add atom sites -project.structures['lbco'].atom_sites.create( - label='La', - type_symbol='La', - fract_x=0, - fract_y=0, - fract_z=0, - wyckoff_letter='a', - adp_iso=0.5, - occupancy=0.5, -) - -# Show as CIF -project.structures['lbco'].show_as_cif() -``` - -### 8.3 Define Experiments - -```python -# Download data and create experiment from a data file -data_path = ed.download_data(id=3, destination='data') -project.experiments.add_from_data_path( - name='hrpt', - data_path=data_path, - sample_form='powder', - beam_mode='constant wavelength', - radiation_probe='neutron', -) - -# Set instrument parameters -project.experiments['hrpt'].instrument.setup_wavelength = 1.494 -project.experiments['hrpt'].instrument.calib_twotheta_offset = 0.6 - -# Browse and select peak profile type -project.experiments['hrpt'].show_peak_profile_types() -project.experiments['hrpt'].peak_profile_type = 'pseudo-voigt' - -# Set peak profile parameters -project.experiments['hrpt'].peak.broad_gauss_u = 0.1 -project.experiments['hrpt'].peak.broad_gauss_v = -0.1 - -# Browse and select background type -project.experiments['hrpt'].show_background_types() -project.experiments['hrpt'].background_type = 'line-segment' - -# Add background points -project.experiments['hrpt'].background.create(id='10', x=10, y=170) -project.experiments['hrpt'].background.create(id='50', x=50, y=170) - -# Link structure to experiment -project.experiments['hrpt'].linked_phases.create(id='lbco', scale=10.0) -``` - -### 8.4 Analysis and Fitting - -```python -# Calculator is auto-resolved per experiment; override if needed -project.experiments['hrpt'].calculation.show_calculator_types() -project.experiments['hrpt'].calculation.calculator_type = 'cryspy' -project.analysis.fitting.minimizer_type = 'lmfit' - -# Plot before fitting -project.display.pattern(expt_name='hrpt') - -# Select free parameters -project.structures['lbco'].cell.length_a.free = True -project.experiments['hrpt'].linked_phases['lbco'].scale.free = True -project.experiments['hrpt'].instrument.calib_twotheta_offset.free = True -project.experiments['hrpt'].background['10'].y.free = True - -# Inspect free parameters -project.display.parameters.free() - -# Fit and show results -project.analysis.fit() -project.display.fit.results() - -# Plot after fitting -project.display.pattern(expt_name='hrpt') - -# Save -project.save() -``` - -### 8.4.1 Bayesian Refinement - -```python -# Deterministic pre-fit remains explicit -project.analysis.fitting.minimizer_type = 'bumps (lm)' -project.analysis.fit() - -# Switch to Bayesian sampling using the same entry point -project.analysis.fitting.minimizer_type = 'bumps (dream)' -project.analysis.fitting.minimizer.steps = 1000 -project.analysis.fitting.minimizer.parallel = 0 -project.analysis.fit() - -# Runtime-only Bayesian summaries and plots -project.display.fit.results() -project.display.fit.correlations() -project.display.posterior.pairs() -project.display.posterior.distribution(param) -project.display.posterior.predictive(expt_name='hrpt') -``` - -### 8.5 TOF Experiment (tutorial ed-7) - -```python -expt = ExperimentFactory.from_data_path( - name='sepd', - data_path=data_path, - beam_mode='time-of-flight', -) -expt.instrument.calib_d_to_tof_offset = 0.0 -expt.instrument.calib_d_to_tof_linear = 7476.91 -expt.peak_profile_type = 'jorgensen' -expt.peak.broad_gauss_sigma_0 = 3.0 -``` - -### 8.6 Total Scattering / PDF (tutorial ed-12) - -```python -project.experiments.add_from_data_path( - name='xray_pdf', - data_path=data_path, - sample_form='powder', - beam_mode='constant wavelength', - radiation_probe='xray', - scattering_type='total', -) -project.experiments['xray_pdf'].peak_profile_type = 'gaussian-damped-sinc' -# Calculator is auto-resolved to 'pdffit' for total scattering experiments -``` - ---- - -## 9. Design Principles - -### 9.1 Naming and CIF Conventions - -- Follow CIF naming conventions where possible. Deviate for better API - design when necessary, but keep the spirit of CIF names. -- Reuse the concept of datablocks and categories from CIF. -- `DatablockItem` = one CIF `data_` block, `DatablockCollection` = set - of blocks. -- `CategoryItem` = one CIF category, `CategoryCollection` = CIF loop. -- **Free-flag encoding**: A parameter's free/fixed status is encoded in - CIF via uncertainty brackets. `3.89` = fixed, `3.89(2)` = free with - esd, `3.89()` = free without esd. There is no separate list of free - parameters; the brackets are the single source of truth. - -### 9.2 Immutability of Experiment Type - -The experiment type (the four enum axes) can only be set at creation -time. It cannot be changed afterwards. This avoids the complexity of -maintaining different state transformations when switching between -fundamentally different experiment configurations. - -### 9.3 Category Type Switching - -In contrast to experiment type, categories that have multiple -implementations (peak profiles, backgrounds, instruments) can be -switched at runtime by the user. The API pattern uses a type property on -the **experiment**, not on the category itself: - -```python -# ✅ Correct — type property on the experiment -expt.background_type = 'chebyshev' - -# ❌ Not used — type property on the category -expt.background.type = 'chebyshev' -``` - -This makes it clear that the entire category object is being replaced -and simplifies maintenance. - -### 9.4 Switchable-Category Convention - -Categories whose concrete implementation can be swapped at runtime -(background, peak profile, etc.) are called **switchable categories**. -**Every category must be factory-based** — even if only one -implementation exists today. This ensures uniform construction, -consistent metadata, and makes adding a second implementation trivial. - -For categories with **multiple implementations** (multi-type), the owner -exposes the full switchable API: - -| Facet | Naming pattern | Example | -| --------------- | -------------------------------------------- | ------------------------------------------------ | -| Current object | `` property (read-only) | `expt.background`, `expt.peak` | -| Active type tag | `_type` property (getter + setter) | `expt.background_type`, `expt.peak_profile_type` | -| Show types | `show__types()` | `expt.show_background_types()` | - -Multi-type categories: - -- **Experiment:** `background_type`, `peak_profile_type`, - `extinction_type`. - -Categories that are **fixed at creation** (determined by the experiment -type and never changed) expose only a read-only `` property -with no `_type` getter, setter, or show methods: - -- **Experiment:** `instrument`, `data`, `refln`. - -For categories with **only one implementation** (single-type), the -`_type` getter, setter, and show methods are omitted from the public API -to avoid clutter. The factory and `_type` attribute still exist -internally for consistency and future extensibility. - -Single-type categories (no public `_type` property): - -- **Experiment:** `diffrn`, `linked_crystal`, `excluded_regions`, - `linked_phases`. -- **Structure:** `cell`, `space_group`, `atom_sites`, `atom_site_aniso`. -- **Analysis:** `aliases`, `constraints`, `fitting`, `sequential_fit`. - -`fitting` is a dedicated analysis configuration category, but the fit -mode selector lives on the owner as `analysis.fitting_mode_type`. This -is the project's active-sibling selector pattern: the owner stores the -authoritative mode and decides which sibling categories are active, -shown in help, and serialized. `joint_fit`, `sequential_fit`, and -`sequential_fit_extract` remain direct `Analysis` siblings even when -inactive. See the fit-mode ADR for the full contract: -[`adr_fit-mode-categories.md`](ADRs/adr_fit-mode-categories.md). -Likewise, `calculation` is a dedicated experiment category that owns -calculator selection — `experiment.calculation.calculator_type` and -`experiment.calculation.show_calculator_types()` — instead of the -selector being exposed at the experiment owner level. The same pattern -applies to `display` on `Project`, which owns `chart_engine` and -`table_engine` (see §9.4.1). - -**Design decisions:** - -- The **experiment owns** the `_type` setter because switching replaces - the entire category object - (`self._background = BackgroundFactory.create(...)`). -- The **experiment owns** the `show_*` methods because they are - one-liners that delegate to `Factory.show_supported(...)` and can pass - experiment-specific context (e.g. `scattering_type`, `beam_mode` for - peak filtering). -- Concrete category subclasses provide a public `show()` method for - displaying the current content (not on the base - `CategoryItem`/`CategoryCollection`). - -#### 9.4.1 Selector Families - -Not every `_type` attribute represents the same kind of choice. The API -recognises three distinct selector families. They share a similar -`_type` shape so the user can inspect and set them uniformly, but -their intent and ownership differ: - -| Family | User intent | Examples | CIF | -| ---------------------------------- | ------------------------------- | --------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------ | -| Backend selector | Pick an execution backend | `fitting.minimizer_type`, `calculation.calculator_type`, `rendering.chart_engine` | `_fitting.minimizer_type`, `_calculation.calculator_type`, `_rendering.chart_engine` | -| Switchable-category impl. selector | Swap a category implementation | `experiment.background_type`, `experiment.peak_profile_type` | category-owned type tag such as `_peak.profile_type` | -| Active-sibling selector | Pick the active sibling surface | `analysis.fitting_mode_type` | owner-owned tag such as `_fitting.mode_type` | - -Backend selectors live on a dedicated configuration category (`fitting`, -`calculation`, `rendering`). Switchable-category implementation -selectors are owned by the host (typically the experiment) because -switching them replaces the category instance, as described in §9.3. -Active-sibling selectors are also owner-level, but they do not swap one -category implementation for another. Instead, they select which sibling -category family is authoritative while the shared configuration category -keeps a stable shape. - -### 9.5 Discoverable Supported Options - -The user can always discover what is supported for the current -experiment: - -```python -expt.show_peak_profile_types() -expt.show_background_types() -expt.calculation.show_calculator_types() -expt.show_extinction_types() -project.analysis.fitting.show_minimizer_types() -project.analysis.show_fitting_mode_types() -project.rendering.show_chart_engines() -project.rendering.show_table_engines() -``` - -Available calculators are filtered by `engine_imported` (whether the -library is installed) and by the experiment's active support category -`calculator_support` metadata. - -### 9.6 Enums for Finite Value Sets - -Every attribute, descriptor, or configuration option that accepts a -**finite, closed set of values** must be represented by a `(str, Enum)` -class. This applies to: - -- Factory tags (§5.6) — e.g. `PeakProfileTypeEnum`, `CalculatorEnum`. -- Experiment-axis values — e.g. `SampleFormEnum`, `BeamModeEnum`. -- Category descriptors with enumerated choices — e.g. fit mode - (`FitModeEnum.SINGLE`, `FitModeEnum.JOINT`, `FitModeEnum.SEQUENTIAL`). - -The enum serves as the **single source of truth** for valid values, -their user-facing string representations, and their descriptions. -Benefits: - -- **Autocomplete and typo safety** — IDEs list valid members; - misspellings are caught at assignment time. -- **Greppable** — searching for `FitModeEnum.JOINT` finds every code - path that handles joint fitting. -- **Type-safe dispatch** — `if mode == FitModeEnum.JOINT:` is checked by - type checkers; `if mode == 'joint':` is not. -- **Consistent validation** — use `MembershipValidator` with the enum - members instead of `RegexValidator` with hand-written patterns. - -**Rule:** internal code must compare against enum members, never raw -strings. User-facing setters accept either the enum member or its string -value (because `str(EnumMember) == EnumMember.value` for `(str, Enum)`), -but internal dispatch always uses the enum: - -```python -# ✅ Correct — compare with enum -if self._fitting_mode_type is FitModeEnum.JOINT: - -# ❌ Wrong — compare with raw string -if self._fitting_mode_type == 'joint': -``` - -### 9.7 Flat Category Structure — No Nested Categories - -Following CIF conventions, categories are **flat siblings** within their -owner (`CategoryOwner`). A category must never be a child of another -category of a different type. Categories can reference each other via -IDs, but the ownership hierarchy is always: - -``` -Owner (CategoryOwner) -├── CategoryA (CategoryItem or CategoryCollection) -├── CategoryB (CategoryItem or CategoryCollection) -└── CategoryC (CategoryItem or CategoryCollection) -``` - -Never: - -``` -Owner -└── CategoryA - └── CategoryB ← WRONG: CategoryB is a child of CategoryA -``` - -**Example — `fit` and `joint_fit`:** `fit` is a `CategoryItem` holding -the active minimizer and fitting mode. `joint_fit` is a separate -`CategoryCollection` holding per-experiment weights. Both are direct -children of `Analysis`, not nested: - -```python -# ✅ Correct — sibling categories on Analysis -project.analysis.fitting_mode_type = 'joint' -project.analysis.joint_fit['npd'].weight = 0.7 - -# ❌ Wrong — joint_fit as a child of fit -project.analysis.fitting.joint_fit['npd'].weight = 0.7 -``` - -In CIF output, sibling categories appear as independent blocks: - -``` -_fitting.mode_type joint -_fitting.minimizer_type lmfit - -loop_ -_joint_fit.experiment_id -_joint_fit.weight -npd 0.7 -xrd 0.3 -``` - -### 9.8 Property Docstring and Type-Hint Template - -Every public property backed by a private `Parameter`, -`NumericDescriptor`, or `StringDescriptor` attribute must follow the -template below. The `description` field on the descriptor is the -**single source of truth**; docstrings and type hints are mechanically -derived from it. - -**Definitions:** - -| Symbol | Meaning | -| --------- | -------------------------------------------------------------------------------------- | -| `{desc}` | `description` string without trailing period | -| `{units}` | `units` string; omit the `({units})` parenthetical when absent/empty | -| `{Type}` | Descriptor class name: `Parameter`, `NumericDescriptor`, or `StringDescriptor` | -| `{ann}` | Setter value annotation: `float` for numeric descriptors, `str` for string descriptors | - -**Template — writable property:** - -```python -@property -def length_a(self) -> Parameter: - """Length of the a axis of the unit cell (Å). - - Reading this property returns the underlying ``Parameter`` - object. Assigning to it updates the parameter value. - """ - return self._length_a - -@length_a.setter -def length_a(self, value: float) -> None: - self._length_a.value = value -``` - -**Template — read-only property:** - -```python -@property -def length_a(self) -> Parameter: - """Length of the a axis of the unit cell (Å). - - Reading this property returns the underlying ``Parameter`` - object. - """ - return self._length_a -``` - -**Quick-reference table:** - -| Element | Text | -| ---------------------- | -------------------------------------------------------------------------------------------------------------- | -| Getter summary line | `"""{desc} ({units}).` (or `"""{desc}.` when unitless) | -| Getter body (writable) | `Reading this property returns the underlying ``{Type}`` object. Assigning to it updates the parameter value.` | -| Getter body (readonly) | `Reading this property returns the underlying ``{Type}`` object.` | -| Setter docstring | _(none — not rendered by griffe / MkDocs)_ | -| Getter annotation | `-> {Type}` | -| Setter annotation | `value: {ann}` and `-> None` | - -**Notes:** - -- Getter docstrings have **no** `Args:` or `Returns:` sections. -- Setters have **no** docstring. -- Avoid markdown emphasis (`*a*`) in docstrings; use plain text to stay - in sync with the `description` field. -- The CI tool `pixi run param-consistency-check` validates compliance; - `pixi run param-consistency-fix` auto-fixes violations. - -### 9.9 Lint Complexity Thresholds - -The Pylint-style complexity limits in `pyproject.toml` are **intentional -code-quality guardrails**, not arbitrary numbers. A violation is a -signal that the function or class needs refactoring — not that the -threshold needs raising. - -The project uses **ruff's defaults** for all PLR thresholds, with one -exception: `max-args` and `max-positional-args` are set to **6** instead -of the ruff default of 5, because ruff counts `self`/`cls` while -traditional pylint does not. Setting 6 in ruff matches pylint's standard -limit of 5 real parameters per function. - -| Threshold | Value | Rule | -| --------------------- | ----- | ------- | -| `max-args` | 6 | PLR0913 | -| `max-positional-args` | 6 | PLR0917 | -| `max-branches` | 12 | PLR0912 | -| `max-statements` | 50 | PLR0915 | -| `max-locals` | 15 | PLR0914 | -| `max-nested-blocks` | 5 | PLR1702 | -| `max-returns` | 6 | PLR0911 | -| `max-public-methods` | 20 | PLR0904 | - -**Rules:** - -- **Do not raise thresholds.** The current values represent the - project's design intent for maximum acceptable complexity. -- **Do not add `# noqa` comments** (or any other mechanism) to silence - complexity rules such as `PLR0912`, `PLR0913`, `PLR0914`, `PLR0915`, - `PLR0917`, `PLR1702`. -- **Refactor the code instead:** extract helper functions, introduce - parameter objects, flatten nesting, use early returns, etc. -- **For complex refactors** that touch many lines or change public API, - propose a refactoring plan and wait for approval before proceeding. - ---- - -## 10. Test Strategy - -Every new feature, category, factory, or bug fix must ship with tests. -The project enforces a multi-layered testing approach that catches -regressions at different levels of abstraction. - -### 10.1 Test Layers - -| Layer | Location | Runner command | Scope | -| --------------------- | ----------------------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------- | -| **Unit** | `tests/unit/` | `pixi run unit-tests` | Single class or function in isolation. Fast, no I/O, no external engines. | -| **Functional** | `tests/functional/` | `pixi run functional-tests` | Multi-component workflows (e.g. create experiment → load data → fit). No external data files beyond tiny test stubs. | -| **Integration** | `tests/integration/` | `pixi run integration-tests` | End-to-end pipelines using real calculation engines (cryspy, crysfml, pdffit2) and real data files from `data/`. | -| **Script (tutorial)** | `tools/test_scripts.py` | `pixi run script-tests` | Runs each tutorial `*.py` script under `docs/docs/tutorials/` as a subprocess and checks for a zero exit code. | -| **Notebook** | `docs/docs/tutorials/` | `pixi run notebook-tests` | Executes every Jupyter notebook end-to-end via `nbmake`. | - -### 10.2 Directory Structure Convention - -The unit-test tree **mirrors** the source tree: - -``` -src/easydiffraction//.py - → tests/unit/easydiffraction//test_.py -``` - -Two additional patterns are recognised: - -1. **Supplementary coverage files** — `test__coverage.py`, - `test__more.py`, etc. sit beside the main test file and add - extra scenarios. -2. **Parent-level roll-up** — for category packages that contain only - `default.py` and `factory.py`, a single `test_.py` one - directory up covers the whole package (e.g. - `categories/test_experiment_type.py` covers - `categories/experiment_type/default.py` and - `categories/experiment_type/factory.py`). - -The CI tool `pixi run test-structure-check` validates that every source -module has a corresponding test file and reports any gaps. Explicit name -aliases (e.g. `variable.py` tested by `test_parameters.py`) are declared -in `KNOWN_ALIASES` inside the tool script. - -### 10.3 What to Test per Source Module Type - -| Source module type | Required tests | -| -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | -| **Core base class** (`core/`) | Instantiation, public properties, validation edge cases, identity wiring. | -| **Factory** (`factory.py`) | Registration check, `supported_tags()`, `default_tag()`, `create()` for each tag, `show_supported()` output, invalid-tag handling. | -| **Category** (`default.py`) | Instantiation, all public properties (read + write where applicable), CIF round-trip (`as_cif` → `from_cif`), parameter enumeration. | -| **Enum** (`enums.py`) | Membership of all members, `default()` method, `description()` for every member, `StrEnum` string equality. | -| **Datablock item** (`base.py`) | Construction, switchable-category full API (``, `_type` get/set, `show__types`), `show`/`show_as_cif`. | -| **Collection** (`collection.py`) | `create`, `add`, `remove`, `names`, `show_names`, `show_params`, iteration, duplicate-name handling. | -| **Calculator / Minimizer** | `can_handle()` with compatible and incompatible experiment types, `_compute()` stub or mock. | -| **Display / IO** | Input → output for representative cases; file-not-found and malformed-input error paths. | - -### 10.4 Test Conventions - -- **No test-ordering dependence.** Each test must be self-contained. Use - `monkeypatch` to set `Logger._reaction` when the test expects a raised - exception (another test may have leaked WARN mode via the global - `Logger` singleton). -- **Error paths are tested explicitly.** Use `pytest.raises()` (with - `monkeypatch` for Logger RAISE mode) for `log.error()` calls that - specify `exc_type`. -- **`@typechecked` setters raise `typeguard.TypeCheckError`**, not - `TypeError`. Tests must catch the correct exception. -- **Use `capsys` / `capfd`** for asserting console output from `show_*` - methods. -- **Prefer `tmp_path`** (pytest fixture) for file-system tests. -- **No sleeping, no network calls, no real calculation engines** in unit - tests. -- Test files carry the SPDX license header and a module-level docstring. - They are exempt from most lint rules (ANN, D, DOC, INP001, S101, etc.) - per `pyproject.toml`. - -### 10.5 Coverage Threshold - -The minimum line-coverage threshold is **70 %** (`fail_under = 70` in -`pyproject.toml`). The project aspires to test every code path; the -threshold is a safety net, not a target. - -Run `pixi run unit-tests-coverage` for a per-module report. - ---- - -## 11. Issues - -- **Open:** [`issues_open.md`](Issues/issues_open.md) — prioritised - backlog. -- **Closed:** [`issues_closed.md`](Issues/issues_closed.md) — resolved - items for reference. - -When a resolution affects the architecture described above, the relevant -sections of this document are updated accordingly. diff --git a/docs/dev/index.md b/docs/dev/index.md new file mode 100644 index 00000000..424140f2 --- /dev/null +++ b/docs/dev/index.md @@ -0,0 +1,29 @@ +# Development Documentation + +This directory contains development-only project documentation. Keep it +under `docs/dev` rather than a repository-root `dev/` directory so all +documentation lives under one tree, while published user documentation +remains isolated under `docs/docs`. + +## Structure + +| Path | Purpose | +| ---------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | +| [`adrs/index.md`](adrs/index.md) | Architecture and decision navigation, grouped by topic. | +| [`issues/open.md`](issues/open.md) | Prioritized open development issues and design questions. | +| [`issues/closed.md`](issues/closed.md) | Closed development issues retained for history. | +| [`package-structure/short.md`](package-structure/short.md) | Generated compact package tree. | +| [`package-structure/full.md`](package-structure/full.md) | Generated package tree with top-level classes. | +| [`plans/`](plans/) | Implementation plans for larger migrations. | +| [`roadmap/ROADMAP.md`](roadmap/ROADMAP.md) | Development roadmap. This may later be copied into `docs/docs` during the published-docs build. | + +## Rules + +- Put architecture and decision history in ADR files. +- Put proposed decisions in `adrs/suggestions/` until accepted. +- Move accepted suggestions to `adrs/accepted/` and update + `adrs/index.md`. +- Do not edit generated package-structure files by hand; run + `pixi run update-package-diagrams`. +- Keep development documentation out of `docs/docs` unless it is meant + to become user-facing documentation. diff --git a/docs/dev/Issues/issues_closed.md b/docs/dev/issues/closed.md similarity index 98% rename from docs/dev/Issues/issues_closed.md rename to docs/dev/issues/closed.md index 0e6db4dc..c2e60653 100644 --- a/docs/dev/Issues/issues_closed.md +++ b/docs/dev/issues/closed.md @@ -12,7 +12,8 @@ that do not inherit the guarded object hierarchy: `project.display`, `project.display.posterior`, `analysis.display`, and `project.summary`. Introduced `render_object_help()` so these helpers share the same property and method table style as `GuardedBase.help()`. Documented the -convention in `docs/dev/ADRs/adr_help-discoverability.md`. +convention in +[`help-discoverability.md`](../adrs/accepted/help-discoverability.md). --- diff --git a/docs/dev/Issues/issues_open.md b/docs/dev/issues/open.md similarity index 99% rename from docs/dev/Issues/issues_open.md rename to docs/dev/issues/open.md index 2e6142e8..6c96f87e 100644 --- a/docs/dev/Issues/issues_open.md +++ b/docs/dev/issues/open.md @@ -3,8 +3,8 @@ Prioritised list of issues, improvements, and design questions to address. Items are ordered by a combination of user impact, blocking potential, and implementation readiness. When an item is fully -implemented, remove it from this file and update `architecture.md` if -needed. +implemented, remove it from this file and update +[`adrs/index.md`](../adrs/index.md) or the relevant ADR if needed. **Legend:** 🔴 High · 🟡 Medium · 🟢 Low @@ -200,7 +200,7 @@ duplicate rules writing the same target, and how additional supported prefixes should be introduced when new environment categories appear. **Fix:** pin the allowed target grammar and duplicate-target behaviour -in architecture and validation rules. +in an ADR and validation rules. **Depends on:** nothing. @@ -1452,8 +1452,8 @@ Two manual workflow steps are required between releases/changes: 2. `pixi run notebook-prepare` — regenerate tutorial notebooks from scripts. -Document these in `CONTRIBUTING.md` or the architecture doc so they are -not forgotten. +Document these in `CONTRIBUTING.md` or a relevant ADR so they are not +forgotten. **Depends on:** nothing. @@ -1600,7 +1600,7 @@ Should print: **Type:** CI / Tooling CodeFactor flags TODO comments as unresolved issues (rule C100) in PRs. -Since TODOs are tracked in `issues_open.md`, the CodeFactor check adds +Since TODOs are tracked in `issues/open.md`, the CodeFactor check adds noise. Disable the C100 rule or configure CodeFactor to ignore TODO comments. diff --git a/docs/dev/package-structure-full.md b/docs/dev/package-structure/full.md similarity index 100% rename from docs/dev/package-structure-full.md rename to docs/dev/package-structure/full.md diff --git a/docs/dev/package-structure-short.md b/docs/dev/package-structure/short.md similarity index 100% rename from docs/dev/package-structure-short.md rename to docs/dev/package-structure/short.md diff --git a/docs/dev/plan_workspace-root-project-category.md b/docs/dev/plans/workspace-root-project-category.md similarity index 97% rename from docs/dev/plan_workspace-root-project-category.md rename to docs/dev/plans/workspace-root-project-category.md index 81c9b183..07f608a2 100644 --- a/docs/dev/plan_workspace-root-project-category.md +++ b/docs/dev/plans/workspace-root-project-category.md @@ -7,13 +7,13 @@ Branch: `feature/workspace-root-project-category` ADR suggestion: ```text -docs/dev/ADR-suggestions/adr_workspace-root-project-category.md +docs/dev/adrs/suggestions/workspace-root-project-category.md ``` Two-phase workflow from `.github/copilot-instructions.md`: -- Phase 1 - Implementation. Code, docs, and architecture updates only. - Do not create or run tests unless the user explicitly asks. +- Phase 1 - Implementation. Code and docs updates only. Do not create or + run tests unless the user explicitly asks. - Phase 2 - Verification. Add/update tests, then run the verification commands listed near the end of this plan. @@ -737,31 +737,31 @@ new root object and project-information category. ### Files Likely To Change -- `docs/dev/architecture.md` -- `docs/dev/Issues/issues_open.md` -- `docs/dev/ADRs/*.md` -- `docs/dev/ADR-suggestions/*.md` +- `docs/dev/adrs/index.md` +- `docs/dev/issues/open.md` +- `docs/dev/adrs/accepted/*.md` +- `docs/dev/adrs/suggestions/*.md` - `docs/docs/tutorials/*.py` - `README.md` - `CONTRIBUTING.md` only if it contains API examples Do not edit these by hand: -- `docs/dev/package-structure-full.md` -- `docs/dev/package-structure-short.md` +- `docs/dev/package-structure/full.md` +- `docs/dev/package-structure/short.md` - generated tutorial notebooks - generated `docs/site/` files ### Steps -1. Update architecture section 7: +1. Update the relevant accepted ADRs: ```text - Project - The Top-Level Facade - -> Workspace - The Top-Level Facade + Project Facade and Persistence Layout + -> Workspace Facade and Persistence Layout ``` -2. Update the architecture table to use: +2. Update the affected ADR examples to use: ```text workspace.project ProjectInfo @@ -1018,7 +1018,7 @@ rg -n "_verbosity|verbosity" src tests docs tools README.md CONTRIBUTING.md Generated docs should not be manually edited: ```shell -git diff -- docs/site docs/dev/package-structure-full.md docs/dev/package-structure-short.md +git diff -- docs/site docs/dev/package-structure/full.md docs/dev/package-structure/short.md ``` If package-structure docs changed because of `pixi run fix`, that is diff --git a/docs/dev/ROADMAP.md b/docs/dev/roadmap/ROADMAP.md similarity index 100% rename from docs/dev/ROADMAP.md rename to docs/dev/roadmap/ROADMAP.md diff --git a/tools/generate_package_docs.py b/tools/generate_package_docs.py index 1d860557..78915c2f 100644 --- a/tools/generate_package_docs.py +++ b/tools/generate_package_docs.py @@ -3,9 +3,9 @@ """Generate project package structure markdown files. -Outputs two docs under docs/dev/: - - package-structure-short.md (folders/files only) - - package-structure-full.md (folders/files and classes) +Outputs two docs under docs/dev/package-structure/: + - short.md (folders/files only) + - full.md (folders/files and classes) Run (from repo root): pixi run python tools/generate_package_docs.py @@ -21,7 +21,7 @@ REPO_ROOT = Path(__file__).resolve().parents[1] SRC_ROOT = REPO_ROOT / 'src' / 'easydiffraction' -DOCS_OUT_DIR = REPO_ROOT / 'docs' / 'dev' +DOCS_OUT_DIR = REPO_ROOT / 'docs' / 'dev' / 'package-structure' IGNORE_DIRS = { @@ -132,8 +132,8 @@ def _render(node: Node, prefix: str = '') -> None: def write_markdown(short_lines: List[str], full_lines: List[str]) -> None: DOCS_OUT_DIR.mkdir(parents=True, exist_ok=True) - short_md = DOCS_OUT_DIR / 'package-structure-short.md' - full_md = DOCS_OUT_DIR / 'package-structure-full.md' + short_md = DOCS_OUT_DIR / 'short.md' + full_md = DOCS_OUT_DIR / 'full.md' short_content = [ '# Package Structure (short)', diff --git a/tools/param_consistency.py b/tools/param_consistency.py index 1c459e05..a9d266ba 100644 --- a/tools/param_consistency.py +++ b/tools/param_consistency.py @@ -8,8 +8,8 @@ python param_consistency.py --fix python param_consistency.py src/mypackage/ --check -Template (see architecture.md §9.8 for the full spec) ------------------------------------------------------- +Template (see docs/dev/adrs/accepted/property-docstring-template.md) +----------------------------------------------------------------------- Given ``description='Length of the a axis of the unit cell.'``, ``units='Å'``, and type ``Parameter``: From f81c95e8ebe34f7de31e860e240eefd101cc045e Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Sun, 17 May 2026 16:35:40 +0200 Subject: [PATCH 03/10] Tighten workspace migration plan for autonomous implementation --- .../workspace-root-project-category.md | 7 +- .../plans/workspace-root-project-category.md | 183 ++++++++++++++---- 2 files changed, 155 insertions(+), 35 deletions(-) diff --git a/docs/dev/adrs/suggestions/workspace-root-project-category.md b/docs/dev/adrs/suggestions/workspace-root-project-category.md index c0d91b9b..a6c94cba 100644 --- a/docs/dev/adrs/suggestions/workspace-root-project-category.md +++ b/docs/dev/adrs/suggestions/workspace-root-project-category.md @@ -338,7 +338,8 @@ The high-level migration is: 9. Rename saved singleton config file from `project.cif` to `workspace.cif`. 10. Persist workspace verbosity in `workspace.cif` as - `_verbosity.level`. + `_verbosity.level`, owned by a first-class `Verbosity` category + under `WorkspaceConfig` (parallel to `Rendering`). 11. Update code, tests, scripts, tutorials, docs, and ADR references. ## Post-Implementation ADR Update @@ -371,5 +372,9 @@ This ADR is satisfied when: - saved singleton configuration lives in `workspace.cif`. - `workspace.cif` uses `_project.*`, `_rendering.*`, and `_verbosity.level` tags. +- workspace verbosity is owned by a registered `Verbosity` category + alongside `Rendering`. +- `ProjectInfo.path` is removed; the saved directory path is exposed + only as `workspace.path`. - no `_meta.*` tags are introduced for project information. - tutorials and accepted ADRs use `Workspace`. diff --git a/docs/dev/plans/workspace-root-project-category.md b/docs/dev/plans/workspace-root-project-category.md index 07f608a2..346a72aa 100644 --- a/docs/dev/plans/workspace-root-project-category.md +++ b/docs/dev/plans/workspace-root-project-category.md @@ -165,6 +165,18 @@ implementation: `workspace.project.path`. - The old `Project` public API is removed unless the user explicitly approves an alias before implementation. +- The category class name `ProjectInfo` is kept; only the public + attribute name (`info` → `project`) changes. +- `ProjectInfo.path` is removed; the saved directory path lives on + `Workspace.path` only. +- A first-class `Verbosity` category under `WorkspaceConfig` is + required (not optional) so that `_verbosity.level` round-trips + through `workspace.cif` like other singleton categories. +- Source-tree imports may be temporarily inconsistent between phases + (for example, Phase 1 leaves `project_config_to_cif` imports until + Phase 4 renames the serializer functions). This is acceptable + because tests are not run in Phase 1. Each phase must still leave + the source importable at the end of the phase. ## Current Shape @@ -277,7 +289,7 @@ Rename the top-level runtime facade and package from `project` to ### Steps -1. Move the source package: +1. Move the source package with `git mv` so history is preserved: ```text src/easydiffraction/project/ @@ -302,25 +314,57 @@ Rename the top-level runtime facade and package from `project` to ProjectDisplay -> WorkspaceDisplay ``` -4. Update top-level import: +4. Rename class-level state inside the facade: + + ```text + Project._current_project -> Workspace._current_workspace + Project._loading -> Workspace._loading # name kept + Project.current_project_path() -> Workspace.current_workspace_path() + ``` + + Update the `ClassVar` annotation accordingly and update all + internal references (`type(self)._current_project = self`, + `cls._current_project`). + +5. Update the `varname()` fallback inside `__init__` so the default + variable name becomes `'workspace'` instead of `'project'`: + + ```python + self._varname = 'workspace' if type(self)._loading else varname() + ``` + +6. Update top-level import: ```python from easydiffraction.workspace.workspace import Workspace ``` -5. Remove the old top-level `Project` import unless the user approved an +7. Remove the old top-level `Project` import unless the user approved an alias. -6. Update type-checking imports: +8. Update type-checking imports: ```python from easydiffraction.workspace.workspace import Workspace ``` -7. Update docstrings from "Project facade" to "Workspace facade" where - they describe the root object. +9. Update docstrings from "Project facade" to "Workspace facade" where + they describe the root object. Keep wording that talks about the + scientific project (titles, descriptions, identity) unchanged. + +10. Rename `io/ascii.py::extract_project_from_zip` to + `extract_workspace_from_zip` and update its re-exports in + `src/easydiffraction/io/__init__.py` and + `src/easydiffraction/__init__.py`. The function extracts a saved + workspace directory, not scientific project information. -8. Run a source-only grep. Do not run tests in Phase 1: +11. Note: `src/easydiffraction/io/cif/serialize.py` still defines + `project_config_to_cif`, `project_config_from_cif`, and + `project_to_cif` at this point. Leave those imports as-is in + `workspace.py`; they are renamed in Phase 4. The function + `project_info_to_cif` keeps its name. + +12. Run a source-only grep. Do not run tests in Phase 1: ```shell rg -n "easydiffraction\\.project|\\bProject\\b|ProjectDisplay|ProjectConfig" src @@ -454,12 +498,17 @@ the workspace location and is not serialized project information. ### Steps -1. In `ProjectInfo`, rename the public identity property: +1. In `ProjectInfo`, rename the public identity property and its + setter: ```text - name -> id + name (getter) -> id (getter) + name (setter) -> id (setter) ``` + Do not also rename the internal descriptor attribute + `self._project_id`; it already matches the new public name. + 2. Keep the underlying CIF tag unchanged: ```python @@ -468,10 +517,12 @@ the workspace location and is not serialized project information. 3. Update `ProjectInfo.unique_name` to return `self.id`. -4. Update `project_info_to_cif()` and CIF loading helpers to use +4. Keep `ProjectInfo._identity.category_code = 'project'` as-is. + +5. Update `project_info_to_cif()` and CIF loading helpers to use `info.id`. -5. Rename constructor arguments: +6. Rename constructor arguments: ```text name -> project_id @@ -482,10 +533,15 @@ the workspace location and is not serialized project information. - `WorkspaceConfig.__init__` - `ProjectInfo.__init__` - `ProjectInfoFactory.create(...)` call sites + - Default value: `'untitled_project'` (unchanged value, just the + parameter name changes) -6. Add `Workspace.path` as the runtime storage path. +7. Add `Workspace.path` as the runtime storage path. Initialize + `self._path: pathlib.Path | None = None` in `Workspace.__init__`. - Suggested shape: + Suggested shape (match the surrounding `GuardedBase` pattern; do + not add `@typechecked` here because the setter accepts both `str` + and `pathlib.Path`): ```python @property @@ -495,13 +551,14 @@ the workspace location and is not serialized project information. @path.setter def path(self, value: object) -> None: - self._path = pathlib.Path(value) + self._path = pathlib.Path(value) if value is not None else None ``` -7. Remove `ProjectInfo.path` unless explicitly approved as a - compatibility alias. +8. Remove `ProjectInfo.path` (property, setter, and the `self._path` + attribute inside `ProjectInfo.__init__`) unless explicitly approved + as a compatibility alias. -8. Update save/load logic: +9. Update save/load logic across the codebase: ```text workspace.path @@ -510,17 +567,22 @@ the workspace location and is not serialized project information. should replace: ```text - workspace.project.path + project.info.path # old + workspace.project.path # never used; do not introduce ``` -9. Update messages and string representations: + Concrete call sites include `Workspace.save`, `Workspace.load`, + `Workspace.current_workspace_path`, and any consumers in + `analysis/`, `display/`, `summary/`, and `io/`. + +10. Update messages and string representations: ```text Workspace '' (...) Saving workspace '' to ... ``` -10. Run grep: +11. Run grep: ```shell rg -n "\\.name\\b|\\.path\\b|project_id|Project identifier" src/easydiffraction/workspace src/easydiffraction/io src/easydiffraction/display src/easydiffraction/summary @@ -562,14 +624,20 @@ Rename the saved singleton configuration file from `project.cif` to ### Steps -1. Rename serializer functions if they still use project-root naming: +1. Rename serializer functions in + `src/easydiffraction/io/cif/serialize.py` and every call site: ```text - project_config_to_cif -> workspace_config_to_cif + project_config_to_cif -> workspace_config_to_cif project_config_from_cif -> workspace_config_from_cif - project_to_cif -> workspace_to_cif + project_to_cif -> workspace_to_cif ``` + Call sites include `workspace.py` (formerly `project.py`), + `workspace_config.py`, and the serializer itself (the `project_to_cif` + body calls `project_config_to_cif`). After this step the imports + that were intentionally left stale in Phase 1 must compile cleanly. + Do not rename `project_info_to_cif`; it serializes the `_project` category and that name remains correct. @@ -588,24 +656,37 @@ Rename the saved singleton configuration file from `project.cif` to 4. Do not add `project.cif`, `config.cif`, or `meta.cif` fallbacks unless the user approved a compatibility loader. -5. Move the public workspace verbosity preference into the workspace - singleton configuration. Keep the simple public access path: +5. Add a `Verbosity` category under + `src/easydiffraction/workspace/categories/verbosity/` following the + same shape as `rendering/` (a `default.py` with a `Verbosity` + `CategoryItem`, a `factory.py` with `VerbosityFactory`, and an + `__init__.py` that imports both to trigger registration). + + The category owns one descriptor: + + ```python + CifHandler(names=['_verbosity.level']) + ``` + + Bind it in `WorkspaceConfig.__init__` next to `_rendering`, and + expose it from `Workspace` so that the public access path remains: ```python workspace.verbosity = 'short' + workspace.verbosity # -> 'short' ``` + The public `verbosity` getter/setter on `Workspace` reads and + writes the category's `level` descriptor (validated against + `VerbosityEnum`) and replaces the current `self._verbosity: + VerbosityEnum` runtime-only attribute. Remove that attribute. + Serialize it as: ```cif _verbosity.level short ``` - A small `Verbosity` category under `WorkspaceConfig` is preferred if - it follows the local category-owner pattern cleanly. If that is too - much for this migration, use a focused serializer/deserializer helper - and document the reason. - 6. Keep the contents semantic: ```cif @@ -624,8 +705,16 @@ Rename the saved singleton configuration file from `project.cif` to rg -n "project\\.cif|config\\.cif|meta\\.cif|project_config_to_cif|project_config_from_cif|project_to_cif|verbosity" src docs tests ``` - In Phase 1, update source and docs only. Test files are handled in - Phase 2 unless the user explicitly asks otherwise. + Update source and docs only. Test files are handled in Phase 2 + unless the user explicitly asks otherwise. + +9. Saved on-disk fixtures under `data/` and `projects/` still contain + `project.cif` files (for example `data/lbco_project/project.cif`, + `projects/cosio/project.cif`). Do **not** edit or regenerate them + in Phase 1. They are inputs to integration/script tests and will + either be regenerated in Phase 2 or the relevant tests will be + updated to write fresh workspace directories. Flag any that block + Phase 2 in the review gate. ### Stop Conditions @@ -673,14 +762,21 @@ category or the scientific project itself. analysis.project -> analysis.workspace ``` -2. Rename display internals: + This includes the constructor argument, the stored attribute, and + any public read-only property exposing the parent workspace. + +2. Rename display internals in `WorkspaceDisplay` (formerly + `ProjectDisplay`) and in any other class that stores a back- + reference to the root facade: ```text self._project -> self._workspace _set_project(...) -> _set_workspace(...) ``` - Only do this when the object is the top-level runtime facade. + Only do this when the stored object is the top-level runtime + facade. Do not touch `self._project_id` inside `ProjectInfo` or + `_project.*` CIF tags. 3. Rename local variables in runtime code: @@ -946,6 +1042,11 @@ Add focused tests for: 13. `project.cif`, `config.cif`, and `meta.cif` are not written unless compatibility was approved. 14. `ed.Project` is absent unless compatibility was approved. +15. `Verbosity` category is registered via its factory and reachable + through `WorkspaceConfig` (parallel to `Rendering`). +16. `ed.extract_workspace_from_zip` is importable and + `ed.extract_project_from_zip` is not (unless compatibility + approved). If compatibility alias was approved, add tests for: @@ -1088,6 +1189,20 @@ Only root-facade uses should become `workspace` or `Workspace`. The saved directory path belongs to the workspace runtime state. It should be `workspace.path`, not `workspace.project.path`. +### Mistake: Forgetting Facade Class-Level State + +When renaming `Project` to `Workspace`, the `ClassVar` `_current_project`, +the `current_project_path()` classmethod, and the `varname()` fallback +string `'project'` all live on the class itself and are easy to miss +with a single search-and-replace. Rename them to `_current_workspace`, +`current_workspace_path()`, and `'workspace'` respectively. + +### Mistake: Renaming `ProjectInfo._project_id` + +The internal descriptor attribute `self._project_id` inside +`ProjectInfo` already matches the new public name `id` and stays as +is. Only the public `name` property/setter becomes `id`. + ### Mistake: Editing Generated Notebooks Directly Tutorial notebooks are generated artifacts. Edit tutorial `.py` files, From e0eb9cb3b6a7c5dd0bd42a29098eaf49d71dd1fc Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 11:57:34 +0200 Subject: [PATCH 04/10] Consolidate ADR index into single table --- docs/dev/adrs/index.md | 67 ++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 555b3b54..ed0940bf 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -11,39 +11,36 @@ to keep the list easy to scan. If a group becomes too broad, split it; if a group stays small, keep it as a group rather than adding deeper folders. -## Accepted ADRs +## ADR Index -| Group | Title | Short description | Link | -| -------------------- | ----------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- | -| Analysis and fitting | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | -| Analysis and fitting | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | -| Core model | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | -| Core model | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | -| Core model | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | -| Core model | Two-Level Category Parameter Access | Keeps parameter access to `datablock.category.parameter` or `datablock.collection[id].parameter`. | [`category-parameter-access.md`](accepted/category-parameter-access.md) | -| Documentation | Descriptor Property Docstring Template | Makes descriptor metadata the source of truth for public property docstrings and annotations. | [`property-docstring-template.md`](accepted/property-docstring-template.md) | -| Documentation | Development Documentation Structure | Defines the `docs/dev` layout for ADRs, issues, plans, package structure, and roadmap. | [`development-docs-structure.md`](accepted/development-docs-structure.md) | -| Documentation | Help Method Discoverability | Requires primary public objects and facades to expose consistent `help()` output. | [`help-discoverability.md`](accepted/help-discoverability.md) | -| Documentation | Notebook Generation Source of Truth | Treats tutorial `.py` files as editable sources and notebooks as generated artifacts. | [`notebook-generation.md`](accepted/notebook-generation.md) | -| Experiment model | Immutable Experiment Type | Makes experiment type axes creation-time state rather than mutable runtime state. | [`immutable-experiment-type.md`](accepted/immutable-experiment-type.md) | -| Factories | Factory Contracts and Metadata | Standardizes factory construction, metadata, compatibility, and registration behavior. | [`factory-contracts.md`](accepted/factory-contracts.md) | -| Naming | Factory Tag Naming | Defines canonical factory tag style and standard abbreviations. | [`factory-tag-naming.md`](accepted/factory-tag-naming.md) | -| Persistence | Free-Flag CIF Encoding | Encodes fit free/fixed state through CIF uncertainty syntax instead of a separate free list. | [`free-flag-cif-encoding.md`](accepted/free-flag-cif-encoding.md) | -| Persistence | Project Facade and Persistence Layout | Documents the current `Project` facade and saved directory layout. | [`project-facade-and-persistence.md`](accepted/project-facade-and-persistence.md) | -| Quality | Lint Complexity Thresholds | Treats ruff PLR complexity limits as design guardrails that should not be bypassed. | [`lint-complexity-thresholds.md`](accepted/lint-complexity-thresholds.md) | -| Quality | Test Strategy | Defines layered unit, functional, integration, script, and notebook testing. | [`test-strategy.md`](accepted/test-strategy.md) | -| Structure model | Type-Neutral ADP Parameters | Keeps ADP parameter object identities stable across B/U and iso/ani switches. | [`type-neutral-adp-parameters.md`](accepted/type-neutral-adp-parameters.md) | -| User-facing API | Display UX Facade | Defines `project.display` and `project.rendering` responsibilities and display method names. | [`display-ux.md`](accepted/display-ux.md) | -| User-facing API | Selector Families | Distinguishes backend selectors, switchable-category selectors, and active-sibling selectors. | [`selector-families.md`](accepted/selector-families.md) | -| User-facing API | String Paths and Live Descriptors | Separates persisted field selectors from references to live model parameters. | [`string-paths-and-live-descriptors.md`](accepted/string-paths-and-live-descriptors.md) | -| User-facing API | Switchable Category API | Places multi-type category selectors on the owner and omits public selectors for fixed or single-type categories. | [`switchable-category-api.md`](accepted/switchable-category-api.md) | - -## ADR Suggestions - -| Group | Title | Short description | Link | -| -------------------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------ | -| Analysis and fitting | Analysis CIF Fit State | Proposes a persisted scalar projection of fit state in `analysis.cif`. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | -| Analysis and fitting | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | -| Analysis and fitting | Parameter-Level Posterior Projection and Bayesian Persistence | Proposes saved Bayesian summaries and canonical posterior storage. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | -| Analysis and fitting | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | -| Workspace model | Workspace Root and Project Information Category | Proposes renaming the top-level facade from `Project` to `Workspace` and reserving `project` for project metadata. | [`workspace-root-project-category.md`](suggestions/workspace-root-project-category.md) | +| Group | Status | Title | Short description | Link | +| -------------------- | ---------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------- | +| Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | +| Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | +| Analysis and fitting | Suggestion | Analysis CIF Fit State | Proposes a persisted scalar projection of fit state in `analysis.cif`. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | +| Analysis and fitting | Suggestion | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | +| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection and Bayesian Persistence | Proposes saved Bayesian summaries and canonical posterior storage. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | +| Analysis and fitting | Suggestion | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | +| Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | +| Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | +| Core model | Accepted | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | +| Core model | Accepted | Two-Level Category Parameter Access | Keeps parameter access to `datablock.category.parameter` or `datablock.collection[id].parameter`. | [`category-parameter-access.md`](accepted/category-parameter-access.md) | +| Documentation | Accepted | Descriptor Property Docstring Template | Makes descriptor metadata the source of truth for public property docstrings and annotations. | [`property-docstring-template.md`](accepted/property-docstring-template.md) | +| Documentation | Accepted | Development Documentation Structure | Defines the `docs/dev` layout for ADRs, issues, plans, package structure, and roadmap. | [`development-docs-structure.md`](accepted/development-docs-structure.md) | +| Documentation | Accepted | Help Method Discoverability | Requires primary public objects and facades to expose consistent `help()` output. | [`help-discoverability.md`](accepted/help-discoverability.md) | +| Documentation | Accepted | Notebook Generation Source of Truth | Treats tutorial `.py` files as editable sources and notebooks as generated artifacts. | [`notebook-generation.md`](accepted/notebook-generation.md) | +| Experiment model | Accepted | Immutable Experiment Type | Makes experiment type axes creation-time state rather than mutable runtime state. | [`immutable-experiment-type.md`](accepted/immutable-experiment-type.md) | +| Factories | Accepted | Factory Contracts and Metadata | Standardizes factory construction, metadata, compatibility, and registration behavior. | [`factory-contracts.md`](accepted/factory-contracts.md) | +| Naming | Accepted | Factory Tag Naming | Defines canonical factory tag style and standard abbreviations. | [`factory-tag-naming.md`](accepted/factory-tag-naming.md) | +| Persistence | Accepted | Free-Flag CIF Encoding | Encodes fit free/fixed state through CIF uncertainty syntax instead of a separate free list. | [`free-flag-cif-encoding.md`](accepted/free-flag-cif-encoding.md) | +| Persistence | Accepted | Project Facade and Persistence Layout | Documents the current `Project` facade and saved directory layout. | [`project-facade-and-persistence.md`](accepted/project-facade-and-persistence.md) | +| Persistence | Suggestion | Loop Category Keys and Identity Naming | Documents current loop collection keys and proposes naming rules aligned with CIF category keys. | [`loop-category-key-identity.md`](suggestions/loop-category-key-identity.md) | +| Persistence | Suggestion | Python and CIF Category Correspondence | Compares current Python paths and CIF tags, then proposes scoped one-to-one mapping for project-level categories. | [`python-cif-category-correspondence.md`](suggestions/python-cif-category-correspondence.md) | +| Quality | Accepted | Lint Complexity Thresholds | Treats ruff PLR complexity limits as design guardrails that should not be bypassed. | [`lint-complexity-thresholds.md`](accepted/lint-complexity-thresholds.md) | +| Quality | Accepted | Test Strategy | Defines layered unit, functional, integration, script, and notebook testing. | [`test-strategy.md`](accepted/test-strategy.md) | +| Structure model | Accepted | Type-Neutral ADP Parameters | Keeps ADP parameter object identities stable across B/U and iso/ani switches. | [`type-neutral-adp-parameters.md`](accepted/type-neutral-adp-parameters.md) | +| User-facing API | Accepted | Display UX Facade | Defines `project.display` and `project.rendering` responsibilities and display method names. | [`display-ux.md`](accepted/display-ux.md) | +| User-facing API | Accepted | Selector Families | Distinguishes backend selectors, switchable-category selectors, and active-sibling selectors. | [`selector-families.md`](accepted/selector-families.md) | +| User-facing API | Accepted | String Paths and Live Descriptors | Separates persisted field selectors from references to live model parameters. | [`string-paths-and-live-descriptors.md`](accepted/string-paths-and-live-descriptors.md) | +| User-facing API | Accepted | Switchable Category API | Places multi-type category selectors on the owner and omits public selectors for fixed or single-type categories. | [`switchable-category-api.md`](accepted/switchable-category-api.md) | +| Workspace model | Suggestion | Workspace Root and Project Information Category | Proposes renaming the top-level facade from `Project` to `Workspace` and reserving `project` for project metadata. | [`workspace-root-project-category.md`](suggestions/workspace-root-project-category.md) | From c4e810d5ca0cdb8209d0d329398e7e5856796a3e Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 12:10:16 +0200 Subject: [PATCH 05/10] Add loop category key identity ADR and plan --- .github/copilot-instructions.md | 27 +- .../suggestions/loop-category-key-identity.md | 155 +++++ .../python-cif-category-correspondence.md | 314 +++++++++ docs/dev/plans/loop-category-key-identity.md | 620 ++++++++++++++++++ .../plans/workspace-root-project-category.md | 106 +-- 5 files changed, 1165 insertions(+), 57 deletions(-) create mode 100644 docs/dev/adrs/suggestions/loop-category-key-identity.md create mode 100644 docs/dev/adrs/suggestions/python-cif-category-correspondence.md create mode 100644 docs/dev/plans/loop-category-key-identity.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index af6f3606..d5f96553 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -34,7 +34,10 @@ - One class per file when substantial; group small related classes. - No `**kwargs` — use explicit keyword arguments. - No string-based dispatch (e.g. `getattr(self, f'_{name}')`); write - named methods (`_set_sample_form`, `_set_beam_mode`). + named methods (`_set_sample_form`, `_set_beam_mode`). Narrow framework + metadata lookups are allowed when the attribute name is a class-level + declaration, is not user input, and is validated in one central place; + for example, `CategoryItem._category_entry_name`. - Public attrs are either editable (getter+setter property) or read-only (getter only). For internal mutation of read-only props, use a private `_set_` method, not a public setter. @@ -115,7 +118,10 @@ resolves them. - Never remove or replace existing functionality without explicit confirmation — highlight every removal and wait for approval. -- When renaming, grep the entire project (code, tests, tutorials, docs). +- When renaming or auditing usages, search the entire project (code, + tests, tutorials, docs). Use `git grep -n` because all contributors + have Git; do not assume `rg` is installed. If `git grep` is + unavailable, fall back to `find ... -type f` plus `grep -n`. - Each change is atomic and single-commit-sized: make one change, suggest the commit message, then stop and wait for confirmation. - When in doubt, ask. @@ -163,14 +169,22 @@ Notes: When asked to create a plan: +- Start the plan by referencing this file: + `.github/copilot-instructions.md`. State any deliberate exception to + these instructions in the plan itself. - First gather enough repository context to make the plan concrete. Ask all ambiguous or unclear questions in one concise batch; record unresolved questions in the plan if the user wants it saved before answering them. - Save plans as `docs/dev/plans/.md` (lowercase, - dash-separated, e.g. `docs/dev/plans/background-refactor.md`). Use the - same `` for the implementation branch - (`feature/`). Do not push the branch unless asked. + dash-separated, e.g. `docs/dev/plans/background-refactor.md`). When a + plan implements one ADR, use the same slug as the ADR file; for + example, `docs/dev/adrs/suggestions/foo.md` maps to + `docs/dev/plans/foo.md`. If a plan has no corresponding ADR or spans + multiple ADRs, choose a concise feature slug and list all related ADRs + in the plan. Use the same `` for the implementation + branch (`feature/`). Do not push the branch unless + asked. - Include a status checklist with `[ ]` items; mark `[x]` as completed during implementation. - Apply the two-phase workflow (Phase 1 implementation, Phase 2 @@ -190,6 +204,9 @@ When asked to create a plan: files likely to change, decisions already made, open questions, verification commands for Phase 2, and a short suggested commit message or branch name when useful. +- Before saving a plan, verify that referenced files, directories, + scripts, and task names exist locally when that is practical. If a + referenced tool is optional or missing, include an available fallback. - End every plan with a "Suggested Pull Request" section containing a short PR title and a brief end-user-oriented description. Keep this section non-technical enough for scientists and other users to diff --git a/docs/dev/adrs/suggestions/loop-category-key-identity.md b/docs/dev/adrs/suggestions/loop-category-key-identity.md new file mode 100644 index 00000000..f9324416 --- /dev/null +++ b/docs/dev/adrs/suggestions/loop-category-key-identity.md @@ -0,0 +1,155 @@ +# ADR: Loop Category Keys and Identity Naming + +**Status:** Proposed +**Date:** 2026-05-18 + +## Context + +CIF dictionaries can declare the key column for a loop category through +`_category_key.name`. For example, crystallographic categories commonly +use domain-specific identity tags such as `_atom_site.label`, while +other loop categories may use an explicit `id` tag. + +EasyDiffraction currently models the same runtime concept with +`item._identity.category_entry_name`. `CategoryCollection` uses this +value as the collection key, and category items use it in their +`unique_name` path: + +```text +.. +``` + +The design question is whether the current `category_entry_name` +approach is enough, and how closely Python-facing identity names should +follow CIF key tags. + +## Assessment + +The current approach is directionally good. It gives every loop item a +stable collection key without hard-coding the key field into +`CategoryCollection`, and most current loop categories derive that key +from the same field that is serialized into CIF. + +It is not explicit enough yet. The key field is encoded as a lambda on +each item, not as declarative metadata on the category or descriptor. +Nothing validates that the runtime key corresponds to a serialized CIF +field. The main visible example is `Constraint`: the current collection +key is derived from the left-hand side of `_constraint.expression`, but +no separate `_constraint.id` field is persisted. + +## Decision + +Keep `category_entry_name` as the runtime analogue of CIF +`_category_key.name`, and formalize the rule: + +1. Every concrete `CategoryCollection` should have a documented key + field. +2. The key should normally be a public descriptor on the item. +3. The key descriptor should normally be serialized to CIF. +4. Standard CIF categories should keep CIF names in Python where those + names are already meaningful to scientists. +5. Custom categories should prefer `id` only for opaque machine + identity. Use `label` or `*_id` when the value has clearer domain + meaning. + +The `constraint` category should add an explicit `id` field and use it +as the collection key: + +```text +analysis.constraints[id].id -> _constraint.id +``` + +The existing `lhs_alias` and `rhs_expr` properties should remain derived +helpers from `_constraint.expression`, not the row identity. + +This argues against a blanket Python API change to use `id` everywhere. +For scientists moving between notebooks and saved CIF, `atom_site.label` +in Python and `_atom_site.label` in CIF is less surprising than +`atom_site.id` in Python and `_atom_site.label` in CIF. + +## Loop Category Keys + +Rows are sorted by the chosen Python key style: `id`, then `*_id`, then +`label`. This table lists fields that drive `category_entry_name`; +identifier-like fields that are not collection keys are listed in the +next section. + +| Python key | Area | Collection class | Category code | CIF key tag | Source | Decision | +| --------------- | ---------- | ------------------------------------------- | ------------------------ | ---------------------------- | --------------------------- | ---------------------------------------------------------------------------------------------------- | +| `id` | Analysis | `Constraints` | `constraint` | `_constraint.id` | Custom category | Add this key. Current implementation derives the key from the left side of `_constraint.expression`. | +| `id` | Analysis | `SequentialFitExtractCollection` | `sequential_fit_extract` | `_sequential_fit_extract.id` | Custom category | Keep. It is an explicit row identifier for extraction rules. | +| `id` | Experiment | `LineSegmentBackground` | `background` | `_pd_background.id` | Powder CIF-style category | Keep. The row identity is opaque and already serialized. | +| `id` | Experiment | `ChebyshevPolynomialBackground` | `background` | `_pd_background.id` | Powder CIF-style category | Keep. The row identity is opaque and shared with other background variants. | +| `id` | Experiment | `LinkedPhases` | `linked_phases` | `_pd_phase_block.id` | Powder CIF-style category | Keep. Consider `phase_id` only if the public API later standardizes foreign-key names. | +| `id` | Experiment | `ExcludedRegions` | `excluded_regions` | `_excluded_region.id` | Custom category | Keep. It is a simple custom loop row identifier. | +| `id` | Experiment | `ReflnData` | `refln` | `_refln.id` | CIF-style category | Keep for the current reflection table shape. | +| `id` | Experiment | `PowderCwlReflnData` / `PowderTofReflnData` | `refln` | `_refln.id` | CIF-style category | Keep; `phase_id` remains a separate field, not the row key. | +| `experiment_id` | Analysis | `JointFitCollection` | `joint_fit` | `_joint_fit.experiment_id` | Custom category | Keep. The key is a reference to an experiment, so `id` alone would lose context. | +| `point_id` | Experiment | `PdCwlData` / `PdTofData` | `pd_data` | `_pd_data.point_id` | Powder CIF-style category | Keep. It is clearer than `id` for dense measured/calculated data points. | +| `point_id` | Experiment | `TotalData` | `total_data` | `_pd_data.point_id` | Current powder-data mapping | Keep. Revisit the CIF tag only when total-scattering-specific CIF tags are introduced. | +| `label` | Analysis | `Aliases` | `alias` | `_alias.label` | Custom category | Keep. It is the user-visible symbol referenced by expressions, not an opaque row id. | +| `label` | Structure | `AtomSites` | `atom_site` | `_atom_site.label` | CIF core category | Keep. This is a well-known crystallographic identity field. | +| `label` | Structure | `AtomSiteAnisoCollection` | `atom_site_aniso` | `_atom_site_aniso.label` | CIF core category | Keep. It intentionally matches and references the atom-site label. | + +## Non-Key Identity And Reference Fields + +These fields are serialized in loop rows and look identity-like, but +they do not define the collection key. + +| Python field | Area | Collection class | Category code | CIF tag | Role | +| ------------------- | ---------- | ------------------------------------------- | ------------- | -------------------------- | ------------------------------------------------------------------------------------- | +| `phase_id` | Experiment | `PowderCwlReflnData` / `PowderTofReflnData` | `refln` | `_refln.phase_id` | References the linked phase for a calculated reflection. Row key remains `_refln.id`. | +| `param_unique_name` | Analysis | `Aliases` | `alias` | `_alias.param_unique_name` | References the target parameter. Row key remains `_alias.label`. | + +## Naming Guidance + +Use `label` when the identity is a user-visible, domain-specific symbol. +This applies to atom sites and aliases. A label is not weaker than an +`id` if the category defines it as the key. + +Use `id` when the identity is an opaque row identifier without a richer +domain word. This applies to excluded regions, background terms, +sequential-fit extraction rules, and current reflection rows. + +Use `*_id` when the identity is primarily a reference to another object. +This applies to `experiment_id` and `point_id`. It keeps the public API +clearer than a generic `id` while still expressing uniqueness within the +collection. + +Avoid renaming standard CIF identity fields in Python unless the CIF +name is actively hostile to the user-facing model. Each intentional +Python/CIF mismatch adds translation cost for users who move between +Jupyter, CLI output, and saved CIF files. + +## Implementation Notes + +The current `category_entry_name` mechanism can stay, but it should be +made easier to audit. A future implementation should add metadata that +identifies the key descriptor for each `CategoryCollection`, or at least +tests that the resolved key comes from a serialized descriptor. + +For constraints, add a descriptor-backed `id` property serialized as +`_constraint.id`, and change `category_entry_name` to resolve from that +descriptor. Keep `_constraint.expression` for the full equation. Keep +`lhs_alias` and `rhs_expr` as derived convenience properties. + +When reading older CIF files that only contain `_constraint.expression`, +derive a deterministic fallback `id` from the old `lhs_alias` key, then +write `_constraint.id` on the next save. + +## Consequences + +Keeping CIF names where possible improves notebook-to-CIF continuity and +makes saved files easier to inspect. It also reduces the amount of +documentation needed to explain common crystallographic terms such as +`atom_site.label`. + +Allowing custom categories to use `id`, `label`, or `*_id` means the API +will not be mechanically uniform, but it will be semantically clearer. +Uniformity should come from documented key metadata and predictable +collection behavior, not from forcing every row key to be named `id`. + +## References + +- [COMCIFS `cif_core.dic`](https://github.com/COMCIFS/cif_core/blob/main/cif_core.dic) +- [IUCr Core CIF dictionary browser](https://www.iucr.org/resources/cif/dictionaries/browse/cif_core) diff --git a/docs/dev/adrs/suggestions/python-cif-category-correspondence.md b/docs/dev/adrs/suggestions/python-cif-category-correspondence.md new file mode 100644 index 00000000..6f970a0e --- /dev/null +++ b/docs/dev/adrs/suggestions/python-cif-category-correspondence.md @@ -0,0 +1,314 @@ +# ADR: Python and CIF Category Correspondence + +**Status:** Proposed +**Date:** 2026-05-17 + +## Context + +EasyDiffraction exposes a Python object graph and persists state in CIF +files. The public Python API should be easy for scientists to predict, +while CIF output should remain readable and semantically useful. + +The current public root is `Project`, and project-level configuration is +saved in: + +```text +project.cif +``` + +Inside that file, generic category names such as `_info.*`, +`_rendering.*`, and `_verbosity.*` are less ambiguous than they would be +in a single monolithic CIF file. This opens the option of a strict +one-to-one correspondence for project-owned singleton categories: + +```text +project.info.title -> project.cif: _info.title +project.rendering.engine -> project.cif: _rendering.engine +project.verbosity.level -> project.cif: _verbosity.level +``` + +The design question is whether this rule should be applied only to +project-level configuration, or more broadly across analysis, +experiments, structures, and calculated data. + +## Scope Of Comparison + +The comparison below is category-level and public-API oriented. It lists +all currently persisted Python category surfaces found in the source +code, with complete field sets where the category is small and compact +field groups where a CIF loop has many repeated parameters. + +Shorthand names such as `analysis`, `experiment`, and `structure` refer +to objects reached from the current `Project` root, for example +`project.analysis`, `project.experiments[name]`, and +`project.structures[name]`. + +## Current Persistence Layout + +| Current Python surface | Current saved location | Current CIF block form | Notes | +| ----------------------------------- | ------------------------ | ---------------------- | ----------------------------------------------------------------------------------- | +| `project.info`, `project.rendering` | `project.cif` | bare categories | Project-level singleton config. | +| `project.verbosity` | not persisted | none | Runtime-only string property backed by `VerbosityEnum`; no `_verbosity` category. | +| `project.structures[name]` | `structures/.cif` | `data_` | Each structure is one CIF data block. | +| `project.experiments[name]` | `experiments/.cif` | `data_` | Each experiment is one CIF data block. | +| `project.analysis` | `analysis/analysis.cif` | bare categories | Loader also accepts legacy root-level `analysis.cif`. | +| `project.summary` | `summary.cif` | placeholder text | Summary persistence exists as a file but `summary_to_cif()` is not implemented yet. | + +## Current Correspondence + +### Project-Level Configuration + +| Current Python path | Current CIF path | Match? | Notes | +| -------------------------------- | ------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------- | +| `project.info.name` | `_project.id` | No | Python uses user-facing `name`; CIF uses `id`; category is `info` in Python but `_project` in CIF. | +| `project.info.title` | `_project.title` | Partly | Field name matches, category name does not. | +| `project.info.description` | `_project.description` | Partly | Field name matches, category name does not. | +| `project.info.created` | `_project.created` | Partly | Field name matches, category name does not. | +| `project.info.last_modified` | `_project.last_modified` | Partly | Field name matches, category name does not. | +| `project.info.path` | none | No | Runtime storage path, not a CIF field. | +| `project.rendering.chart_engine` | `_rendering.chart_engine` | Yes | Direct category and field mapping. | +| `project.rendering.table_engine` | `_rendering.table_engine` | Yes | Direct category and field mapping. | +| `project.verbosity` | none | No | Runtime-only string convenience property; current code has no `project.verbosity.level` category and no `_verbosity.level` tag. | + +### Analysis Configuration + +| Current Python path | Current CIF path | Match? | Notes | +| ------------------------------------------------- | ---------------------------------- | ------ | ------------------------------------------------------------------------------------------------ | +| `analysis.fitting.minimizer_type` | `_fitting.minimizer_type` | Yes | Direct category mapping. | +| `analysis.fitting_mode_type` | `_fitting.mode_type` | No | Public selector is owner-level state serialized into the `_fitting` category. | +| `analysis.joint_fit[experiment_id].experiment_id` | `_joint_fit.experiment_id` | Yes | Collection key is also stored as a field. | +| `analysis.joint_fit[experiment_id].weight` | `_joint_fit.weight` | Yes | Direct field mapping. | +| `analysis.sequential_fit.data_dir` | `_sequential_fit.data_dir` | Yes | Direct category mapping. | +| `analysis.sequential_fit.file_pattern` | `_sequential_fit.file_pattern` | Yes | Direct category mapping. | +| `analysis.sequential_fit.max_workers` | `_sequential_fit.max_workers` | Yes | Direct category mapping. | +| `analysis.sequential_fit.chunk_size` | `_sequential_fit.chunk_size` | Yes | Direct category mapping. | +| `analysis.sequential_fit.reverse` | `_sequential_fit.reverse` | Yes | Direct category mapping. | +| `analysis.sequential_fit_extract[id].id` | `_sequential_fit_extract.id` | Yes | Direct collection mapping. | +| `analysis.sequential_fit_extract[id].target` | `_sequential_fit_extract.target` | Yes | Direct collection mapping. | +| `analysis.sequential_fit_extract[id].pattern` | `_sequential_fit_extract.pattern` | Yes | Direct collection mapping. | +| `analysis.sequential_fit_extract[id].required` | `_sequential_fit_extract.required` | Yes | Direct collection mapping. | +| `analysis.aliases[label].label` | `_alias.label` | Partly | Python collection is plural; CIF row category is singular. | +| `analysis.aliases[label].param_unique_name` | `_alias.param_unique_name` | Partly | Python collection is plural; CIF row category is singular. | +| `analysis.constraints[lhs_alias].expression` | `_constraint.expression` | Partly | Collection key is derived from the expression; there is no separate `_constraint.lhs_alias` tag. | + +### Experiment Configuration + +| Current Python path | Current CIF path | Match? | Notes | +| --------------------------------------------- | ---------------------------------------------------------------------------------- | ------ | ------------------------------------------------------------------------------------ | +| `experiment.type.sample_form` | `_expt_type.sample_form` | Partly | Python uses the user-facing word `type`; CIF uses abbreviated `_expt_type`. | +| `experiment.type.beam_mode` | `_expt_type.beam_mode` | Partly | Python uses the user-facing word `type`; CIF uses abbreviated `_expt_type`. | +| `experiment.type.radiation_probe` | `_expt_type.radiation_probe` | Partly | Python uses the user-facing word `type`; CIF uses abbreviated `_expt_type`. | +| `experiment.type.scattering_type` | `_expt_type.scattering_type` | Partly | Python uses the user-facing word `type`; CIF uses abbreviated `_expt_type`. | +| `experiment.calculation.calculator_type` | `_calculation.calculator_type` | Yes | Direct category mapping. | +| `experiment.diffrn.ambient_temperature` | `_diffrn.ambient_temperature` | Yes | Direct category mapping. | +| `experiment.diffrn.ambient_pressure` | `_diffrn.ambient_pressure` | Yes | Direct category mapping. | +| `experiment.diffrn.ambient_magnetic_field` | `_diffrn.ambient_magnetic_field` | Yes | Direct category mapping. | +| `experiment.diffrn.ambient_electric_field` | `_diffrn.ambient_electric_field` | Yes | Direct category mapping. | +| `experiment.instrument.setup_wavelength` | `_instr.wavelength` | Partly | Python name exposes setup role; CIF tag uses compact instrument name. | +| `experiment.instrument.calib_twotheta_offset` | `_instr.2theta_offset` | Partly | Python name exposes calibration role; CIF tag uses compact instrument name. | +| `experiment.instrument.setup_twotheta_bank` | `_instr.2theta_bank` | Partly | Python name exposes setup role; CIF tag uses compact instrument name. | +| `experiment.instrument.calib_d_to_tof_offset` | `_instr.d_to_tof_offset` | Partly | Python name exposes calibration role; CIF tag uses compact instrument name. | +| `experiment.instrument.calib_d_to_tof_linear` | `_instr.d_to_tof_linear` | Partly | Python name exposes calibration role; CIF tag uses compact instrument name. | +| `experiment.instrument.calib_d_to_tof_quad` | `_instr.d_to_tof_quad` | Partly | Python name exposes calibration role; CIF tag uses compact instrument name. | +| `experiment.instrument.calib_d_to_tof_recip` | `_instr.d_to_tof_recip` | Partly | Python name exposes calibration role; CIF tag uses compact instrument name. | +| `experiment.peak.profile_type` | `_peak.profile_type` | Yes | The active peak category stores its own type tag. | +| `experiment.peak_profile_type` | `_peak.profile_type` | No | Public selector is an owner-level convenience alias. | +| `experiment.peak.broad_gauss_u` | `_peak.broad_gauss_u` | Yes | CWL peak field. | +| `experiment.peak.broad_gauss_v` | `_peak.broad_gauss_v` | Yes | CWL peak field. | +| `experiment.peak.broad_gauss_w` | `_peak.broad_gauss_w` | Yes | CWL peak field. | +| `experiment.peak.broad_lorentz_x` | `_peak.broad_lorentz_x` | Yes | CWL peak field. | +| `experiment.peak.broad_lorentz_y` | `_peak.broad_lorentz_y` | Yes | CWL peak field. | +| `experiment.peak.asym_empir_1..4` | `_peak.asym_empir_1..4` | Yes | CWL peak field group. | +| `experiment.peak.asym_fcj_1..2` | `_peak.asym_fcj_1..2` | Yes | CWL peak field group. | +| `experiment.peak.broad_gauss_sigma_0..2` | `_peak.gauss_sigma_0..2` | Partly | Python prefixes the family with `broad_`; CIF tags omit that grouping prefix. | +| `experiment.peak.broad_lorentz_gamma_0..2` | `_peak.lorentz_gamma_0..2` | Partly | Python prefixes the family with `broad_`; CIF tags omit that grouping prefix. | +| `experiment.peak.exp_rise_alpha_0..1` | `_peak.rise_alpha_0..1` | Partly | Python prefixes the family with `exp_`; CIF tags omit that grouping prefix. | +| `experiment.peak.exp_decay_beta_0..1` | `_peak.decay_beta_0..1` | Partly | Python prefixes the family with `exp_`; CIF tags omit that grouping prefix. | +| `experiment.peak.dexp_*` | `_peak.dexp_*` | Yes | TOF double-exponential peak field group. | +| `experiment.peak.damp_q` | `_peak.damp_q` | Yes | Total-scattering peak field. | +| `experiment.peak.broad_q` | `_peak.broad_q` | Yes | Total-scattering peak field. | +| `experiment.peak.cutoff_q` | `_peak.cutoff_q` | Yes | Total-scattering peak field. | +| `experiment.peak.sharp_delta_1` | `_peak.sharp_delta_1` | Yes | Total-scattering peak field. | +| `experiment.peak.sharp_delta_2` | `_peak.sharp_delta_2` | Yes | Total-scattering peak field. | +| `experiment.peak.damp_particle_diameter` | `_peak.damp_particle_diameter` | Yes | Total-scattering peak field. | +| `experiment.background[id].id` line segment | `_pd_background.id` | Partly | Python category is `background`; CIF uses powder-background category. | +| `experiment.background[id].x` line segment | `_pd_background.line_segment_X` or `_pd_background_line_segment_X` | Partly | Python uses compact `x`; CIF tag encodes powder-background line-segment meaning. | +| `experiment.background[id].y` line segment | `_pd_background.line_segment_intensity` or `_pd_background_line_segment_intensity` | Partly | Python uses compact `y`; CIF tag encodes powder-background line-segment meaning. | +| `experiment.background[id].id` Chebyshev | `_pd_background.id` | Partly | Python category is `background`; CIF uses powder-background category. | +| `experiment.background[id].order` Chebyshev | `_pd_background.Chebyshev_order` | Partly | CIF tag encodes polynomial type and uses CIF-style capitalization. | +| `experiment.background[id].coef` Chebyshev | `_pd_background.Chebyshev_coef` | Partly | CIF tag encodes polynomial type and uses CIF-style capitalization. | +| `experiment.background_type` | implied by active background category | No | There is no standalone selector tag. | +| `experiment.extinction.model` | `_extinction.model` | Yes | Direct category mapping. | +| `experiment.extinction.mosaicity` | `_extinction.mosaicity` | Yes | Direct category mapping. | +| `experiment.extinction.radius` | `_extinction.radius` | Yes | Direct category mapping. | +| `experiment.extinction_type` | `_extinction.model` | Partly | Public selector chooses the category; persisted model tag lives inside the category. | +| `experiment.linked_phases[id].id` | `_pd_phase_block.id` | Partly | Python name is user-facing; CIF tag follows powder phase-block convention. | +| `experiment.linked_phases[id].scale` | `_pd_phase_block.scale` | Partly | Python name is user-facing; CIF tag follows powder phase-block convention. | +| `experiment.linked_crystal.id` | `_sc_crystal_block.id` | Partly | Python name is user-facing; CIF tag follows single-crystal block convention. | +| `experiment.linked_crystal.scale` | `_sc_crystal_block.scale` | Partly | Python name is user-facing; CIF tag follows single-crystal block convention. | +| `experiment.excluded_regions[id].id` | `_excluded_region.id` | Partly | Python collection is plural; CIF row category is singular. | +| `experiment.excluded_regions[id].start` | `_excluded_region.start` | Partly | Python collection is plural; CIF row category is singular. | +| `experiment.excluded_regions[id].end` | `_excluded_region.end` | Partly | Python collection is plural; CIF row category is singular. | + +### Experiment Data And Calculated Results + +| Current Python path | Current CIF path | Match? | Notes | +| ---------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | ------ | -------------------------------------------------------------------- | +| `experiment.data[point_id].point_id` | `_pd_data.point_id` | Partly | Python row key maps to a powder-data CIF tag. | +| `experiment.data[point_id].d_spacing` Bragg powder | `_pd_proc.d_spacing` | Partly | Python name is analysis-oriented; CIF tag is processed powder data. | +| `experiment.data[point_id].intensity_meas` Bragg powder | `_pd_meas.intensity_total` or `_pd_proc.intensity_norm` | Partly | CIF can use measured or processed intensity tags. | +| `experiment.data[point_id].intensity_meas_su` Bragg powder | `_pd_meas.intensity_total_su` or `_pd_proc.intensity_norm_su` | Partly | CIF can use measured or processed uncertainty tags. | +| `experiment.data[point_id].intensity_calc` Bragg powder | `_pd_calc.intensity_total` | Partly | Python name is analysis-oriented; CIF tag is calculated powder data. | +| `experiment.data[point_id].intensity_bkg` Bragg powder | `_pd_calc.intensity_bkg` | Partly | Python name is analysis-oriented; CIF tag is calculated background. | +| `experiment.data[point_id].calc_status` Bragg powder | `_pd_data.refinement_status` | Partly | Python says calculation status; CIF tag says refinement status. | +| `experiment.data[point_id].two_theta` Bragg powder | `_pd_proc.2theta_scan` or `_pd_meas.2theta_scan` | Partly | CIF can use processed or measured x-coordinate tags. | +| `experiment.data[point_id].time_of_flight` Bragg powder | `_pd_meas.time_of_flight` | Partly | CIF tag is measurement-specific. | +| `experiment.data[point_id].r` total powder | `_pd_proc.r` | Partly | Current code comments say PDF-specific CIF names are still needed. | +| `experiment.data[point_id].g_r_meas` total powder | `_pd_meas.intensity_total` | Partly | Current code comments say PDF-specific CIF names are still needed. | +| `experiment.data[point_id].g_r_meas_su` total powder | `_pd_meas.intensity_total_su` | Partly | Current code comments say PDF-specific CIF names are still needed. | +| `experiment.data[point_id].g_r_calc` total powder | `_pd_calc.intensity_total` | Partly | Current code comments say PDF-specific CIF names are still needed. | +| `experiment.data[point_id].calc_status` total powder | `_pd_data.refinement_status` | Partly | Current code comments say PDF-specific CIF names are still needed. | +| `experiment.refln[id]` single crystal | `_refln.{id,d_spacing,sin_theta_over_lambda,index_h,index_k,index_l,intensity_meas,intensity_meas_su,intensity_calc,wavelength}` | Yes | Direct reflection-loop mapping. | +| `experiment.refln[id]` powder calculated reflections | `_refln.{id,phase_id,d_spacing,sin_theta_over_lambda,index_h,index_k,index_l,f_calc,f_squared_calc,two_theta,time_of_flight}` | Yes | Direct calculated-reflection loop mapping. | + +### Structure Configuration + +| Current Python path | Current CIF path | Match? | Notes | +| ------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------ | --------------------------------------------------------------------------------- | +| `structure.cell.length_a` | `_cell.length_a` | Yes | Direct category mapping. | +| `structure.cell.length_b` | `_cell.length_b` | Yes | Direct category mapping. | +| `structure.cell.length_c` | `_cell.length_c` | Yes | Direct category mapping. | +| `structure.cell.angle_alpha` | `_cell.angle_alpha` | Yes | Direct category mapping. | +| `structure.cell.angle_beta` | `_cell.angle_beta` | Yes | Direct category mapping. | +| `structure.cell.angle_gamma` | `_cell.angle_gamma` | Yes | Direct category mapping. | +| `structure.space_group.name_h_m` | `_space_group.name_H-M_alt`, `_space_group_name_H-M_alt`, `_symmetry.space_group_name_H-M`, or `_symmetry_space_group_name_H-M` | Partly | CIF naming follows crystallographic conventions and supports legacy alternatives. | +| `structure.space_group.it_coordinate_system_code` | `_space_group.IT_coordinate_system_code`, `_space_group_IT_coordinate_system_code`, `_symmetry.IT_coordinate_system_code`, or `_symmetry_IT_coordinate_system_code` | Partly | CIF naming follows crystallographic conventions and supports legacy alternatives. | +| `structure.atom_sites[label].label` | `_atom_site.label` | Yes | Direct row-field mapping. | +| `structure.atom_sites[label].type_symbol` | `_atom_site.type_symbol` | Yes | Direct row-field mapping. | +| `structure.atom_sites[label].fract_x` | `_atom_site.fract_x` | Yes | Direct row-field mapping. | +| `structure.atom_sites[label].fract_y` | `_atom_site.fract_y` | Yes | Direct row-field mapping. | +| `structure.atom_sites[label].fract_z` | `_atom_site.fract_z` | Yes | Direct row-field mapping. | +| `structure.atom_sites[label].wyckoff_letter` | `_atom_site.Wyckoff_letter` or `_atom_site.Wyckoff_symbol` | Partly | CIF uses capitalized/legacy Wyckoff tags. | +| `structure.atom_sites[label].occupancy` | `_atom_site.occupancy` | Yes | Direct row-field mapping. | +| `structure.atom_sites[label].adp_iso` | `_atom_site.B_iso_or_equiv` or `_atom_site.U_iso_or_equiv` | No | Python uses type-neutral ADP name; CIF uses B/U-specific tags. | +| `structure.atom_sites[label].adp_type` | `_atom_site.adp_type` | Yes | Direct row-field mapping. | +| `structure.atom_site_aniso[label].label` | `_atom_site_aniso.label` | Yes | Direct row-field mapping. | +| `structure.atom_site_aniso[label].adp_11` | `_atom_site_aniso.B_11` or `_atom_site_aniso.U_11` | No | Python uses type-neutral ADP name; CIF uses B/U-specific tags. | +| `structure.atom_site_aniso[label].adp_22` | `_atom_site_aniso.B_22` or `_atom_site_aniso.U_22` | No | Python uses type-neutral ADP name; CIF uses B/U-specific tags. | +| `structure.atom_site_aniso[label].adp_33` | `_atom_site_aniso.B_33` or `_atom_site_aniso.U_33` | No | Python uses type-neutral ADP name; CIF uses B/U-specific tags. | +| `structure.atom_site_aniso[label].adp_12` | `_atom_site_aniso.B_12` or `_atom_site_aniso.U_12` | No | Python uses type-neutral ADP name; CIF uses B/U-specific tags. | +| `structure.atom_site_aniso[label].adp_13` | `_atom_site_aniso.B_13` or `_atom_site_aniso.U_13` | No | Python uses type-neutral ADP name; CIF uses B/U-specific tags. | +| `structure.atom_site_aniso[label].adp_23` | `_atom_site_aniso.B_23` or `_atom_site_aniso.U_23` | No | Python uses type-neutral ADP name; CIF uses B/U-specific tags. | + +### Not Yet Mapped + +| Current Python path | Current CIF status | Notes | +| ------------------- | --------------------- | -------------------------------------------- | +| `project.summary` | placeholder text only | `summary_to_cif()` currently returns a stub. | + +## Decision To Discuss + +Adopt a scoped one-to-one rule for project-level configuration: + +```text +project.. -> project.cif: _. +``` + +This ADR does not propose renaming the public root object. The current +root object is already `Project`; the proposal is about category and tag +correspondence inside project-owned singleton configuration. + +Target project-level mappings if the current Python names are kept: + +| Python path | Target CIF path | Current state | +| -------------------------------- | ------------------------- | ------------------------------------------ | +| `project.info.name` | `_info.name` | Currently `_project.id`. | +| `project.info.title` | `_info.title` | Currently `_project.title`. | +| `project.info.description` | `_info.description` | Currently `_project.description`. | +| `project.info.created` | `_info.created` | Currently `_project.created`. | +| `project.info.last_modified` | `_info.last_modified` | Currently `_project.last_modified`. | +| `project.rendering.chart_engine` | `_rendering.chart_engine` | Already matches. | +| `project.rendering.table_engine` | `_rendering.table_engine` | Already matches. | +| `project.verbosity.level` | `_verbosity.level` | Currently no persisted verbosity category. | + +Alternative target if the project identity field should be called `id` +rather than `name`: + +```text +project.info.id -> _info.id +``` + +Do not force strict one-to-one correspondence globally where CIF-domain +names are clearer or where the Python API intentionally abstracts over +CIF details. + +## Rationale + +### Project-Level Categories Are Repository-Owned + +Project-level configuration categories are not external crystallographic +CIF categories. They are EasyDiffraction project-file categories, so the +repository can optimize them for API/persistence symmetry. + +### `project.cif` Scopes Generic Categories + +`_info.title` is generic in isolation, but inside `project.cif` it reads +as project information. This is similar to `_verbosity.level`: the file +scope tells the reader this is project-level verbosity. + +### The Current `Project` Root Already Matches User Language + +The current public root object is already `Project`. Keeping it avoids a +user-facing `workspace.project.*` nesting and aligns with scientific +workflows where a project is the container for structures, experiments, +analysis, and saved files. + +### Scientific CIF/Domain Categories Should Stay Domain-Oriented + +For structures, experiments, measured data, and calculated results, many +CIF names are standard or domain-specific. Exact Python mirroring would +make those tags less meaningful to CIF readers and could weaken +compatibility with scientific conventions. + +### Some Python Names Are Deliberate Abstractions + +Type-neutral ADP parameters, owner-level switchable selectors, +active-sibling selectors, and analysis-friendly data names intentionally +do not mirror individual CIF fields. These should remain exceptions +unless a separate ADR changes the underlying API pattern. + +## Consequences + +### Positive + +- Project-level configuration becomes easier to explain and inspect. +- Users can predict project-level CIF tags from Python paths. +- The decision can focus on project-owned singleton config without + forcing scientific CIF categories to mirror Python convenience names. + +### Trade-Offs + +- `_info.*` is less self-describing if copied out of `project.cif`. +- Existing `_project.*` project files would need migration or a + deliberate compatibility decision. +- If verbosity is persisted, `project.verbosity` would either need to + become a category object or remain as a convenience alias for a new + `project.verbosity.level` field. +- Collapsing rendering to `project.rendering.engine` would simplify the + API, but only if chart and table renderers are intended to share one + backend choice. + +## Open Questions + +- Should the project identity remain `project.info.name`, or should it + become `project.info.id` to mirror the saved identifier field? +- Should project metadata move from `_project.*` to `_info.*`, or is + `_project.*` clearer even inside `project.cif`? +- Should `project.rendering.chart_engine` and + `project.rendering.table_engine` remain separate, or should the public + API and CIF collapse to one `engine` field? +- Should `_project.*` be accepted as a read-only legacy fallback when + loading older saved projects? +- Should `project.verbosity = 'short'` remain as a convenience alias for + `project.verbosity.level = 'short'`, or should strict correspondence + remove the alias? diff --git a/docs/dev/plans/loop-category-key-identity.md b/docs/dev/plans/loop-category-key-identity.md new file mode 100644 index 00000000..ff435823 --- /dev/null +++ b/docs/dev/plans/loop-category-key-identity.md @@ -0,0 +1,620 @@ +# Loop Category Key Identity Implementation Plan + +## Status + +Workflow instructions: + +```text +.github/copilot-instructions.md +``` + +Related ADR suggestion: + +```text +docs/dev/adrs/suggestions/loop-category-key-identity.md +``` + +This plan implements two related changes: + +1. Move category identity declarations from per-instance assignments to + class-level declarations. +2. Add an explicit persisted `id` field to the constraints loop. + +Status checklist. Mark `[x]` only while implementing: + +```text +Phase 1 - Implementation +[ ] Add class-level identity declarations to CategoryItem. +[ ] Teach Identity to resolve declared category entry names. +[ ] Rebuild collection indexes after CIF loop loading. +[ ] Add _category_code to all current CategoryItem subclasses. +[ ] Add _category_entry_name to all current loop CategoryItem subclasses. +[ ] Remove direct self._identity.category_code assignments. +[ ] Remove direct self._identity.category_entry_name lambda assignments. +[ ] Add Constraint.id descriptor serialized as _constraint.id. +[ ] Change constraints collection keys from lhs_alias to id. +[ ] Preserve default constraints.create(expression=...) behavior by using lhs_alias as the default id. +[ ] Add backward-compatible loading for old CIF loops without _constraint.id. +[ ] Update constraints display. +[ ] Update loop-category-key-identity.md if implementation details differ from the ADR. +[ ] Phase 1 review gate: present diff for approval. + +Phase 2 - Verification +[ ] Add tests for the base declarative identity behavior. +[ ] Add parametrized tests for current loop category identity declarations. +[ ] Update constraints tests. +[ ] Update existing round-trip tests that compare constraints CIF. +[ ] Run formatting. +[ ] Run targeted unit tests. +[ ] Run broader checks. +``` + +## Commit Discipline + +When an AI agent follows this plan, every completed Phase 1 +implementation step must be staged with explicit paths and committed +locally before moving to the next implementation step or to the Phase 1 +review gate. + +Follow the **Commits** section of `.github/copilot-instructions.md`. + +Rules: + +- One commit per implementation step. +- Keep each commit atomic and single-purpose. +- Stage explicit paths only. Do not use `git add .`. +- Do not stage unrelated user changes. +- Do not stage generated artifacts unless the user explicitly asks. +- If a serious uncovered design issue appears, stop and ask before + continuing. + +Suggested branch: + +```text +feature/loop-category-key-identity +``` + +Suggested commit messages: + +```text +Add declarative category identity resolution +Declare category identities on current items +Persist explicit constraint identifiers +Update ADR for declarative category identity +Add declarative category identity tests +``` + +## Goal + +Replace repeated constructor code like this: + +```python +self._identity.category_code = 'atom_site' +self._identity.category_entry_name = lambda: str(self.label.value) +``` + +with class-level declarations: + +```python +class AtomSite(CategoryItem): + _category_code = 'atom_site' + _category_entry_name = 'label' +``` + +The name `_category_entry_name` is intentionally kept because this is +the preferred project terminology. In this plan it means "the name of +the item attribute used to resolve the entry name". The resolved entry +value is still obtained through: + +```python +item._identity.category_entry_name +``` + +For example, `AtomSite._category_entry_name == 'label'`, while +`atom_site._identity.category_entry_name == 'Ba1'`. + +## Non-Goals + +Do not change these in this migration: + +- Do not rename `Identity.category_entry_name`. +- Do not rename `CategoryCollection`. +- Do not change public collection access syntax. +- Do not change CIF tags except adding `_constraint.id`. +- Do not rename `label` to `id` for atom sites or aliases. +- Do not make `phase_id` the row key for powder reflection loops. + +## Design + +This plan intentionally uses a narrow metadata lookup based on +`_category_entry_name`. That is an allowed exception to the general "no +string-based dispatch" rule because the attribute name is a class-level +declaration, not user input, and resolution is centralized in +`CategoryItem`. + +### CategoryItem Declarations + +Add these class attributes to `CategoryItem` in +`src/easydiffraction/core/category.py`: + +```python +class CategoryItem(GuardedBase): + _category_code: str | None = None + _category_entry_name: str | None = None +``` + +Update `CategoryItem.__init__()` so it assigns `_category_code` once: + +```python +def __init__(self) -> None: + super().__init__() + if self._category_code is not None: + self._identity.category_code = self._category_code +``` + +Do not try to resolve `_category_entry_name` in `CategoryItem.__init__`. +Many item classes create descriptors after `super().__init__()` returns, +and some mixin-based classes run `CategoryItem.__init__()` before their +descriptors are created. + +Add a resolver method to `CategoryItem`: + +```python +def _resolve_category_entry_name(self) -> str | None: + attr_name = self._category_entry_name + if attr_name is None: + return None + + value = getattr(self, attr_name) + if isinstance(value, GenericDescriptorBase): + value = value.value + return str(value) +``` + +Import `GenericDescriptorBase` from `easydiffraction.core.variable` in +`category.py` if it is not already in scope. + +### Identity Resolution + +Update `Identity._resolve_up()` in +`src/easydiffraction/core/identity.py` so that category entries can be +resolved from the owning object before walking to the parent. + +Add this logic after checking direct callable/string values and before +climbing to the parent: + +```python +if attr == 'category_entry': + resolver = getattr(self._owner, '_resolve_category_entry_name', None) + if callable(resolver): + resolved = resolver() + if resolved is not None: + return resolved +``` + +Keep the existing `category_entry_name` setter. It remains useful as an +escape hatch and keeps old code compatible during migration. + +### Collection Index Rebuild After CIF Loading + +Update `category_collection_from_cif()` in +`src/easydiffraction/io/cif/serialize.py`. + +Currently `_adopt_items()` rebuilds the index before loop values are +loaded into each item. After declarative keys are resolved from +descriptor values, the index must be rebuilt after parameters are +loaded. + +After the row population loop, run any collection hook and rebuild: + +```python +after_from_cif = getattr(self, '_after_from_cif', None) +if callable(after_from_cif): + after_from_cif() + +self._rebuild_index() +``` + +The hook is needed for constraints to backfill missing ids from old CIF +files. + +## Category Migration Table + +Add `_category_code` to each listed class. Add `_category_entry_name` +only when the class is a loop item and currently has a collection key. +Then remove matching constructor assignments. + +| File | Class | `_category_code` | `_category_entry_name` | Notes | +| ----------------------------------------------------------------------------------- | -------------------------- | ------------------------ | ---------------------- | -------------------------------------------- | +| `src/easydiffraction/project/categories/info/default.py` | `ProjectInfo` | `project` | none | Singleton category. | +| `src/easydiffraction/project/categories/rendering/default.py` | `Rendering` | `rendering` | none | Singleton category. | +| `src/easydiffraction/analysis/categories/fitting/default.py` | `Fitting` | `fitting` | none | Singleton category. | +| `src/easydiffraction/analysis/categories/sequential_fit/default.py` | `SequentialFit` | `sequential_fit` | none | Singleton category. | +| `src/easydiffraction/analysis/categories/aliases/default.py` | `Alias` | `alias` | `label` | Loop key stays `_alias.label`. | +| `src/easydiffraction/analysis/categories/constraints/default.py` | `Constraint` | `constraint` | `id` | Add the id descriptor first. | +| `src/easydiffraction/analysis/categories/joint_fit/default.py` | `JointFitItem` | `joint_fit` | `experiment_id` | Loop key stays `_joint_fit.experiment_id`. | +| `src/easydiffraction/analysis/categories/sequential_fit_extract/default.py` | `SequentialFitExtractItem` | `sequential_fit_extract` | `id` | Loop key stays `_sequential_fit_extract.id`. | +| `src/easydiffraction/datablocks/structure/categories/cell/default.py` | `Cell` | `cell` | none | Singleton category. | +| `src/easydiffraction/datablocks/structure/categories/space_group/default.py` | `SpaceGroup` | `space_group` | none | Singleton category. | +| `src/easydiffraction/datablocks/structure/categories/atom_sites/default.py` | `AtomSite` | `atom_site` | `label` | Loop key stays `_atom_site.label`. | +| `src/easydiffraction/datablocks/structure/categories/atom_site_aniso/default.py` | `AtomSiteAniso` | `atom_site_aniso` | `label` | Loop key stays `_atom_site_aniso.label`. | +| `src/easydiffraction/datablocks/experiment/categories/experiment_type/default.py` | `ExperimentType` | `expt_type` | none | Singleton category. | +| `src/easydiffraction/datablocks/experiment/categories/calculation/default.py` | `Calculation` | `calculation` | none | Singleton category. | +| `src/easydiffraction/datablocks/experiment/categories/diffrn/default.py` | `DefaultDiffrn` | `diffrn` | none | Singleton category. | +| `src/easydiffraction/datablocks/experiment/categories/instrument/base.py` | `InstrumentBase` | `instrument` | none | Subclasses inherit this code. | +| `src/easydiffraction/datablocks/experiment/categories/peak/base.py` | `PeakBase` | `peak` | none | Subclasses inherit this code. | +| `src/easydiffraction/datablocks/experiment/categories/extinction/becker_coppens.py` | `BeckerCoppensExtinction` | `extinction` | none | Singleton category. | +| `src/easydiffraction/datablocks/experiment/categories/linked_crystal/default.py` | `LinkedCrystal` | `linked_crystal` | none | Singleton category. | +| `src/easydiffraction/datablocks/experiment/categories/linked_phases/default.py` | `LinkedPhase` | `linked_phases` | `id` | Loop key stays `_pd_phase_block.id`. | +| `src/easydiffraction/datablocks/experiment/categories/background/line_segment.py` | `LineSegment` | `background` | `id` | Loop key stays `_pd_background.id`. | +| `src/easydiffraction/datablocks/experiment/categories/background/chebyshev.py` | `PolynomialTerm` | `background` | `id` | Loop key stays `_pd_background.id`. | +| `src/easydiffraction/datablocks/experiment/categories/excluded_regions/default.py` | `ExcludedRegion` | `excluded_regions` | `id` | Loop key stays `_excluded_region.id`. | +| `src/easydiffraction/datablocks/experiment/categories/refln/bragg_sc.py` | `Refln` | `refln` | `id` | Powder reflection rows inherit this key. | +| `src/easydiffraction/datablocks/experiment/categories/data/bragg_pd.py` | `PdCwlDataPoint` | `pd_data` | `point_id` | CWL powder data row. | +| `src/easydiffraction/datablocks/experiment/categories/data/bragg_pd.py` | `PdTofDataPoint` | `pd_data` | `point_id` | TOF powder data row. | +| `src/easydiffraction/datablocks/experiment/categories/data/total_pd.py` | `TotalDataPoint` | `total_data` | `point_id` | Total-scattering data row. | + +Do not add `_category_code` or `_category_entry_name` to +`PowderReflnBase`, `PowderCwlRefln`, or `PowderTofRefln`; they inherit +the `refln` identity declarations from `Refln`. + +## Constraints Migration + +### Target Shape + +`Constraint` should become: + +```python +class Constraint(CategoryItem): + _category_code = 'constraint' + _category_entry_name = 'id' + + def __init__(self) -> None: + super().__init__() + + self._id = StringDescriptor( + name='id', + description='Identifier for this constraint.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_]*$'), + ), + cif_handler=CifHandler(names=['_constraint.id']), + ) + self._expression = StringDescriptor(...) +``` + +Define the public property: + +```python +@property +def id(self) -> StringDescriptor: + """Identifier for this constraint.""" + return self._id + +@id.setter +def id(self, value: str) -> None: + self._id.value = value +``` + +Keep `lhs_alias` and `rhs_expr` as derived read-only properties. + +### Create API + +Change `Constraints.create()` from: + +```python +def create(self, *, expression: str) -> None: +``` + +to: + +```python +def create(self, *, expression: str, id: str | None = None) -> None: +``` + +Implementation order: + +```python +item = Constraint() +item.expression = expression +item.id = id if id is not None else item.lhs_alias +self.add(item) +self._enabled = True +``` + +This preserves the current default user experience: + +```python +analysis.constraints.create(expression='biso_Ba = biso_La') +analysis.constraints['biso_Ba'] +``` + +It also allows explicit ids: + +```python +analysis.constraints.create( + id='constraint_1', + expression='biso_Ba = biso_La', +) +analysis.constraints['constraint_1'] +``` + +### Backward-Compatible CIF Loading + +Old CIF files only contain: + +```cif +loop_ +_constraint.expression +biso_Ba = biso_La +``` + +New CIF files should contain: + +```cif +loop_ +_constraint.id +_constraint.expression +biso_Ba "biso_Ba = biso_La" +``` + +Add a hook on `Constraints`: + +```python +def _after_from_cif(self) -> None: + for index, item in enumerate(self._items, start=1): + if item.id.value in {'', '_'}: + fallback = item.lhs_alias or f'constraint_{index}' + item.id = fallback +``` + +The generic `category_collection_from_cif()` hook described above must +call this before rebuilding the index. + +### Constraints Display + +Update `Constraints.show()` to include the id: + +```text +id | expression +``` + +Keep the existing empty warning behavior. + +## Required Code Searches + +After migration, these searches should return no category-item +constructor assignments: + +```shell +git grep -n -E "self\\._identity\\.category_code =" -- src/easydiffraction +git grep -n -E "self\\._identity\\.category_entry_name =" -- src/easydiffraction +git grep -n -E "category_entry_name = lambda" -- src/easydiffraction +``` + +The following reads are expected to remain: + +```shell +git grep -n "_identity\\.category_entry_name" -- src/easydiffraction +``` + +Those reads are used by collections, display, reporting, and parameter +unique names. + +Do not use `rg` in this plan; it is not available in every contributor +environment. Use `git grep` for repository searches. + +## Tests To Add Or Update + +### Base Identity Tests + +Add or update tests under `tests/unit/easydiffraction/core/`. + +Test a fake category item: + +```python +class FakeItem(CategoryItem): + _category_code = 'fake' + _category_entry_name = 'id' + + def __init__(self): + super().__init__() + self._id = StringDescriptor(...) + + @property + def id(self): + return self._id +``` + +Assert: + +```python +item._identity.category_code == 'fake' +item._identity.category_entry_name == item.id.value +item.id.unique_name.endswith('.fake..id') +``` + +Also test that a singleton-like item with `_category_code` and no +`_category_entry_name` resolves category code but returns no entry name. + +### Current Category Parametrized Tests + +Add a parametrized test that instantiates representative loop item +classes and verifies category code plus entry: + +```text +Alias -> alias, label +JointFitItem -> joint_fit, experiment_id +SequentialFitExtractItem -> sequential_fit_extract, id +AtomSite -> atom_site, label +AtomSiteAniso -> atom_site_aniso, label +LinkedPhase -> linked_phases, id +LineSegment -> background, id +PolynomialTerm -> background, id +ExcludedRegion -> excluded_regions, id +Refln -> refln, id +PdCwlDataPoint -> pd_data, point_id +PdTofDataPoint -> pd_data, point_id +TotalDataPoint -> total_data, point_id +``` + +Do not instantiate heavy calculator-backed owner objects for this test; +instantiate item classes directly. + +### Constraints Tests + +Update +`tests/unit/easydiffraction/analysis/categories/test_constraints.py`. + +Required assertions: + +```python +c = Constraint() +c.expression = 'a = b + c' +c.id = 'constraint_a' +assert c.id.value == 'constraint_a' +assert c.lhs_alias == 'a' +assert c.rhs_expr == 'b + c' +assert c._identity.category_entry_name == 'constraint_a' +``` + +Default id behavior: + +```python +coll = Constraints() +coll.create(expression='a = b + c') +assert 'a' in coll.names +assert coll['a'].id.value == 'a' +assert coll['a'].rhs_expr == 'b + c' +``` + +Explicit id behavior: + +```python +coll = Constraints() +coll.create(id='c1', expression='a = b + c') +assert 'c1' in coll.names +assert coll['c1'].lhs_alias == 'a' +``` + +CIF serialization: + +```python +cif = coll.as_cif +assert '_constraint.id' in cif +assert '_constraint.expression' in cif +``` + +Backward-compatible CIF loading: + +```python +cif = ''' +data_analysis + +loop_ +_constraint.expression +"a = b + c" +''' +``` + +Load through the existing analysis/constraints loader and assert the +loaded collection has key `a`. + +### Existing Round-Trip Tests + +Update tests that compare constraint CIF or constraint collection keys: + +- `tests/unit/easydiffraction/project/test_project_load.py` +- `tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py` +- `tests/integration/fitting/test_project_load.py` + +Expected changes: + +- New saved analysis CIF includes `_constraint.id`. +- Old expression-only fixtures, if any, still load. +- Existing constraint workflows still work when no explicit id is + supplied. + +## Verification Commands + +Run the smallest useful checks first: + +```shell +pixi run python -m pytest tests/unit/easydiffraction/analysis/categories/test_constraints.py +pixi run python -m pytest tests/unit/easydiffraction/core/ +pixi run python -m pytest tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py +``` + +Then run broader checks: + +```shell +pixi run unit-tests +pixi run integration-tests +pixi run check +``` + +If the modified-file Prettier helper is still missing, format changed +Markdown directly: + +```shell +npx prettier --write --config=prettierrc.toml docs/dev/plans/loop-category-key-identity.md docs/dev/adrs/suggestions/loop-category-key-identity.md +``` + +## Local Availability Check + +The plan was checked against the current repository state: + +- `.github/copilot-instructions.md` exists. +- `src/easydiffraction/core/category.py`, + `src/easydiffraction/core/identity.py`, and + `src/easydiffraction/io/cif/serialize.py` exist. +- `tests/unit/easydiffraction/analysis/categories/test_constraints.py` + exists. +- `tests/unit/easydiffraction/core/` exists. +- `tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py` + exists. +- `pixi.toml` defines `fix`, `check`, `unit-tests`, `integration-tests`, + and `test-structure-check`. +- `prettierrc.toml` and local Prettier are available. +- `tools/nonpy_prettier_modified.py` is not present, so use the direct + `npx prettier --write --config=prettierrc.toml ...` fallback for + touched Markdown files. + +## Acceptance Criteria + +The implementation is done when all of these are true: + +- All current category codes are declared as `_category_code` on item + classes. +- All current loop collection keys are declared as + `_category_entry_name` on item classes. +- No current category item sets `self._identity.category_code` in its + constructor. +- No current category item sets `self._identity.category_entry_name` in + its constructor. +- `item._identity.category_entry_name` still works for all loop rows. +- Descriptor `unique_name` values still include datablock, category, + entry, and descriptor name. +- Constraints persist `_constraint.id`. +- Constraints created without an explicit id still default to the left + hand alias. +- Old constraints CIF without `_constraint.id` still loads and receives + deterministic ids. +- Collection indexes are correct after CIF loading. + +## Suggested Pull Request + +Title: + +```text +Use declarative category identity metadata +``` + +Description: + +```text +This change makes category and loop-row identities easier to audit and +keeps saved CIF identifiers explicit. Constraint rows gain a stable +identifier while existing constraint expressions continue to work. +``` diff --git a/docs/dev/plans/workspace-root-project-category.md b/docs/dev/plans/workspace-root-project-category.md index 346a72aa..778fd758 100644 --- a/docs/dev/plans/workspace-root-project-category.md +++ b/docs/dev/plans/workspace-root-project-category.md @@ -169,14 +169,14 @@ implementation: attribute name (`info` → `project`) changes. - `ProjectInfo.path` is removed; the saved directory path lives on `Workspace.path` only. -- A first-class `Verbosity` category under `WorkspaceConfig` is - required (not optional) so that `_verbosity.level` round-trips - through `workspace.cif` like other singleton categories. +- A first-class `Verbosity` category under `WorkspaceConfig` is required + (not optional) so that `_verbosity.level` round-trips through + `workspace.cif` like other singleton categories. - Source-tree imports may be temporarily inconsistent between phases (for example, Phase 1 leaves `project_config_to_cif` imports until - Phase 4 renames the serializer functions). This is acceptable - because tests are not run in Phase 1. Each phase must still leave - the source importable at the end of the phase. + Phase 4 renames the serializer functions). This is acceptable because + tests are not run in Phase 1. Each phase must still leave the source + importable at the end of the phase. ## Current Shape @@ -322,8 +322,8 @@ Rename the top-level runtime facade and package from `project` to Project.current_project_path() -> Workspace.current_workspace_path() ``` - Update the `ClassVar` annotation accordingly and update all - internal references (`type(self)._current_project = self`, + Update the `ClassVar` annotation accordingly and update all internal + references (`type(self)._current_project = self`, `cls._current_project`). 5. Update the `varname()` fallback inside `__init__` so the default @@ -366,14 +366,15 @@ Rename the top-level runtime facade and package from `project` to 12. Run a source-only grep. Do not run tests in Phase 1: - ```shell - rg -n "easydiffraction\\.project|\\bProject\\b|ProjectDisplay|ProjectConfig" src - ``` +```shell +rg -n "easydiffraction\\.project|\\bProject\\b|ProjectDisplay|ProjectConfig" src +``` + +For every match, decide whether it refers to: - For every match, decide whether it refers to: - - the old root object, which should become `Workspace`; - - the project-information category, which should remain project; - - historical text that should be updated in docs later. +- the old root object, which should become `Workspace`; +- the project-information category, which should remain project; +- historical text that should be updated in docs later. ### Stop Conditions @@ -498,8 +499,7 @@ the workspace location and is not serialized project information. ### Steps -1. In `ProjectInfo`, rename the public identity property and its - setter: +1. In `ProjectInfo`, rename the public identity property and its setter: ```text name (getter) -> id (getter) @@ -539,9 +539,9 @@ the workspace location and is not serialized project information. 7. Add `Workspace.path` as the runtime storage path. Initialize `self._path: pathlib.Path | None = None` in `Workspace.__init__`. - Suggested shape (match the surrounding `GuardedBase` pattern; do - not add `@typechecked` here because the setter accepts both `str` - and `pathlib.Path`): + Suggested shape (match the surrounding `GuardedBase` pattern; do not + add `@typechecked` here because the setter accepts both `str` and + `pathlib.Path`): ```python @property @@ -572,15 +572,15 @@ the workspace location and is not serialized project information. ``` Concrete call sites include `Workspace.save`, `Workspace.load`, - `Workspace.current_workspace_path`, and any consumers in - `analysis/`, `display/`, `summary/`, and `io/`. + `Workspace.current_workspace_path`, and any consumers in `analysis/`, + `display/`, `summary/`, and `io/`. 10. Update messages and string representations: - ```text - Workspace '' (...) - Saving workspace '' to ... - ``` +```text +Workspace '' (...) +Saving workspace '' to ... +``` 11. Run grep: @@ -634,9 +634,10 @@ Rename the saved singleton configuration file from `project.cif` to ``` Call sites include `workspace.py` (formerly `project.py`), - `workspace_config.py`, and the serializer itself (the `project_to_cif` - body calls `project_config_to_cif`). After this step the imports - that were intentionally left stale in Phase 1 must compile cleanly. + `workspace_config.py`, and the serializer itself (the + `project_to_cif` body calls `project_config_to_cif`). After this step + the imports that were intentionally left stale in Phase 1 must + compile cleanly. Do not rename `project_info_to_cif`; it serializes the `_project` category and that name remains correct. @@ -676,10 +677,10 @@ Rename the saved singleton configuration file from `project.cif` to workspace.verbosity # -> 'short' ``` - The public `verbosity` getter/setter on `Workspace` reads and - writes the category's `level` descriptor (validated against - `VerbosityEnum`) and replaces the current `self._verbosity: - VerbosityEnum` runtime-only attribute. Remove that attribute. + The public `verbosity` getter/setter on `Workspace` reads and writes + the category's `level` descriptor (validated against `VerbosityEnum`) + and replaces the current `self._verbosity: VerbosityEnum` + runtime-only attribute. Remove that attribute. Serialize it as: @@ -705,16 +706,16 @@ Rename the saved singleton configuration file from `project.cif` to rg -n "project\\.cif|config\\.cif|meta\\.cif|project_config_to_cif|project_config_from_cif|project_to_cif|verbosity" src docs tests ``` - Update source and docs only. Test files are handled in Phase 2 - unless the user explicitly asks otherwise. + Update source and docs only. Test files are handled in Phase 2 unless + the user explicitly asks otherwise. 9. Saved on-disk fixtures under `data/` and `projects/` still contain `project.cif` files (for example `data/lbco_project/project.cif`, - `projects/cosio/project.cif`). Do **not** edit or regenerate them - in Phase 1. They are inputs to integration/script tests and will - either be regenerated in Phase 2 or the relevant tests will be - updated to write fresh workspace directories. Flag any that block - Phase 2 in the review gate. + `projects/cosio/project.cif`). Do **not** edit or regenerate them in + Phase 1. They are inputs to integration/script tests and will either + be regenerated in Phase 2 or the relevant tests will be updated to + write fresh workspace directories. Flag any that block Phase 2 in the + review gate. ### Stop Conditions @@ -762,8 +763,8 @@ category or the scientific project itself. analysis.project -> analysis.workspace ``` - This includes the constructor argument, the stored attribute, and - any public read-only property exposing the parent workspace. + This includes the constructor argument, the stored attribute, and any + public read-only property exposing the parent workspace. 2. Rename display internals in `WorkspaceDisplay` (formerly `ProjectDisplay`) and in any other class that stores a back- @@ -774,9 +775,9 @@ category or the scientific project itself. _set_project(...) -> _set_workspace(...) ``` - Only do this when the stored object is the top-level runtime - facade. Do not touch `self._project_id` inside `ProjectInfo` or - `_project.*` CIF tags. + Only do this when the stored object is the top-level runtime facade. + Do not touch `self._project_id` inside `ProjectInfo` or `_project.*` + CIF tags. 3. Rename local variables in runtime code: @@ -1191,17 +1192,18 @@ should be `workspace.path`, not `workspace.project.path`. ### Mistake: Forgetting Facade Class-Level State -When renaming `Project` to `Workspace`, the `ClassVar` `_current_project`, -the `current_project_path()` classmethod, and the `varname()` fallback -string `'project'` all live on the class itself and are easy to miss -with a single search-and-replace. Rename them to `_current_workspace`, -`current_workspace_path()`, and `'workspace'` respectively. +When renaming `Project` to `Workspace`, the `ClassVar` +`_current_project`, the `current_project_path()` classmethod, and the +`varname()` fallback string `'project'` all live on the class itself and +are easy to miss with a single search-and-replace. Rename them to +`_current_workspace`, `current_workspace_path()`, and `'workspace'` +respectively. ### Mistake: Renaming `ProjectInfo._project_id` The internal descriptor attribute `self._project_id` inside -`ProjectInfo` already matches the new public name `id` and stays as -is. Only the public `name` property/setter becomes `id`. +`ProjectInfo` already matches the new public name `id` and stays as is. +Only the public `name` property/setter becomes `id`. ### Mistake: Editing Generated Notebooks Directly From 93f0394b73c7bf581bb00dd5246c7941bf965739 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 12:30:53 +0200 Subject: [PATCH 06/10] Add declarative category identity resolution --- docs/dev/plans/loop-category-key-identity.md | 6 +++--- src/easydiffraction/core/category.py | 18 ++++++++++++++++++ src/easydiffraction/core/identity.py | 7 +++++++ src/easydiffraction/io/cif/serialize.py | 6 ++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/docs/dev/plans/loop-category-key-identity.md b/docs/dev/plans/loop-category-key-identity.md index ff435823..231974ac 100644 --- a/docs/dev/plans/loop-category-key-identity.md +++ b/docs/dev/plans/loop-category-key-identity.md @@ -24,9 +24,9 @@ Status checklist. Mark `[x]` only while implementing: ```text Phase 1 - Implementation -[ ] Add class-level identity declarations to CategoryItem. -[ ] Teach Identity to resolve declared category entry names. -[ ] Rebuild collection indexes after CIF loop loading. +[x] Add class-level identity declarations to CategoryItem. +[x] Teach Identity to resolve declared category entry names. +[x] Rebuild collection indexes after CIF loop loading. [ ] Add _category_code to all current CategoryItem subclasses. [ ] Add _category_entry_name to all current loop CategoryItem subclasses. [ ] Remove direct self._identity.category_code assignments. diff --git a/src/easydiffraction/core/category.py b/src/easydiffraction/core/category.py index 0804d7d4..d2d95fb8 100644 --- a/src/easydiffraction/core/category.py +++ b/src/easydiffraction/core/category.py @@ -22,6 +22,13 @@ class CategoryItem(GuardedBase): # CategoryCollection and use them when serializing to CIF! # TODO: Common for all categories _update_priority = 10 # Default. Lower values run first. + _category_code: str | None = None + _category_entry_name: str | None = None + + def __init__(self) -> None: + super().__init__() + if self._category_code is not None: + self._identity.category_code = self._category_code def __str__(self) -> str: """Human-readable representation of this component.""" @@ -37,6 +44,17 @@ def _update( # noqa: PLR6301 ) -> None: del called_by_minimizer + def _resolve_category_entry_name(self) -> str | None: + """Resolve the declared category entry value for this item.""" + attr_name = self._category_entry_name + if attr_name is None: + return None + + value = getattr(self, attr_name) + if isinstance(value, GenericDescriptorBase): + value = value.value + return str(value) + @property def unique_name(self) -> str: """Fully qualified name: datablock, category, entry.""" diff --git a/src/easydiffraction/core/identity.py b/src/easydiffraction/core/identity.py index f6aca8cb..b6b9e57a 100644 --- a/src/easydiffraction/core/identity.py +++ b/src/easydiffraction/core/identity.py @@ -41,6 +41,13 @@ def _resolve_up(self, attr: str, visited: set[int] | None = None) -> str | None: if isinstance(value, str): return value + if attr == 'category_entry': + resolver = getattr(self._owner, '_resolve_category_entry_name', None) + if callable(resolver): + resolved = resolver() + if resolved is not None: + return resolved + # Climb to parent if available parent = getattr(self._owner, '__dict__', {}).get('_parent') if parent and hasattr(parent, '_identity'): diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index f7625330..5c3a29c1 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -912,3 +912,9 @@ def category_collection_from_cif( # param_from_cif _set_param_from_raw_cif_value(param, array[row_idx][col_idx]) break + + after_from_cif = getattr(self, '_after_from_cif', None) + if callable(after_from_cif): + after_from_cif() + + self._rebuild_index() From feee0465079050a8a6ac8c7756cfb64f779eb978 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 12:56:08 +0200 Subject: [PATCH 07/10] Declare category identities on current items --- docs/dev/plans/loop-category-key-identity.md | 4 ++-- .../analysis/categories/aliases/default.py | 6 +++--- .../analysis/categories/constraints/default.py | 4 ++-- .../analysis/categories/fitting/default.py | 4 ++-- .../analysis/categories/joint_fit/default.py | 6 +++--- .../analysis/categories/sequential_fit/default.py | 4 ++-- .../categories/sequential_fit_extract/default.py | 6 +++--- .../experiment/categories/background/chebyshev.py | 6 +++--- .../experiment/categories/background/line_segment.py | 6 +++--- .../experiment/categories/calculation/default.py | 4 ++-- .../datablocks/experiment/categories/data/bragg_pd.py | 10 ++++++---- .../datablocks/experiment/categories/data/total_pd.py | 6 +++--- .../datablocks/experiment/categories/diffrn/default.py | 4 ++-- .../experiment/categories/excluded_regions/default.py | 5 +++-- .../experiment/categories/experiment_type/default.py | 4 ++-- .../experiment/categories/extinction/becker_coppens.py | 4 ++-- .../experiment/categories/instrument/base.py | 5 +++-- .../experiment/categories/linked_crystal/default.py | 4 ++-- .../experiment/categories/linked_phases/default.py | 6 +++--- .../datablocks/experiment/categories/peak/base.py | 3 ++- .../datablocks/experiment/categories/refln/bragg_sc.py | 6 +++--- .../structure/categories/atom_site_aniso/default.py | 6 +++--- .../structure/categories/atom_sites/default.py | 6 +++--- .../datablocks/structure/categories/cell/default.py | 4 ++-- .../structure/categories/space_group/default.py | 4 ++-- src/easydiffraction/project/categories/info/default.py | 4 ++-- .../project/categories/rendering/default.py | 4 ++-- 27 files changed, 70 insertions(+), 65 deletions(-) diff --git a/docs/dev/plans/loop-category-key-identity.md b/docs/dev/plans/loop-category-key-identity.md index 231974ac..123f2b69 100644 --- a/docs/dev/plans/loop-category-key-identity.md +++ b/docs/dev/plans/loop-category-key-identity.md @@ -27,9 +27,9 @@ Phase 1 - Implementation [x] Add class-level identity declarations to CategoryItem. [x] Teach Identity to resolve declared category entry names. [x] Rebuild collection indexes after CIF loop loading. -[ ] Add _category_code to all current CategoryItem subclasses. +[x] Add _category_code to all current CategoryItem subclasses. [ ] Add _category_entry_name to all current loop CategoryItem subclasses. -[ ] Remove direct self._identity.category_code assignments. +[x] Remove direct self._identity.category_code assignments. [ ] Remove direct self._identity.category_entry_name lambda assignments. [ ] Add Constraint.id descriptor serialized as _constraint.id. [ ] Change constraints collection keys from lhs_alias to id. diff --git a/src/easydiffraction/analysis/categories/aliases/default.py b/src/easydiffraction/analysis/categories/aliases/default.py index eef6201a..5dd764ce 100644 --- a/src/easydiffraction/analysis/categories/aliases/default.py +++ b/src/easydiffraction/analysis/categories/aliases/default.py @@ -30,6 +30,9 @@ class Alias(CategoryItem): ``unique_name`` for CIF serialization. """ + _category_code = 'alias' + _category_entry_name = 'label' + def __init__(self) -> None: super().__init__() @@ -56,9 +59,6 @@ def __init__(self) -> None: # Stored via object.__setattr__ to avoid parent-chain mutation. object.__setattr__(self, '_param_ref', None) - self._identity.category_code = 'alias' - self._identity.category_entry_name = lambda: str(self.label.value) - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/analysis/categories/constraints/default.py b/src/easydiffraction/analysis/categories/constraints/default.py index fc1d74fd..accb11a5 100644 --- a/src/easydiffraction/analysis/categories/constraints/default.py +++ b/src/easydiffraction/analysis/categories/constraints/default.py @@ -26,6 +26,8 @@ class Constraint(CategoryItem): """Single constraint item stored as ``lhs = rhs`` expression.""" + _category_code = 'constraint' + def __init__(self) -> None: super().__init__() @@ -38,8 +40,6 @@ def __init__(self) -> None: ), cif_handler=CifHandler(names=['_constraint.expression']), ) - - self._identity.category_code = 'constraint' self._identity.category_entry_name = lambda: self.lhs_alias # ------------------------------------------------------------------ diff --git a/src/easydiffraction/analysis/categories/fitting/default.py b/src/easydiffraction/analysis/categories/fitting/default.py index 925259cf..141e2236 100644 --- a/src/easydiffraction/analysis/categories/fitting/default.py +++ b/src/easydiffraction/analysis/categories/fitting/default.py @@ -31,6 +31,8 @@ class Fitting(CategoryItem): Holds the active minimizer backend tag. """ + _category_code = 'fitting' + type_info = TypeInfo( tag='default', description='Fitting configuration category', @@ -51,8 +53,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_fitting.minimizer_type']), ) - self._identity.category_code = 'fitting' - @property def minimizer_type(self) -> StringDescriptor: """Fitting minimizer backend type.""" diff --git a/src/easydiffraction/analysis/categories/joint_fit/default.py b/src/easydiffraction/analysis/categories/joint_fit/default.py index 62f9e7e3..2821f2e8 100644 --- a/src/easydiffraction/analysis/categories/joint_fit/default.py +++ b/src/easydiffraction/analysis/categories/joint_fit/default.py @@ -24,6 +24,9 @@ class JointFitItem(CategoryItem): """A single joint-fit entry.""" + _category_code = 'joint_fit' + _category_entry_name = 'experiment_id' + def __init__(self) -> None: super().__init__() @@ -46,9 +49,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_joint_fit.weight']), ) - self._identity.category_code = 'joint_fit' - self._identity.category_entry_name = lambda: str(self.experiment_id.value) - @property def experiment_id(self) -> StringDescriptor: """ diff --git a/src/easydiffraction/analysis/categories/sequential_fit/default.py b/src/easydiffraction/analysis/categories/sequential_fit/default.py index aad826f1..52519f56 100644 --- a/src/easydiffraction/analysis/categories/sequential_fit/default.py +++ b/src/easydiffraction/analysis/categories/sequential_fit/default.py @@ -22,6 +22,8 @@ class SequentialFit(CategoryItem): """Persisted settings for sequential fitting.""" + _category_code = 'sequential_fit' + type_info = TypeInfo( tag='default', description='Sequential fitting settings', @@ -67,8 +69,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_sequential_fit.reverse']), ) - self._identity.category_code = 'sequential_fit' - @property def data_dir(self) -> StringDescriptor: """Directory containing sequential-fit data files.""" diff --git a/src/easydiffraction/analysis/categories/sequential_fit_extract/default.py b/src/easydiffraction/analysis/categories/sequential_fit_extract/default.py index 84bbf70c..a724692c 100644 --- a/src/easydiffraction/analysis/categories/sequential_fit_extract/default.py +++ b/src/easydiffraction/analysis/categories/sequential_fit_extract/default.py @@ -58,6 +58,9 @@ def _validate_extract_pattern(value: str) -> None: class SequentialFitExtractItem(CategoryItem): """A single sequential-fit extract rule.""" + _category_code = 'sequential_fit_extract' + _category_entry_name = 'id' + def __init__(self) -> None: super().__init__() @@ -89,9 +92,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_sequential_fit_extract.required']), ) - self._identity.category_code = 'sequential_fit_extract' - self._identity.category_entry_name = lambda: str(self.id.value) - @property def id(self) -> StringDescriptor: """Identifier for this extract rule.""" diff --git a/src/easydiffraction/datablocks/experiment/categories/background/chebyshev.py b/src/easydiffraction/datablocks/experiment/categories/background/chebyshev.py index 5f90094d..77e109c8 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/chebyshev.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/chebyshev.py @@ -41,6 +41,9 @@ class PolynomialTerm(CategoryItem): not break immediately. Tests should migrate to the short names. """ + _category_code = 'background' + _category_entry_name = 'id' + def __init__(self) -> None: super().__init__() @@ -75,9 +78,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_pd_background.Chebyshev_coef']), ) - self._identity.category_code = 'background' - self._identity.category_entry_name = lambda: str(self._id.value) - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py index 83e202b4..431abb31 100644 --- a/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py +++ b/src/easydiffraction/datablocks/experiment/categories/background/line_segment.py @@ -34,6 +34,9 @@ class LineSegment(CategoryItem): """Single background control point for interpolation.""" + _category_code = 'background' + _category_entry_name = 'id' + def __init__(self) -> None: super().__init__() @@ -78,9 +81,6 @@ def __init__(self) -> None: ), ) - self._identity.category_code = 'background' - self._identity.category_entry_name = lambda: str(self._id.value) - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/experiment/categories/calculation/default.py b/src/easydiffraction/datablocks/experiment/categories/calculation/default.py index 1574c58b..ae54663a 100644 --- a/src/easydiffraction/datablocks/experiment/categories/calculation/default.py +++ b/src/easydiffraction/datablocks/experiment/categories/calculation/default.py @@ -21,6 +21,8 @@ class Calculation(CategoryItem): """Calculator selection and access for an experiment.""" + _category_code = 'calculation' + type_info = TypeInfo( tag='default', description='Experiment calculation category', @@ -45,8 +47,6 @@ def __init__( cif_handler=CifHandler(names=['_calculation.calculator_type']), ) - self._identity.category_code = 'calculation' - @property def calculator_type(self) -> StringDescriptor: """Calculation backend type.""" diff --git a/src/easydiffraction/datablocks/experiment/categories/data/bragg_pd.py b/src/easydiffraction/datablocks/experiment/categories/data/bragg_pd.py index b1e30aa0..1d5c866b 100644 --- a/src/easydiffraction/datablocks/experiment/categories/data/bragg_pd.py +++ b/src/easydiffraction/datablocks/experiment/categories/data/bragg_pd.py @@ -285,10 +285,11 @@ class PdCwlDataPoint( ): """Powder diffraction data point for CWL experiments.""" + _category_code = 'pd_data' + _category_entry_name = 'point_id' + def __init__(self) -> None: super().__init__() - self._identity.category_code = 'pd_data' - self._identity.category_entry_name = lambda: str(self.point_id.value) class PdTofDataPoint( @@ -298,10 +299,11 @@ class PdTofDataPoint( ): """Powder diffraction data point for time-of-flight experiments.""" + _category_code = 'pd_data' + _category_entry_name = 'point_id' + def __init__(self) -> None: super().__init__() - self._identity.category_code = 'pd_data' - self._identity.category_entry_name = lambda: str(self.point_id.value) class PdDataBase(CategoryCollection): diff --git a/src/easydiffraction/datablocks/experiment/categories/data/total_pd.py b/src/easydiffraction/datablocks/experiment/categories/data/total_pd.py index 31ac92e7..58ef0c74 100644 --- a/src/easydiffraction/datablocks/experiment/categories/data/total_pd.py +++ b/src/easydiffraction/datablocks/experiment/categories/data/total_pd.py @@ -33,6 +33,9 @@ class TotalDataPoint(CategoryItem): original measurement was CWL or TOF. """ + _category_code = 'total_data' + _category_entry_name = 'point_id' + def __init__(self) -> None: super().__init__() @@ -114,9 +117,6 @@ def __init__(self) -> None: ), ) - self._identity.category_code = 'total_data' - self._identity.category_entry_name = lambda: str(self.point_id.value) - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/experiment/categories/diffrn/default.py b/src/easydiffraction/datablocks/experiment/categories/diffrn/default.py index 9635fc2e..92b4290d 100644 --- a/src/easydiffraction/datablocks/experiment/categories/diffrn/default.py +++ b/src/easydiffraction/datablocks/experiment/categories/diffrn/default.py @@ -17,6 +17,8 @@ class DefaultDiffrn(CategoryItem): """Ambient conditions recorded during diffraction measurement.""" + _category_code = 'diffrn' + type_info = TypeInfo( tag='default', description='Diffraction ambient conditions', @@ -73,8 +75,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_diffrn.ambient_electric_field']), ) - self._identity.category_code = 'diffrn' - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/experiment/categories/excluded_regions/default.py b/src/easydiffraction/datablocks/experiment/categories/excluded_regions/default.py index 2f4170bb..36df8e46 100644 --- a/src/easydiffraction/datablocks/experiment/categories/excluded_regions/default.py +++ b/src/easydiffraction/datablocks/experiment/categories/excluded_regions/default.py @@ -27,6 +27,9 @@ class ExcludedRegion(CategoryItem): """Closed interval [start, end] to be excluded.""" + _category_code = 'excluded_regions' + _category_entry_name = 'id' + def __init__(self) -> None: super().__init__() @@ -61,8 +64,6 @@ def __init__(self) -> None: ), cif_handler=CifHandler(names=['_excluded_region.end']), ) - self._identity.category_code = 'excluded_regions' - self._identity.category_entry_name = lambda: str(self._id.value) # ------------------------------------------------------------------ # Public properties diff --git a/src/easydiffraction/datablocks/experiment/categories/experiment_type/default.py b/src/easydiffraction/datablocks/experiment/categories/experiment_type/default.py index 1b9d3811..83348086 100644 --- a/src/easydiffraction/datablocks/experiment/categories/experiment_type/default.py +++ b/src/easydiffraction/datablocks/experiment/categories/experiment_type/default.py @@ -29,6 +29,8 @@ class ExperimentType(CategoryItem): """Container of attributes defining the experiment type.""" + _category_code = 'expt_type' + type_info = TypeInfo( tag='default', description='Experiment type descriptor', @@ -79,8 +81,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_expt_type.scattering_type']), ) - self._identity.category_code = 'expt_type' - # ------------------------------------------------------------------ # Private setters (used by factories and loaders only) # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/experiment/categories/extinction/becker_coppens.py b/src/easydiffraction/datablocks/experiment/categories/extinction/becker_coppens.py index 0555d7d0..d5ffa157 100644 --- a/src/easydiffraction/datablocks/experiment/categories/extinction/becker_coppens.py +++ b/src/easydiffraction/datablocks/experiment/categories/extinction/becker_coppens.py @@ -36,6 +36,8 @@ class BeckerCoppensExtinction(CategoryItem): (in arc-minutes, as expected by CrysPy). """ + _category_code = 'extinction' + type_info = TypeInfo( tag='becker-coppens', description='Becker-Coppens isotropic extinction correction', @@ -83,8 +85,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_extinction.radius']), ) - self._identity.category_code = 'extinction' - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/experiment/categories/instrument/base.py b/src/easydiffraction/datablocks/experiment/categories/instrument/base.py index a2568884..c8e20974 100644 --- a/src/easydiffraction/datablocks/experiment/categories/instrument/base.py +++ b/src/easydiffraction/datablocks/experiment/categories/instrument/base.py @@ -20,7 +20,8 @@ class InstrumentBase(CategoryItem): for concrete CWL/TOF instrument definitions. """ + _category_code = 'instrument' + def __init__(self) -> None: - """Initialize instrument base and set category code.""" + """Initialize instrument base.""" super().__init__() - self._identity.category_code = 'instrument' diff --git a/src/easydiffraction/datablocks/experiment/categories/linked_crystal/default.py b/src/easydiffraction/datablocks/experiment/categories/linked_crystal/default.py index d441aa9c..b1cbf2d8 100644 --- a/src/easydiffraction/datablocks/experiment/categories/linked_crystal/default.py +++ b/src/easydiffraction/datablocks/experiment/categories/linked_crystal/default.py @@ -23,6 +23,8 @@ class LinkedCrystal(CategoryItem): """Linked crystal reference for single-crystal diffraction.""" + _category_code = 'linked_crystal' + type_info = TypeInfo( tag='default', description='Crystal reference with id and scale factor', @@ -53,8 +55,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_sc_crystal_block.scale']), ) - self._identity.category_code = 'linked_crystal' - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/experiment/categories/linked_phases/default.py b/src/easydiffraction/datablocks/experiment/categories/linked_phases/default.py index eb6047a7..fca0fb3b 100644 --- a/src/easydiffraction/datablocks/experiment/categories/linked_phases/default.py +++ b/src/easydiffraction/datablocks/experiment/categories/linked_phases/default.py @@ -23,6 +23,9 @@ class LinkedPhase(CategoryItem): """Link to a phase by id with a scale factor.""" + _category_code = 'linked_phases' + _category_entry_name = 'id' + def __init__(self) -> None: super().__init__() @@ -45,9 +48,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_pd_phase_block.scale']), ) - self._identity.category_code = 'linked_phases' - self._identity.category_entry_name = lambda: str(self.id.value) - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/experiment/categories/peak/base.py b/src/easydiffraction/datablocks/experiment/categories/peak/base.py index c8270ed1..330d3462 100644 --- a/src/easydiffraction/datablocks/experiment/categories/peak/base.py +++ b/src/easydiffraction/datablocks/experiment/categories/peak/base.py @@ -13,9 +13,10 @@ class PeakBase(CategoryItem): """Base class for peak profile categories.""" + _category_code = 'peak' + def __init__(self) -> None: super().__init__() - self._identity.category_code = 'peak' type_info = getattr(type(self), 'type_info', None) default_tag = type_info.tag if type_info is not None else '' diff --git a/src/easydiffraction/datablocks/experiment/categories/refln/bragg_sc.py b/src/easydiffraction/datablocks/experiment/categories/refln/bragg_sc.py index ada32c70..8b29900d 100644 --- a/src/easydiffraction/datablocks/experiment/categories/refln/bragg_sc.py +++ b/src/easydiffraction/datablocks/experiment/categories/refln/bragg_sc.py @@ -28,6 +28,9 @@ class Refln(CategoryItem): """Single reflection for single-crystal diffraction data.""" + _category_code = 'refln' + _category_entry_name = 'id' + def __init__(self) -> None: super().__init__() @@ -128,9 +131,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_refln.wavelength']), ) - self._identity.category_code = 'refln' - self._identity.category_entry_name = lambda: str(self.id.value) - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/structure/categories/atom_site_aniso/default.py b/src/easydiffraction/datablocks/structure/categories/atom_site_aniso/default.py index fbb7dedf..acc422b0 100644 --- a/src/easydiffraction/datablocks/structure/categories/atom_site_aniso/default.py +++ b/src/easydiffraction/datablocks/structure/categories/atom_site_aniso/default.py @@ -32,6 +32,9 @@ class AtomSiteAniso(CategoryItem): ``atom_site.adp_type``. """ + _category_code = 'atom_site_aniso' + _category_entry_name = 'label' + def __init__(self) -> None: """Initialise with default zero-valued tensor components.""" super().__init__() @@ -134,9 +137,6 @@ def __init__(self) -> None: ), ) - self._identity.category_code = 'atom_site_aniso' - self._identity.category_entry_name = lambda: str(self.label.value) - # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/structure/categories/atom_sites/default.py b/src/easydiffraction/datablocks/structure/categories/atom_sites/default.py index 6c91a856..2ac4b1d3 100644 --- a/src/easydiffraction/datablocks/structure/categories/atom_sites/default.py +++ b/src/easydiffraction/datablocks/structure/categories/atom_sites/default.py @@ -36,6 +36,9 @@ class AtomSite(CategoryItem): CIF serialization. """ + _category_code = 'atom_site' + _category_entry_name = 'label' + def __init__(self) -> None: """Initialise the atom site with default descriptor values.""" super().__init__() @@ -139,9 +142,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_atom_site.adp_type']), ) - self._identity.category_code = 'atom_site' - self._identity.category_entry_name = lambda: str(self.label.value) - # ------------------------------------------------------------------ # Private helper methods # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/structure/categories/cell/default.py b/src/easydiffraction/datablocks/structure/categories/cell/default.py index 71262950..9b298008 100644 --- a/src/easydiffraction/datablocks/structure/categories/cell/default.py +++ b/src/easydiffraction/datablocks/structure/categories/cell/default.py @@ -23,6 +23,8 @@ class Cell(CategoryItem): descriptors supporting validation, fitting and CIF serialization. """ + _category_code = 'cell' + type_info = TypeInfo( tag='default', description='Unit cell parameters', @@ -93,8 +95,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_cell.angle_gamma']), ) - self._identity.category_code = 'cell' - # ------------------------------------------------------------------ # Private helper methods # ------------------------------------------------------------------ diff --git a/src/easydiffraction/datablocks/structure/categories/space_group/default.py b/src/easydiffraction/datablocks/structure/categories/space_group/default.py index e04015f2..bb727ac4 100644 --- a/src/easydiffraction/datablocks/structure/categories/space_group/default.py +++ b/src/easydiffraction/datablocks/structure/categories/space_group/default.py @@ -30,6 +30,8 @@ class SpaceGroup(CategoryItem): to the first allowed value for the new group. """ + _category_code = 'space_group' + type_info = TypeInfo( tag='default', description='Space group symmetry', @@ -77,8 +79,6 @@ def __init__(self) -> None: ), ) - self._identity.category_code = 'space_group' - # ------------------------------------------------------------------ # Private helper methods # ------------------------------------------------------------------ diff --git a/src/easydiffraction/project/categories/info/default.py b/src/easydiffraction/project/categories/info/default.py index 1d7b8e31..ff730dce 100644 --- a/src/easydiffraction/project/categories/info/default.py +++ b/src/easydiffraction/project/categories/info/default.py @@ -24,6 +24,8 @@ class ProjectInfo(CategoryItem): """Project metadata category.""" + _category_code = 'project' + type_info = TypeInfo( tag='default', description='Project metadata category', @@ -72,8 +74,6 @@ def __init__( ) self._path: pathlib.Path | None = None - self._identity.category_code = 'project' - @staticmethod def _parse_timestamp(value: str) -> datetime.datetime: """Parse project timestamp text from CIF storage format.""" diff --git a/src/easydiffraction/project/categories/rendering/default.py b/src/easydiffraction/project/categories/rendering/default.py index b55a6f92..d3e2fad3 100644 --- a/src/easydiffraction/project/categories/rendering/default.py +++ b/src/easydiffraction/project/categories/rendering/default.py @@ -24,6 +24,8 @@ class Rendering(CategoryItem): """Chart and table engine selection for a project.""" + _category_code = 'rendering' + type_info = TypeInfo( tag='default', description='Project rendering category', @@ -58,8 +60,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_rendering.table_engine']), ) - self._identity.category_code = 'rendering' - @property def chart_engine(self) -> StringDescriptor: """Chart renderer backend type.""" From b4a537ff9bdfadf139b0d410296b0e46eb270709 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 12:57:51 +0200 Subject: [PATCH 08/10] Persist explicit constraint identifiers --- docs/dev/plans/loop-category-key-identity.md | 14 ++++---- .../categories/constraints/default.py | 35 ++++++++++++++++--- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/docs/dev/plans/loop-category-key-identity.md b/docs/dev/plans/loop-category-key-identity.md index 123f2b69..7edc573d 100644 --- a/docs/dev/plans/loop-category-key-identity.md +++ b/docs/dev/plans/loop-category-key-identity.md @@ -28,14 +28,14 @@ Phase 1 - Implementation [x] Teach Identity to resolve declared category entry names. [x] Rebuild collection indexes after CIF loop loading. [x] Add _category_code to all current CategoryItem subclasses. -[ ] Add _category_entry_name to all current loop CategoryItem subclasses. +[x] Add _category_entry_name to all current loop CategoryItem subclasses. [x] Remove direct self._identity.category_code assignments. -[ ] Remove direct self._identity.category_entry_name lambda assignments. -[ ] Add Constraint.id descriptor serialized as _constraint.id. -[ ] Change constraints collection keys from lhs_alias to id. -[ ] Preserve default constraints.create(expression=...) behavior by using lhs_alias as the default id. -[ ] Add backward-compatible loading for old CIF loops without _constraint.id. -[ ] Update constraints display. +[x] Remove direct self._identity.category_entry_name lambda assignments. +[x] Add Constraint.id descriptor serialized as _constraint.id. +[x] Change constraints collection keys from lhs_alias to id. +[x] Preserve default constraints.create(expression=...) behavior by using lhs_alias as the default id. +[x] Add backward-compatible loading for old CIF loops without _constraint.id. +[x] Update constraints display. [ ] Update loop-category-key-identity.md if implementation details differ from the ADR. [ ] Phase 1 review gate: present diff for approval. diff --git a/src/easydiffraction/analysis/categories/constraints/default.py b/src/easydiffraction/analysis/categories/constraints/default.py index accb11a5..5fa3dc32 100644 --- a/src/easydiffraction/analysis/categories/constraints/default.py +++ b/src/easydiffraction/analysis/categories/constraints/default.py @@ -27,10 +27,20 @@ class Constraint(CategoryItem): """Single constraint item stored as ``lhs = rhs`` expression.""" _category_code = 'constraint' + _category_entry_name = 'id' def __init__(self) -> None: super().__init__() + self._id = StringDescriptor( + name='id', + description='Explicit identifier for this constraint row.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z0-9_]*$'), + ), + cif_handler=CifHandler(names=['_constraint.id']), + ) self._expression = StringDescriptor( name='expression', description='Constraint equation, e.g. "occ_Ba = 1 - occ_La".', @@ -40,12 +50,20 @@ def __init__(self) -> None: ), cif_handler=CifHandler(names=['_constraint.expression']), ) - self._identity.category_entry_name = lambda: self.lhs_alias # ------------------------------------------------------------------ # Public properties # ------------------------------------------------------------------ + @property + def id(self) -> StringDescriptor: + """Explicit identifier for this constraint row.""" + return self._id + + @id.setter + def id(self, value: str) -> None: + self._id.value = value + @property def expression(self) -> StringDescriptor: """ @@ -133,21 +151,30 @@ def create(self, *, expression: str) -> None: """ item = Constraint() item.expression = expression + if item.lhs_alias: + item.id = item.lhs_alias self.add(item) self._enabled = True + def _after_from_cif(self) -> None: + """Backfill explicit ids when loading older CIF constraint loops.""" + for item in self: + if item.id.value.strip() or not item.lhs_alias: + continue + item.id = item.lhs_alias + def show(self) -> None: """Print a table of all user-defined symbolic constraints.""" if not self._items: log.warning('No constraints defined.') return - rows = [[constraint.expression.value] for constraint in self] + rows = [[constraint.id.value, constraint.expression.value] for constraint in self] console.paragraph('User defined constraints') render_table( - columns_headers=['expression'], - columns_alignment=['left'], + columns_headers=['id', 'expression'], + columns_alignment=['left', 'left'], columns_data=rows, ) console.print(f'Constraints enabled: {self.enabled}') From a5e087d4fc424fa93a3d66b7af1f61ba1c77e87e Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 12:58:50 +0200 Subject: [PATCH 09/10] Update ADR for declarative category identity --- .../adrs/suggestions/loop-category-key-identity.md | 13 ++++++++----- docs/dev/plans/loop-category-key-identity.md | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/dev/adrs/suggestions/loop-category-key-identity.md b/docs/dev/adrs/suggestions/loop-category-key-identity.md index f9324416..f55ef4a8 100644 --- a/docs/dev/adrs/suggestions/loop-category-key-identity.md +++ b/docs/dev/adrs/suggestions/loop-category-key-identity.md @@ -124,9 +124,12 @@ Jupyter, CLI output, and saved CIF files. ## Implementation Notes The current `category_entry_name` mechanism can stay, but it should be -made easier to audit. A future implementation should add metadata that -identifies the key descriptor for each `CategoryCollection`, or at least -tests that the resolved key comes from a serialized descriptor. +made easier to audit. The implementation now uses class-level +`_category_code` and `_category_entry_name` declarations on concrete +`CategoryItem` subclasses. `CategoryItem` resolves the declared +`_category_entry_name` lazily from the named public attribute, and +`Identity` exposes the resolved value through +`item._identity.category_entry_name`. For constraints, add a descriptor-backed `id` property serialized as `_constraint.id`, and change `category_entry_name` to resolve from that @@ -134,8 +137,8 @@ descriptor. Keep `_constraint.expression` for the full equation. Keep `lhs_alias` and `rhs_expr` as derived convenience properties. When reading older CIF files that only contain `_constraint.expression`, -derive a deterministic fallback `id` from the old `lhs_alias` key, then -write `_constraint.id` on the next save. +derive a deterministic fallback `id` from the old `lhs_alias` key after +row values are loaded, then write `_constraint.id` on the next save. ## Consequences diff --git a/docs/dev/plans/loop-category-key-identity.md b/docs/dev/plans/loop-category-key-identity.md index 7edc573d..7eb37cab 100644 --- a/docs/dev/plans/loop-category-key-identity.md +++ b/docs/dev/plans/loop-category-key-identity.md @@ -36,7 +36,7 @@ Phase 1 - Implementation [x] Preserve default constraints.create(expression=...) behavior by using lhs_alias as the default id. [x] Add backward-compatible loading for old CIF loops without _constraint.id. [x] Update constraints display. -[ ] Update loop-category-key-identity.md if implementation details differ from the ADR. +[x] Update loop-category-key-identity.md if implementation details differ from the ADR. [ ] Phase 1 review gate: present diff for approval. Phase 2 - Verification From bd14001a882c18ade6b71a67b6bc6608ee4bd7be Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 13:22:43 +0200 Subject: [PATCH 10/10] Verify declarative category identity behavior --- docs/dev/plans/loop-category-key-identity.md | 14 +- .../categories/constraints/default.py | 7 +- .../test_analysis_and_fit_category_support.py | 7 +- .../integration/fitting/test_project_load.py | 3 + .../analysis/categories/test_constraints.py | 36 ++++ .../analysis/test_analysis_coverage.py | 7 +- .../easydiffraction/core/test_category.py | 157 +++++++++++++++++- .../test_serialize_category_owner_baseline.py | 1 + .../project/test_project_load.py | 2 + 9 files changed, 221 insertions(+), 13 deletions(-) diff --git a/docs/dev/plans/loop-category-key-identity.md b/docs/dev/plans/loop-category-key-identity.md index 7eb37cab..9917d1ba 100644 --- a/docs/dev/plans/loop-category-key-identity.md +++ b/docs/dev/plans/loop-category-key-identity.md @@ -40,13 +40,13 @@ Phase 1 - Implementation [ ] Phase 1 review gate: present diff for approval. Phase 2 - Verification -[ ] Add tests for the base declarative identity behavior. -[ ] Add parametrized tests for current loop category identity declarations. -[ ] Update constraints tests. -[ ] Update existing round-trip tests that compare constraints CIF. -[ ] Run formatting. -[ ] Run targeted unit tests. -[ ] Run broader checks. +[x] Add tests for the base declarative identity behavior. +[x] Add parametrized tests for current loop category identity declarations. +[x] Update constraints tests. +[x] Update existing round-trip tests that compare constraints CIF. +[x] Run formatting. +[x] Run targeted unit tests. +[x] Run broader checks. ``` ## Commit Discipline diff --git a/src/easydiffraction/analysis/categories/constraints/default.py b/src/easydiffraction/analysis/categories/constraints/default.py index 5fa3dc32..d6f02622 100644 --- a/src/easydiffraction/analysis/categories/constraints/default.py +++ b/src/easydiffraction/analysis/categories/constraints/default.py @@ -157,9 +157,12 @@ def create(self, *, expression: str) -> None: self._enabled = True def _after_from_cif(self) -> None: - """Backfill explicit ids when loading older CIF constraint loops.""" + """ + Backfill explicit ids when loading older CIF constraint loops. + """ for item in self: - if item.id.value.strip() or not item.lhs_alias: + constraint_id = item.id.value.strip() + if constraint_id not in {'', '_', '?'} or not item.lhs_alias: continue item.id = item.lhs_alias diff --git a/tests/integration/fitting/test_analysis_and_fit_category_support.py b/tests/integration/fitting/test_analysis_and_fit_category_support.py index 5633cb6a..89c0fd93 100644 --- a/tests/integration/fitting/test_analysis_and_fit_category_support.py +++ b/tests/integration/fitting/test_analysis_and_fit_category_support.py @@ -267,10 +267,14 @@ def test_analysis_display_as_cif_and_constraints(monkeypatch, capsys): analysis.display.constraints() assert 'No constraints' in capsys.readouterr().out + class FakeId: + value = 'constraint_1' + class FakeExpr: value = 'x = y + 1' class FakeConstraint: + id = FakeId() expression = FakeExpr() analysis.constraints._items = [FakeConstraint()] @@ -279,7 +283,8 @@ class FakeConstraint: analysis.display.constraints() out = capsys.readouterr().out assert 'User defined constraints' in out - assert captured['columns_data'][0][0] == 'x = y + 1' + assert captured['columns_headers'] == ['id', 'expression'] + assert captured['columns_data'][0] == ['constraint_1', 'x = y + 1'] def test_discover_helpers_and_snapshot_params(): diff --git a/tests/integration/fitting/test_project_load.py b/tests/integration/fitting/test_project_load.py index f1f81c80..f908789a 100644 --- a/tests/integration/fitting/test_project_load.py +++ b/tests/integration/fitting/test_project_load.py @@ -205,6 +205,9 @@ def test_save_load_round_trip_preserves_parameters(tmp_path) -> None: # Compare constraints assert len(loaded.analysis.constraints) == len(original.analysis.constraints) for i, orig_c in enumerate(original.analysis.constraints): + loaded_constraint = loaded.analysis.constraints[orig_c.id.value] + assert loaded_constraint.id.value == orig_c.id.value + assert loaded_constraint.expression.value == orig_c.expression.value assert loaded.analysis.constraints[i].expression.value == orig_c.expression.value assert loaded.analysis.constraints.enabled is True diff --git a/tests/unit/easydiffraction/analysis/categories/test_constraints.py b/tests/unit/easydiffraction/analysis/categories/test_constraints.py index 15dddc4f..b887408e 100644 --- a/tests/unit/easydiffraction/analysis/categories/test_constraints.py +++ b/tests/unit/easydiffraction/analysis/categories/test_constraints.py @@ -1,16 +1,52 @@ # SPDX-FileCopyrightText: 2025 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +import gemmi + from easydiffraction.analysis.categories.constraints import Constraint from easydiffraction.analysis.categories.constraints import Constraints def test_constraint_creation_and_collection(): c = Constraint() + c.id = 'constraint_1' c.expression = 'a = b + c' + assert c.id.value == 'constraint_1' assert c.lhs_alias == 'a' assert c.rhs_expr == 'b + c' + coll = Constraints() coll.create(expression='a = b + c') assert 'a' in coll.names + assert coll['a'].id.value == 'a' assert coll['a'].rhs_expr == 'b + c' + + +def test_constraints_from_cif_preserves_explicit_id_keys(): + doc = gemmi.cif.read_string( + 'data_constraints\n\n' + 'loop_\n' + '_constraint.id\n' + '_constraint.expression\n' + 'constraint_1 "a = b + c"\n', + ) + coll = Constraints() + + coll.from_cif(doc.sole_block()) + + assert coll.names == ['constraint_1'] + assert coll['constraint_1'].id.value == 'constraint_1' + assert coll['constraint_1'].lhs_alias == 'a' + + +def test_constraints_from_cif_backfills_missing_id_from_lhs_alias(): + doc = gemmi.cif.read_string( + 'data_constraints\n\nloop_\n_constraint.expression\n"a = b + c"\n', + ) + coll = Constraints() + + coll.from_cif(doc.sole_block()) + + assert coll.names == ['a'] + assert coll['a'].id.value == 'a' + assert coll['a'].expression.value == 'a = b + c' diff --git a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py index 42ef01ac..13072544 100644 --- a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py +++ b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py @@ -78,10 +78,14 @@ def test_constraints_with_items(self, capsys, monkeypatch): a = Analysis(project=_make_project()) # Create a fake constraint with expression + class FakeId: + value = 'constraint_1' + class FakeExpr: value = 'x = y + 1' class FakeConstraint: + id = FakeId() expression = FakeExpr() a.constraints._items = [FakeConstraint()] @@ -95,8 +99,9 @@ def fake_render_table(**kwargs): a.display.constraints() out = capsys.readouterr().out assert 'User defined constraints' in out + assert captured['columns_headers'] == ['id', 'expression'] assert 'columns_data' in captured - assert captured['columns_data'][0][0] == 'x = y + 1' + assert captured['columns_data'][0] == ['constraint_1', 'x = y + 1'] # ------------------------------------------------------------------ diff --git a/tests/unit/easydiffraction/core/test_category.py b/tests/unit/easydiffraction/core/test_category.py index 3d27b501..c1a4ffbe 100644 --- a/tests/unit/easydiffraction/core/test_category.py +++ b/tests/unit/easydiffraction/core/test_category.py @@ -1,6 +1,10 @@ # SPDX-FileCopyrightText: 2025 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +import importlib + +import pytest + from easydiffraction.core.category import CategoryCollection from easydiffraction.core.category import CategoryItem from easydiffraction.core.validation import AttributeSpec @@ -9,9 +13,11 @@ class SimpleItem(CategoryItem): + _category_code = 'simple' + _category_entry_name = 'a' + def __init__(self): super().__init__() - self._identity.category_code = 'simple' object.__setattr__( self, '_a', @@ -32,7 +38,6 @@ def __init__(self): cif_handler=CifHandler(names=['_simple.b']), ), ) - self._identity.category_entry_name = lambda: str(self._a.value) @property def a(self): @@ -56,6 +61,154 @@ def __init__(self): super().__init__(item_type=SimpleItem) +def test_category_item_uses_declared_identity_metadata(): + it = SimpleItem() + + assert type(it)._category_code == 'simple' + assert type(it)._category_entry_name == 'a' + assert it._identity.category_code == 'simple' + + it.a = 'name1' + + assert it._identity.category_entry_name == 'name1' + + +@pytest.mark.parametrize( + ('module_name', 'class_name', 'attr_name', 'value', 'category_code'), + [ + pytest.param( + 'easydiffraction.analysis.categories.aliases.default', + 'Alias', + 'label', + 'alias_1', + 'alias', + id='alias', + ), + pytest.param( + 'easydiffraction.analysis.categories.constraints.default', + 'Constraint', + 'id', + 'constraint_1', + 'constraint', + id='constraint', + ), + pytest.param( + 'easydiffraction.analysis.categories.joint_fit.default', + 'JointFitItem', + 'experiment_id', + 'exp_1', + 'joint_fit', + id='joint_fit', + ), + pytest.param( + 'easydiffraction.analysis.categories.sequential_fit_extract.default', + 'SequentialFitExtractItem', + 'id', + 'rule_1', + 'sequential_fit_extract', + id='sequential_fit_extract', + ), + pytest.param( + 'easydiffraction.datablocks.structure.categories.atom_sites.default', + 'AtomSite', + 'label', + 'Fe1', + 'atom_site', + id='atom_site', + ), + pytest.param( + 'easydiffraction.datablocks.structure.categories.atom_site_aniso.default', + 'AtomSiteAniso', + 'label', + 'Fe1', + 'atom_site_aniso', + id='atom_site_aniso', + ), + pytest.param( + 'easydiffraction.datablocks.experiment.categories.linked_phases.default', + 'LinkedPhase', + 'id', + 'phase_1', + 'linked_phases', + id='linked_phases', + ), + pytest.param( + 'easydiffraction.datablocks.experiment.categories.background.line_segment', + 'LineSegment', + 'id', + 'bg_1', + 'background', + id='background_line_segment', + ), + pytest.param( + 'easydiffraction.datablocks.experiment.categories.background.chebyshev', + 'PolynomialTerm', + 'id', + 'poly_1', + 'background', + id='background_chebyshev', + ), + pytest.param( + 'easydiffraction.datablocks.experiment.categories.excluded_regions.default', + 'ExcludedRegion', + 'id', + 'mask_1', + 'excluded_regions', + id='excluded_region', + ), + pytest.param( + 'easydiffraction.datablocks.experiment.categories.refln.bragg_sc', + 'Refln', + 'id', + 'refl_1', + 'refln', + id='refln', + ), + pytest.param( + 'easydiffraction.datablocks.experiment.categories.data.bragg_pd', + 'PdCwlDataPoint', + 'point_id', + '1', + 'pd_data', + id='pd_cwl_data', + ), + pytest.param( + 'easydiffraction.datablocks.experiment.categories.data.bragg_pd', + 'PdTofDataPoint', + 'point_id', + '2', + 'pd_data', + id='pd_tof_data', + ), + pytest.param( + 'easydiffraction.datablocks.experiment.categories.data.total_pd', + 'TotalDataPoint', + 'point_id', + '3', + 'total_data', + id='total_data', + ), + ], +) +def test_loop_items_resolve_declared_category_identity( + module_name, + class_name, + attr_name, + value, + category_code, +): + module = importlib.import_module(module_name) + item_cls = getattr(module, class_name) + item = item_cls() + + getattr(item, attr_name).value = value + + assert item_cls._category_code == category_code + assert item_cls._category_entry_name == attr_name + assert item._identity.category_code == category_code + assert item._identity.category_entry_name == value + + def test_category_item_str_and_properties(): it = SimpleItem() it.a = 'name1' diff --git a/tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py b/tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py index badd7241..7d45dfbd 100644 --- a/tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py +++ b/tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py @@ -52,6 +52,7 @@ def test_real_analysis_as_cif_includes_aliases_and_constraints_when_present() -> assert '_alias.label' in analysis_cif assert '_alias.param_unique_name' in analysis_cif + assert '_constraint.id' in analysis_cif assert '_constraint.expression' in analysis_cif assert 'a_param = a_param' in analysis_cif diff --git a/tests/unit/easydiffraction/project/test_project_load.py b/tests/unit/easydiffraction/project/test_project_load.py index 4f32ffde..e44c1260 100644 --- a/tests/unit/easydiffraction/project/test_project_load.py +++ b/tests/unit/easydiffraction/project/test_project_load.py @@ -120,6 +120,8 @@ def test_round_trips_constraints(self, tmp_path): assert loaded.analysis.aliases['b_param'].param is not None assert len(loaded.analysis.constraints) == 1 + assert loaded.analysis.constraints['b_param'].id.value == 'b_param' + assert loaded.analysis.constraints['b_param'].expression.value == 'b_param = a_param' assert loaded.analysis.constraints[0].expression.value == 'b_param = a_param' assert loaded.analysis.constraints.enabled is True