From 1a020908edfe60a7fa0d907ab3a81a586b4db1d7 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 14:06:03 +0200 Subject: [PATCH 01/72] Split persistence ADR and add fit output ADR --- docs/dev/adrs/index.md | 5 +- .../suggestions/analysis-cif-fit-state.md | 628 ++++++++++++++---- .../fit-output-files-and-data-exports.md | 147 ++++ .../parameter-posterior-summary.md | 418 +----------- 4 files changed, 689 insertions(+), 509 deletions(-) create mode 100644 docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index ed0940bf..2514794c 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -17,9 +17,10 @@ folders. | -------------------- | ---------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------- | | Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | | Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | -| Analysis and fitting | Suggestion | Analysis CIF Fit State | Proposes a persisted scalar projection of fit state in `analysis.cif`. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | +| Analysis and fitting | Suggestion | Analysis CIF Fit State | Proposes persisted fit-state categories, result projections, and Bayesian result manifests. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | +| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Proposes role-based filenames for fit results, data archives, and external plotting exports. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | | Analysis and fitting | Suggestion | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | -| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection and Bayesian Persistence | Proposes saved Bayesian summaries and canonical posterior storage. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | +| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Proposes the `parameter.posterior` API as a projection of analysis-level Bayesian state. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | | Analysis and fitting | Suggestion | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | | Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | | Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | diff --git a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md index d471bb11..b66d60d3 100644 --- a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md +++ b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md @@ -1,61 +1,102 @@ # ADR: Analysis CIF Fit State -**Status:** Proposed +**Status:** Proposed **Date:** 2026-05-13 +**Updated:** 2026-05-18 ## Context `analysis/analysis.cif` currently persists analysis configuration such -as `_fit.minimizer_type`, `_fit.mode`, aliases, constraints, and -joint-fit weights. It does not persist analysis-owned fit state such as -fit bounds, bound provenance, pre-fit scalar snapshots, or latest -fit-status metadata. +as `_fitting.minimizer_type`, `_fitting.mode_type`, aliases, +constraints, and active mode-specific settings. It does not yet persist +the analysis-owned fit state needed to reopen a saved project and +continue the same command-line or plotting workflow. + +Parameter CIF serialization already carries the committed parameter +`value`, the current `free` state, and the current `uncertainty` via CIF +bracket notation. That data belongs to structure or experiment CIF +files. Analysis-owned fit state should not be duplicated there. + +The missing analysis-owned state includes: + +- fit controls that apply to parameters during fitting but are not model + values +- fit bounds and bound provenance needed by deterministic and Bayesian + minimizers +- pre-fit scalar snapshots needed by fit recovery and undo workflows +- compact status metadata for the latest persisted fit projection +- Bayesian summary metadata and manifests for bulk array sidecars +- plot-ready Bayesian caches that make restored posterior displays + instant rather than recomputing after load + +The accepted `runtime-fit-results.md` ADR keeps full backend runtime +objects runtime-only unless a later ADR narrows the persisted +projection. This ADR defines that narrower persisted projection. It +persists stable metadata, summaries, and canonical/cached numerical +arrays, not backend driver objects. + +This ADR is the canonical storage contract for fit-state persistence. +The parameter-level posterior ADR defines only the `parameter.posterior` +API projection and depends on the saved state described here. -At the same time, parameter CIF serialization already carries the -committed parameter `value`, the current `free` state, and the current -`uncertainty` via CIF bracket notation. That data belongs to the model -and should remain in structure or experiment CIF files. +## Decision -The missing piece is analysis-owned fit state: +### 1. Every new persisted concept gets an explicit CIF category -- fit controls that apply to parameters during fitting but are not part - of the model itself -- latest fit-status metadata shown by `display.fit_results()` -- deterministic and Bayesian fit metadata that should survive project - reloads and command-line workflows +New analysis fit-state data must be represented by named CIF categories. +Do not add loose ad-hoc tags, JSON blobs, or overload existing model +parameter tags for analysis-owned fit state. -This separation matters because: +Existing categories remain responsible for existing configuration: -- Bayesian plotting after reload needs `fit_min`, `fit_max`, and bound - provenance even when raw posterior arrays are absent -- command-line users need a saved pre-fit starting state to recover from - a poor minimization run -- `analysis.fit_results` already changes by fit type, but its persisted - projection should have a stable analysis-owned home +- `_fitting` stores common fitting configuration. +- `_alias` and `_constraint` store symbolic analysis configuration. +- `_joint_fit`, `_sequential_fit`, and `_sequential_fit_extract` store + active fit-mode settings. -The accepted runtime-fit-results ADR describes fit results as -runtime-only. This ADR proposes a narrower persisted projection of the -latest fit state, not a direct dump of backend runtime objects. +New common fit-state categories are: -## Decision +- `_fit_state` +- `_fit_parameter` +- `_fit_result` +- `_fit_parameter_correlation` + +Deterministic-specific categories are: + +- `_deterministic_result` +- `_deterministic_parameter_result` + +Bayesian-specific categories are: -### 1. `analysis/analysis.cif` becomes the home of analysis-owned fit state +- `_bayesian_result` +- `_bayesian_sampler` +- `_bayesian_convergence` +- `_bayesian_parameter_posterior` +- `_bayesian_distribution_cache` +- `_bayesian_pair_cache` +- `_bayesian_predictive_dataset` -The analysis CIF file will persist: +Bulk arrays referenced by Bayesian categories live in +`analysis/results.h5`. -- fit configuration -- aliases and constraints -- joint-fit weights -- per-parameter fit controls owned by analysis -- latest fit-status metadata common to deterministic and Bayesian fits -- fit-type-specific extensions defined in separate ADRs +### 2. Add `_fit_state` for schema versioning + +`_fit_state` is a single-item category for the persisted fit-state +schema: + +```cif +_fit_state.schema_version 1 +``` -Committed parameter values remain in structure or experiment CIF files. -They are not duplicated into `analysis/analysis.cif`. +This version applies to the fit-state CIF categories and any HDF5 +sidecar manifests they reference. It is not the EasyDiffraction package +version. Individual result categories should not repeat +`schema_version` unless they later need independent evolution. -### 2. Add a real `_fit_parameter` loop +### 3. Add `_fit_parameter` for per-parameter fit controls -Introduce a new analysis-owned loop category: +`_fit_parameter` is an analysis-owned loop keyed by live parameter +unique name: ```cif loop_ @@ -65,8 +106,8 @@ _fit_parameter.fit_max _fit_parameter.fit_bounds_uncertainty_multiplier _fit_parameter.start_value _fit_parameter.start_uncertainty -cosio.atom_site.Co1.adp_iso 0.0000 0.1200 4.0 0.0312 0.0021 -cosio.atom_site.Co2.adp_iso 0.0000 0.1200 4.0 0.0312 0.0021 +lbco.cell.length_a 3.8895 3.8920 4.0 3.8909 0.0003 +hrpt.peak.broad_gauss_u 0.05 0.11 4.0 0.08 0.007 ``` Fields: @@ -78,22 +119,31 @@ Fields: - `start_value` - `start_uncertainty` -Rationale: +`fit_min` and `fit_max` are required so saved DREAM projects can be +rerun from the CLI without recreating bounds in Python. The +`fit_bounds_uncertainty_multiplier` field preserves how +uncertainty-derived bounds were created. `start_value` and +`start_uncertainty` capture the most recent pre-fit scalar state for +fit-result displays and undo workflows. -- `fit_min` and `fit_max` are required to restore deterministic and - Bayesian fit controls faithfully. -- `fit_bounds_uncertainty_multiplier` preserves the provenance of - uncertainty-derived bounds for restored Bayesian plot annotations. -- `start_value` and `start_uncertainty` capture the last committed - pre-fit scalar state and enable fit recovery workflows, especially in - command-line usage. -- `start_uncertainty` preserves a user-visible pre-fit uncertainty - instead of treating it as disposable fit residue. +The committed parameter value after a fit remains in structure or +experiment CIF. `_fit_parameter` does not duplicate active values. -### 3. Add a generic `_fit_result` single-item category +### 4. Add `_fit_result` for common fit status -Persist the latest fit-status metadata shared across fit types in a -single analysis-owned category: +`_fit_result` is a single-item category for fields shared across fit +types: + +```cif +_fit_result.result_kind bayesian +_fit_result.success true +_fit_result.message "Sampler completed" +_fit_result.iterations 3000 +_fit_result.fitting_time 82.4 +_fit_result.reduced_chi_square 1.031 +``` + +Fields: - `result_kind` - `success` @@ -102,75 +152,348 @@ single analysis-owned category: - `fitting_time` - `reduced_chi_square` -Suggested CIF fragment: +`result_kind` identifies the latest persisted projection, for example +`deterministic` or `bayesian`. Backend runtime objects, optimizer +instances, driver state, and arbitrary engine payloads are not stored in +this category. + +### 5. Add `_fit_parameter_correlation` for reusable correlations + +`_fit_parameter_correlation` stores compact pairwise correlation +summaries: ```cif -_fit_result.result_kind deterministic -_fit_result.success yes -_fit_result.message "Fit converged" -_fit_result.iterations 37 -_fit_result.fitting_time 1.82 -_fit_result.reduced_chi_square 1.031 +loop_ +_fit_parameter_correlation.source_kind +_fit_parameter_correlation.param_unique_name_i +_fit_parameter_correlation.param_unique_name_j +_fit_parameter_correlation.correlation +posterior lbco.cell.length_a hrpt.peak.broad_gauss_u 0.87 ``` -`result_kind` distinguishes the latest persisted fit projection, for -example `deterministic` or `bayesian`. +Fields: -### 4. Persist only stable fit-status fields here +- `source_kind` +- `param_unique_name_i` +- `param_unique_name_j` +- `correlation` -The `_fit_result` category is for generic status fields that are stable -across engines and already belong to the result model. +Only the upper triangle excluding the diagonal is stored. Correlation +heatmaps can be restored from this loop alone. Posterior pair plots +still use the Bayesian pair cache or posterior samples. -It should not persist backend runtime objects or arbitrary engine -payloads. +### 6. Store deterministic metadata in dedicated categories -Metrics such as R-factors shown by `display.fit_results()` are derived -from observed and calculated data and can be recomputed after load when -needed. They do not need to be part of the first persisted fit-state -schema. +Deterministic fits use the common `_fit_parameter`, `_fit_result`, and +`_fit_parameter_correlation` categories, plus deterministic-specific +categories for optimizer details and parameter-result display state. -### 5. Fit-type-specific extensions build on this ADR +`_deterministic_result` stores one saved deterministic result header: -This ADR defines the common `analysis.cif` contract for deterministic -and Bayesian fitting. +- `optimizer_name` +- `method_name` +- `objective_name` +- `objective_value` +- `n_data_points` +- `n_parameters` +- `n_free_parameters` +- `degrees_of_freedom` +- `covariance_available` +- `correlation_available` -Fit-type-specific extensions are layered on top: +`_deterministic_parameter_result` stores one row per parameter varied in +the latest deterministic fit: -- Bayesian persistence extends this with `_bayesian_*` categories and an - HDF5 sidecar, as described in `parameter-posterior-summary.md`. -- Future fit-specific summaries should follow the same pattern: generic - shared fields in `_fit_result`, specialized fields in separate - categories. +- `order_index` +- `param_unique_name` +- `final_value` +- `final_uncertainty` +- `at_lower_bound` +- `at_upper_bound` + +`final_value` and `final_uncertainty` are a result projection for +display and consistency checks. The calculation source of truth remains +the live parameter value and uncertainty restored from structure and +experiment CIF. If the deterministic result projection disagrees with +the live parameter state on load, loaders should warn and prefer the +live parameter state for calculations. + +Pre-fit values and uncertainties are not duplicated in +`_deterministic_parameter_result`; they come from `_fit_parameter`. +Parameter correlations, when available from covariance, are stored in +`_fit_parameter_correlation` with `source_kind deterministic`. + +### 7. Store Bayesian metadata in dedicated categories -### 6. Restore order is analysis-first, fit-type-second +Bayesian persistence extends the common categories with explicit +Bayesian categories in `analysis/analysis.cif`. + +`_bayesian_result` stores one saved Bayesian result header: + +- `sampler_name` +- `point_estimate_name` +- `success` +- `sampler_completed` +- `best_log_posterior` +- `credible_interval_inner` +- `credible_interval_outer` +- `has_posterior_samples` +- `has_distribution_cache` +- `has_pair_cache` +- `has_posterior_predictive` +- `sidecar_file` + +`_bayesian_sampler` stores resolved sampler settings actually used: + +- `steps` +- `burn` +- `thin` +- `pop` +- `parallel` +- `init` +- `random_seed` + +`_bayesian_convergence` stores top-level diagnostics and shapes: + +- `converged` +- `max_r_hat` +- `min_ess_bulk` +- `n_draws` +- `n_chains` +- `n_parameters` + +`_bayesian_parameter_posterior` stores one posterior summary row per +sampled parameter: + +- `order_index` +- `unique_name` +- `display_name` +- `best_sample_value` +- `median` +- `uncertainty` +- `interval_68_lower` +- `interval_68_upper` +- `interval_95_lower` +- `interval_95_upper` +- `ess_bulk` +- `r_hat` + +`order_index` defines the parameter column order in posterior sample +arrays stored in the HDF5 sidecar. `parameter.posterior` is rebuilt from +this loop on load; posterior summary data is not duplicated in +structure or experiment CIF files. + +### 8. Store plot-ready Bayesian caches in explicit manifest categories + +Bayesian plotting should not require expensive post-load preparation +when the project was saved after a successful Bayesian fit. Plot-ready +caches therefore have their own manifest categories in +`analysis/analysis.cif`, with the actual arrays stored in HDF5. + +`_bayesian_distribution_cache` supports +`project.display.posterior.distribution(...)`: + +- `param_unique_name` +- `x_path` +- `density_path` +- `n_grid` +- `n_draws_cached` + +`_bayesian_pair_cache` supports +`project.display.posterior.pairs(...)`: + +- `param_unique_name_x` +- `param_unique_name_y` +- `x_path` +- `y_path` +- `density_path` +- `contour_level_path` +- `n_grid_x` +- `n_grid_y` +- `n_draws_cached` + +`_bayesian_predictive_dataset` supports +`project.display.posterior.predictive(...)`: + +- `experiment_name` +- `x_axis_name` +- `x_path` +- `best_sample_prediction_path` +- `lower_95_path` +- `upper_95_path` +- `lower_68_path` +- `upper_68_path` +- `draws_path` +- `n_x` +- `n_draws_cached` + +The manifest rows are the source of truth for HDF5 paths. HDF5 group +naming conventions are implementation details and may change as long as +the manifest remains valid. + +### 9. Store bulk Bayesian arrays in `analysis/results.h5` + +`analysis/analysis.cif` remains the text metadata entry point. +Numerical arrays large enough to make CIF unwieldy are stored in: + +- `analysis/results.h5` + +Required canonical posterior arrays, when available: + +- `/posterior/parameter_samples` +- `/posterior/log_posterior` +- `/posterior/draw_index` + +Expected shapes: + +- `/posterior/parameter_samples`: `(n_draws, n_chains, n_parameters)` +- `/posterior/log_posterior`: `(n_draws, n_chains)` +- `/posterior/draw_index`: `(n_draws,)` + +Recommended plot-cache array layout: + +- `/posterior/distribution//x` +- `/posterior/distribution//density` +- `/posterior/pairs//x` +- `/posterior/pairs//y` +- `/posterior/pairs//density` +- `/posterior/pairs//contour_levels` +- `/predictive//x` +- `/predictive//best_sample_prediction` +- `/predictive//lower_95` +- `/predictive//upper_95` +- `/predictive//lower_68` +- `/predictive//upper_68` +- `/predictive//draws` + +The sidecar is optional for summary-only restore. If it is missing, +`_bayesian_parameter_posterior` can still restore parameter summaries +and fit-result tables, but posterior plots that require arrays or +plot-ready caches must warn clearly or offer recomputation. + +Do not persist backend-specific runtime objects such as DREAM driver +instances, raw engine result objects, or ArviZ `InferenceData`. + +### 10. Prepare Bayesian plot data immediately after sampling + +After DREAM sampling completes, the UX should include an explicit +post-processing step before the fit is considered fully saved: + +```text +Processing Bayesian results... +``` + +During this step, EasyDiffraction should prepare: + +- posterior parameter summaries +- convergence diagnostics +- parameter correlation summaries +- distribution density cache arrays +- pair density and contour cache arrays +- posterior predictive bands and cached draws for available experiments +- HDF5 sidecar datasets and CIF manifest rows + +For saved projects, `project.analysis.fit()` already triggers a save at +the end of fitting. In that case the post-processing step should run +before the automatic save writes `analysis/analysis.cif` and +`analysis/results.h5`. For unsaved projects, the same prepared +data remains in memory and is written on the next `project.save_as(...)` +or `project.save()`. + +The display methods should then prefer persisted plot caches: + +```python +project.display.posterior.distribution() +project.display.posterior.pairs() +project.display.posterior.predictive(expt_name='hrpt') +``` + +When valid caches are available, these calls should only load arrays and +render plots. They should not rerun posterior summarization, KDE, +contour preparation, or posterior predictive calculations. + +### 11. Restore order is configuration first, fit state second Load order should be: 1. standard analysis configuration 2. aliases and constraints -3. joint-fit weights -4. `_fit_parameter` -5. `_fit_result` -6. fit-type-specific extensions such as `_bayesian_*` +3. active mode-specific settings +4. `_fit_state` +5. `_fit_parameter` +6. `_fit_result` +7. `_fit_parameter_correlation` +8. deterministic metadata categories when `result_kind` is + `deterministic` +9. Bayesian metadata categories when `result_kind` is `bayesian` +10. Bayesian HDF5 sidecar arrays and plot caches + +This ensures bounds and live parameter references are available before +fit-specific summaries and cached plot data are attached. -This ensures that generic fit controls are available before restoring -specialized fit summaries. +### 12. Saved examples use current `_fitting.*` tags -### 7. Suggested full `analysis.cif` example +Suggested deterministic `analysis/analysis.cif` fragment: ```cif -_fit.minimizer_type "bumps (dream)" -_fit.mode single +_fitting.mode_type single +_fitting.minimizer_type "lmfit (leastsq)" + +_fit_state.schema_version 1 + +loop_ +_fit_parameter.param_unique_name +_fit_parameter.fit_min +_fit_parameter.fit_max +_fit_parameter.fit_bounds_uncertainty_multiplier +_fit_parameter.start_value +_fit_parameter.start_uncertainty +lbco.cell.length_a 3.8895 3.8920 4.0 3.8909 0.0003 +hrpt.peak.broad_gauss_u 0.05 0.11 4.0 0.08 0.007 + +_fit_result.result_kind deterministic +_fit_result.success true +_fit_result.message "Fit converged" +_fit_result.iterations 37 +_fit_result.fitting_time 1.82 +_fit_result.reduced_chi_square 1.031 + +_deterministic_result.optimizer_name lmfit +_deterministic_result.method_name leastsq +_deterministic_result.objective_name chi_square +_deterministic_result.objective_value 2568.4 +_deterministic_result.n_data_points 2500 +_deterministic_result.n_parameters 5 +_deterministic_result.n_free_parameters 2 +_deterministic_result.degrees_of_freedom 2498 +_deterministic_result.covariance_available true +_deterministic_result.correlation_available true loop_ -_alias.label -_alias.param_unique_name -biso_Co1 cosio.atom_site.Co1.adp_iso -biso_Co2 cosio.atom_site.Co2.adp_iso +_deterministic_parameter_result.order_index +_deterministic_parameter_result.param_unique_name +_deterministic_parameter_result.final_value +_deterministic_parameter_result.final_uncertainty +_deterministic_parameter_result.at_lower_bound +_deterministic_parameter_result.at_upper_bound +0 lbco.cell.length_a 3.89091 0.0003 false false +1 hrpt.peak.broad_gauss_u 0.08 0.007 false false loop_ -_constraint.expression -"biso_Co2 = biso_Co1" +_fit_parameter_correlation.source_kind +_fit_parameter_correlation.param_unique_name_i +_fit_parameter_correlation.param_unique_name_j +_fit_parameter_correlation.correlation +deterministic lbco.cell.length_a hrpt.peak.broad_gauss_u 0.42 +``` + +Suggested Bayesian `analysis/analysis.cif` fragment: + +```cif +_fitting.mode_type single +_fitting.minimizer_type "bumps (dream)" + +_fit_state.schema_version 1 loop_ _fit_parameter.param_unique_name @@ -179,42 +502,113 @@ _fit_parameter.fit_max _fit_parameter.fit_bounds_uncertainty_multiplier _fit_parameter.start_value _fit_parameter.start_uncertainty -cosio.atom_site.Co1.adp_iso 0.0000 0.1200 4.0 0.0312 0.0021 -cosio.atom_site.Co2.adp_iso 0.0000 0.1200 4.0 0.0312 0.0021 +lbco.cell.length_a 3.8895 3.8920 4.0 3.8909 0.0003 +hrpt.peak.broad_gauss_u 0.05 0.11 4.0 0.08 0.007 _fit_result.result_kind bayesian -_fit_result.success yes -_fit_result.message "Sampler converged" +_fit_result.success true +_fit_result.message "Sampler completed" _fit_result.iterations 3000 _fit_result.fitting_time 82.4 _fit_result.reduced_chi_square 1.031 -# Bayesian-specific extension categories follow here. +_bayesian_result.sampler_name dream +_bayesian_result.point_estimate_name best_sample +_bayesian_result.success true +_bayesian_result.sampler_completed true +_bayesian_result.best_log_posterior -1542.77 +_bayesian_result.credible_interval_inner 0.68 +_bayesian_result.credible_interval_outer 0.95 +_bayesian_result.has_posterior_samples true +_bayesian_result.has_distribution_cache true +_bayesian_result.has_pair_cache true +_bayesian_result.has_posterior_predictive true +_bayesian_result.sidecar_file "results.h5" + +_bayesian_sampler.steps 3000 +_bayesian_sampler.burn 600 +_bayesian_sampler.thin 1 +_bayesian_sampler.pop 20 +_bayesian_sampler.parallel 0 +_bayesian_sampler.init lhs +_bayesian_sampler.random_seed 12345 + +_bayesian_convergence.converged true +_bayesian_convergence.max_r_hat 1.01 +_bayesian_convergence.min_ess_bulk 812.4 +_bayesian_convergence.n_draws 2400 +_bayesian_convergence.n_chains 20 +_bayesian_convergence.n_parameters 2 + +loop_ +_bayesian_parameter_posterior.order_index +_bayesian_parameter_posterior.unique_name +_bayesian_parameter_posterior.display_name +_bayesian_parameter_posterior.best_sample_value +_bayesian_parameter_posterior.median +_bayesian_parameter_posterior.uncertainty +_bayesian_parameter_posterior.interval_68_lower +_bayesian_parameter_posterior.interval_68_upper +_bayesian_parameter_posterior.interval_95_lower +_bayesian_parameter_posterior.interval_95_upper +_bayesian_parameter_posterior.ess_bulk +_bayesian_parameter_posterior.r_hat +0 lbco.cell.length_a "length_a" 3.89091 3.89090 0.0003 3.8906 3.8912 3.8903 3.8915 812.4 1.01 + +loop_ +_bayesian_distribution_cache.param_unique_name +_bayesian_distribution_cache.x_path +_bayesian_distribution_cache.density_path +_bayesian_distribution_cache.n_grid +_bayesian_distribution_cache.n_draws_cached +lbco.cell.length_a /posterior/distribution/0/x /posterior/distribution/0/density 256 48000 + +loop_ +_bayesian_predictive_dataset.experiment_name +_bayesian_predictive_dataset.x_axis_name +_bayesian_predictive_dataset.x_path +_bayesian_predictive_dataset.best_sample_prediction_path +_bayesian_predictive_dataset.lower_95_path +_bayesian_predictive_dataset.upper_95_path +_bayesian_predictive_dataset.lower_68_path +_bayesian_predictive_dataset.upper_68_path +_bayesian_predictive_dataset.draws_path +_bayesian_predictive_dataset.n_x +_bayesian_predictive_dataset.n_draws_cached +hrpt ttheta /predictive/hrpt/x /predictive/hrpt/best_sample_prediction /predictive/hrpt/lower_95 /predictive/hrpt/upper_95 /predictive/hrpt/lower_68 /predictive/hrpt/upper_68 /predictive/hrpt/draws 2500 200 ``` ## Consequences ### Positive -- `analysis.cif` becomes the single analysis-owned source of fit state. -- Deterministic and Bayesian persistence share one common base schema. -- Fit bounds, bound provenance, and start values survive project - reloads. -- Command-line workflows gain a persisted pre-fit starting state. +- `analysis/analysis.cif` becomes the single text manifest for + analysis-owned fit state. +- Saved DREAM projects have enough fit bounds to run again from the CLI. +- Bayesian save/load separates compact CIF metadata from large HDF5 + arrays. +- Restored posterior displays can render from cached arrays without + expensive recomputation. +- Parameter posterior summaries are rebuilt from analysis-level data + rather than duplicated in model CIF. ### Trade-offs -- The runtime fit-results ADR must be updated because fit state is no - longer entirely runtime-only. -- Analysis persistence becomes more stateful and must be kept in sync - with live parameter objects. -- Some existing serializer assumptions will need refactoring so that - `analysis.cif` owns fit metadata rather than individual parameters. +- The runtime fit-results ADR must be read as "runtime-only unless a + narrower persistence ADR defines a saved projection"; this ADR defines + that projection. +- Bayesian persistence now spans CIF and HDF5, so save/load must + validate consistency between manifest rows and sidecar datasets. +- Post-fit processing increases the time between sampler completion and + final saved project state, but makes later display calls much faster. +- Cached plot arrays are derived data and must be invalidated when a new + fit runs or when the project changes in ways that make the saved fit + result stale. ## Deferred Work -- Bayesian-specific categories and HDF5 sidecar details remain in - `parameter-posterior-summary.md`. -- Undo semantics for `start_value` and `start_uncertainty` are defined - in a separate ADR. -- Correlation-matrix persistence is defined in a separate ADR. +- Exact compression and chunking policy for HDF5 datasets. +- Multiple saved Bayesian runs per project. +- Optional covariance persistence beyond correlation summaries. +- Cache invalidation UX for manual edits after a saved fit. +- Persistence for posterior-capable minimizers beyond DREAM. diff --git a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md new file mode 100644 index 00000000..24375d69 --- /dev/null +++ b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md @@ -0,0 +1,147 @@ +# ADR: Fit Output Files and Data Exports + +**Status:** Proposed +**Date:** 2026-05-18 + +## Context + +Different fit modes produce different kinds of reusable output: + +- sequential deterministic fits produce a rectangular parameter + evolution table, currently saved as `analysis/results.csv` +- Bayesian fits produce posterior samples, diagnostics, predictive + arrays, and plot caches, which are too large and structured for CIF + or CSV +- deterministic single and joint fits produce fitted model state, + calculated data, reflection tables, residuals, and optional + covariance/correlation summaries + +The project should keep naming consistent and avoid making users extract +ordinary plotting data from CIF when a clearer CSV export is possible. +At the same time, CIF remains the canonical model/configuration format, +and large numerical arrays should not be embedded in +`analysis/analysis.cif`. + +## Decision + +### 1. Separate results, data archives, and exports + +Use three file roles under `analysis/`: + +- `analysis/results.csv` for flat tabular fit results. +- `analysis/results.h5` for large or structured result arrays and + result-derived plot caches. +- `analysis/data.h5` for optional archived input or measured data. + +Use `analysis/exports/` for optional user-facing CSV files intended for +external plotting and inspection. + +This naming keeps the fit type out of the filename. The fit type and +schema are recorded in `analysis/analysis.cif` manifests such as +`_fit_result.result_kind` and `_fit_state.schema_version`. + +### 2. Sequential deterministic results stay CSV + +Sequential deterministic fitting should keep `analysis/results.csv` as +the canonical table for parameter evolution and extracted metadata. + +This file is intentionally CSV because: + +- each row naturally corresponds to one sequential fit step +- users often inspect, filter, and plot it outside EasyDiffraction +- it should remain easy to diff, copy, and load in spreadsheets + +Sequential measured input data may optionally be archived in +`analysis/data.h5`, but that archive is data, not results. It must not +replace `analysis/results.csv`. + +### 3. Bayesian arrays use `analysis/results.h5` + +Single Bayesian fits should store posterior samples, log posterior +arrays, predictive arrays, and prepared plot caches in +`analysis/results.h5`. + +The previous candidate name `analysis/bayesian_data.h5` is avoided +because it mixes fit type with file role and blurs result arrays with +input data. Bayesian-specific meaning belongs in the CIF manifest and +HDF5 groups, not the sidecar filename. + +### 4. Deterministic single and joint fits may gain CSV exports + +For single, joint, and sequential deterministic fits, EasyDiffraction +should consider optional CSV exports for ordinary plotting data: + +- measured data +- calculated data +- residuals +- reflection tables / `refln` categories + +These exports are not canonical persistence. They are convenience files +for users who want to plot or analyze results in external software +without parsing CIF. + +Suggested first layout: + +```text +analysis/ + analysis.cif + results.csv # sequential deterministic only, when applicable + results.h5 # Bayesian and other structured result arrays + data.h5 # optional archived measured/input data + exports/ + _measured.csv + _calculated.csv + _residual.csv + _reflections.csv +``` + +## Fit-Type Mapping + +| Fit type | Canonical fit state | Tabular results | Large arrays / caches | Optional data archive | Optional exports | +| ------------------------ | ------------------------------------ | ------------------------------- | --------------------------- | --------------------- | ----------------------------- | +| single deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | +| joint deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | +| sequential deterministic | `analysis/analysis.cif` | `analysis/results.csv` | none initially | `analysis/data.h5` | `analysis/exports/*.csv` | +| single Bayesian | `analysis/analysis.cif` manifest | optional summary export only | `analysis/results.h5` | none initially | optional summary/predictive CSV | + +## Open Questions + +- Should single and joint deterministic fits write a one-row + `analysis/results.csv`, or is their result projection in + `analysis/analysis.cif` enough? +- Should CSV exports be written automatically after fit/save, or only + by an explicit export command? +- What exact CSV column schemas should be used for measured, + calculated, residual, and reflection exports? +- Should exported `refln` CSVs mirror CIF tag names exactly, or use + shorter user-facing column names? +- Should sequential measured data archival in `analysis/data.h5` be + opt-in, automatic below a size threshold, or always disabled unless + requested? +- What size threshold and compression policy should control + `analysis/data.h5` and `analysis/results.h5`? +- Should `analysis/results.h5` store only the latest fit, or eventually + support multiple saved runs? +- Should external CSV exports be regenerated from canonical CIF/HDF5 on + demand rather than stored persistently? + +## Consequences + +### Positive + +- Fit output filenames become role-based and consistent across fit + types. +- Sequential parameter evolution keeps its simple CSV workflow. +- Bayesian arrays get a generic result sidecar name that can also serve + future structured result types. +- External plotting data can be exposed as plain CSV without weakening + CIF as the canonical model format. + +### Trade-offs + +- The project gains more output-file roles under `analysis/`, so save + and cleanup behavior must be explicit. +- Export CSVs are derived data and must be invalidated or regenerated + when the project changes. +- HDF5 archives introduce size and compression choices that should be + resolved before implementation. diff --git a/docs/dev/adrs/suggestions/parameter-posterior-summary.md b/docs/dev/adrs/suggestions/parameter-posterior-summary.md index 3de39bf3..98466b57 100644 --- a/docs/dev/adrs/suggestions/parameter-posterior-summary.md +++ b/docs/dev/adrs/suggestions/parameter-posterior-summary.md @@ -1,6 +1,6 @@ -# ADR: Parameter-Level Posterior Projection and Bayesian Persistence +# ADR: Parameter-Level Posterior Projection -**Status:** Proposed +**Status:** Proposed **Date:** 2026-05-13 ## Context @@ -14,7 +14,7 @@ Bayesian DREAM currently keeps posterior state only on `posterior_samples`, `posterior_parameter_summaries`, `posterior_predictive`, diagnostics, and sampler settings. The accepted runtime-fit-results ADR describes this state as runtime-only and not -serialized. +serialized unless a narrower persistence ADR defines a saved projection. `analysis.fit_results` already changes by analysis type: deterministic fits use `FitResults`, while posterior-capable fits such as DREAM use @@ -59,7 +59,7 @@ Do not add separate flat parameter attributes such as `median`, Instead, the parameter-level projection reuses the existing `PosteriorParameterSummary` object already produced for `BayesianFitResults`. This keeps one grouped summary shape for display, -inspection, and later persistence. +inspection, and restore from persisted analysis state. The summary object currently provides the right level of detail: @@ -211,358 +211,24 @@ Asymmetric interval information is not squeezed into `parameter.uncertainty`; it remains available only via `parameter.posterior`. -### 10. Persist canonical Bayesian state at analysis level +### 10. Rebuild posterior from analysis-level state Canonical Bayesian state is owned by `analysis.fit_results`, not by -individual parameters. - -When the active result is a `BayesianFitResults` instance, persistence -must save enough data to restore two distinct capability levels: - -- summary-only restore for parameter inspection and tables -- full restore for posterior plots and predictive plots +individual parameters. The saved fit-state format and restore order are +defined in `analysis-cif-fit-state.md`. `parameter.posterior` is never serialized as a per-parameter property. -It is always rebuilt from the canonical analysis-level persisted data. - -### 11. Persist fit-control and Bayesian metadata in `analysis/analysis.cif` - -The existing `analysis/analysis.cif` file remains the text metadata -entry point for analysis persistence. - -The persisted fit-control and Bayesian metadata is split into explicit -CIF categories. - -#### 11.1 `_fit_parameter` loop - -Stores analysis-owned per-parameter fit metadata that is not currently -covered by parameter CIF serialization. - -This loop exists because the committed parameter CIF representation -already carries the active `value`, current `free` state, and current -`uncertainty`, but it does not carry fit bounds, bound provenance, or -the pre-fit uncertainty snapshot needed by undo. Those fields are -required for Bayesian plot ranges, pair-plot bound annotations, and -clean fit rollback after project reload. - -Fields: - -- `param_unique_name` -- `fit_min` -- `fit_max` -- `fit_bounds_uncertainty_multiplier` -- `start_value` -- `start_uncertainty` - -`fit_min` and `fit_max` are required for restored Bayesian plotting. -`fit_bounds_uncertainty_multiplier` is required if restored plots should -preserve the uncertainty-derived bound annotation exactly. `start_value` -and `start_uncertainty` are required for clean cross-session undo. If -omitted, restored fit reports may show `N/A` for `start` and `change`, -and undo may need to clear uncertainty as a compatibility fallback. - -#### 11.2 `_bayesian_result` single item - -Stores one saved Bayesian result header with these fields: - -- `schema_version` -- `sampler_name` -- `point_estimate_name` -- `success` -- `sampler_completed` -- `reduced_chi_square` -- `fitting_time` -- `best_log_posterior` -- `credible_interval_inner` -- `credible_interval_outer` -- `has_posterior_samples` -- `has_posterior_predictive` -- `sidecar_file` - -For the current design, `point_estimate_name` is always `best_sample`. - -#### 11.3 `_bayesian_sampler` single item - -Stores resolved sampler settings actually used for the run: - -- `steps` -- `burn` -- `thin` -- `pop` -- `parallel` -- `init` -- `random_seed` - -This persists the existing runtime `sampler_settings` in an explicit, -schema-driven form rather than as an open-ended key/value mapping. - -#### 11.4 `_bayesian_convergence` single item +It is rebuilt from the analysis-level saved result projection when that +projection is available. -Stores top-level convergence and shape metadata: +Two restore levels matter for the parameter API: -- `converged` -- `max_r_hat` -- `min_ess_bulk` -- `n_draws` -- `n_chains` -- `n_parameters` +- summary-only restore can populate `parameter.posterior` and + fit-result tables +- full restore can also support posterior plots and predictive plots -Per-parameter `r_hat` and `ess_bulk` remain in the parameter summary -loop described below. - -#### 11.5 `_bayesian_parameter_posterior` loop - -Stores one canonical posterior summary row per sampled parameter. These -rows are the source used to rebuild `parameter.posterior` on load. - -Fields: - -- `order_index` -- `unique_name` -- `display_name` -- `best_sample_value` -- `median` -- `uncertainty` -- `interval_68_lower` -- `interval_68_upper` -- `interval_95_lower` -- `interval_95_upper` -- `ess_bulk` -- `r_hat` - -`order_index` defines the parameter order used by posterior sample -columns in the sidecar arrays. - -#### 11.6 `_bayesian_predictive_dataset` loop - -Stores one manifest row per persisted posterior predictive summary. - -Fields: - -- `experiment_name` -- `x_axis_name` -- `x_path` -- `best_sample_prediction_path` -- `lower_95_path` -- `upper_95_path` -- `lower_68_path` -- `upper_68_path` -- `draws_path` -- `n_x` -- `n_draws_cached` - -This loop tells the loader which arrays to read from the sidecar file -for each experiment-level predictive summary. - -#### 11.7 Suggested CIF fragments - -The active parameter value remains in the structure or experiment CIF as -it does today, for example: - -```cif -_atom_site_U_iso_or_equiv 0.0319(21) -``` - -Analysis-owned fit-control and Bayesian metadata then lives in -`analysis/analysis.cif`, for example: - -```cif -_fit.minimizer_type "bumps (dream)" -_fit.mode single - -loop_ -_fit_parameter.param_unique_name -_fit_parameter.fit_min -_fit_parameter.fit_max -_fit_parameter.fit_bounds_uncertainty_multiplier -_fit_parameter.start_value -_fit_parameter.start_uncertainty -cosio.atom_site.Co1.adp_iso 0.0000 0.1200 4.0 0.0312 0.0021 -cosio.atom_site.Co2.adp_iso 0.0000 0.1200 4.0 0.0312 0.0021 - -_bayesian_result.schema_version 1 -_bayesian_result.sampler_name dream -_bayesian_result.point_estimate_name best_sample -_bayesian_result.success yes -_bayesian_result.sampler_completed yes -_bayesian_result.reduced_chi_square 1.031 -_bayesian_result.fitting_time 82.4 -_bayesian_result.best_log_posterior -1542.77 -_bayesian_result.credible_interval_inner 0.68 -_bayesian_result.credible_interval_outer 0.95 -_bayesian_result.has_posterior_samples yes -_bayesian_result.has_posterior_predictive yes -_bayesian_result.sidecar_file "bayesian_data.h5" - -_bayesian_sampler.steps 3000 -_bayesian_sampler.burn 600 -_bayesian_sampler.thin 1 -_bayesian_sampler.pop 20 -_bayesian_sampler.parallel 0 -_bayesian_sampler.init lhs -_bayesian_sampler.random_seed 12345 - -_bayesian_convergence.converged yes -_bayesian_convergence.max_r_hat 1.01 -_bayesian_convergence.min_ess_bulk 812.4 -_bayesian_convergence.n_draws 2400 -_bayesian_convergence.n_chains 20 -_bayesian_convergence.n_parameters 2 - -loop_ -_bayesian_parameter_posterior.order_index -_bayesian_parameter_posterior.unique_name -_bayesian_parameter_posterior.display_name -_bayesian_parameter_posterior.best_sample_value -_bayesian_parameter_posterior.median -_bayesian_parameter_posterior.uncertainty -_bayesian_parameter_posterior.interval_68_lower -_bayesian_parameter_posterior.interval_68_upper -_bayesian_parameter_posterior.interval_95_lower -_bayesian_parameter_posterior.interval_95_upper -_bayesian_parameter_posterior.ess_bulk -_bayesian_parameter_posterior.r_hat -0 cosio.atom_site.Co1.adp_iso "Co1 ADP" 0.0319 0.0317 0.0021 0.0298 0.0339 0.0278 0.0361 812.4 1.01 -1 cosio.atom_site.Co2.adp_iso "Co2 ADP" 0.0320 0.0318 0.0020 0.0300 0.0338 0.0281 0.0359 830.7 1.00 - -loop_ -_bayesian_predictive_dataset.experiment_name -_bayesian_predictive_dataset.x_axis_name -_bayesian_predictive_dataset.x_path -_bayesian_predictive_dataset.best_sample_prediction_path -_bayesian_predictive_dataset.lower_95_path -_bayesian_predictive_dataset.upper_95_path -_bayesian_predictive_dataset.lower_68_path -_bayesian_predictive_dataset.upper_68_path -_bayesian_predictive_dataset.draws_path -_bayesian_predictive_dataset.n_x -_bayesian_predictive_dataset.n_draws_cached -hrpt ttheta /predictive/hrpt/x /predictive/hrpt/best_sample_prediction /predictive/hrpt/lower_95 /predictive/hrpt/upper_95 /predictive/hrpt/lower_68 /predictive/hrpt/upper_68 /predictive/hrpt/draws 2500 200 -``` - -### 12. Persist bulk arrays in `analysis/bayesian_data.h5` - -Large numerical arrays are stored outside CIF text in a single sidecar -file: - -- `analysis/bayesian_data.h5` - -The sidecar is optional. Summary-only restore remains valid without it. - -HDF5 is selected instead of NPZ because the persisted Bayesian payload -is a structured collection of named datasets with heterogeneous shapes, -optional groups, and potentially large predictive arrays. HDF5 provides -explicit hierarchical storage, dataset metadata, selective reads, and a -better long-term path for compression or chunking. NPZ is simpler, but -it is flatter and less suitable once the saved Bayesian state grows -beyond a small set of arrays. - -#### 12.1 Required core HDF5 dataset paths when posterior samples are saved - -- `posterior_parameter_samples` -- `posterior_log_posterior` -- `posterior_draw_index` - -Expected array shapes: - -- `posterior_parameter_samples`: `(n_draws, n_chains, n_parameters)` -- `posterior_log_posterior`: `(n_draws, n_chains)` when available -- `posterior_draw_index`: `(n_draws,)` when available - -If `posterior_log_posterior` or `posterior_draw_index` are unavailable, -their corresponding header flags remain false and the arrays may be -omitted. - -#### 12.2 Predictive dataset keys - -Posterior predictive arrays are addressed through the -`_bayesian_predictive_dataset` manifest rather than inferred from file -ordering. - -Recommended HDF5 dataset naming is: - -- `predictive____x` -- `predictive____best_sample_prediction` -- `predictive____lower_95` -- `predictive____upper_95` -- `predictive____lower_68` -- `predictive____upper_68` -- `predictive____draws` - -The manifest, not the naming convention, is the source of truth. - -Recommended HDF5 group layout is: - -- `/posterior/parameter_samples` -- `/posterior/log_posterior` -- `/posterior/draw_index` -- `/predictive//x` -- `/predictive//best_sample_prediction` -- `/predictive//lower_95` -- `/predictive//upper_95` -- `/predictive//lower_68` -- `/predictive//upper_68` -- `/predictive//draws` - -The manifest remains the canonical mapping used by the loader, so this -layout is recommended rather than mandatory. - -#### 12.3 What is not persisted in the sidecar - -Do not persist backend-specific runtime objects such as `engine_result`, -the DREAM driver, or ArviZ `InferenceData`. - -Those objects can be rebuilt from canonical saved arrays when needed, or -left unavailable after load. - -### 13. Restore flow and partial-availability policy - -Persistence must support both full and partial restore. - -#### 13.1 Save flow - -When `Project.save()` sees `analysis.fit_results` as a -`BayesianFitResults` instance: - -1. It writes standard analysis configuration to `analysis/analysis.cif`. -2. It appends `_fit_parameter`, `_bayesian_result`, `_bayesian_sampler`, - `_bayesian_convergence`, and `_bayesian_parameter_posterior`. -3. If posterior predictive summaries are available, it also writes the - `_bayesian_predictive_dataset` manifest. -4. If posterior sample arrays or predictive arrays are available, it - writes `analysis/bayesian_data.h5`. - -#### 13.2 Load flow - -When `Project.load()` restores `analysis/analysis.cif`: - -1. Standard analysis configuration is restored first. -2. If `_fit_parameter` is present, fit bounds, bound provenance, and - optional pre-fit scalar snapshots are restored by matching - `param_unique_name` to live parameters. -3. If Bayesian categories are present, a lightweight - `BayesianFitResults` instance is rebuilt from persisted metadata and - parameter summary rows. -4. `parameter.posterior` is rebuilt by matching summary rows to live - parameters via `unique_name`. -5. `parameter.value` and `parameter.uncertainty` continue to come from - the normal project serialization path; Bayesian restore does not - overwrite them. -6. If `analysis/bayesian_data.h5` exists and matches the manifest, - `posterior_samples` and `posterior_predictive` are restored. -7. If the sidecar is missing or incomplete, the restore degrades to - summary-only mode without failing the whole project load. - -#### 13.3 Partial restore behavior - -The chosen partial-restore policy is: - -- `parameter.posterior` and `display.fit_results()` remain available - from saved metadata and summaries. -- Bayesian-only plots requiring canonical posterior arrays remain - unavailable unless those arrays were restored successfully. -- Missing sidecar data should produce a clear warning, not a hard load - failure. +If the saved project has no analysis-level posterior summary for a +parameter, `parameter.posterior` remains `None`. Example user access after load: @@ -574,28 +240,18 @@ posterior = param.posterior if posterior is not None: print(posterior.best_sample_value) - print(posterior.uncertainty) - print(posterior.interval_68) + print(posterior.uncertainty) + print(posterior.interval_68) project.analysis.display.fit_results() ``` -Example plotting behavior after load: +Posterior plot availability after load follows the fit-state restore +level defined in `analysis-cif-fit-state.md`. -```python -# Works with summary-only restore -project.analysis.display.fit_results() +### 11. Keep parameter posterior data rebuilt, not duplicated -# Requires canonical posterior arrays from analysis/bayesian_data.h5 -project.display.plotter.plot_posterior_pairs() -project.display.plotter.plot_param_distribution(param) -project.display.plotter.plot_posterior_predictive(expt_name='hrpt') -``` - -### 14. Keep parameter posterior data rebuilt, not duplicated - -`parameter.posterior` is always rebuilt from the canonical -`_bayesian_parameter_posterior` loop in `analysis/analysis.cif`. +`parameter.posterior` is always rebuilt from analysis-level fit state. Do not serialize posterior summaries again inside each parameter's own CIF representation. Duplicating the same posterior summary data across @@ -610,15 +266,10 @@ structure and experiment files would create multiple sources of truth. into one optional object rather than many flat attributes. - `uncertainty` and posterior metadata become clearly fit-owned rather than mixed user-editable and fit-editable state. -- `analysis.fit_results` remains the canonical source for full Bayesian - state. -- `analysis/analysis.cif` becomes the home for fit-control metadata that - does not belong in structure or experiment CIF files. -- Bayesian save/load gains a clear split between text metadata in - `analysis/analysis.cif` and bulk numerical arrays in - `analysis/bayesian_data.h5`. -- Partial restore works for summaries even when full posterior arrays - are absent. +- `analysis.fit_results` remains the canonical runtime source for full + Bayesian state. +- Partial restore can still expose parameter summaries when + analysis-level saved state contains them. - The design matches the current rule that `value` is the only active scalar used for calculations. @@ -632,8 +283,6 @@ structure and experiment files would create multiple sources of truth. application code must be updated to use dedicated internal helpers rather than a mix of `_set_value_from_minimizer(...)` and public uncertainty assignment. -- Bayesian persistence now spans both CIF metadata and a binary sidecar, - so save/load code must validate consistency between the two. ## Layering and Ownership @@ -647,14 +296,8 @@ type-only import is acceptable. ## Deferred Work -This ADR now defines persistence for one canonical saved Bayesian run. - -It still defers: +This ADR defines the parameter-level posterior projection. It defers: -- support for multiple saved Bayesian runs per project -- plot-ready cache layers beyond canonical posterior and predictive data -- persistence-time compression or chunking strategies beyond the first - HDF5 sidecar implementation - persistence for future posterior-capable minimizers beyond DREAM - enabling currently unsupported single-crystal predictive draw plots @@ -665,18 +308,13 @@ It still defers: - Existing `PosteriorParameterSummary` instances should be reused rather than copied into a second summary type unless implementation reveals a concrete layering problem that cannot be resolved cleanly. -- `parameter.posterior` should always be rebuilt from restored canonical +- `parameter.posterior` should always be rebuilt from analysis-level Bayesian data rather than serialized redundantly at parameter level. -- The sidecar reader and writer should be isolated behind explicit - serializer helpers instead of being implemented inline in - `Project.save()` and `Project.load()`. ## Chosen Defaults - `parameter.value` remains committed to the best posterior sample after posterior fits. -- If a project is loaded without full posterior arrays, restoring only +- If a project is loaded with only posterior summaries, restoring `parameter.posterior` is acceptable for table display and parameter inspection. -- Posterior plotting remains unavailable unless the canonical Bayesian - containers needed by those plots are also restored. From 394158bc2e1ef3d5492f5c51b2a06eced773050a Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 14:32:42 +0200 Subject: [PATCH 02/72] Add artifact root for reproducible tutorials --- docs/dev/adrs/index.md | 64 ++++++------- .../suggestions/analysis-cif-fit-state.md | 25 +++-- .../fit-output-files-and-data-exports.md | 27 +++--- .../parameter-posterior-summary.md | 7 +- pixi.toml | 10 +- src/easydiffraction/io/ascii.py | 21 +++-- src/easydiffraction/project/project.py | 3 + src/easydiffraction/utils/environment.py | 94 +++++++++++++++++++ src/easydiffraction/utils/utils.py | 7 +- .../easydiffraction/utils/test_environment.py | 41 ++++++++ .../utils/test_utils_coverage.py | 36 +++++++ 11 files changed, 256 insertions(+), 79 deletions(-) diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 2514794c..f5273257 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -13,35 +13,35 @@ folders. ## ADR Index -| Group | Status | Title | Short description | Link | -| -------------------- | ---------- | ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------- | -| Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | -| Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | -| Analysis and fitting | Suggestion | Analysis CIF Fit State | Proposes persisted fit-state categories, result projections, and Bayesian result manifests. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | -| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Proposes role-based filenames for fit results, data archives, and external plotting exports. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | -| Analysis and fitting | Suggestion | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | -| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Proposes the `parameter.posterior` API as a projection of analysis-level Bayesian state. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | -| Analysis and fitting | Suggestion | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | -| Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | -| Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | -| Core model | Accepted | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | -| Core model | Accepted | Two-Level Category Parameter Access | Keeps parameter access to `datablock.category.parameter` or `datablock.collection[id].parameter`. | [`category-parameter-access.md`](accepted/category-parameter-access.md) | -| Documentation | Accepted | Descriptor Property Docstring Template | Makes descriptor metadata the source of truth for public property docstrings and annotations. | [`property-docstring-template.md`](accepted/property-docstring-template.md) | -| Documentation | Accepted | Development Documentation Structure | Defines the `docs/dev` layout for ADRs, issues, plans, package structure, and roadmap. | [`development-docs-structure.md`](accepted/development-docs-structure.md) | -| Documentation | Accepted | Help Method Discoverability | Requires primary public objects and facades to expose consistent `help()` output. | [`help-discoverability.md`](accepted/help-discoverability.md) | -| Documentation | Accepted | Notebook Generation Source of Truth | Treats tutorial `.py` files as editable sources and notebooks as generated artifacts. | [`notebook-generation.md`](accepted/notebook-generation.md) | -| Experiment model | Accepted | Immutable Experiment Type | Makes experiment type axes creation-time state rather than mutable runtime state. | [`immutable-experiment-type.md`](accepted/immutable-experiment-type.md) | -| Factories | Accepted | Factory Contracts and Metadata | Standardizes factory construction, metadata, compatibility, and registration behavior. | [`factory-contracts.md`](accepted/factory-contracts.md) | -| Naming | Accepted | Factory Tag Naming | Defines canonical factory tag style and standard abbreviations. | [`factory-tag-naming.md`](accepted/factory-tag-naming.md) | -| Persistence | Accepted | Free-Flag CIF Encoding | Encodes fit free/fixed state through CIF uncertainty syntax instead of a separate free list. | [`free-flag-cif-encoding.md`](accepted/free-flag-cif-encoding.md) | -| Persistence | Accepted | Project Facade and Persistence Layout | Documents the current `Project` facade and saved directory layout. | [`project-facade-and-persistence.md`](accepted/project-facade-and-persistence.md) | -| Persistence | Suggestion | Loop Category Keys and Identity Naming | Documents current loop collection keys and proposes naming rules aligned with CIF category keys. | [`loop-category-key-identity.md`](suggestions/loop-category-key-identity.md) | -| Persistence | Suggestion | Python and CIF Category Correspondence | Compares current Python paths and CIF tags, then proposes scoped one-to-one mapping for project-level categories. | [`python-cif-category-correspondence.md`](suggestions/python-cif-category-correspondence.md) | -| Quality | Accepted | Lint Complexity Thresholds | Treats ruff PLR complexity limits as design guardrails that should not be bypassed. | [`lint-complexity-thresholds.md`](accepted/lint-complexity-thresholds.md) | -| Quality | Accepted | Test Strategy | Defines layered unit, functional, integration, script, and notebook testing. | [`test-strategy.md`](accepted/test-strategy.md) | -| Structure model | Accepted | Type-Neutral ADP Parameters | Keeps ADP parameter object identities stable across B/U and iso/ani switches. | [`type-neutral-adp-parameters.md`](accepted/type-neutral-adp-parameters.md) | -| User-facing API | Accepted | Display UX Facade | Defines `project.display` and `project.rendering` responsibilities and display method names. | [`display-ux.md`](accepted/display-ux.md) | -| User-facing API | Accepted | Selector Families | Distinguishes backend selectors, switchable-category selectors, and active-sibling selectors. | [`selector-families.md`](accepted/selector-families.md) | -| User-facing API | Accepted | String Paths and Live Descriptors | Separates persisted field selectors from references to live model parameters. | [`string-paths-and-live-descriptors.md`](accepted/string-paths-and-live-descriptors.md) | -| User-facing API | Accepted | Switchable Category API | Places multi-type category selectors on the owner and omits public selectors for fixed or single-type categories. | [`switchable-category-api.md`](accepted/switchable-category-api.md) | -| Workspace model | Suggestion | Workspace Root and Project Information Category | Proposes renaming the top-level facade from `Project` to `Workspace` and reserving `project` for project metadata. | [`workspace-root-project-category.md`](suggestions/workspace-root-project-category.md) | +| Group | Status | Title | Short description | Link | +| -------------------- | ---------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------- | +| Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | +| Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | +| Analysis and fitting | Suggestion | Analysis CIF Fit State | Proposes persisted fit-state categories, result projections, and Bayesian result manifests. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | +| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Proposes role-based filenames for fit results, data archives, and external plotting exports. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | +| Analysis and fitting | Suggestion | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | +| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Proposes the `parameter.posterior` API as a projection of analysis-level Bayesian state. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | +| Analysis and fitting | Suggestion | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | +| Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | +| Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | +| Core model | Accepted | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | +| Core model | Accepted | Two-Level Category Parameter Access | Keeps parameter access to `datablock.category.parameter` or `datablock.collection[id].parameter`. | [`category-parameter-access.md`](accepted/category-parameter-access.md) | +| Documentation | Accepted | Descriptor Property Docstring Template | Makes descriptor metadata the source of truth for public property docstrings and annotations. | [`property-docstring-template.md`](accepted/property-docstring-template.md) | +| Documentation | Accepted | Development Documentation Structure | Defines the `docs/dev` layout for ADRs, issues, plans, package structure, and roadmap. | [`development-docs-structure.md`](accepted/development-docs-structure.md) | +| Documentation | Accepted | Help Method Discoverability | Requires primary public objects and facades to expose consistent `help()` output. | [`help-discoverability.md`](accepted/help-discoverability.md) | +| Documentation | Accepted | Notebook Generation Source of Truth | Treats tutorial `.py` files as editable sources and notebooks as generated artifacts. | [`notebook-generation.md`](accepted/notebook-generation.md) | +| Experiment model | Accepted | Immutable Experiment Type | Makes experiment type axes creation-time state rather than mutable runtime state. | [`immutable-experiment-type.md`](accepted/immutable-experiment-type.md) | +| Factories | Accepted | Factory Contracts and Metadata | Standardizes factory construction, metadata, compatibility, and registration behavior. | [`factory-contracts.md`](accepted/factory-contracts.md) | +| Naming | Accepted | Factory Tag Naming | Defines canonical factory tag style and standard abbreviations. | [`factory-tag-naming.md`](accepted/factory-tag-naming.md) | +| Persistence | Accepted | Free-Flag CIF Encoding | Encodes fit free/fixed state through CIF uncertainty syntax instead of a separate free list. | [`free-flag-cif-encoding.md`](accepted/free-flag-cif-encoding.md) | +| Persistence | Accepted | Project Facade and Persistence Layout | Documents the current `Project` facade and saved directory layout. | [`project-facade-and-persistence.md`](accepted/project-facade-and-persistence.md) | +| Persistence | Suggestion | Loop Category Keys and Identity Naming | Documents current loop collection keys and proposes naming rules aligned with CIF category keys. | [`loop-category-key-identity.md`](suggestions/loop-category-key-identity.md) | +| Persistence | Suggestion | Python and CIF Category Correspondence | Compares current Python paths and CIF tags, then proposes scoped one-to-one mapping for project-level categories. | [`python-cif-category-correspondence.md`](suggestions/python-cif-category-correspondence.md) | +| Quality | Accepted | Lint Complexity Thresholds | Treats ruff PLR complexity limits as design guardrails that should not be bypassed. | [`lint-complexity-thresholds.md`](accepted/lint-complexity-thresholds.md) | +| Quality | Accepted | Test Strategy | Defines layered unit, functional, integration, script, and notebook testing. | [`test-strategy.md`](accepted/test-strategy.md) | +| Structure model | Accepted | Type-Neutral ADP Parameters | Keeps ADP parameter object identities stable across B/U and iso/ani switches. | [`type-neutral-adp-parameters.md`](accepted/type-neutral-adp-parameters.md) | +| User-facing API | Accepted | Display UX Facade | Defines `project.display` and `project.rendering` responsibilities and display method names. | [`display-ux.md`](accepted/display-ux.md) | +| User-facing API | Accepted | Selector Families | Distinguishes backend selectors, switchable-category selectors, and active-sibling selectors. | [`selector-families.md`](accepted/selector-families.md) | +| User-facing API | Accepted | String Paths and Live Descriptors | Separates persisted field selectors from references to live model parameters. | [`string-paths-and-live-descriptors.md`](accepted/string-paths-and-live-descriptors.md) | +| User-facing API | Accepted | Switchable Category API | Places multi-type category selectors on the owner and omits public selectors for fixed or single-type categories. | [`switchable-category-api.md`](accepted/switchable-category-api.md) | +| Workspace model | Suggestion | Workspace Root and Project Information Category | Proposes renaming the top-level facade from `Project` to `Workspace` and reserving `project` for project metadata. | [`workspace-root-project-category.md`](suggestions/workspace-root-project-category.md) | diff --git a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md index b66d60d3..200645f3 100644 --- a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md +++ b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md @@ -1,8 +1,6 @@ # ADR: Analysis CIF Fit State -**Status:** Proposed -**Date:** 2026-05-13 -**Updated:** 2026-05-18 +**Status:** Proposed **Date:** 2026-05-13 **Updated:** 2026-05-18 ## Context @@ -90,8 +88,8 @@ _fit_state.schema_version 1 This version applies to the fit-state CIF categories and any HDF5 sidecar manifests they reference. It is not the EasyDiffraction package -version. Individual result categories should not repeat -`schema_version` unless they later need independent evolution. +version. Individual result categories should not repeat `schema_version` +unless they later need independent evolution. ### 3. Add `_fit_parameter` for per-parameter fit controls @@ -280,8 +278,8 @@ sampled parameter: `order_index` defines the parameter column order in posterior sample arrays stored in the HDF5 sidecar. `parameter.posterior` is rebuilt from -this loop on load; posterior summary data is not duplicated in -structure or experiment CIF files. +this loop on load; posterior summary data is not duplicated in structure +or experiment CIF files. ### 8. Store plot-ready Bayesian caches in explicit manifest categories @@ -299,8 +297,7 @@ caches therefore have their own manifest categories in - `n_grid` - `n_draws_cached` -`_bayesian_pair_cache` supports -`project.display.posterior.pairs(...)`: +`_bayesian_pair_cache` supports `project.display.posterior.pairs(...)`: - `param_unique_name_x` - `param_unique_name_y` @@ -333,8 +330,8 @@ the manifest remains valid. ### 9. Store bulk Bayesian arrays in `analysis/results.h5` -`analysis/analysis.cif` remains the text metadata entry point. -Numerical arrays large enough to make CIF unwieldy are stored in: +`analysis/analysis.cif` remains the text metadata entry point. Numerical +arrays large enough to make CIF unwieldy are stored in: - `analysis/results.h5` @@ -396,9 +393,9 @@ During this step, EasyDiffraction should prepare: For saved projects, `project.analysis.fit()` already triggers a save at the end of fitting. In that case the post-processing step should run before the automatic save writes `analysis/analysis.cif` and -`analysis/results.h5`. For unsaved projects, the same prepared -data remains in memory and is written on the next `project.save_as(...)` -or `project.save()`. +`analysis/results.h5`. For unsaved projects, the same prepared data +remains in memory and is written on the next `project.save_as(...)` or +`project.save()`. The display methods should then prefer persisted plot caches: diff --git a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md index 24375d69..c1106cd4 100644 --- a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md +++ b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md @@ -1,7 +1,6 @@ # ADR: Fit Output Files and Data Exports -**Status:** Proposed -**Date:** 2026-05-18 +**Status:** Proposed **Date:** 2026-05-18 ## Context @@ -10,8 +9,8 @@ Different fit modes produce different kinds of reusable output: - sequential deterministic fits produce a rectangular parameter evolution table, currently saved as `analysis/results.csv` - Bayesian fits produce posterior samples, diagnostics, predictive - arrays, and plot caches, which are too large and structured for CIF - or CSV + arrays, and plot caches, which are too large and structured for CIF or + CSV - deterministic single and joint fits produce fitted model state, calculated data, reflection tables, residuals, and optional covariance/correlation summaries @@ -97,22 +96,22 @@ analysis/ ## Fit-Type Mapping -| Fit type | Canonical fit state | Tabular results | Large arrays / caches | Optional data archive | Optional exports | -| ------------------------ | ------------------------------------ | ------------------------------- | --------------------------- | --------------------- | ----------------------------- | -| single deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | -| joint deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | -| sequential deterministic | `analysis/analysis.cif` | `analysis/results.csv` | none initially | `analysis/data.h5` | `analysis/exports/*.csv` | -| single Bayesian | `analysis/analysis.cif` manifest | optional summary export only | `analysis/results.h5` | none initially | optional summary/predictive CSV | +| Fit type | Canonical fit state | Tabular results | Large arrays / caches | Optional data archive | Optional exports | +| ------------------------ | -------------------------------- | ---------------------------- | --------------------- | --------------------- | ------------------------------- | +| single deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | +| joint deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | +| sequential deterministic | `analysis/analysis.cif` | `analysis/results.csv` | none initially | `analysis/data.h5` | `analysis/exports/*.csv` | +| single Bayesian | `analysis/analysis.cif` manifest | optional summary export only | `analysis/results.h5` | none initially | optional summary/predictive CSV | ## Open Questions - Should single and joint deterministic fits write a one-row `analysis/results.csv`, or is their result projection in `analysis/analysis.cif` enough? -- Should CSV exports be written automatically after fit/save, or only - by an explicit export command? -- What exact CSV column schemas should be used for measured, - calculated, residual, and reflection exports? +- Should CSV exports be written automatically after fit/save, or only by + an explicit export command? +- What exact CSV column schemas should be used for measured, calculated, + residual, and reflection exports? - Should exported `refln` CSVs mirror CIF tag names exactly, or use shorter user-facing column names? - Should sequential measured data archival in `analysis/data.h5` be diff --git a/docs/dev/adrs/suggestions/parameter-posterior-summary.md b/docs/dev/adrs/suggestions/parameter-posterior-summary.md index 98466b57..ccfd4a98 100644 --- a/docs/dev/adrs/suggestions/parameter-posterior-summary.md +++ b/docs/dev/adrs/suggestions/parameter-posterior-summary.md @@ -1,7 +1,6 @@ # ADR: Parameter-Level Posterior Projection -**Status:** Proposed -**Date:** 2026-05-13 +**Status:** Proposed **Date:** 2026-05-13 ## Context @@ -223,8 +222,8 @@ projection is available. Two restore levels matter for the parameter API: -- summary-only restore can populate `parameter.posterior` and - fit-result tables +- summary-only restore can populate `parameter.posterior` and fit-result + tables - full restore can also support posterior plots and predictive plots If the saved project has no analysis-level posterior summary for a diff --git a/pixi.toml b/pixi.toml index 958851cf..71ac80fd 100644 --- a/pixi.toml +++ b/pixi.toml @@ -102,8 +102,8 @@ user = { features = ['py-max', 'user'] } unit-tests = 'python -m pytest tests/unit/ --color=yes -v' functional-tests = 'python -m pytest tests/functional/ --color=yes -v' integration-tests = 'python -m pytest tests/integration/ --color=yes -n auto -v' -script-tests = 'python -m pytest tools/test_scripts.py --color=yes -n auto -v' -notebook-tests = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --color=yes -n auto -v' +script-tests = { cmd = 'python -m pytest tools/test_scripts.py --color=yes -n auto -v', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } +notebook-tests = { cmd = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --color=yes -n auto -v', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } test = { depends-on = ['unit-tests', 'functional-tests'] } test-all = { depends-on = [ @@ -184,10 +184,14 @@ cov = { depends-on = [ # 📓 Notebook Management ######################## +python = { cmd = 'python', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } +tutorial = { cmd = 'python', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } +jupyter = { cmd = 'jupyter', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } + notebook-convert = 'jupytext docs/docs/tutorials/*.py --from py:percent --to ipynb' notebook-strip = 'nbstripout docs/docs/tutorials/*.ipynb' notebook-tweak = 'python tools/tweak_notebooks.py docs/docs/tutorials/' -notebook-exec = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --overwrite --color=yes -n auto -v' +notebook-exec = { cmd = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --overwrite --color=yes -n auto -v', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } notebook-prepare = { depends-on = [ 'notebook-convert', diff --git a/src/easydiffraction/io/ascii.py b/src/easydiffraction/io/ascii.py index faab7682..dcbea91b 100644 --- a/src/easydiffraction/io/ascii.py +++ b/src/easydiffraction/io/ascii.py @@ -5,20 +5,22 @@ from __future__ import annotations import re -import tempfile import zipfile from io import StringIO from pathlib import Path import numpy as np +from easydiffraction.utils.environment import create_artifact_temp_dir +from easydiffraction.utils.environment import resolve_artifact_path + def _resolve_extraction_destination(destination: str | Path | None) -> Path: """Return an extraction directory for ZIP contents.""" if destination is None: - return Path(tempfile.mkdtemp(prefix='ed_zip_')) + return create_artifact_temp_dir(prefix='ed_zip_') - extract_dir = Path(destination) + extract_dir = resolve_artifact_path(destination) if not extract_dir.is_absolute(): extract_dir = Path.cwd() / extract_dir @@ -45,7 +47,9 @@ def extract_project_from_zip( Path to the ZIP archive containing the project. destination : str | Path | None, default=None Directory to extract into. When ``None``, a temporary directory - is created. + is created. Relative destinations are resolved against the + configured artifact root when ``EASYDIFFRACTION_ARTIFACT_ROOT`` + is set. Returns ------- @@ -65,11 +69,7 @@ def extract_project_from_zip( msg = f'ZIP file not found: {zip_path}' raise FileNotFoundError(msg) - if destination is not None: - extract_dir = Path(destination) - extract_dir.mkdir(parents=True, exist_ok=True) - else: - extract_dir = Path(tempfile.mkdtemp(prefix='ed_zip_')) + extract_dir = _resolve_extraction_destination(destination) with zipfile.ZipFile(zip_path, 'r') as zf: # Determine the project directory from the archive contents @@ -107,7 +107,8 @@ def extract_data_paths_from_zip( destination : str | Path | None, default=None Directory to extract files into. When ``None``, a temporary directory is created. Relative destinations are resolved against - the current working directory. + the current working directory, or against the configured + artifact root when ``EASYDIFFRACTION_ARTIFACT_ROOT`` is set. Returns ------- diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index 5bbe8acb..76715267 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -22,6 +22,7 @@ from easydiffraction.project.project_config import ProjectConfig from easydiffraction.summary.summary import Summary from easydiffraction.utils.enums import VerbosityEnum +from easydiffraction.utils.environment import resolve_artifact_path from easydiffraction.utils.logging import console from easydiffraction.utils.logging import log @@ -467,6 +468,8 @@ def save_as( if temporary: tmp: str = tempfile.gettempdir() dir_path = pathlib.Path(tmp) / dir_path + else: + dir_path = resolve_artifact_path(dir_path) self.info.path = dir_path self.save() diff --git a/src/easydiffraction/utils/environment.py b/src/easydiffraction/utils/environment.py index e2df97d4..0fd35ce0 100644 --- a/src/easydiffraction/utils/environment.py +++ b/src/easydiffraction/utils/environment.py @@ -5,7 +5,55 @@ import os import sys +import tempfile from importlib.util import find_spec +from pathlib import Path + +_ARTIFACT_ROOT_ENV_VAR = 'EASYDIFFRACTION_ARTIFACT_ROOT' +_PIXI_PROJECT_ROOT_ENV_VAR = 'PIXI_PROJECT_ROOT' +_TUTORIALS_DIR = Path('docs') / 'docs' / 'tutorials' +_TUTORIAL_ARTIFACT_ROOT = Path('tmp') / 'tutorials' + + +def _repo_root() -> Path | None: + project_root = os.environ.get(_PIXI_PROJECT_ROOT_ENV_VAR) + if project_root: + return Path(project_root).resolve() + + for parent in Path(__file__).resolve().parents: + if (parent / 'pixi.toml').is_file() and (parent / _TUTORIALS_DIR).is_dir(): + return parent + + return None + + +def _tutorial_artifact_root() -> Path | None: + repo_root = _repo_root() + if repo_root is None: + return None + + tutorials_dir = (repo_root / _TUTORIALS_DIR).resolve() + cwd = Path.cwd().resolve() + if not cwd.is_relative_to(tutorials_dir): + return None + + return (repo_root / _TUTORIAL_ARTIFACT_ROOT).resolve() + + +def _artifact_root() -> Path | None: + artifact_root = os.environ.get(_ARTIFACT_ROOT_ENV_VAR) + if not artifact_root: + return _tutorial_artifact_root() + + root = Path(artifact_root) + if root.is_absolute(): + return root.resolve() + + project_root = os.environ.get(_PIXI_PROJECT_ROOT_ENV_VAR) + if project_root: + return (Path(project_root) / root).resolve() + + return (Path.cwd() / root).resolve() def in_pytest() -> bool: @@ -107,6 +155,52 @@ def in_github_ci() -> bool: return os.environ.get('GITHUB_ACTIONS') is not None +def resolve_artifact_path(path: str | Path) -> Path: + """ + Resolve a path against the configured artifact root. + + Parameters + ---------- + path : str | Path + Path to resolve. + + Returns + ------- + Path + The original path when no artifact root is configured or when + *path* is absolute. Otherwise, the absolute path under the + configured artifact root. + """ + resolved_path = Path(path) + artifact_root = _artifact_root() + if artifact_root is None or resolved_path.is_absolute(): + return resolved_path + + return (artifact_root / resolved_path).resolve() + + +def create_artifact_temp_dir(prefix: str) -> Path: + """ + Create a temporary directory under the artifact root when set. + + Parameters + ---------- + prefix : str + Prefix for the temporary directory name. + + Returns + ------- + Path + Path to the created temporary directory. + """ + artifact_root = _artifact_root() + if artifact_root is None: + return Path(tempfile.mkdtemp(prefix=prefix)) + + artifact_root.mkdir(parents=True, exist_ok=True) + return Path(tempfile.mkdtemp(prefix=prefix, dir=artifact_root)).resolve() + + # ---------------------------------------------------------------------- # IPython/Jupyter helpers # ---------------------------------------------------------------------- diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index cf461aee..874c362f 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -20,6 +20,7 @@ from uncertainties import ufloat_fromstr from easydiffraction.display.tables import TableRenderer +from easydiffraction.utils.environment import resolve_artifact_path from easydiffraction.utils.logging import console from easydiffraction.utils.logging import log @@ -160,7 +161,9 @@ def download_data( id : int | str Numeric dataset id (e.g. 12). destination : str, default='data' - Directory to save the file into (created if missing). + Directory to save the file into (created if missing). Relative + destinations are resolved against the configured artifact root + when ``EASYDIFFRACTION_ARTIFACT_ROOT`` is set. overwrite : bool, default=False Whether to overwrite the file if it already exists. @@ -191,7 +194,7 @@ def download_data( _validate_url(url) fname = _filename_for_id_from_path(id, record_path) - dest_path = pathlib.Path(destination) + dest_path = resolve_artifact_path(destination) dest_path.mkdir(parents=True, exist_ok=True) file_path = dest_path / fname diff --git a/tests/unit/easydiffraction/utils/test_environment.py b/tests/unit/easydiffraction/utils/test_environment.py index 691b0a9c..fab45e72 100644 --- a/tests/unit/easydiffraction/utils/test_environment.py +++ b/tests/unit/easydiffraction/utils/test_environment.py @@ -84,3 +84,44 @@ def test_can_use_ipython_display_with_none(self): from easydiffraction.utils.environment import can_use_ipython_display assert can_use_ipython_display(None) is False + + +class TestArtifactPaths: + def test_resolve_artifact_path_uses_env_root(self, monkeypatch, tmp_path): + from easydiffraction.utils.environment import resolve_artifact_path + + monkeypatch.setenv('EASYDIFFRACTION_ARTIFACT_ROOT', 'tmp/tutorials') + monkeypatch.setenv('PIXI_PROJECT_ROOT', str(tmp_path)) + + assert resolve_artifact_path('data') == tmp_path / 'tmp' / 'tutorials' / 'data' + + def test_resolve_artifact_path_uses_tutorial_fallback(self, monkeypatch, tmp_path): + import easydiffraction.utils.environment as env + + repo_root = tmp_path / 'repo' + tutorials_dir = repo_root / 'docs' / 'docs' / 'tutorials' + tutorials_dir.mkdir(parents=True) + + monkeypatch.delenv('EASYDIFFRACTION_ARTIFACT_ROOT', raising=False) + monkeypatch.delenv('PIXI_PROJECT_ROOT', raising=False) + monkeypatch.chdir(tutorials_dir) + monkeypatch.setattr(env, '_repo_root', lambda: repo_root) + + assert env.resolve_artifact_path('data') == repo_root / 'tmp' / 'tutorials' / 'data' + + def test_create_artifact_temp_dir_uses_tutorial_fallback(self, monkeypatch, tmp_path): + import easydiffraction.utils.environment as env + + repo_root = tmp_path / 'repo' + tutorials_dir = repo_root / 'docs' / 'docs' / 'tutorials' + tutorials_dir.mkdir(parents=True) + + monkeypatch.delenv('EASYDIFFRACTION_ARTIFACT_ROOT', raising=False) + monkeypatch.delenv('PIXI_PROJECT_ROOT', raising=False) + monkeypatch.chdir(tutorials_dir) + monkeypatch.setattr(env, '_repo_root', lambda: repo_root) + + created_dir = env.create_artifact_temp_dir('ed_zip_') + + assert created_dir.is_dir() + assert created_dir.parent == repo_root / 'tmp' / 'tutorials' diff --git a/tests/unit/easydiffraction/utils/test_utils_coverage.py b/tests/unit/easydiffraction/utils/test_utils_coverage.py index 9357e272..d2d07159 100644 --- a/tests/unit/easydiffraction/utils/test_utils_coverage.py +++ b/tests/unit/easydiffraction/utils/test_utils_coverage.py @@ -433,6 +433,42 @@ def test_download_data_no_description(monkeypatch, tmp_path, capsys): assert 'Data #1' in out +def test_download_data_uses_tutorial_artifact_root_fallback(monkeypatch, tmp_path): + import easydiffraction.utils.environment as env + import easydiffraction.utils.utils as MUT + + repo_root = tmp_path / 'repo' + tutorials_dir = repo_root / 'docs' / 'docs' / 'tutorials' + tutorials_dir.mkdir(parents=True) + + fake_index = { + '1': { + 'path': 'data.xye', + 'hash': None, + 'description': 'Test data', + } + } + monkeypatch.setattr(MUT, '_fetch_data_index', lambda: fake_index) + monkeypatch.setattr(env, '_repo_root', lambda: repo_root) + monkeypatch.delenv('EASYDIFFRACTION_ARTIFACT_ROOT', raising=False) + monkeypatch.delenv('PIXI_PROJECT_ROOT', raising=False) + monkeypatch.chdir(tutorials_dir) + + def fake_retrieve(url, known_hash, fname, path): + import pathlib + + pathlib.Path(path, fname).write_text('x y e') + return str(pathlib.Path(path, fname)) + + monkeypatch.setattr(MUT.pooch, 'retrieve', fake_retrieve) + + result = MUT.download_data(id=1, destination='data') + + expected_path = repo_root / 'tmp' / 'tutorials' / 'data' / 'ed-1.xye' + assert result == str(expected_path) + assert expected_path.exists() + + # --- download_tutorial with overwrite=True ------------------------------------ From c40b015f11a3b5c05747a7760fc7b43da0146f8a Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 14:33:42 +0200 Subject: [PATCH 03/72] Finalize Project facade decision --- .../project-facade-and-persistence.md | 45 +- docs/dev/adrs/index.md | 1 - .../python-cif-category-correspondence.md | 39 +- .../workspace-root-project-category.md | 380 ----- .../plans/workspace-root-project-category.md | 1229 ----------------- 5 files changed, 70 insertions(+), 1624 deletions(-) delete mode 100644 docs/dev/adrs/suggestions/workspace-root-project-category.md delete mode 100644 docs/dev/plans/workspace-root-project-category.md diff --git a/docs/dev/adrs/accepted/project-facade-and-persistence.md b/docs/dev/adrs/accepted/project-facade-and-persistence.md index 67492d8d..3b922f42 100644 --- a/docs/dev/adrs/accepted/project-facade-and-persistence.md +++ b/docs/dev/adrs/accepted/project-facade-and-persistence.md @@ -14,16 +14,22 @@ Persistence. ## Context -`Project` is the current top-level user facade. It owns project -metadata, structures, experiments, rendering preferences, display -helpers, analysis, summaries, verbosity, and save/load behavior. +`Project` is the top-level user facade. It owns project metadata, +structures, experiments, rendering preferences, display helpers, +analysis, summaries, verbosity, and save/load behavior. + +A later proposal considered renaming this facade to `Workspace` so that +`project` could be reserved for the scientific project information +category. The final naming decision is to keep `Project` as the public +root because it matches scientific user language and the saved project +container. The persisted project directory needs to separate real CIF datablocks from singleton project sections. ## Decision -Use `Project` as the current top-level facade and persist projects as a +Use `Project` as the top-level facade and persist projects as a directory of CIF files: ```text @@ -40,8 +46,35 @@ Real structures and experiments serialize as `data_` datablocks. Singleton sections such as project configuration, analysis, and summary serialize without fake `data_` headers. +Keep project information available as `project.info`. The Python name +avoids a confusing `project.project` access path, while the persisted +CIF category remains the semantic `_project.*` category: + +```cif +_project.id +_project.title +_project.description +_project.created +_project.last_modified +``` + +Do not introduce `_meta.*` tags for project information. The category is +scientific project information, not generic metadata. Any future change +from `_project.*` to another category name must be a separate explicit +persistence decision. + +Keep `project.cif` as the primary singleton project configuration file +while `Project` remains the root facade. Do not rename it to +`workspace.cif` as a side effect of category cleanup. + +The saved project directory path is runtime file-I/O state, not a +serialized project-information field. If the path is exposed in Python, +it must not emit a `_project.path` CIF item. + ## Consequences The saved layout mirrors the current object graph while preserving the -semantic difference between real datablocks and singleton sections. A -proposed `Workspace` rename is tracked separately as an ADR suggestion. +semantic difference between real datablocks and singleton sections. The +`Workspace` rename proposal is rejected; ADR examples should continue to +use `Project`, `project.info`, and `project.cif` unless a later accepted +ADR changes a narrower part of this design. diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index f5273257..02ec5c8c 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -44,4 +44,3 @@ folders. | User-facing API | Accepted | Selector Families | Distinguishes backend selectors, switchable-category selectors, and active-sibling selectors. | [`selector-families.md`](accepted/selector-families.md) | | User-facing API | Accepted | String Paths and Live Descriptors | Separates persisted field selectors from references to live model parameters. | [`string-paths-and-live-descriptors.md`](accepted/string-paths-and-live-descriptors.md) | | User-facing API | Accepted | Switchable Category API | Places multi-type category selectors on the owner and omits public selectors for fixed or single-type categories. | [`switchable-category-api.md`](accepted/switchable-category-api.md) | -| Workspace model | Suggestion | Workspace Root and Project Information Category | Proposes renaming the top-level facade from `Project` to `Workspace` and reserving `project` for project metadata. | [`workspace-root-project-category.md`](suggestions/workspace-root-project-category.md) | diff --git a/docs/dev/adrs/suggestions/python-cif-category-correspondence.md b/docs/dev/adrs/suggestions/python-cif-category-correspondence.md index 6f970a0e..b9a90ece 100644 --- a/docs/dev/adrs/suggestions/python-cif-category-correspondence.md +++ b/docs/dev/adrs/suggestions/python-cif-category-correspondence.md @@ -31,6 +31,13 @@ The design question is whether this rule should be applied only to project-level configuration, or more broadly across analysis, experiments, structures, and calculated data. +The accepted project-facade decision keeps `Project` as the public root +and keeps `project.cif` as the singleton project configuration file. It +also keeps `_project.*` as the semantic CIF category for scientific +project information and rejects `_meta.*` for that purpose. This ADR +therefore must not reintroduce the rejected `Workspace` rename, +`workspace.cif`, or `_meta.project_*` tags as incidental cleanup. + ## Scope Of Comparison The comparison below is category-level and public-API oriented. It lists @@ -218,7 +225,19 @@ This ADR does not propose renaming the public root object. The current root object is already `Project`; the proposal is about category and tag correspondence inside project-owned singleton configuration. -Target project-level mappings if the current Python names are kept: +The accepted baseline is: + +```text +project.info. -> project.cif: _project. +``` + +Future one-to-one correspondence work may still discuss whether the +public identity field should be `name` or `id`, whether verbosity should +persist as `_verbosity.level`, and whether rendering should keep +separate chart and table engine fields. + +Possible strict-correspondence target if a future ADR explicitly changes +the accepted `_project.*` baseline: | Python path | Target CIF path | Current state | | -------------------------------- | ------------------------- | ------------------------------------------ | @@ -259,9 +278,17 @@ scope tells the reader this is project-level verbosity. ### The Current `Project` Root Already Matches User Language The current public root object is already `Project`. Keeping it avoids a -user-facing `workspace.project.*` nesting and aligns with scientific -workflows where a project is the container for structures, experiments, -analysis, and saved files. +broad user-facing root rename and aligns with scientific workflows where +a project is the container for structures, experiments, analysis, and +saved files. + +### `_project.*` Is More Semantic Than `_meta.*` + +The project-information category stores the scientific project identity, +title, description, and timestamps. `_project.id` and `_project.title` +say that directly, while `_meta.project_id` and `_meta.project_title` +make the CIF less domain-oriented and repeat the concept in every item +name. ### Scientific CIF/Domain Categories Should Stay Domain-Oriented @@ -302,13 +329,9 @@ unless a separate ADR changes the underlying API pattern. - Should the project identity remain `project.info.name`, or should it become `project.info.id` to mirror the saved identifier field? -- Should project metadata move from `_project.*` to `_info.*`, or is - `_project.*` clearer even inside `project.cif`? - Should `project.rendering.chart_engine` and `project.rendering.table_engine` remain separate, or should the public API and CIF collapse to one `engine` field? -- Should `_project.*` be accepted as a read-only legacy fallback when - loading older saved projects? - Should `project.verbosity = 'short'` remain as a convenience alias for `project.verbosity.level = 'short'`, or should strict correspondence remove the alias? diff --git a/docs/dev/adrs/suggestions/workspace-root-project-category.md b/docs/dev/adrs/suggestions/workspace-root-project-category.md deleted file mode 100644 index a6c94cba..00000000 --- a/docs/dev/adrs/suggestions/workspace-root-project-category.md +++ /dev/null @@ -1,380 +0,0 @@ -# ADR: Workspace Root and Project Information Category - -**Status:** Proposed -**Date:** 2026-05-17 - -## Context - -The current public root object is `Project`. It acts as the top-level -facade for an EasyDiffraction working session: - -```python -project = ed.Project(name='lbco_hrpt') -project.structures -project.experiments -project.analysis -project.display -project.summary -project.save() -``` - -The same word, "project", is also the natural CIF category name for -information about the scientific project: - -```cif -_project.id -_project.title -_project.description -_project.created -_project.last_modified -``` - -This creates a naming conflict. The root object is a broad runtime -facade, while the `_project.*` category is only information about the -scientific project. Using the same name for both makes category naming -awkward: - -```python -project.project_info.title -project.config.project_info.title -project.project.title -``` - -At the same time, replacing `_project.*` with a generic `_meta.*` -category would weaken the CIF model: - -```cif -_meta.project_id -_meta.project_title -``` - -The category name `meta` is too generic. It forces each item name to -repeat what the category should already communicate. The existing -`_project.id` and `_project.title` names are more semantic and better -aligned with the repository rule to follow CIF naming unless there is a -clearly better API. - -The design question is therefore: - -- should the top-level runtime object remain `Project`, and project - information move to a different category such as `meta`; -- or should the top-level runtime object be renamed to `Workspace`, so - `project` can be used cleanly for project information? - -## Decision - -Rename the top-level runtime facade from `Project` to `Workspace`. - -Use `project` as the public project-information category under the -workspace: - -```python -workspace = ed.Workspace(project_id='lbco_hrpt') -workspace.project.id -workspace.project.title = 'La0.5Ba0.5CoO3 at HRPT@PSI' -workspace.rendering.table_engine = 'rich' -workspace.verbosity = 'short' -workspace.structures -workspace.experiments -workspace.analysis -``` - -Persist workspace-level singleton categories in `workspace.cif`: - -```cif -_project.id -_project.title -_project.description -_project.created -_project.last_modified - -_rendering.chart_engine -_rendering.table_engine - -_verbosity.level -``` - -The saved directory is a workspace directory whose filesystem name is -chosen by the user. The canonical layout is: - -```text -/ -|-- workspace.cif -|-- structures/ -| `-- cosio.cif -|-- experiments/ -| `-- d20.cif -|-- analysis/ -| `-- analysis.cif -`-- summary/ - `-- summary.cif -``` - -Do not introduce `_meta.*` CIF tags. Do not use `project.cif`, -`config.cif`, or `meta.cif` as the primary singleton configuration file -in the target layout. - -The intended naming split is: - -```text -Workspace -|-- project # information about the scientific project -|-- rendering # rendering preferences -|-- verbosity # console/output verbosity preference -|-- structures # real structure datablocks -|-- experiments # real experiment datablocks -|-- analysis # analysis section -|-- display # display facade -`-- summary # summary/report facade -``` - -## Rationale - -### `Workspace` better describes the top-level facade - -The top-level object is more than project metadata. It owns active -collections, analysis state, display helpers, save/load behavior, and -runtime orchestration. `Workspace` describes that broader role without -consuming the domain word `project`. - -The name is also familiar in scientific software. It commonly means an -active analysis environment, a data/model container, or a working area. -That is close to the role of the current EasyDiffraction root object. - -### `project` is the right category name for project information - -Project information is not generic metadata. It is specifically the -identity, title, description, and timestamps of the scientific project. - -This reads cleanly: - -```python -workspace.project.title -``` - -and maps directly to clean CIF: - -```cif -_project.title -``` - -### `_meta.project_title` is weaker than `_project.title` - -The `_meta` category would make the CIF less domain-oriented. It also -creates longer and more repetitive item names: - -```cif -_meta.project_id -_meta.project_title -_meta.project_description -``` - -The existing `_project.*` tags are clearer: - -```cif -_project.id -_project.title -_project.description -``` - -### This keeps layer-specific consistency - -After this decision, each layer has a clear rule: - -| Layer | Rule | Example | -| --------------- | ------------------------------ | ------------------- | -| Runtime root | working-session facade | `Workspace` | -| Public category | semantic category name | `workspace.project` | -| CIF category | semantic CIF category | `_project.*` | -| Config file | workspace singleton categories | `workspace.cif` | - -This avoids one-off aliases such as `project.info` while preserving -semantic CIF names. - -### `workspace.cif` is clearer than `project.cif`, `config.cif`, or `meta.cif` - -The file stores singleton settings owned by the workspace: scientific -project information, rendering preferences, and verbosity. `project.cif` -overloads the project name again, while `config.cif` and `meta.cif` are -generic. `workspace.cif` names the owning layer and lets each category -inside the file keep its domain-specific name. - -## Consequences - -### Positive - -- The root object and project-information category no longer share the - same conceptual name. -- Public access becomes uniform: `workspace.project`, - `workspace.rendering`, `workspace.verbosity`, `workspace.analysis`. -- CIF stays semantic and does not introduce `_meta.*`. -- Workspace-level preferences such as rendering and verbosity are saved - with the workspace instead of being hidden runtime-only state. -- Project information can use short item names such as `id`, `title`, - and `description`. -- The top-level facade name better reflects active runtime - orchestration. - -### Negative - -- This is a breaking public API change. -- Tutorials, scripts, tests, docs, type hints, and imports must be - updated from `Project` to `Workspace`. -- Existing saved directories using `project.cif` must be migrated to - `workspace.cif` if no compatibility loader is kept. -- Existing code that expected verbosity to be runtime-only must account - for it round-tripping through `workspace.cif`. -- Users familiar with `Project` must learn the new root name. -- `Workspace` can be confused with a filesystem workspace in some - ecosystems, so documentation must define it clearly as the active - EasyDiffraction working object. - -## Compatibility Policy - -EasyDiffraction is in beta, and repository instructions say not to keep -legacy shims by default. - -Therefore the target implementation should not add a permanent -`Project = Workspace` alias unless explicitly approved before -implementation. - -The migration plan still includes a review gate before removing the old -`Project` public symbol, because this is a user-facing breaking change. - -## Alternatives Considered - -### Keep `Project` root and rename project information to `meta` - -Rejected. - -Example: - -```python -project.meta.project_title -``` - -```cif -_meta.project_title -``` - -This keeps the root class stable, but it weakens the category model. -`meta` is too broad, and the CIF item names become repetitive. - -### Keep `Project` root and use `project.config.project` - -Rejected for now. - -Example: - -```python -project.config.project.title -``` - -This is technically consistent, but it still repeats `project` at -different semantic layers. It also adds depth to common user workflows. -It is a reasonable fallback if the public root rename is rejected. - -### Keep `Project` root and use `project.info` - -Rejected for the target design. - -Example: - -```python -project.info.title -``` - -This is readable, but it preserves a special-case category alias. The -current goal is stronger consistency between public categories and CIF -category concepts. - -### Use `project.cif`, `config.cif`, or `meta.cif` for singleton settings - -Rejected for the target layout. - -`project.cif` repeats the overloaded term that this migration removes. -`config.cif` and `meta.cif` are too generic and do not say which layer -owns the settings. `workspace.cif` is more explicit while still allowing -semantic categories such as `_project`, `_rendering`, and `_verbosity` -inside the file. - -### Rename only internal files and keep public API unchanged - -Rejected for the target design. - -This improves implementation clarity but does not solve the public -naming inconsistency. - -### Use `Study` instead of `Workspace` - -Rejected. - -`Study` is a plausible scientific term, but it is less established for -an active computational container. It also does not map as naturally to -save/load, display, and analysis orchestration. - -## Implementation Notes - -The implementation should follow: - -```text -docs/dev/plans/workspace-root-project-category.md -``` - -The high-level migration is: - -1. Rename the root facade `Project` to `Workspace`. -2. Rename the project package/module surface from `project` to - `workspace`. -3. Rename `ProjectConfig` to `WorkspaceConfig`. -4. Rename project-level category access from `info` to `project`. -5. Rename project-information `name` access to `id`, matching - `_project.id`. -6. Move the storage path to `Workspace.path`, because it describes the - saved workspace directory rather than project information. -7. Keep the information category class named `ProjectInfo` unless a - later decision chooses `ProjectMetadata`. -8. Keep CIF tags `_project.*` and `_rendering.*`. -9. Rename saved singleton config file from `project.cif` to - `workspace.cif`. -10. Persist workspace verbosity in `workspace.cif` as - `_verbosity.level`, owned by a first-class `Verbosity` category - under `WorkspaceConfig` (parallel to `Rendering`). -11. Update code, tests, scripts, tutorials, docs, and ADR references. - -## Post-Implementation ADR Update - -This ADR must be updated after the migration plan is implemented. - -When implementation is complete: - -1. Change status from `Proposed` to `Accepted and implemented`. -2. Record the final public API and saved file layout. -3. Record whether a temporary or permanent `Project` compatibility alias - was approved. -4. Record any deviations from the migration plan. -5. Move this file from `docs/dev/adrs/suggestions/` to - `docs/dev/adrs/accepted/` if the decision is accepted. -6. Update `docs/dev/adrs/index.md` and related accepted ADRs if the ADR - map changes. -7. Update or close related items in `docs/dev/issues/open.md`. - -## Acceptance Criteria - -This ADR is satisfied when: - -- `ed.Workspace` is the public top-level facade. -- `ed.Project` is removed unless explicitly approved as an alias. -- the public project-information category is `workspace.project`. -- project identity is exposed as `workspace.project.id`. -- the saved directory path is exposed as `workspace.path`. -- the public rendering category is `workspace.rendering`. -- saved singleton configuration lives in `workspace.cif`. -- `workspace.cif` uses `_project.*`, `_rendering.*`, and - `_verbosity.level` tags. -- workspace verbosity is owned by a registered `Verbosity` category - alongside `Rendering`. -- `ProjectInfo.path` is removed; the saved directory path is exposed - only as `workspace.path`. -- no `_meta.*` tags are introduced for project information. -- tutorials and accepted ADRs use `Workspace`. diff --git a/docs/dev/plans/workspace-root-project-category.md b/docs/dev/plans/workspace-root-project-category.md deleted file mode 100644 index 778fd758..00000000 --- a/docs/dev/plans/workspace-root-project-category.md +++ /dev/null @@ -1,1229 +0,0 @@ -# Workspace Root and Project Category Migration Plan - -## Status - -Branch: `feature/workspace-root-project-category` - -ADR suggestion: - -```text -docs/dev/adrs/suggestions/workspace-root-project-category.md -``` - -Two-phase workflow from `.github/copilot-instructions.md`: - -- Phase 1 - Implementation. Code and docs updates only. Do not create or - run tests unless the user explicitly asks. -- Phase 2 - Verification. Add/update tests, then run the verification - commands listed near the end of this plan. - -Stop after Phase 1 and request review before starting Phase 2. - -Status checklist. Mark `[x]` only while implementing: - -```text -Phase 1 - Implementation -[ ] Phase 0: Confirm breaking-change approval. -[ ] Phase 1: Rename root package and public facade to Workspace. -[ ] Phase 2: Rename project-info access from info to project. -[ ] Phase 3: Align project information fields with _project.* tags. -[ ] Phase 4: Rename project config file to workspace.cif. -[ ] Phase 5: Update root-object references across runtime code. -[ ] Phase 6: Update docs, tutorials, and ADR references. -[ ] Phase 7: Remove old public Project surface unless approved. -[ ] Phase 1 review gate: present diff for approval. - -Phase 2 - Verification -[ ] Move/update unit tests to workspace paths. -[ ] Add workspace naming and CIF layout tests. -[ ] pixi run test-structure-check -[ ] pixi run fix -[ ] pixi run check -[ ] pixi run unit-tests -[ ] pixi run integration-tests -[ ] pixi run script-tests -[ ] pixi run notebook-prepare -[ ] pixi run notebook-tests -``` - -## Commit Discipline - -When an AI agent follows this plan, every completed Phase 1 -implementation step must be staged with explicit paths and committed -locally before moving to the next implementation step or to the Phase 1 -review gate. - -Follow the **Commits** section of `.github/copilot-instructions.md`. - -Rules: - -- One commit per phase. -- Keep each commit atomic and single-purpose. -- Stage explicit paths only. Do not use `git add .`. -- Use `git mv` for file and directory moves. -- Do not stage unrelated user changes. -- Do not stage generated artifacts unless the user explicitly asks. -- If a serious uncovered design issue appears, stop and ask before - continuing. - -Suggested commit messages: - -```text -Rename Project facade to Workspace -Expose project information as workspace.project -Align project metadata fields with CIF names -Rename project config file to workspace.cif -Update runtime references to Workspace -Update docs for Workspace root API -Remove old Project public API surface -``` - -## Goal - -Split the name "project" into two distinct concepts: - -1. `Workspace` - the top-level runtime facade, currently named - `Project`. -2. `workspace.project` - the category that stores information about the - scientific project. - -Target public API: - -```python -import easydiffraction as ed - -workspace = ed.Workspace(project_id='lbco_hrpt') -workspace.project.id -workspace.project.title = 'La0.5Ba0.5CoO3 at HRPT@PSI' -workspace.rendering.table_engine = 'rich' -workspace.verbosity = 'short' -workspace.structures -workspace.experiments -workspace.analysis -workspace.display -workspace.summary -workspace.save_as('lbco_hrpt') -``` - -Target workspace-level config file: - -```text -workspace.cif -``` - -Target CIF tags inside `workspace.cif`: - -```cif -_project.id -_project.title -_project.description -_project.created -_project.last_modified - -_rendering.chart_engine -_rendering.table_engine - -_verbosity.level -``` - -Do not introduce `_meta.*` tags. - -Target saved layout: - -```text -/ -|-- workspace.cif -|-- structures/ -| `-- cosio.cif -|-- experiments/ -| `-- d20.cif -|-- analysis/ -| `-- analysis.cif -`-- summary/ - `-- summary.cif -``` - -## Decisions Already Made - -Use these decisions unless the user explicitly changes the ADR before -implementation: - -- The public root class becomes `Workspace`. -- The public root import becomes - `from easydiffraction import Workspace`. -- The public project-information category becomes `workspace.project`. -- The public rendering category remains `workspace.rendering`. -- The project-information category keeps semantic CIF tags `_project.*`. -- The rendering category keeps semantic CIF tags `_rendering.*`. -- The verbosity preference is serialized as `_verbosity.level`. -- The saved singleton config file becomes `workspace.cif`. -- The saved root is a workspace directory with a user-chosen filesystem - name; do not use `project` as the conceptual root name in new docs. -- Do not use `project.cif`, `config.cif`, or `meta.cif` as the primary - singleton config file in the target layout. -- The storage directory path belongs to `Workspace.path`, not - `workspace.project.path`. -- The old `Project` public API is removed unless the user explicitly - approves an alias before implementation. -- The category class name `ProjectInfo` is kept; only the public - attribute name (`info` → `project`) changes. -- `ProjectInfo.path` is removed; the saved directory path lives on - `Workspace.path` only. -- A first-class `Verbosity` category under `WorkspaceConfig` is required - (not optional) so that `_verbosity.level` round-trips through - `workspace.cif` like other singleton categories. -- Source-tree imports may be temporarily inconsistent between phases - (for example, Phase 1 leaves `project_config_to_cif` imports until - Phase 4 renames the serializer functions). This is acceptable because - tests are not run in Phase 1. Each phase must still leave the source - importable at the end of the phase. - -## Current Shape - -The current implementation already has a category-owner based project -configuration layer: - -```text -src/easydiffraction/project/ -|-- project.py # class Project -|-- project_config.py # class ProjectConfig -|-- project_info.py # ProjectInfo export -|-- display.py # class ProjectDisplay -`-- categories/ - |-- info/ # ProjectInfo category - `-- rendering/ # Rendering category -``` - -Current public API: - -```python -project = ed.Project(name='my_project') -project.info.title -project.rendering.table_engine -``` - -Current saved config file: - -```text -project.cif -``` - -## Target Shape - -Target implementation: - -```text -src/easydiffraction/workspace/ -|-- workspace.py # class Workspace -|-- workspace_config.py # class WorkspaceConfig -|-- project_info.py # ProjectInfo export -|-- display.py # class WorkspaceDisplay -`-- categories/ - |-- project/ # ProjectInfo category - |-- rendering/ # Rendering category - `-- verbosity/ # Verbosity category -``` - -Target public API: - -```python -workspace = ed.Workspace(project_id='my_project') -workspace.project.title -workspace.rendering.table_engine -workspace.verbosity = 'short' -``` - -## Out Of Scope - -Do not do these in this migration: - -- Do not add `_meta.*` CIF tags. -- Do not use `project.cif`, `config.cif`, or `meta.cif` as the target - singleton settings file. -- Do not redesign structure or experiment datablocks. -- Do not change analysis fit-mode semantics. -- Do not change calculator behavior. -- Do not edit generated package-structure docs by hand. -- Do not edit generated notebooks directly. Edit tutorial `.py` sources - and regenerate notebooks during Phase 2. -- Do not keep a `Project = Workspace` compatibility alias unless the - user explicitly approves it. - -## Phase 0: Confirm Breaking-Change Approval - -This migration removes or replaces the public `Project` API unless the -user approves a compatibility alias. - -Before changing code, ask the user to confirm: - -```text -This migration removes ed.Project and replaces it with ed.Workspace. -Should implementation proceed without a Project compatibility alias? -``` - -If the user asks for a compatibility alias, record that decision in this -plan and in the ADR before implementation. - -Do not implement code before this approval gate. - -Commit: no commit required for this phase unless the plan or ADR is -updated. - -## Phase 1: Rename Root Package And Public Facade - -### Objective - -Rename the top-level runtime facade and package from `project` to -`workspace`. - -### Files Likely To Change - -- `src/easydiffraction/project/` -- `src/easydiffraction/__init__.py` -- `src/easydiffraction/__main__.py` -- `src/easydiffraction/analysis/analysis.py` -- `src/easydiffraction/analysis/sequential.py` -- `src/easydiffraction/display/plotting.py` -- `src/easydiffraction/summary/summary.py` -- any source file importing `easydiffraction.project.*` - -### Steps - -1. Move the source package with `git mv` so history is preserved: - - ```text - src/easydiffraction/project/ - -> src/easydiffraction/workspace/ - ``` - -2. Rename files: - - ```text - workspace/project.py - -> workspace/workspace.py - - workspace/project_config.py - -> workspace/workspace_config.py - ``` - -3. Rename classes: - - ```text - Project -> Workspace - ProjectConfig -> WorkspaceConfig - ProjectDisplay -> WorkspaceDisplay - ``` - -4. Rename class-level state inside the facade: - - ```text - Project._current_project -> Workspace._current_workspace - Project._loading -> Workspace._loading # name kept - Project.current_project_path() -> Workspace.current_workspace_path() - ``` - - Update the `ClassVar` annotation accordingly and update all internal - references (`type(self)._current_project = self`, - `cls._current_project`). - -5. Update the `varname()` fallback inside `__init__` so the default - variable name becomes `'workspace'` instead of `'project'`: - - ```python - self._varname = 'workspace' if type(self)._loading else varname() - ``` - -6. Update top-level import: - - ```python - from easydiffraction.workspace.workspace import Workspace - ``` - -7. Remove the old top-level `Project` import unless the user approved an - alias. - -8. Update type-checking imports: - - ```python - from easydiffraction.workspace.workspace import Workspace - ``` - -9. Update docstrings from "Project facade" to "Workspace facade" where - they describe the root object. Keep wording that talks about the - scientific project (titles, descriptions, identity) unchanged. - -10. Rename `io/ascii.py::extract_project_from_zip` to - `extract_workspace_from_zip` and update its re-exports in - `src/easydiffraction/io/__init__.py` and - `src/easydiffraction/__init__.py`. The function extracts a saved - workspace directory, not scientific project information. - -11. Note: `src/easydiffraction/io/cif/serialize.py` still defines - `project_config_to_cif`, `project_config_from_cif`, and - `project_to_cif` at this point. Leave those imports as-is in - `workspace.py`; they are renamed in Phase 4. The function - `project_info_to_cif` keeps its name. - -12. Run a source-only grep. Do not run tests in Phase 1: - -```shell -rg -n "easydiffraction\\.project|\\bProject\\b|ProjectDisplay|ProjectConfig" src -``` - -For every match, decide whether it refers to: - -- the old root object, which should become `Workspace`; -- the project-information category, which should remain project; -- historical text that should be updated in docs later. - -### Stop Conditions - -Stop and ask if: - -- another public class named `Workspace` already exists; -- package moves break imports in a way that would require compatibility - shims; -- a file has both root-object `project` and category `project` meanings - that cannot be separated clearly. - -### Commit - -Stage explicit moved and edited files, then commit: - -```text -Rename Project facade to Workspace -``` - -## Phase 2: Rename Project-Info Access From `info` To `project` - -### Objective - -Make the project-information category public as `workspace.project` -instead of `workspace.info`. - -### Files Likely To Change - -- `src/easydiffraction/workspace/workspace_config.py` -- `src/easydiffraction/workspace/workspace.py` -- `src/easydiffraction/workspace/categories/info/` -- `src/easydiffraction/workspace/project_info.py` -- `src/easydiffraction/io/cif/serialize.py` -- all code using `.info` for project information - -### Steps - -1. Rename category package: - - ```text - src/easydiffraction/workspace/categories/info/ - -> src/easydiffraction/workspace/categories/project/ - ``` - -2. Keep the category class name `ProjectInfo` for now. The class name is - explicit and avoids a confusing `Project` class after the root class - is renamed to `Workspace`. - -3. Rename imports: - - ```python - from easydiffraction.workspace.categories.project import ProjectInfo - from easydiffraction.workspace.categories.project import ProjectInfoFactory - ``` - -4. In `WorkspaceConfig`, rename: - - ```text - _info -> _project - info -> project - ``` - -5. In `Workspace`, rename: - - ```text - _info -> _project - info -> project - ``` - -6. Remove the public `.info` property unless the user approved a - compatibility alias. - -7. Update all runtime references: - - ```text - workspace.info.title -> workspace.project.title - workspace.info.description -> workspace.project.description - workspace.info.update_last_modified() -> workspace.project.update_last_modified() - ``` - -8. Run grep: - - ```shell - rg -n "\\.info\\b|categories/info|categories\\.info" src - ``` - -9. For every match, update it if it refers to project information. Leave - unrelated uses of the word "info" alone. - -### Stop Conditions - -Stop and ask if: - -- `info` appears as a different public concept unrelated to project - information; -- removing `.info` would break a user-requested compatibility alias. - -### Commit - -```text -Expose project information as workspace.project -``` - -## Phase 3: Align Project Information Fields With `_project.*` - -### Objective - -Expose project identity as `workspace.project.id`, matching CIF -`_project.id`. - -Move the saved directory path to `workspace.path`, because it describes -the workspace location and is not serialized project information. - -### Files Likely To Change - -- `src/easydiffraction/workspace/categories/project/default.py` -- `src/easydiffraction/workspace/workspace.py` -- `src/easydiffraction/io/cif/serialize.py` -- `src/easydiffraction/summary/summary.py` -- `src/easydiffraction/display/plotting.py` -- any code using `.name` for project identity or `.project.path` - -### Steps - -1. In `ProjectInfo`, rename the public identity property and its setter: - - ```text - name (getter) -> id (getter) - name (setter) -> id (setter) - ``` - - Do not also rename the internal descriptor attribute - `self._project_id`; it already matches the new public name. - -2. Keep the underlying CIF tag unchanged: - - ```python - CifHandler(names=['_project.id']) - ``` - -3. Update `ProjectInfo.unique_name` to return `self.id`. - -4. Keep `ProjectInfo._identity.category_code = 'project'` as-is. - -5. Update `project_info_to_cif()` and CIF loading helpers to use - `info.id`. - -6. Rename constructor arguments: - - ```text - name -> project_id - ``` - - Apply this to: - - `Workspace.__init__` - - `WorkspaceConfig.__init__` - - `ProjectInfo.__init__` - - `ProjectInfoFactory.create(...)` call sites - - Default value: `'untitled_project'` (unchanged value, just the - parameter name changes) - -7. Add `Workspace.path` as the runtime storage path. Initialize - `self._path: pathlib.Path | None = None` in `Workspace.__init__`. - - Suggested shape (match the surrounding `GuardedBase` pattern; do not - add `@typechecked` here because the setter accepts both `str` and - `pathlib.Path`): - - ```python - @property - def path(self) -> pathlib.Path | None: - """Saved workspace directory.""" - return self._path - - @path.setter - def path(self, value: object) -> None: - self._path = pathlib.Path(value) if value is not None else None - ``` - -8. Remove `ProjectInfo.path` (property, setter, and the `self._path` - attribute inside `ProjectInfo.__init__`) unless explicitly approved - as a compatibility alias. - -9. Update save/load logic across the codebase: - - ```text - workspace.path - ``` - - should replace: - - ```text - project.info.path # old - workspace.project.path # never used; do not introduce - ``` - - Concrete call sites include `Workspace.save`, `Workspace.load`, - `Workspace.current_workspace_path`, and any consumers in `analysis/`, - `display/`, `summary/`, and `io/`. - -10. Update messages and string representations: - -```text -Workspace '' (...) -Saving workspace '' to ... -``` - -11. Run grep: - -```shell -rg -n "\\.name\\b|\\.path\\b|project_id|Project identifier" src/easydiffraction/workspace src/easydiffraction/io src/easydiffraction/display src/easydiffraction/summary -``` - -Inspect each match manually. Do not blindly replace every `.name`; -structures and experiments still use `.name`. - -### Stop Conditions - -Stop and ask if: - -- a caller depends on `workspace.name` as a root-object property; -- moving `path` out of `ProjectInfo` makes save/load unclear; -- external saved fixtures require an approved compatibility path. - -### Commit - -```text -Align project metadata fields with CIF names -``` - -## Phase 4: Rename Project Config File To `workspace.cif` - -### Objective - -Rename the saved singleton configuration file from `project.cif` to -`workspace.cif`. - -### Files Likely To Change - -- `src/easydiffraction/workspace/workspace.py` -- `src/easydiffraction/io/cif/serialize.py` -- `src/easydiffraction/workspace/workspace_config.py` -- `src/easydiffraction/workspace/categories/verbosity/` -- CLI entry points in `src/easydiffraction/__main__.py` -- docs that describe saved project directories -- test fixtures in Phase 2 - -### Steps - -1. Rename serializer functions in - `src/easydiffraction/io/cif/serialize.py` and every call site: - - ```text - project_config_to_cif -> workspace_config_to_cif - project_config_from_cif -> workspace_config_from_cif - project_to_cif -> workspace_to_cif - ``` - - Call sites include `workspace.py` (formerly `project.py`), - `workspace_config.py`, and the serializer itself (the - `project_to_cif` body calls `project_config_to_cif`). After this step - the imports that were intentionally left stale in Phase 1 must - compile cleanly. - - Do not rename `project_info_to_cif`; it serializes the `_project` - category and that name remains correct. - -2. Update `Workspace.save()` to write: - - ```text - workspace.cif - ``` - -3. Update `Workspace.load()` to read: - - ```text - workspace.cif - ``` - -4. Do not add `project.cif`, `config.cif`, or `meta.cif` fallbacks - unless the user approved a compatibility loader. - -5. Add a `Verbosity` category under - `src/easydiffraction/workspace/categories/verbosity/` following the - same shape as `rendering/` (a `default.py` with a `Verbosity` - `CategoryItem`, a `factory.py` with `VerbosityFactory`, and an - `__init__.py` that imports both to trigger registration). - - The category owns one descriptor: - - ```python - CifHandler(names=['_verbosity.level']) - ``` - - Bind it in `WorkspaceConfig.__init__` next to `_rendering`, and - expose it from `Workspace` so that the public access path remains: - - ```python - workspace.verbosity = 'short' - workspace.verbosity # -> 'short' - ``` - - The public `verbosity` getter/setter on `Workspace` reads and writes - the category's `level` descriptor (validated against `VerbosityEnum`) - and replaces the current `self._verbosity: VerbosityEnum` - runtime-only attribute. Remove that attribute. - - Serialize it as: - - ```cif - _verbosity.level short - ``` - -6. Keep the contents semantic: - - ```cif - _project.id - _project.title - _rendering.table_engine - _verbosity.level - ``` - -7. Update logging and console output from `project.cif` to - `workspace.cif`. - -8. Run grep: - - ```shell - rg -n "project\\.cif|config\\.cif|meta\\.cif|project_config_to_cif|project_config_from_cif|project_to_cif|verbosity" src docs tests - ``` - - Update source and docs only. Test files are handled in Phase 2 unless - the user explicitly asks otherwise. - -9. Saved on-disk fixtures under `data/` and `projects/` still contain - `project.cif` files (for example `data/lbco_project/project.cif`, - `projects/cosio/project.cif`). Do **not** edit or regenerate them in - Phase 1. They are inputs to integration/script tests and will either - be regenerated in Phase 2 or the relevant tests will be updated to - write fresh workspace directories. Flag any that block Phase 2 in the - review gate. - -### Stop Conditions - -Stop and ask if: - -- repository fixtures or tutorials contain saved directories that must - remain loadable without conversion; -- the user wants a one-release compatibility loader. -- the verbosity setting cannot be represented as a category without - weakening the public `workspace.verbosity` API. - -### Commit - -```text -Rename project config file to workspace.cif -``` - -## Phase 5: Update Root-Object References Across Runtime Code - -### Objective - -Replace root-object variables and attributes named `project` with -`workspace` where they refer to the top-level facade. - -Keep the word `project` where it refers to the project-information -category or the scientific project itself. - -### Files Likely To Change - -- `src/easydiffraction/analysis/analysis.py` -- `src/easydiffraction/analysis/sequential.py` -- `src/easydiffraction/display/plotting.py` -- `src/easydiffraction/project/display.py` after it has moved to - `workspace/display.py` -- `src/easydiffraction/summary/summary.py` -- `src/easydiffraction/__main__.py` -- `src/easydiffraction/io/*` - -### Steps - -1. Rename root references in `Analysis`: - - ```text - self.project -> self.workspace - analysis.project -> analysis.workspace - ``` - - This includes the constructor argument, the stored attribute, and any - public read-only property exposing the parent workspace. - -2. Rename display internals in `WorkspaceDisplay` (formerly - `ProjectDisplay`) and in any other class that stores a back- - reference to the root facade: - - ```text - self._project -> self._workspace - _set_project(...) -> _set_workspace(...) - ``` - - Only do this when the stored object is the top-level runtime facade. - Do not touch `self._project_id` inside `ProjectInfo` or `_project.*` - CIF tags. - -3. Rename local variables in runtime code: - - ```text - project = Workspace(...) - -> workspace = Workspace(...) - ``` - -4. Keep scientific-project wording where appropriate: - - ```text - workspace.project.title - project_id - _project.id - ``` - -5. Update user-facing messages carefully. Good examples: - - ```text - "Workspace directory not found" - "Saving workspace" - "Project title" - ``` - -6. Run grep: - - ```shell - rg -n "\\bproject\\b|\\bProject\\b|_project|ProjectDisplay" src/easydiffraction - ``` - -7. Inspect each match. Do not replace `_project` CIF tags. - -### Stop Conditions - -Stop and ask if: - -- a name has both root-workspace and project-information meanings in the - same function and cannot be made clear; -- renaming a method such as `_set_project` would require updating public - plugin or user code. - -### Commit - -```text -Update runtime references to Workspace -``` - -## Phase 6: Update Docs, Tutorials, And ADR References - -### Objective - -Update user-facing and developer-facing documentation to describe the -new root object and project-information category. - -### Files Likely To Change - -- `docs/dev/adrs/index.md` -- `docs/dev/issues/open.md` -- `docs/dev/adrs/accepted/*.md` -- `docs/dev/adrs/suggestions/*.md` -- `docs/docs/tutorials/*.py` -- `README.md` -- `CONTRIBUTING.md` only if it contains API examples - -Do not edit these by hand: - -- `docs/dev/package-structure/full.md` -- `docs/dev/package-structure/short.md` -- generated tutorial notebooks -- generated `docs/site/` files - -### Steps - -1. Update the relevant accepted ADRs: - - ```text - Project Facade and Persistence Layout - -> Workspace Facade and Persistence Layout - ``` - -2. Update the affected ADR examples to use: - - ```text - workspace.project ProjectInfo - workspace.rendering Rendering - workspace.verbosity str - workspace.display WorkspaceDisplay - ``` - -3. Update saved layout examples: - - ```text - workspace.cif - structures/ - experiments/ - analysis/ - summary.cif - ``` - -4. Update public examples: - - ```python - workspace = ed.Workspace(project_id='lbco_hrpt') - workspace.project.title = '...' - ``` - -5. Update ADRs that describe current API. Historical reasoning can keep - old names only if it is clearly historical and not presented as - current usage. - -6. Update tutorial `.py` files, not notebooks. Phase 2 will run - `pixi run notebook-prepare`. - -7. Run grep: - - ```shell - rg -n "ed\\.Project|from easydiffraction import Project|project\\.info|project\\.rendering|ProjectDisplay|project\\.cif" docs README.md CONTRIBUTING.md - ``` - -8. Inspect each match manually. - -### Stop Conditions - -Stop and ask if: - -- a tutorial title uses "Project" as ordinary English rather than API - naming; -- historical ADRs would become misleading if edited mechanically. - -### Commit - -```text -Update docs for Workspace root API -``` - -## Phase 7: Remove Old Public `Project` Surface Unless Approved - -### Objective - -Finish the breaking rename by removing old public imports and module -paths unless the user approved compatibility. - -### Steps Without Compatibility Alias - -1. Ensure top-level `easydiffraction.__init__` exports `Workspace`, not - `Project`. - -2. Ensure no source imports from: - - ```text - easydiffraction.project - ``` - -3. Ensure no public source package remains at: - - ```text - src/easydiffraction/project - ``` - -4. Run grep: - - ```shell - rg -n "from easydiffraction import Project|ed\\.Project|easydiffraction\\.project|\\bProject\\(" src docs tests tools README.md CONTRIBUTING.md - ``` - -5. Any remaining match must be: - - historical text that intentionally names the old API; or - - a test that will be updated in Phase 2; or - - a generated artifact that should not be edited manually. - -### Steps With Approved Compatibility Alias - -Only do this if the user explicitly approved it. - -1. Add a temporary alias in `src/easydiffraction/__init__.py`: - - ```python - Project = Workspace - ``` - -2. Keep the alias undocumented unless the user asks for a migration - note. - -3. Add tests in Phase 2 proving both `Workspace` and `Project` construct - the same root object. - -### Commit - -Without alias: - -```text -Remove old Project public API surface -``` - -With alias: - -```text -Add Project alias for Workspace migration -``` - -## Phase 1 Review Gate - -After Phase 1 commits are complete: - -1. Run `git status --short`. -2. Confirm only intended files are changed. -3. Summarize: - - whether `Project` was removed or aliased; - - whether `workspace.cif` replaced `project.cif`; - - any files intentionally left for Phase 2 test updates; - - any unresolved questions. - -4. Stop and ask the user to review before starting Phase 2. - -Do not run the full verification suite until the user approves moving to -Phase 2. - -## Phase 2: Verification And Tests - -Only start this phase after the user approves the Phase 1 -implementation. - -### Test Updates - -Move or update tests to mirror the new source tree: - -```text -tests/unit/easydiffraction/project/ --> tests/unit/easydiffraction/workspace/ -``` - -Update imports: - -```python -from easydiffraction.workspace.workspace import Workspace -from easydiffraction.workspace.display import WorkspaceDisplay -``` - -Update functional and integration tests: - -```python -from easydiffraction import Workspace -workspace = Workspace(project_id='...') -``` - -### New Tests To Add - -Add focused tests for: - -1. `from easydiffraction import Workspace`. -2. `Workspace(project_id='p1').project.id == 'p1'`. -3. `workspace.project.title` round-trips through `workspace.cif`. -4. `workspace.rendering.table_engine` round-trips through - `workspace.cif`. -5. `workspace.verbosity` round-trips through `workspace.cif`. -6. `Workspace.save()` writes `workspace.cif`. -7. `Workspace.load()` reads `workspace.cif`. -8. `workspace.cif` contains `_project.id`, not `_meta.project_id`. -9. `workspace.cif` contains `_rendering.table_engine`. -10. `workspace.cif` contains `_verbosity.level`. -11. `workspace.path` is set after `save_as()` and `load()`. -12. `workspace.project` has no serialized path field. -13. `project.cif`, `config.cif`, and `meta.cif` are not written unless - compatibility was approved. -14. `ed.Project` is absent unless compatibility was approved. -15. `Verbosity` category is registered via its factory and reachable - through `WorkspaceConfig` (parallel to `Rendering`). -16. `ed.extract_workspace_from_zip` is importable and - `ed.extract_project_from_zip` is not (unless compatibility - approved). - -If compatibility alias was approved, add tests for: - -1. `from easydiffraction import Project`. -2. `Project is Workspace` or equivalent behavior. -3. Any approved `project.cif` fallback behavior. - -### Verification Commands - -Run in this order: - -```shell -pixi run test-structure-check -pixi run fix -pixi run check -pixi run unit-tests -pixi run integration-tests -pixi run script-tests -pixi run notebook-prepare -pixi run notebook-tests -``` - -If `pixi run fix` regenerates package-structure docs, accept those -generated changes and do not hand-edit them. - -### Phase 2 Commit Suggestions - -Use one or more commits, depending on size: - -```text -Update workspace unit tests -Update tutorials for Workspace API -Regenerate tutorial notebooks for Workspace API -``` - -## Grep Checklist - -Use this checklist before final review. - -Runtime root object should use `Workspace`: - -```shell -rg -n "\\bProject\\b|ed\\.Project|from easydiffraction import Project" src tests docs tools README.md CONTRIBUTING.md -``` - -Project-information category should use `workspace.project`: - -```shell -rg -n "\\.info\\b|workspace\\.project|project\\.info" src tests docs tools README.md CONTRIBUTING.md -``` - -CIF project category should stay `_project`: - -```shell -rg -n "_meta\\.|_project\\." src tests docs tools README.md CONTRIBUTING.md -``` - -Saved config file should be `workspace.cif`: - -```shell -rg -n "project\\.cif|config\\.cif|meta\\.cif|workspace\\.cif" src tests docs tools README.md CONTRIBUTING.md -``` - -Workspace verbosity should serialize as a workspace-level category: - -```shell -rg -n "_verbosity|verbosity" src tests docs tools README.md CONTRIBUTING.md -``` - -Generated docs should not be manually edited: - -```shell -git diff -- docs/site docs/dev/package-structure/full.md docs/dev/package-structure/short.md -``` - -If package-structure docs changed because of `pixi run fix`, that is -expected. If `docs/site` changed, ask before staging. - -## Common Mistakes - -### Mistake: Renaming `_project.*` To `_workspace.*` - -Do not do this. The CIF category describes scientific project -information, not the runtime facade. - -Correct: - -```cif -_project.id -_project.title -``` - -Incorrect: - -```cif -_workspace.project_id -_workspace.title -``` - -### Mistake: Introducing `_meta.*` - -Do not replace `_project.*` with `_meta.*`. - -Correct: - -```cif -_project.title -``` - -Incorrect: - -```cif -_meta.project_title -``` - -### Mistake: Keeping `project.cif` Or Switching To Generic File Names - -Do not use `project.cif`, `config.cif`, or `meta.cif` as the target -singleton settings file. The file belongs to the workspace layer. - -Correct: - -```text -workspace.cif -``` - -### Mistake: Blindly Replacing Every `project` - -Some uses of `project` should remain: - -- CIF tags such as `_project.id` -- `workspace.project` -- scientific project wording in prose -- `ProjectInfo` class name, unless a later ADR changes it - -Only root-facade uses should become `workspace` or `Workspace`. - -### Mistake: Leaving Path On Project Information - -The saved directory path belongs to the workspace runtime state. It -should be `workspace.path`, not `workspace.project.path`. - -### Mistake: Forgetting Facade Class-Level State - -When renaming `Project` to `Workspace`, the `ClassVar` -`_current_project`, the `current_project_path()` classmethod, and the -`varname()` fallback string `'project'` all live on the class itself and -are easy to miss with a single search-and-replace. Rename them to -`_current_workspace`, `current_workspace_path()`, and `'workspace'` -respectively. - -### Mistake: Renaming `ProjectInfo._project_id` - -The internal descriptor attribute `self._project_id` inside -`ProjectInfo` already matches the new public name `id` and stays as is. -Only the public `name` property/setter becomes `id`. - -### Mistake: Editing Generated Notebooks Directly - -Tutorial notebooks are generated artifacts. Edit tutorial `.py` files, -then run `pixi run notebook-prepare` in Phase 2. - -## Suggested Pull Request - -Title: - -```text -Rename Project root object to Workspace -``` - -Description: - -```text -This change separates the working EasyDiffraction workspace from the -scientific project information stored inside it. Users now create a -Workspace, while project title and description live under -workspace.project and continue to serialize with clear _project.* CIF -names. -``` From e2d64b703b4ac7fd4d9929a11895b33a8241db64 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 14:42:09 +0200 Subject: [PATCH 04/72] Store auto rendering defaults in project config --- .../project/categories/rendering/default.py | 60 ++++++++++++++----- src/easydiffraction/project/display.py | 7 ++- .../categories/rendering/test_default.py | 18 +++++- .../project/test_project_config.py | 20 +++++++ 4 files changed, 86 insertions(+), 19 deletions(-) diff --git a/src/easydiffraction/project/categories/rendering/default.py b/src/easydiffraction/project/categories/rendering/default.py index d3e2fad3..81297960 100644 --- a/src/easydiffraction/project/categories/rendering/default.py +++ b/src/easydiffraction/project/categories/rendering/default.py @@ -20,6 +20,11 @@ from easydiffraction.utils.utils import render_table +AUTO_ENGINE = 'auto' +CHART_ENGINE_OPTIONS = [AUTO_ENGINE, *[member.value for member in PlotterEngineEnum]] +TABLE_ENGINE_OPTIONS = [AUTO_ENGINE, *[member.value for member in TableEngineEnum]] + + @RenderingFactory.register class Rendering(CategoryItem): """Chart and table engine selection for a project.""" @@ -37,13 +42,14 @@ def __init__(self) -> None: self._plotter = Plotter() self._tabler = TableRenderer.get() + # Persist symbolic "auto" so project.cif stays portable across environments. self._chart_engine = StringDescriptor( name='chart_engine', description='Chart renderer backend type', value_spec=AttributeSpec( - default=self._plotter.engine, + default=AUTO_ENGINE, validator=MembershipValidator( - allowed=[member.value for member in PlotterEngineEnum], + allowed=CHART_ENGINE_OPTIONS, ), ), cif_handler=CifHandler(names=['_rendering.chart_engine']), @@ -52,14 +58,44 @@ def __init__(self) -> None: name='table_engine', description='Table renderer backend type', value_spec=AttributeSpec( - default=self._tabler.engine, + default=AUTO_ENGINE, validator=MembershipValidator( - allowed=[member.value for member in TableEngineEnum], + allowed=TABLE_ENGINE_OPTIONS, ), ), cif_handler=CifHandler(names=['_rendering.table_engine']), ) + def _resolved_chart_engine(self, value: str) -> str: + if value == AUTO_ENGINE: + return PlotterEngineEnum.default().value + return value + + def _resolved_table_engine(self, value: str) -> str: + if value == AUTO_ENGINE: + return TableEngineEnum.default().value + return value + + def _set_chart_engine(self, value: str) -> None: + if value not in CHART_ENGINE_OPTIONS: + self._plotter.engine = value + return + + resolved_engine = self._resolved_chart_engine(value) + if self._plotter.engine != resolved_engine: + self._plotter.engine = resolved_engine + self._chart_engine.value = value + + def _set_table_engine(self, value: str) -> None: + if value not in TABLE_ENGINE_OPTIONS: + self._tabler.engine = value + return + + resolved_engine = self._resolved_table_engine(value) + if self._tabler.engine != resolved_engine: + self._tabler.engine = resolved_engine + self._table_engine.value = value + @property def chart_engine(self) -> StringDescriptor: """Chart renderer backend type.""" @@ -67,8 +103,7 @@ def chart_engine(self) -> StringDescriptor: @chart_engine.setter def chart_engine(self, value: str) -> None: - self._plotter.engine = value - self._chart_engine.value = self._plotter.engine + self._set_chart_engine(value) @property def table_engine(self) -> StringDescriptor: @@ -77,8 +112,7 @@ def table_engine(self) -> StringDescriptor: @table_engine.setter def table_engine(self, value: str) -> None: - self._tabler.engine = value - self._table_engine.value = self._tabler.engine + self._set_table_engine(value) @property def plotter(self) -> Plotter: @@ -123,14 +157,8 @@ def from_cif(self, block: object, idx: int = 0) -> None: del idx chart_engine = read_cif_str(block, '_rendering.chart_engine') if chart_engine is not None: - if chart_engine == self._plotter.engine: - self._chart_engine.value = chart_engine - else: - self.chart_engine = chart_engine + self._set_chart_engine(chart_engine) table_engine = read_cif_str(block, '_rendering.table_engine') if table_engine is not None: - if table_engine == self._tabler.engine: - self._table_engine.value = table_engine - else: - self.table_engine = table_engine + self._set_table_engine(table_engine) diff --git a/src/easydiffraction/project/display.py b/src/easydiffraction/project/display.py index 83a9e40f..1ab26941 100644 --- a/src/easydiffraction/project/display.py +++ b/src/easydiffraction/project/display.py @@ -744,7 +744,12 @@ def _uncertainty_status( if posterior_samples is None or posterior_predictive is None: return False, 'Posterior predictive data is unavailable.' - if self._project.rendering.chart_engine.value != PlotterEngineEnum.PLOTLY.value: + active_chart_engine = getattr(self._project.rendering.plotter, 'engine', None) + if active_chart_engine is None: + chart_engine = getattr(self._project.rendering, 'chart_engine', None) + active_chart_engine = getattr(chart_engine, 'value', None) + + if active_chart_engine != PlotterEngineEnum.PLOTLY.value: return False, 'Uncertainty bands currently require the Plotly chart engine.' return True, '' diff --git a/tests/unit/easydiffraction/project/categories/rendering/test_default.py b/tests/unit/easydiffraction/project/categories/rendering/test_default.py index edb88427..07a168ae 100644 --- a/tests/unit/easydiffraction/project/categories/rendering/test_default.py +++ b/tests/unit/easydiffraction/project/categories/rendering/test_default.py @@ -5,14 +5,18 @@ def test_rendering_defaults(): + from easydiffraction.display.plotting import PlotterEngineEnum + from easydiffraction.display.tables import TableEngineEnum from easydiffraction.project.categories.rendering.default import Rendering rendering = Rendering() assert rendering.type_info.tag == 'default' assert rendering._identity.category_code == 'rendering' - assert rendering.chart_engine.value == rendering.plotter.engine - assert rendering.table_engine.value == rendering.tabler.engine + assert rendering.chart_engine.value == 'auto' + assert rendering.table_engine.value == 'auto' + assert rendering.plotter.engine in [member.value for member in PlotterEngineEnum] + assert rendering.tabler.engine in [member.value for member in TableEngineEnum] def test_rendering_plotter_binds_parent(): @@ -28,6 +32,8 @@ def test_rendering_plotter_binds_parent(): def test_rendering_setters_update_engines(): + from easydiffraction.display.plotting import PlotterEngineEnum + from easydiffraction.display.tables import TableEngineEnum from easydiffraction.project.categories.rendering.default import Rendering rendering = Rendering() @@ -40,6 +46,14 @@ def test_rendering_setters_update_engines(): assert rendering.table_engine.value == 'rich' assert rendering.tabler.engine == 'rich' + rendering.chart_engine = 'auto' + rendering.table_engine = 'auto' + + assert rendering.chart_engine.value == 'auto' + assert rendering.table_engine.value == 'auto' + assert rendering.plotter.engine == PlotterEngineEnum.default().value + assert rendering.tabler.engine == TableEngineEnum.default().value + def test_rendering_from_cif_restores_types(): from easydiffraction.project.categories.rendering.default import Rendering diff --git a/tests/unit/easydiffraction/project/test_project_config.py b/tests/unit/easydiffraction/project/test_project_config.py index 3ba20eea..528c99d7 100644 --- a/tests/unit/easydiffraction/project/test_project_config.py +++ b/tests/unit/easydiffraction/project/test_project_config.py @@ -42,6 +42,26 @@ def test_project_config_as_cif_has_project_and_rendering_sections_without_data_h assert '_project.last_modified' in cif_text assert '_rendering.chart_engine' in cif_text assert '_rendering.table_engine' in cif_text + assert '_rendering.chart_engine auto' in cif_text + assert '_rendering.table_engine auto' in cif_text + + +def test_project_save_and_load_use_auto_rendering_defaults_when_unset(tmp_path): + from easydiffraction.project.project import Project + + project = Project(name='beer', title='Beer title', description='Some description') + project.save_as(str(tmp_path / 'proj')) + + project_cif = (tmp_path / 'proj' / 'project.cif').read_text() + + assert not project_cif.startswith('data_') + assert '_rendering.chart_engine auto' in project_cif + assert '_rendering.table_engine auto' in project_cif + + loaded = Project.load(str(tmp_path / 'proj')) + + assert loaded.rendering.chart_engine.value == 'auto' + assert loaded.rendering.table_engine.value == 'auto' def test_project_save_and_load_keep_project_config_section_format(tmp_path): From 1dabc4404684f20dec7023bba55b2a7564cc8098 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 14:45:32 +0200 Subject: [PATCH 05/72] Accept loop category key identity ADR --- .../loop-category-key-identity.md | 95 +-- docs/dev/adrs/index.md | 62 +- docs/dev/plans/loop-category-key-identity.md | 620 ------------------ .../categories/constraints/default.py | 9 +- .../analysis/categories/test_constraints.py | 11 + 5 files changed, 99 insertions(+), 698 deletions(-) rename docs/dev/adrs/{suggestions => accepted}/loop-category-key-identity.md (78%) delete mode 100644 docs/dev/plans/loop-category-key-identity.md diff --git a/docs/dev/adrs/suggestions/loop-category-key-identity.md b/docs/dev/adrs/accepted/loop-category-key-identity.md similarity index 78% rename from docs/dev/adrs/suggestions/loop-category-key-identity.md rename to docs/dev/adrs/accepted/loop-category-key-identity.md index f55ef4a8..4334b4e1 100644 --- a/docs/dev/adrs/suggestions/loop-category-key-identity.md +++ b/docs/dev/adrs/accepted/loop-category-key-identity.md @@ -1,6 +1,6 @@ # ADR: Loop Category Keys and Identity Naming -**Status:** Proposed +**Status:** Accepted **Date:** 2026-05-18 ## Context @@ -10,7 +10,7 @@ CIF dictionaries can declare the key column for a loop category through use domain-specific identity tags such as `_atom_site.label`, while other loop categories may use an explicit `id` tag. -EasyDiffraction currently models the same runtime concept with +EasyDiffraction models the same runtime concept with `item._identity.category_entry_name`. `CategoryCollection` uses this value as the collection key, and category items use it in their `unique_name` path: @@ -19,23 +19,24 @@ value as the collection key, and category items use it in their .. ``` -The design question is whether the current `category_entry_name` -approach is enough, and how closely Python-facing identity names should -follow CIF key tags. +The design question was whether the `category_entry_name` approach was +enough, and how closely Python-facing identity names should follow CIF +key tags. ## Assessment -The current approach is directionally good. It gives every loop item a -stable collection key without hard-coding the key field into +The `category_entry_name` approach is directionally good. It gives every +loop item a stable collection key without hard-coding the key field into `CategoryCollection`, and most current loop categories derive that key from the same field that is serialized into CIF. -It is not explicit enough yet. The key field is encoded as a lambda on -each item, not as declarative metadata on the category or descriptor. -Nothing validates that the runtime key corresponds to a serialized CIF -field. The main visible example is `Constraint`: the current collection -key is derived from the left-hand side of `_constraint.expression`, but -no separate `_constraint.id` field is persisted. +Before this decision it was not explicit enough. The key field was +encoded as a lambda on each item, not as declarative metadata on the +category or descriptor. Nothing validated that the runtime key +corresponded to a serialized CIF field. The main visible example was +`Constraint`: the collection key was derived from the left-hand side of +`_constraint.expression`, but no separate `_constraint.id` field was +persisted. ## Decision @@ -52,14 +53,19 @@ Keep `category_entry_name` as the runtime analogue of CIF identity. Use `label` or `*_id` when the value has clearer domain meaning. -The `constraint` category should add an explicit `id` field and use it -as the collection key: +The `constraint` category has an explicit `id` field and uses it as the +collection key: ```text analysis.constraints[id].id -> _constraint.id ``` -The existing `lhs_alias` and `rhs_expr` properties should remain derived +`Constraints.create()` accepts an optional `id` argument. A +user-provided `id` is used whenever it is not `None`; otherwise the +method derives the default `id` from `lhs_alias`, preserving the old +`create(expression=...)` behavior. + +The existing `lhs_alias` and `rhs_expr` properties remain derived helpers from `_constraint.expression`, not the row identity. This argues against a blanket Python API change to use `id` everywhere. @@ -74,22 +80,22 @@ Rows are sorted by the chosen Python key style: `id`, then `*_id`, then identifier-like fields that are not collection keys are listed in the next section. -| Python key | Area | Collection class | Category code | CIF key tag | Source | Decision | -| --------------- | ---------- | ------------------------------------------- | ------------------------ | ---------------------------- | --------------------------- | ---------------------------------------------------------------------------------------------------- | -| `id` | Analysis | `Constraints` | `constraint` | `_constraint.id` | Custom category | Add this key. Current implementation derives the key from the left side of `_constraint.expression`. | -| `id` | Analysis | `SequentialFitExtractCollection` | `sequential_fit_extract` | `_sequential_fit_extract.id` | Custom category | Keep. It is an explicit row identifier for extraction rules. | -| `id` | Experiment | `LineSegmentBackground` | `background` | `_pd_background.id` | Powder CIF-style category | Keep. The row identity is opaque and already serialized. | -| `id` | Experiment | `ChebyshevPolynomialBackground` | `background` | `_pd_background.id` | Powder CIF-style category | Keep. The row identity is opaque and shared with other background variants. | -| `id` | Experiment | `LinkedPhases` | `linked_phases` | `_pd_phase_block.id` | Powder CIF-style category | Keep. Consider `phase_id` only if the public API later standardizes foreign-key names. | -| `id` | Experiment | `ExcludedRegions` | `excluded_regions` | `_excluded_region.id` | Custom category | Keep. It is a simple custom loop row identifier. | -| `id` | Experiment | `ReflnData` | `refln` | `_refln.id` | CIF-style category | Keep for the current reflection table shape. | -| `id` | Experiment | `PowderCwlReflnData` / `PowderTofReflnData` | `refln` | `_refln.id` | CIF-style category | Keep; `phase_id` remains a separate field, not the row key. | -| `experiment_id` | Analysis | `JointFitCollection` | `joint_fit` | `_joint_fit.experiment_id` | Custom category | Keep. The key is a reference to an experiment, so `id` alone would lose context. | -| `point_id` | Experiment | `PdCwlData` / `PdTofData` | `pd_data` | `_pd_data.point_id` | Powder CIF-style category | Keep. It is clearer than `id` for dense measured/calculated data points. | -| `point_id` | Experiment | `TotalData` | `total_data` | `_pd_data.point_id` | Current powder-data mapping | Keep. Revisit the CIF tag only when total-scattering-specific CIF tags are introduced. | -| `label` | Analysis | `Aliases` | `alias` | `_alias.label` | Custom category | Keep. It is the user-visible symbol referenced by expressions, not an opaque row id. | -| `label` | Structure | `AtomSites` | `atom_site` | `_atom_site.label` | CIF core category | Keep. This is a well-known crystallographic identity field. | -| `label` | Structure | `AtomSiteAnisoCollection` | `atom_site_aniso` | `_atom_site_aniso.label` | CIF core category | Keep. It intentionally matches and references the atom-site label. | +| Python key | Area | Collection class | Category code | CIF key tag | Source | Decision | +| --------------- | ---------- | ------------------------------------------- | ------------------------ | ---------------------------- | --------------------------- | ------------------------------------------------------------------------------------------------------ | +| `id` | Analysis | `Constraints` | `constraint` | `_constraint.id` | Custom category | Add this key. Previous implementations derived the key from the left side of `_constraint.expression`. | +| `id` | Analysis | `SequentialFitExtractCollection` | `sequential_fit_extract` | `_sequential_fit_extract.id` | Custom category | Keep. It is an explicit row identifier for extraction rules. | +| `id` | Experiment | `LineSegmentBackground` | `background` | `_pd_background.id` | Powder CIF-style category | Keep. The row identity is opaque and already serialized. | +| `id` | Experiment | `ChebyshevPolynomialBackground` | `background` | `_pd_background.id` | Powder CIF-style category | Keep. The row identity is opaque and shared with other background variants. | +| `id` | Experiment | `LinkedPhases` | `linked_phases` | `_pd_phase_block.id` | Powder CIF-style category | Keep. Consider `phase_id` only if the public API later standardizes foreign-key names. | +| `id` | Experiment | `ExcludedRegions` | `excluded_regions` | `_excluded_region.id` | Custom category | Keep. It is a simple custom loop row identifier. | +| `id` | Experiment | `ReflnData` | `refln` | `_refln.id` | CIF-style category | Keep for the current reflection table shape. | +| `id` | Experiment | `PowderCwlReflnData` / `PowderTofReflnData` | `refln` | `_refln.id` | CIF-style category | Keep; `phase_id` remains a separate field, not the row key. | +| `experiment_id` | Analysis | `JointFitCollection` | `joint_fit` | `_joint_fit.experiment_id` | Custom category | Keep. The key is a reference to an experiment, so `id` alone would lose context. | +| `point_id` | Experiment | `PdCwlData` / `PdTofData` | `pd_data` | `_pd_data.point_id` | Powder CIF-style category | Keep. It is clearer than `id` for dense measured/calculated data points. | +| `point_id` | Experiment | `TotalData` | `total_data` | `_pd_data.point_id` | Current powder-data mapping | Keep. Revisit the CIF tag only when total-scattering-specific CIF tags are introduced. | +| `label` | Analysis | `Aliases` | `alias` | `_alias.label` | Custom category | Keep. It is the user-visible symbol referenced by expressions, not an opaque row id. | +| `label` | Structure | `AtomSites` | `atom_site` | `_atom_site.label` | CIF core category | Keep. This is a well-known crystallographic identity field. | +| `label` | Structure | `AtomSiteAnisoCollection` | `atom_site_aniso` | `_atom_site_aniso.label` | CIF core category | Keep. It intentionally matches and references the atom-site label. | ## Non-Key Identity And Reference Fields @@ -123,22 +129,21 @@ Jupyter, CLI output, and saved CIF files. ## Implementation Notes -The current `category_entry_name` mechanism can stay, but it should be -made easier to audit. The implementation now uses class-level -`_category_code` and `_category_entry_name` declarations on concrete -`CategoryItem` subclasses. `CategoryItem` resolves the declared -`_category_entry_name` lazily from the named public attribute, and -`Identity` exposes the resolved value through -`item._identity.category_entry_name`. +The implementation uses class-level `_category_code` and +`_category_entry_name` declarations on concrete `CategoryItem` +subclasses. `CategoryItem` resolves the declared `_category_entry_name` +lazily from the named public attribute, and `Identity` exposes the +resolved value through `item._identity.category_entry_name`. -For constraints, add a descriptor-backed `id` property serialized as -`_constraint.id`, and change `category_entry_name` to resolve from that -descriptor. Keep `_constraint.expression` for the full equation. Keep -`lhs_alias` and `rhs_expr` as derived convenience properties. +For constraints, a descriptor-backed `id` property is serialized as +`_constraint.id`, and `category_entry_name` resolves from that +descriptor. `_constraint.expression` stores the full equation. +`lhs_alias` and `rhs_expr` remain derived convenience properties. When reading older CIF files that only contain `_constraint.expression`, -derive a deterministic fallback `id` from the old `lhs_alias` key after -row values are loaded, then write `_constraint.id` on the next save. +the loader derives a deterministic fallback `id` from the old +`lhs_alias` key after row values are loaded, then writes +`_constraint.id` on the next save. ## Consequences diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 02ec5c8c..1ad2c0ae 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -13,34 +13,34 @@ folders. ## ADR Index -| Group | Status | Title | Short description | Link | -| -------------------- | ---------- | ----------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------- | -| Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | -| Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | -| Analysis and fitting | Suggestion | Analysis CIF Fit State | Proposes persisted fit-state categories, result projections, and Bayesian result manifests. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | -| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Proposes role-based filenames for fit results, data archives, and external plotting exports. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | -| Analysis and fitting | Suggestion | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | -| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Proposes the `parameter.posterior` API as a projection of analysis-level Bayesian state. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | -| Analysis and fitting | Suggestion | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | -| Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | -| Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | -| Core model | Accepted | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | -| Core model | Accepted | Two-Level Category Parameter Access | Keeps parameter access to `datablock.category.parameter` or `datablock.collection[id].parameter`. | [`category-parameter-access.md`](accepted/category-parameter-access.md) | -| Documentation | Accepted | Descriptor Property Docstring Template | Makes descriptor metadata the source of truth for public property docstrings and annotations. | [`property-docstring-template.md`](accepted/property-docstring-template.md) | -| Documentation | Accepted | Development Documentation Structure | Defines the `docs/dev` layout for ADRs, issues, plans, package structure, and roadmap. | [`development-docs-structure.md`](accepted/development-docs-structure.md) | -| Documentation | Accepted | Help Method Discoverability | Requires primary public objects and facades to expose consistent `help()` output. | [`help-discoverability.md`](accepted/help-discoverability.md) | -| Documentation | Accepted | Notebook Generation Source of Truth | Treats tutorial `.py` files as editable sources and notebooks as generated artifacts. | [`notebook-generation.md`](accepted/notebook-generation.md) | -| Experiment model | Accepted | Immutable Experiment Type | Makes experiment type axes creation-time state rather than mutable runtime state. | [`immutable-experiment-type.md`](accepted/immutable-experiment-type.md) | -| Factories | Accepted | Factory Contracts and Metadata | Standardizes factory construction, metadata, compatibility, and registration behavior. | [`factory-contracts.md`](accepted/factory-contracts.md) | -| Naming | Accepted | Factory Tag Naming | Defines canonical factory tag style and standard abbreviations. | [`factory-tag-naming.md`](accepted/factory-tag-naming.md) | -| Persistence | Accepted | Free-Flag CIF Encoding | Encodes fit free/fixed state through CIF uncertainty syntax instead of a separate free list. | [`free-flag-cif-encoding.md`](accepted/free-flag-cif-encoding.md) | -| Persistence | Accepted | Project Facade and Persistence Layout | Documents the current `Project` facade and saved directory layout. | [`project-facade-and-persistence.md`](accepted/project-facade-and-persistence.md) | -| Persistence | Suggestion | Loop Category Keys and Identity Naming | Documents current loop collection keys and proposes naming rules aligned with CIF category keys. | [`loop-category-key-identity.md`](suggestions/loop-category-key-identity.md) | -| Persistence | Suggestion | Python and CIF Category Correspondence | Compares current Python paths and CIF tags, then proposes scoped one-to-one mapping for project-level categories. | [`python-cif-category-correspondence.md`](suggestions/python-cif-category-correspondence.md) | -| Quality | Accepted | Lint Complexity Thresholds | Treats ruff PLR complexity limits as design guardrails that should not be bypassed. | [`lint-complexity-thresholds.md`](accepted/lint-complexity-thresholds.md) | -| Quality | Accepted | Test Strategy | Defines layered unit, functional, integration, script, and notebook testing. | [`test-strategy.md`](accepted/test-strategy.md) | -| Structure model | Accepted | Type-Neutral ADP Parameters | Keeps ADP parameter object identities stable across B/U and iso/ani switches. | [`type-neutral-adp-parameters.md`](accepted/type-neutral-adp-parameters.md) | -| User-facing API | Accepted | Display UX Facade | Defines `project.display` and `project.rendering` responsibilities and display method names. | [`display-ux.md`](accepted/display-ux.md) | -| User-facing API | Accepted | Selector Families | Distinguishes backend selectors, switchable-category selectors, and active-sibling selectors. | [`selector-families.md`](accepted/selector-families.md) | -| User-facing API | Accepted | String Paths and Live Descriptors | Separates persisted field selectors from references to live model parameters. | [`string-paths-and-live-descriptors.md`](accepted/string-paths-and-live-descriptors.md) | -| User-facing API | Accepted | Switchable Category API | Places multi-type category selectors on the owner and omits public selectors for fixed or single-type categories. | [`switchable-category-api.md`](accepted/switchable-category-api.md) | +| Group | Status | Title | Short description | Link | +| -------------------- | ---------- | ----------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | +| Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | +| Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | +| Analysis and fitting | Suggestion | Analysis CIF Fit State | Proposes persisted fit-state categories, result projections, and Bayesian result manifests. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | +| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Proposes role-based filenames for fit results, data archives, and external plotting exports. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | +| Analysis and fitting | Suggestion | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | +| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Proposes the `parameter.posterior` API as a projection of analysis-level Bayesian state. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | +| Analysis and fitting | Suggestion | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | +| Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | +| Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | +| Core model | Accepted | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | +| Core model | Accepted | Two-Level Category Parameter Access | Keeps parameter access to `datablock.category.parameter` or `datablock.collection[id].parameter`. | [`category-parameter-access.md`](accepted/category-parameter-access.md) | +| Documentation | Accepted | Descriptor Property Docstring Template | Makes descriptor metadata the source of truth for public property docstrings and annotations. | [`property-docstring-template.md`](accepted/property-docstring-template.md) | +| Documentation | Accepted | Development Documentation Structure | Defines the `docs/dev` layout for ADRs, issues, plans, package structure, and roadmap. | [`development-docs-structure.md`](accepted/development-docs-structure.md) | +| Documentation | Accepted | Help Method Discoverability | Requires primary public objects and facades to expose consistent `help()` output. | [`help-discoverability.md`](accepted/help-discoverability.md) | +| Documentation | Accepted | Notebook Generation Source of Truth | Treats tutorial `.py` files as editable sources and notebooks as generated artifacts. | [`notebook-generation.md`](accepted/notebook-generation.md) | +| Experiment model | Accepted | Immutable Experiment Type | Makes experiment type axes creation-time state rather than mutable runtime state. | [`immutable-experiment-type.md`](accepted/immutable-experiment-type.md) | +| Factories | Accepted | Factory Contracts and Metadata | Standardizes factory construction, metadata, compatibility, and registration behavior. | [`factory-contracts.md`](accepted/factory-contracts.md) | +| Naming | Accepted | Factory Tag Naming | Defines canonical factory tag style and standard abbreviations. | [`factory-tag-naming.md`](accepted/factory-tag-naming.md) | +| Persistence | Accepted | Free-Flag CIF Encoding | Encodes fit free/fixed state through CIF uncertainty syntax instead of a separate free list. | [`free-flag-cif-encoding.md`](accepted/free-flag-cif-encoding.md) | +| Persistence | Accepted | Loop Category Keys and Identity Naming | Documents loop collection keys and naming rules aligned with CIF category keys. | [`loop-category-key-identity.md`](accepted/loop-category-key-identity.md) | +| Persistence | Accepted | Project Facade and Persistence Layout | Documents the current `Project` facade and saved directory layout. | [`project-facade-and-persistence.md`](accepted/project-facade-and-persistence.md) | +| Persistence | Suggestion | Python and CIF Category Correspondence | Compares current Python paths and CIF tags, then proposes scoped one-to-one mapping for project-level categories. | [`python-cif-category-correspondence.md`](suggestions/python-cif-category-correspondence.md) | +| Quality | Accepted | Lint Complexity Thresholds | Treats ruff PLR complexity limits as design guardrails that should not be bypassed. | [`lint-complexity-thresholds.md`](accepted/lint-complexity-thresholds.md) | +| Quality | Accepted | Test Strategy | Defines layered unit, functional, integration, script, and notebook testing. | [`test-strategy.md`](accepted/test-strategy.md) | +| Structure model | Accepted | Type-Neutral ADP Parameters | Keeps ADP parameter object identities stable across B/U and iso/ani switches. | [`type-neutral-adp-parameters.md`](accepted/type-neutral-adp-parameters.md) | +| User-facing API | Accepted | Display UX Facade | Defines `project.display` and `project.rendering` responsibilities and display method names. | [`display-ux.md`](accepted/display-ux.md) | +| User-facing API | Accepted | Selector Families | Distinguishes backend selectors, switchable-category selectors, and active-sibling selectors. | [`selector-families.md`](accepted/selector-families.md) | +| User-facing API | Accepted | String Paths and Live Descriptors | Separates persisted field selectors from references to live model parameters. | [`string-paths-and-live-descriptors.md`](accepted/string-paths-and-live-descriptors.md) | +| User-facing API | Accepted | Switchable Category API | Places multi-type category selectors on the owner and omits public selectors for fixed or single-type categories. | [`switchable-category-api.md`](accepted/switchable-category-api.md) | diff --git a/docs/dev/plans/loop-category-key-identity.md b/docs/dev/plans/loop-category-key-identity.md deleted file mode 100644 index 9917d1ba..00000000 --- a/docs/dev/plans/loop-category-key-identity.md +++ /dev/null @@ -1,620 +0,0 @@ -# Loop Category Key Identity Implementation Plan - -## Status - -Workflow instructions: - -```text -.github/copilot-instructions.md -``` - -Related ADR suggestion: - -```text -docs/dev/adrs/suggestions/loop-category-key-identity.md -``` - -This plan implements two related changes: - -1. Move category identity declarations from per-instance assignments to - class-level declarations. -2. Add an explicit persisted `id` field to the constraints loop. - -Status checklist. Mark `[x]` only while implementing: - -```text -Phase 1 - Implementation -[x] Add class-level identity declarations to CategoryItem. -[x] Teach Identity to resolve declared category entry names. -[x] Rebuild collection indexes after CIF loop loading. -[x] Add _category_code to all current CategoryItem subclasses. -[x] Add _category_entry_name to all current loop CategoryItem subclasses. -[x] Remove direct self._identity.category_code assignments. -[x] Remove direct self._identity.category_entry_name lambda assignments. -[x] Add Constraint.id descriptor serialized as _constraint.id. -[x] Change constraints collection keys from lhs_alias to id. -[x] Preserve default constraints.create(expression=...) behavior by using lhs_alias as the default id. -[x] Add backward-compatible loading for old CIF loops without _constraint.id. -[x] Update constraints display. -[x] Update loop-category-key-identity.md if implementation details differ from the ADR. -[ ] Phase 1 review gate: present diff for approval. - -Phase 2 - Verification -[x] Add tests for the base declarative identity behavior. -[x] Add parametrized tests for current loop category identity declarations. -[x] Update constraints tests. -[x] Update existing round-trip tests that compare constraints CIF. -[x] Run formatting. -[x] Run targeted unit tests. -[x] Run broader checks. -``` - -## Commit Discipline - -When an AI agent follows this plan, every completed Phase 1 -implementation step must be staged with explicit paths and committed -locally before moving to the next implementation step or to the Phase 1 -review gate. - -Follow the **Commits** section of `.github/copilot-instructions.md`. - -Rules: - -- One commit per implementation step. -- Keep each commit atomic and single-purpose. -- Stage explicit paths only. Do not use `git add .`. -- Do not stage unrelated user changes. -- Do not stage generated artifacts unless the user explicitly asks. -- If a serious uncovered design issue appears, stop and ask before - continuing. - -Suggested branch: - -```text -feature/loop-category-key-identity -``` - -Suggested commit messages: - -```text -Add declarative category identity resolution -Declare category identities on current items -Persist explicit constraint identifiers -Update ADR for declarative category identity -Add declarative category identity tests -``` - -## Goal - -Replace repeated constructor code like this: - -```python -self._identity.category_code = 'atom_site' -self._identity.category_entry_name = lambda: str(self.label.value) -``` - -with class-level declarations: - -```python -class AtomSite(CategoryItem): - _category_code = 'atom_site' - _category_entry_name = 'label' -``` - -The name `_category_entry_name` is intentionally kept because this is -the preferred project terminology. In this plan it means "the name of -the item attribute used to resolve the entry name". The resolved entry -value is still obtained through: - -```python -item._identity.category_entry_name -``` - -For example, `AtomSite._category_entry_name == 'label'`, while -`atom_site._identity.category_entry_name == 'Ba1'`. - -## Non-Goals - -Do not change these in this migration: - -- Do not rename `Identity.category_entry_name`. -- Do not rename `CategoryCollection`. -- Do not change public collection access syntax. -- Do not change CIF tags except adding `_constraint.id`. -- Do not rename `label` to `id` for atom sites or aliases. -- Do not make `phase_id` the row key for powder reflection loops. - -## Design - -This plan intentionally uses a narrow metadata lookup based on -`_category_entry_name`. That is an allowed exception to the general "no -string-based dispatch" rule because the attribute name is a class-level -declaration, not user input, and resolution is centralized in -`CategoryItem`. - -### CategoryItem Declarations - -Add these class attributes to `CategoryItem` in -`src/easydiffraction/core/category.py`: - -```python -class CategoryItem(GuardedBase): - _category_code: str | None = None - _category_entry_name: str | None = None -``` - -Update `CategoryItem.__init__()` so it assigns `_category_code` once: - -```python -def __init__(self) -> None: - super().__init__() - if self._category_code is not None: - self._identity.category_code = self._category_code -``` - -Do not try to resolve `_category_entry_name` in `CategoryItem.__init__`. -Many item classes create descriptors after `super().__init__()` returns, -and some mixin-based classes run `CategoryItem.__init__()` before their -descriptors are created. - -Add a resolver method to `CategoryItem`: - -```python -def _resolve_category_entry_name(self) -> str | None: - attr_name = self._category_entry_name - if attr_name is None: - return None - - value = getattr(self, attr_name) - if isinstance(value, GenericDescriptorBase): - value = value.value - return str(value) -``` - -Import `GenericDescriptorBase` from `easydiffraction.core.variable` in -`category.py` if it is not already in scope. - -### Identity Resolution - -Update `Identity._resolve_up()` in -`src/easydiffraction/core/identity.py` so that category entries can be -resolved from the owning object before walking to the parent. - -Add this logic after checking direct callable/string values and before -climbing to the parent: - -```python -if attr == 'category_entry': - resolver = getattr(self._owner, '_resolve_category_entry_name', None) - if callable(resolver): - resolved = resolver() - if resolved is not None: - return resolved -``` - -Keep the existing `category_entry_name` setter. It remains useful as an -escape hatch and keeps old code compatible during migration. - -### Collection Index Rebuild After CIF Loading - -Update `category_collection_from_cif()` in -`src/easydiffraction/io/cif/serialize.py`. - -Currently `_adopt_items()` rebuilds the index before loop values are -loaded into each item. After declarative keys are resolved from -descriptor values, the index must be rebuilt after parameters are -loaded. - -After the row population loop, run any collection hook and rebuild: - -```python -after_from_cif = getattr(self, '_after_from_cif', None) -if callable(after_from_cif): - after_from_cif() - -self._rebuild_index() -``` - -The hook is needed for constraints to backfill missing ids from old CIF -files. - -## Category Migration Table - -Add `_category_code` to each listed class. Add `_category_entry_name` -only when the class is a loop item and currently has a collection key. -Then remove matching constructor assignments. - -| File | Class | `_category_code` | `_category_entry_name` | Notes | -| ----------------------------------------------------------------------------------- | -------------------------- | ------------------------ | ---------------------- | -------------------------------------------- | -| `src/easydiffraction/project/categories/info/default.py` | `ProjectInfo` | `project` | none | Singleton category. | -| `src/easydiffraction/project/categories/rendering/default.py` | `Rendering` | `rendering` | none | Singleton category. | -| `src/easydiffraction/analysis/categories/fitting/default.py` | `Fitting` | `fitting` | none | Singleton category. | -| `src/easydiffraction/analysis/categories/sequential_fit/default.py` | `SequentialFit` | `sequential_fit` | none | Singleton category. | -| `src/easydiffraction/analysis/categories/aliases/default.py` | `Alias` | `alias` | `label` | Loop key stays `_alias.label`. | -| `src/easydiffraction/analysis/categories/constraints/default.py` | `Constraint` | `constraint` | `id` | Add the id descriptor first. | -| `src/easydiffraction/analysis/categories/joint_fit/default.py` | `JointFitItem` | `joint_fit` | `experiment_id` | Loop key stays `_joint_fit.experiment_id`. | -| `src/easydiffraction/analysis/categories/sequential_fit_extract/default.py` | `SequentialFitExtractItem` | `sequential_fit_extract` | `id` | Loop key stays `_sequential_fit_extract.id`. | -| `src/easydiffraction/datablocks/structure/categories/cell/default.py` | `Cell` | `cell` | none | Singleton category. | -| `src/easydiffraction/datablocks/structure/categories/space_group/default.py` | `SpaceGroup` | `space_group` | none | Singleton category. | -| `src/easydiffraction/datablocks/structure/categories/atom_sites/default.py` | `AtomSite` | `atom_site` | `label` | Loop key stays `_atom_site.label`. | -| `src/easydiffraction/datablocks/structure/categories/atom_site_aniso/default.py` | `AtomSiteAniso` | `atom_site_aniso` | `label` | Loop key stays `_atom_site_aniso.label`. | -| `src/easydiffraction/datablocks/experiment/categories/experiment_type/default.py` | `ExperimentType` | `expt_type` | none | Singleton category. | -| `src/easydiffraction/datablocks/experiment/categories/calculation/default.py` | `Calculation` | `calculation` | none | Singleton category. | -| `src/easydiffraction/datablocks/experiment/categories/diffrn/default.py` | `DefaultDiffrn` | `diffrn` | none | Singleton category. | -| `src/easydiffraction/datablocks/experiment/categories/instrument/base.py` | `InstrumentBase` | `instrument` | none | Subclasses inherit this code. | -| `src/easydiffraction/datablocks/experiment/categories/peak/base.py` | `PeakBase` | `peak` | none | Subclasses inherit this code. | -| `src/easydiffraction/datablocks/experiment/categories/extinction/becker_coppens.py` | `BeckerCoppensExtinction` | `extinction` | none | Singleton category. | -| `src/easydiffraction/datablocks/experiment/categories/linked_crystal/default.py` | `LinkedCrystal` | `linked_crystal` | none | Singleton category. | -| `src/easydiffraction/datablocks/experiment/categories/linked_phases/default.py` | `LinkedPhase` | `linked_phases` | `id` | Loop key stays `_pd_phase_block.id`. | -| `src/easydiffraction/datablocks/experiment/categories/background/line_segment.py` | `LineSegment` | `background` | `id` | Loop key stays `_pd_background.id`. | -| `src/easydiffraction/datablocks/experiment/categories/background/chebyshev.py` | `PolynomialTerm` | `background` | `id` | Loop key stays `_pd_background.id`. | -| `src/easydiffraction/datablocks/experiment/categories/excluded_regions/default.py` | `ExcludedRegion` | `excluded_regions` | `id` | Loop key stays `_excluded_region.id`. | -| `src/easydiffraction/datablocks/experiment/categories/refln/bragg_sc.py` | `Refln` | `refln` | `id` | Powder reflection rows inherit this key. | -| `src/easydiffraction/datablocks/experiment/categories/data/bragg_pd.py` | `PdCwlDataPoint` | `pd_data` | `point_id` | CWL powder data row. | -| `src/easydiffraction/datablocks/experiment/categories/data/bragg_pd.py` | `PdTofDataPoint` | `pd_data` | `point_id` | TOF powder data row. | -| `src/easydiffraction/datablocks/experiment/categories/data/total_pd.py` | `TotalDataPoint` | `total_data` | `point_id` | Total-scattering data row. | - -Do not add `_category_code` or `_category_entry_name` to -`PowderReflnBase`, `PowderCwlRefln`, or `PowderTofRefln`; they inherit -the `refln` identity declarations from `Refln`. - -## Constraints Migration - -### Target Shape - -`Constraint` should become: - -```python -class Constraint(CategoryItem): - _category_code = 'constraint' - _category_entry_name = 'id' - - def __init__(self) -> None: - super().__init__() - - self._id = StringDescriptor( - name='id', - description='Identifier for this constraint.', - value_spec=AttributeSpec( - default='_', - validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_]*$'), - ), - cif_handler=CifHandler(names=['_constraint.id']), - ) - self._expression = StringDescriptor(...) -``` - -Define the public property: - -```python -@property -def id(self) -> StringDescriptor: - """Identifier for this constraint.""" - return self._id - -@id.setter -def id(self, value: str) -> None: - self._id.value = value -``` - -Keep `lhs_alias` and `rhs_expr` as derived read-only properties. - -### Create API - -Change `Constraints.create()` from: - -```python -def create(self, *, expression: str) -> None: -``` - -to: - -```python -def create(self, *, expression: str, id: str | None = None) -> None: -``` - -Implementation order: - -```python -item = Constraint() -item.expression = expression -item.id = id if id is not None else item.lhs_alias -self.add(item) -self._enabled = True -``` - -This preserves the current default user experience: - -```python -analysis.constraints.create(expression='biso_Ba = biso_La') -analysis.constraints['biso_Ba'] -``` - -It also allows explicit ids: - -```python -analysis.constraints.create( - id='constraint_1', - expression='biso_Ba = biso_La', -) -analysis.constraints['constraint_1'] -``` - -### Backward-Compatible CIF Loading - -Old CIF files only contain: - -```cif -loop_ -_constraint.expression -biso_Ba = biso_La -``` - -New CIF files should contain: - -```cif -loop_ -_constraint.id -_constraint.expression -biso_Ba "biso_Ba = biso_La" -``` - -Add a hook on `Constraints`: - -```python -def _after_from_cif(self) -> None: - for index, item in enumerate(self._items, start=1): - if item.id.value in {'', '_'}: - fallback = item.lhs_alias or f'constraint_{index}' - item.id = fallback -``` - -The generic `category_collection_from_cif()` hook described above must -call this before rebuilding the index. - -### Constraints Display - -Update `Constraints.show()` to include the id: - -```text -id | expression -``` - -Keep the existing empty warning behavior. - -## Required Code Searches - -After migration, these searches should return no category-item -constructor assignments: - -```shell -git grep -n -E "self\\._identity\\.category_code =" -- src/easydiffraction -git grep -n -E "self\\._identity\\.category_entry_name =" -- src/easydiffraction -git grep -n -E "category_entry_name = lambda" -- src/easydiffraction -``` - -The following reads are expected to remain: - -```shell -git grep -n "_identity\\.category_entry_name" -- src/easydiffraction -``` - -Those reads are used by collections, display, reporting, and parameter -unique names. - -Do not use `rg` in this plan; it is not available in every contributor -environment. Use `git grep` for repository searches. - -## Tests To Add Or Update - -### Base Identity Tests - -Add or update tests under `tests/unit/easydiffraction/core/`. - -Test a fake category item: - -```python -class FakeItem(CategoryItem): - _category_code = 'fake' - _category_entry_name = 'id' - - def __init__(self): - super().__init__() - self._id = StringDescriptor(...) - - @property - def id(self): - return self._id -``` - -Assert: - -```python -item._identity.category_code == 'fake' -item._identity.category_entry_name == item.id.value -item.id.unique_name.endswith('.fake..id') -``` - -Also test that a singleton-like item with `_category_code` and no -`_category_entry_name` resolves category code but returns no entry name. - -### Current Category Parametrized Tests - -Add a parametrized test that instantiates representative loop item -classes and verifies category code plus entry: - -```text -Alias -> alias, label -JointFitItem -> joint_fit, experiment_id -SequentialFitExtractItem -> sequential_fit_extract, id -AtomSite -> atom_site, label -AtomSiteAniso -> atom_site_aniso, label -LinkedPhase -> linked_phases, id -LineSegment -> background, id -PolynomialTerm -> background, id -ExcludedRegion -> excluded_regions, id -Refln -> refln, id -PdCwlDataPoint -> pd_data, point_id -PdTofDataPoint -> pd_data, point_id -TotalDataPoint -> total_data, point_id -``` - -Do not instantiate heavy calculator-backed owner objects for this test; -instantiate item classes directly. - -### Constraints Tests - -Update -`tests/unit/easydiffraction/analysis/categories/test_constraints.py`. - -Required assertions: - -```python -c = Constraint() -c.expression = 'a = b + c' -c.id = 'constraint_a' -assert c.id.value == 'constraint_a' -assert c.lhs_alias == 'a' -assert c.rhs_expr == 'b + c' -assert c._identity.category_entry_name == 'constraint_a' -``` - -Default id behavior: - -```python -coll = Constraints() -coll.create(expression='a = b + c') -assert 'a' in coll.names -assert coll['a'].id.value == 'a' -assert coll['a'].rhs_expr == 'b + c' -``` - -Explicit id behavior: - -```python -coll = Constraints() -coll.create(id='c1', expression='a = b + c') -assert 'c1' in coll.names -assert coll['c1'].lhs_alias == 'a' -``` - -CIF serialization: - -```python -cif = coll.as_cif -assert '_constraint.id' in cif -assert '_constraint.expression' in cif -``` - -Backward-compatible CIF loading: - -```python -cif = ''' -data_analysis - -loop_ -_constraint.expression -"a = b + c" -''' -``` - -Load through the existing analysis/constraints loader and assert the -loaded collection has key `a`. - -### Existing Round-Trip Tests - -Update tests that compare constraint CIF or constraint collection keys: - -- `tests/unit/easydiffraction/project/test_project_load.py` -- `tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py` -- `tests/integration/fitting/test_project_load.py` - -Expected changes: - -- New saved analysis CIF includes `_constraint.id`. -- Old expression-only fixtures, if any, still load. -- Existing constraint workflows still work when no explicit id is - supplied. - -## Verification Commands - -Run the smallest useful checks first: - -```shell -pixi run python -m pytest tests/unit/easydiffraction/analysis/categories/test_constraints.py -pixi run python -m pytest tests/unit/easydiffraction/core/ -pixi run python -m pytest tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py -``` - -Then run broader checks: - -```shell -pixi run unit-tests -pixi run integration-tests -pixi run check -``` - -If the modified-file Prettier helper is still missing, format changed -Markdown directly: - -```shell -npx prettier --write --config=prettierrc.toml docs/dev/plans/loop-category-key-identity.md docs/dev/adrs/suggestions/loop-category-key-identity.md -``` - -## Local Availability Check - -The plan was checked against the current repository state: - -- `.github/copilot-instructions.md` exists. -- `src/easydiffraction/core/category.py`, - `src/easydiffraction/core/identity.py`, and - `src/easydiffraction/io/cif/serialize.py` exist. -- `tests/unit/easydiffraction/analysis/categories/test_constraints.py` - exists. -- `tests/unit/easydiffraction/core/` exists. -- `tests/unit/easydiffraction/io/cif/test_serialize_category_owner_baseline.py` - exists. -- `pixi.toml` defines `fix`, `check`, `unit-tests`, `integration-tests`, - and `test-structure-check`. -- `prettierrc.toml` and local Prettier are available. -- `tools/nonpy_prettier_modified.py` is not present, so use the direct - `npx prettier --write --config=prettierrc.toml ...` fallback for - touched Markdown files. - -## Acceptance Criteria - -The implementation is done when all of these are true: - -- All current category codes are declared as `_category_code` on item - classes. -- All current loop collection keys are declared as - `_category_entry_name` on item classes. -- No current category item sets `self._identity.category_code` in its - constructor. -- No current category item sets `self._identity.category_entry_name` in - its constructor. -- `item._identity.category_entry_name` still works for all loop rows. -- Descriptor `unique_name` values still include datablock, category, - entry, and descriptor name. -- Constraints persist `_constraint.id`. -- Constraints created without an explicit id still default to the left - hand alias. -- Old constraints CIF without `_constraint.id` still loads and receives - deterministic ids. -- Collection indexes are correct after CIF loading. - -## Suggested Pull Request - -Title: - -```text -Use declarative category identity metadata -``` - -Description: - -```text -This change makes category and loop-row identities easier to audit and -keeps saved CIF identifiers explicit. Constraint rows gain a stable -identifier while existing constraint expressions continue to work. -``` diff --git a/src/easydiffraction/analysis/categories/constraints/default.py b/src/easydiffraction/analysis/categories/constraints/default.py index d6f02622..dd7bb9d6 100644 --- a/src/easydiffraction/analysis/categories/constraints/default.py +++ b/src/easydiffraction/analysis/categories/constraints/default.py @@ -137,7 +137,7 @@ def disable(self) -> None: """Deactivate constraints without deleting them.""" self._enabled = False - def create(self, *, expression: str) -> None: + def create(self, *, expression: str, id: str | None = None) -> None: """ Create a constraint from an expression string. @@ -148,10 +148,15 @@ def create(self, *, expression: str) -> None: expression : str Constraint equation, e.g. ``'biso_Co2 = biso_Co1'`` or ``'occ_Ba = 1 - occ_La'``. + id : str | None, optional + Explicit row identifier. When not ``None``, this value is + used as the collection key instead of the left-hand alias. """ item = Constraint() item.expression = expression - if item.lhs_alias: + if id is not None: + item.id = id + elif item.lhs_alias: item.id = item.lhs_alias self.add(item) self._enabled = True diff --git a/tests/unit/easydiffraction/analysis/categories/test_constraints.py b/tests/unit/easydiffraction/analysis/categories/test_constraints.py index b887408e..07016b29 100644 --- a/tests/unit/easydiffraction/analysis/categories/test_constraints.py +++ b/tests/unit/easydiffraction/analysis/categories/test_constraints.py @@ -22,6 +22,17 @@ def test_constraint_creation_and_collection(): assert coll['a'].rhs_expr == 'b + c' +def test_constraints_create_uses_explicit_id(): + coll = Constraints() + + coll.create(id='constraint_1', expression='a = b + c') + + assert coll.names == ['constraint_1'] + assert coll['constraint_1'].id.value == 'constraint_1' + assert coll['constraint_1'].lhs_alias == 'a' + assert coll['constraint_1'].rhs_expr == 'b + c' + + def test_constraints_from_cif_preserves_explicit_id_keys(): doc = gemmi.cif.read_string( 'data_constraints\n\n' From 68a8057e353652d67668d5bfa9ce3da825fbe8fc Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 14:47:16 +0200 Subject: [PATCH 06/72] Add project save to ed-2 tutorial --- docs/docs/tutorials/ed-2.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/docs/tutorials/ed-2.py b/docs/docs/tutorials/ed-2.py index a8e9e3d2..220ebae9 100644 --- a/docs/docs/tutorials/ed-2.py +++ b/docs/docs/tutorials/ed-2.py @@ -227,3 +227,9 @@ # %% project.display.pattern(expt_name='hrpt') + +# %% [markdown] +# ## Step 7: Save Project + +# %% +project.save_as('projects/lbco_hrpt') From d039633640a199ef0cc5770ae11f1966939d80e6 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 14:50:04 +0200 Subject: [PATCH 07/72] Remove extraneous blank line --- src/easydiffraction/project/categories/rendering/default.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/easydiffraction/project/categories/rendering/default.py b/src/easydiffraction/project/categories/rendering/default.py index 81297960..431da789 100644 --- a/src/easydiffraction/project/categories/rendering/default.py +++ b/src/easydiffraction/project/categories/rendering/default.py @@ -19,7 +19,6 @@ from easydiffraction.utils.logging import console from easydiffraction.utils.utils import render_table - AUTO_ENGINE = 'auto' CHART_ENGINE_OPTIONS = [AUTO_ENGINE, *[member.value for member in PlotterEngineEnum]] TABLE_ENGINE_OPTIONS = [AUTO_ENGINE, *[member.value for member in TableEngineEnum]] From d5770f0d6e129decf260c5c626e45a0190789519 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 14:56:56 +0200 Subject: [PATCH 08/72] Add project fit verbosity category --- .../python-cif-category-correspondence.md | 25 ++++----- src/easydiffraction/analysis/analysis.py | 2 +- .../categories/constraints/default.py | 2 +- src/easydiffraction/analysis/sequential.py | 8 +-- src/easydiffraction/io/cif/serialize.py | 4 ++ .../project/categories/verbosity/__init__.py | 6 +++ .../project/categories/verbosity/default.py | 53 +++++++++++++++++++ .../project/categories/verbosity/factory.py | 15 ++++++ src/easydiffraction/project/display.py | 8 +-- src/easydiffraction/project/project.py | 20 +++---- src/easydiffraction/project/project_config.py | 8 +++ 11 files changed, 116 insertions(+), 35 deletions(-) create mode 100644 src/easydiffraction/project/categories/verbosity/__init__.py create mode 100644 src/easydiffraction/project/categories/verbosity/default.py create mode 100644 src/easydiffraction/project/categories/verbosity/factory.py diff --git a/docs/dev/adrs/suggestions/python-cif-category-correspondence.md b/docs/dev/adrs/suggestions/python-cif-category-correspondence.md index b9a90ece..32bcd13f 100644 --- a/docs/dev/adrs/suggestions/python-cif-category-correspondence.md +++ b/docs/dev/adrs/suggestions/python-cif-category-correspondence.md @@ -24,7 +24,7 @@ one-to-one correspondence for project-owned singleton categories: ```text project.info.title -> project.cif: _info.title project.rendering.engine -> project.cif: _rendering.engine -project.verbosity.level -> project.cif: _verbosity.level +project.verbosity.fit -> project.cif: _verbosity.fit ``` The design question is whether this rule should be applied only to @@ -55,7 +55,7 @@ to objects reached from the current `Project` root, for example | Current Python surface | Current saved location | Current CIF block form | Notes | | ----------------------------------- | ------------------------ | ---------------------- | ----------------------------------------------------------------------------------- | | `project.info`, `project.rendering` | `project.cif` | bare categories | Project-level singleton config. | -| `project.verbosity` | not persisted | none | Runtime-only string property backed by `VerbosityEnum`; no `_verbosity` category. | +| `project.verbosity` | `project.cif` | bare category | Project-owned fit-output verbosity category backed by `VerbosityEnum`. | | `project.structures[name]` | `structures/.cif` | `data_` | Each structure is one CIF data block. | | `project.experiments[name]` | `experiments/.cif` | `data_` | Each experiment is one CIF data block. | | `project.analysis` | `analysis/analysis.cif` | bare categories | Loader also accepts legacy root-level `analysis.cif`. | @@ -75,7 +75,7 @@ to objects reached from the current `Project` root, for example | `project.info.path` | none | No | Runtime storage path, not a CIF field. | | `project.rendering.chart_engine` | `_rendering.chart_engine` | Yes | Direct category and field mapping. | | `project.rendering.table_engine` | `_rendering.table_engine` | Yes | Direct category and field mapping. | -| `project.verbosity` | none | No | Runtime-only string convenience property; current code has no `project.verbosity.level` category and no `_verbosity.level` tag. | +| `project.verbosity.fit` | `_verbosity.fit` | Yes | Direct category and field mapping for fitting process output verbosity. | ### Analysis Configuration @@ -233,8 +233,8 @@ project.info. -> project.cif: _project. Future one-to-one correspondence work may still discuss whether the public identity field should be `name` or `id`, whether verbosity should -persist as `_verbosity.level`, and whether rendering should keep -separate chart and table engine fields. +gain additional coverage-specific fields, and whether rendering should +keep separate chart and table engine fields. Possible strict-correspondence target if a future ADR explicitly changes the accepted `_project.*` baseline: @@ -248,7 +248,7 @@ the accepted `_project.*` baseline: | `project.info.last_modified` | `_info.last_modified` | Currently `_project.last_modified`. | | `project.rendering.chart_engine` | `_rendering.chart_engine` | Already matches. | | `project.rendering.table_engine` | `_rendering.table_engine` | Already matches. | -| `project.verbosity.level` | `_verbosity.level` | Currently no persisted verbosity category. | +| `project.verbosity.fit` | `_verbosity.fit` | Implemented direct fit-output verbosity mapping. | Alternative target if the project identity field should be called `id` rather than `name`: @@ -272,8 +272,9 @@ repository can optimize them for API/persistence symmetry. ### `project.cif` Scopes Generic Categories `_info.title` is generic in isolation, but inside `project.cif` it reads -as project information. This is similar to `_verbosity.level`: the file -scope tells the reader this is project-level verbosity. +as project information. This is similar to `_verbosity.fit`: the file +scope tells the reader this is project-level verbosity, and the field +name identifies the fitting-process coverage. ### The Current `Project` Root Already Matches User Language @@ -318,9 +319,9 @@ unless a separate ADR changes the underlying API pattern. - `_info.*` is less self-describing if copied out of `project.cif`. - Existing `_project.*` project files would need migration or a deliberate compatibility decision. -- If verbosity is persisted, `project.verbosity` would either need to - become a category object or remain as a convenience alias for a new - `project.verbosity.level` field. +- Persisted verbosity is now a category object. The initial field is + `project.verbosity.fit`, leaving room for future coverage-specific + verbosity fields. - Collapsing rendering to `project.rendering.engine` would simplify the API, but only if chart and table renderers are intended to share one backend choice. @@ -333,5 +334,5 @@ unless a separate ADR changes the underlying API pattern. `project.rendering.table_engine` remain separate, or should the public API and CIF collapse to one `engine` field? - Should `project.verbosity = 'short'` remain as a convenience alias for - `project.verbosity.level = 'short'`, or should strict correspondence + `project.verbosity.fit = 'short'`, or should strict correspondence remove the alias? diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 08303f65..a8d4e257 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -704,7 +704,7 @@ def _resolve_sequential_data_dir(self) -> Path: def _prepare_fit_run(self) -> tuple[VerbosityEnum, object, object] | None: """Resolve common inputs for single and joint fitting.""" - verb = VerbosityEnum(self.project.verbosity) + verb = VerbosityEnum(self.project.verbosity.fit.value) structures = self.project.structures if not structures: log.warning('No structures found in the project. Cannot run fit.') diff --git a/src/easydiffraction/analysis/categories/constraints/default.py b/src/easydiffraction/analysis/categories/constraints/default.py index dd7bb9d6..0b622e18 100644 --- a/src/easydiffraction/analysis/categories/constraints/default.py +++ b/src/easydiffraction/analysis/categories/constraints/default.py @@ -148,7 +148,7 @@ def create(self, *, expression: str, id: str | None = None) -> None: expression : str Constraint equation, e.g. ``'biso_Co2 = biso_Co1'`` or ``'occ_Ba = 1 - occ_La'``. - id : str | None, optional + id : str | None, default=None Explicit row identifier. When not ``None``, this value is used as the collection key instead of the left-hand alias. """ diff --git a/src/easydiffraction/analysis/sequential.py b/src/easydiffraction/analysis/sequential.py index 029486c7..5889aaa7 100644 --- a/src/easydiffraction/analysis/sequential.py +++ b/src/easydiffraction/analysis/sequential.py @@ -144,12 +144,12 @@ def _fit_worker( project.analysis.fitter = Fitter(template.minimizer_tag) # 10. Fit - original_verbosity = project.verbosity - project.verbosity = 'silent' + original_verbosity = project.verbosity.fit.value + project.verbosity.fit = 'silent' try: project.analysis.fit() finally: - project.verbosity = original_verbosity + project.verbosity.fit = original_verbosity # 11. Collect results result.update(_collect_results(project, template)) @@ -910,7 +910,7 @@ def _prepare_sequential_run( reverse: bool, ) -> SequentialRunPlan | None: """Resolve inputs and bookkeeping for one sequential-fit run.""" - verbosity = VerbosityEnum(analysis.project.verbosity) + verbosity = VerbosityEnum(analysis.project.verbosity.fit.value) _check_seq_preconditions(analysis.project) diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index 5c3a29c1..a7fa8f98 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -521,6 +521,10 @@ def project_config_from_cif(project: object, cif_text: str) -> None: if rendering is not None: rendering.from_cif(block) + verbosity = getattr(project, 'verbosity', None) + if verbosity is not None: + verbosity.from_cif(block) + def analysis_from_cif(analysis: object, cif_text: str) -> None: """ diff --git a/src/easydiffraction/project/categories/verbosity/__init__.py b/src/easydiffraction/project/categories/verbosity/__init__.py new file mode 100644 index 00000000..be1abd47 --- /dev/null +++ b/src/easydiffraction/project/categories/verbosity/__init__.py @@ -0,0 +1,6 @@ +"""Project verbosity category exports.""" + +from __future__ import annotations + +from easydiffraction.project.categories.verbosity.default import Verbosity +from easydiffraction.project.categories.verbosity.factory import VerbosityFactory diff --git a/src/easydiffraction/project/categories/verbosity/default.py b/src/easydiffraction/project/categories/verbosity/default.py new file mode 100644 index 00000000..1121bd3a --- /dev/null +++ b/src/easydiffraction/project/categories/verbosity/default.py @@ -0,0 +1,53 @@ +"""Project fit-output verbosity category.""" + +from __future__ import annotations + +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.validation import MembershipValidator +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler +from easydiffraction.project.categories.verbosity.factory import VerbosityFactory +from easydiffraction.utils.enums import VerbosityEnum + + +@VerbosityFactory.register +class Verbosity(CategoryItem): + """Fit-output verbosity selection for a project.""" + + _category_code = 'verbosity' + + type_info = TypeInfo( + tag='default', + description='Project verbosity category', + ) + + def __init__(self) -> None: + super().__init__() + + self._fit = StringDescriptor( + name='fit', + description='Fitting process output verbosity', + value_spec=AttributeSpec( + default=VerbosityEnum.default().value, + validator=MembershipValidator( + allowed=[member.value for member in VerbosityEnum], + ), + ), + cif_handler=CifHandler(names=['_verbosity.fit']), + ) + + @property + def fit(self) -> StringDescriptor: + """Fitting process output verbosity.""" + return self._fit + + @fit.setter + def fit(self, value: str) -> None: + self._fit.value = VerbosityEnum(value).value + + @property + def as_cif(self) -> str: + """Return CIF representation of this verbosity category.""" + return super().as_cif diff --git a/src/easydiffraction/project/categories/verbosity/factory.py b/src/easydiffraction/project/categories/verbosity/factory.py new file mode 100644 index 00000000..bf6e54c4 --- /dev/null +++ b/src/easydiffraction/project/categories/verbosity/factory.py @@ -0,0 +1,15 @@ +"""Factory for project verbosity categories.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class VerbosityFactory(FactoryBase): + """Create project verbosity category instances.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } diff --git a/src/easydiffraction/project/display.py b/src/easydiffraction/project/display.py index 1ab26941..e47c8ef7 100644 --- a/src/easydiffraction/project/display.py +++ b/src/easydiffraction/project/display.py @@ -145,7 +145,7 @@ def pairs( """Plot posterior pair relationships for sampled parameters.""" with activity_indicator( ACTIVITY_LABEL_PROCESSING, - verbosity=VerbosityEnum(self._project.verbosity), + verbosity=VerbosityEnum(self._project.verbosity.fit.value), ): self._project.rendering.plotter.plot_posterior_pairs( parameters=parameters, @@ -184,7 +184,7 @@ def predictive( """Plot posterior predictive summaries for one experiment.""" with activity_indicator( ACTIVITY_LABEL_PROCESSING, - verbosity=VerbosityEnum(self._project.verbosity), + verbosity=VerbosityEnum(self._project.verbosity.fit.value), ): self._project.rendering.plotter.plot_posterior_predictive( expt_name=expt_name, @@ -251,7 +251,7 @@ def pattern( if 'uncertainty' in auto_include: with activity_indicator( ACTIVITY_LABEL_PROCESSING, - verbosity=VerbosityEnum(self._project.verbosity), + verbosity=VerbosityEnum(self._project.verbosity.fit.value), ): self._project.rendering.plotter._plot_posterior_predictive_request( expt_name=expt_name, @@ -285,7 +285,7 @@ def pattern( if 'uncertainty' in normalized_include: with activity_indicator( ACTIVITY_LABEL_PROCESSING, - verbosity=VerbosityEnum(self._project.verbosity), + verbosity=VerbosityEnum(self._project.verbosity.fit.value), ): self._project.rendering.plotter._plot_posterior_predictive_request( expt_name=expt_name, diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index 76715267..adf10904 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -28,6 +28,7 @@ if TYPE_CHECKING: from easydiffraction.project.categories.rendering import Rendering + from easydiffraction.project.categories.verbosity import Verbosity from easydiffraction.project.project_info import ProjectInfo @@ -136,12 +137,12 @@ def __init__( self._structures = Structures() self._experiments = Experiments() object.__setattr__(self, '_rendering', self._config.rendering) + object.__setattr__(self, '_verbosity', self._config.verbosity) self._display = ProjectDisplay(self) self._analysis = Analysis(self) self._summary = Summary(self) self._saved = False self._varname = 'project' if type(self)._loading else varname() - self._verbosity: VerbosityEnum = VerbosityEnum.FULL type(self)._current_project = self @classmethod @@ -250,21 +251,14 @@ def as_cif(self) -> str: return project_to_cif(self) @property - def verbosity(self) -> str: - """ - Project-wide console output verbosity. - - Returns - ------- - str - One of ``'full'``, ``'short'``, or ``'silent'``. - """ - return self._verbosity.value + def verbosity(self) -> Verbosity: + """Verbosity configuration bound to the project.""" + return self._verbosity @verbosity.setter def verbosity(self, value: str) -> None: """ - Set project-wide console output verbosity. + Set fitting process output verbosity. Parameters ---------- @@ -272,7 +266,7 @@ def verbosity(self, value: str) -> None: ``'full'`` for multi-line output, ``'short'`` for one-line status messages, or ``'silent'`` for no output. """ - self._verbosity = VerbosityEnum(value) + self._verbosity.fit = VerbosityEnum(value).value # ------------------------------------------ # Project File I/O diff --git a/src/easydiffraction/project/project_config.py b/src/easydiffraction/project/project_config.py index 32147b60..dd92137a 100644 --- a/src/easydiffraction/project/project_config.py +++ b/src/easydiffraction/project/project_config.py @@ -9,6 +9,8 @@ from easydiffraction.project.categories.info import ProjectInfoFactory from easydiffraction.project.categories.rendering import Rendering from easydiffraction.project.categories.rendering import RenderingFactory +from easydiffraction.project.categories.verbosity import Verbosity +from easydiffraction.project.categories.verbosity import VerbosityFactory class ProjectConfig(CategoryOwner): @@ -28,6 +30,7 @@ def __init__( description=description, ) self._rendering = RenderingFactory.create(RenderingFactory.default_tag()) + self._verbosity = VerbosityFactory.create(VerbosityFactory.default_tag()) @property def info(self) -> ProjectInfo: @@ -39,6 +42,11 @@ def rendering(self) -> Rendering: """Rendering configuration category.""" return self._rendering + @property + def verbosity(self) -> Verbosity: + """Verbosity configuration category.""" + return self._verbosity + @property def as_cif(self) -> str: """Serialize singleton project categories to CIF.""" From c2d36f06cea75cd29be39cc899834cd0f03ce632 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:02:51 +0200 Subject: [PATCH 09/72] Add analysis fit state implementation plan --- docs/dev/plans/analysis-cif-fit-state.md | 687 +++++++++++++++++++++++ 1 file changed, 687 insertions(+) create mode 100644 docs/dev/plans/analysis-cif-fit-state.md diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md new file mode 100644 index 00000000..9e9ac979 --- /dev/null +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -0,0 +1,687 @@ +# Analysis CIF Fit State Implementation Plan + +This plan follows `.github/copilot-instructions.md`. Deliberate +exceptions: none. + +Source ADR: `docs/dev/adrs/suggestions/analysis-cif-fit-state.md`. +Related decisions read while preparing this plan: + +- `docs/dev/adrs/accepted/runtime-fit-results.md` +- `docs/dev/adrs/accepted/project-facade-and-persistence.md` +- `docs/dev/adrs/accepted/category-owner-sections.md` +- `docs/dev/adrs/accepted/free-flag-cif-encoding.md` +- `docs/dev/adrs/accepted/loop-category-key-identity.md` +- `docs/dev/adrs/accepted/fit-mode-categories.md` +- `docs/dev/adrs/accepted/test-strategy.md` +- `docs/dev/adrs/suggestions/parameter-correlation-persistence.md` +- `docs/dev/adrs/suggestions/parameter-posterior-summary.md` + +## Goal + +Persist analysis-owned fit state in `analysis/analysis.cif` and, for +large Bayesian arrays, `analysis/results.h5`. Saved projects should be +able to restore fit bounds, pre-fit snapshots, deterministic result +summaries, Bayesian summaries, posterior manifests, and plot-ready cache +metadata without duplicating committed model parameter values in +structure or experiment CIF files. + +## Status Checklist + +- [x] Gather planning context from ADRs, source files, and tests. +- [x] Confirm ADR status: implement from the suggestion for now. +- [x] Confirm HDF5 strategy: add `h5py` as a direct dependency. +- [x] Confirm composite-key loop strategy: add persisted `id` columns. +- [x] Confirm public surface: expose read-only `Analysis` properties. +- [x] Confirm predictive cache identity: key by `experiment_name`. +- [ ] Phase 1 step 1: update the ADR suggestion with clarifications. +- [ ] Phase 1 step 2: add common fit-state category models. +- [ ] Phase 1 step 3: add deterministic result category models. +- [ ] Phase 1 step 4: add Bayesian metadata category models. +- [ ] Phase 1 step 5: add Bayesian cache manifest category models. +- [ ] Phase 1 step 6: wire analysis CIF save/load for fit state. +- [ ] Phase 1 step 7: capture fit projections after fitting. +- [ ] Phase 1 step 8: add HDF5 sidecar save/load. +- [ ] Phase 1 step 9: restore result objects and display cache inputs. +- [ ] Phase 1 review gate: stop for human review. +- [ ] Phase 2 step 1: add unit tests for new categories. +- [ ] Phase 2 step 2: add CIF and project save/load tests. +- [ ] Phase 2 step 3: add display and sidecar behavior tests. +- [ ] Phase 2 step 4: run the verification commands. + +## Clarified Decisions + +These questions were answered on 2026-05-18. + +1. Implement from `docs/dev/adrs/suggestions/analysis-cif-fit-state.md` + for now. Do not move the ADR to `accepted/` as part of this plan. +2. Add `h5py` as a direct dependency for `analysis/results.h5`. +3. Add persisted `id` columns for composite-key fit-state loops instead + of using computed runtime-only keys. +4. Expose all new fit-state categories as public read-only properties on + `Analysis`. +5. Key `_bayesian_predictive_dataset` rows by `experiment_name`, keeping + one cached predictive dataset per experiment. + +No remaining required gates are known. If implementation uncovers a new +schema conflict, dependency concern, or public API ambiguity, stop and +ask before changing this plan. + +## Agent Safety Rules + +This plan is written for a less advanced agent. Follow it literally. + +- Work on one numbered Phase 1 step at a time. +- Run `git status --short` before each step. If unrelated dirty files + overlap with the files for that step, stop and ask for guidance. +- Use `apply_patch` for manual edits. Do not write files with shell + redirection, Python scripts, or ad hoc generated output. +- Do not create or run tests during Phase 1 unless the user explicitly + asks. Phase 1 is implementation and documentation only. +- After each completed Phase 1 step, inspect the diff for only that + step, stage explicit paths, and commit locally before moving on. +- Use explicit paths with `git add`. Never stage the whole tree. +- Keep each commit atomic and single-purpose. +- If implementation uncovers a missing requirement, dependency problem, + schema conflict, or public API ambiguity, stop and ask. + +Before each implementation step, write down the exact files you expect +to edit. If a file is not listed in the current step and the edit is not +obviously mechanical, stop and ask before changing it. + +For every new category package in Phase 1: + +1. Create `factory.py` with a factory class following the neighboring + analysis category packages. +2. Create `default.py` with concrete `CategoryItem` or + `CategoryCollection` classes. +3. Decorate every concrete top-level category class with + `@Factory.register`. Do not decorate row item classes unless a + neighboring package already does that for the same shape. +4. Create `__init__.py` with explicit imports for the factory and + concrete classes. +5. Update `src/easydiffraction/analysis/categories/__init__.py` and + `src/easydiffraction/analysis/__init__.py` with explicit imports. +6. Keep public descriptors read-only unless the category is meant to be + user-editable. Use private `_set_` helpers for internal restore + when a public setter would create the wrong user-facing contract. +7. If a collection row has read-only public fields, do not rely on the + generic `CategoryCollection.create(**kwargs)` path. Add an explicit + `create(...)` method that builds the row and uses private helpers. + +Required commit discipline for any AI agent following this plan: + +```text +Every completed Phase 1 implementation step must be staged with +explicit paths and committed locally before moving to the next +implementation step or the Phase 1 review gate. +``` + +Suggested branch name: + +```text +feature/analysis-cif-fit-state +``` + +## Verified Repository Facts + +- `Analysis` inherits `CategoryOwner` in + `src/easydiffraction/analysis/analysis.py`. +- `Analysis._serializable_categories()` currently emits fitting, + aliases, constraints, and active fit-mode categories. +- `analysis.as_cif` delegates to `analysis_to_cif()` in + `src/easydiffraction/io/cif/serialize.py`. +- `category_owner_to_cif()` serializes explicit `CategoryItem` and + `CategoryCollection` instances in the order returned by + `_serializable_categories()`. +- `analysis_from_cif()` restores fitting configuration, active + fit-mode sections, aliases, and constraints. +- `Project.save()` writes `analysis/analysis.cif` from + `self.analysis.as_cif` and lists all files already present under the + `analysis/` directory. +- `Project.load()` loads structures and experiments before analysis, + then resolves alias references. +- `GenericParameter` already has `fit_min`, `fit_max`, and + `fit_bounds_uncertainty_multiplier` runtime state. +- `Fitter.fit()` currently captures only `param._fit_start_value` before + fitting. It does not capture pre-fit uncertainty. +- `FitResults` and `BayesianFitResults` already contain most scalar + result information needed by the ADR. +- `project.display.posterior.*` currently reads from runtime + `analysis.fit_results`, not persisted caches. +- Existing verification tasks include `pixi run fix`, `pixi run check`, + `pixi run test-structure-check`, `pixi run unit-tests`, + `pixi run integration-tests`, and `pixi run script-tests`. + +## Naming Decisions For Implementation + +Use exact CIF category codes from the ADR. For Python attributes on +`Analysis`, use singular names for single-item categories and plural +names for collections: + +| Python attribute | CIF category | Shape | +| --- | --- | --- | +| `fit_state` | `_fit_state` | single item | +| `fit_parameters` | `_fit_parameter` | collection | +| `fit_result` | `_fit_result` | single item | +| `fit_parameter_correlations` | `_fit_parameter_correlation` | collection | +| `deterministic_result` | `_deterministic_result` | single item | +| `deterministic_parameter_results` | `_deterministic_parameter_result` | collection | +| `bayesian_result` | `_bayesian_result` | single item | +| `bayesian_sampler` | `_bayesian_sampler` | single item | +| `bayesian_convergence` | `_bayesian_convergence` | single item | +| `bayesian_parameter_posteriors` | `_bayesian_parameter_posterior` | collection | +| `bayesian_distribution_caches` | `_bayesian_distribution_cache` | collection | +| `bayesian_pair_caches` | `_bayesian_pair_cache` | collection | +| `bayesian_predictive_datasets` | `_bayesian_predictive_dataset` | collection | + +If this public surface feels too noisy during implementation, stop and +ask before hiding these properties from `Analysis.help()`. Do not move +the categories under another category; the ADR requires flat analysis +siblings. + +## Phase 1: Implementation + +Phase 1 is code and documentation only. Do not add or run tests here +unless explicitly instructed by the user. + +### Step 1: Update The ADR Suggestion With Clarifications + +Files likely to change: + +- `docs/dev/adrs/suggestions/analysis-cif-fit-state.md` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Keep the ADR in `suggestions/`; do not move it to `accepted/`. +2. Amend the ADR suggestion so composite-key loops have persisted `id` + columns. At minimum this applies to `_fit_parameter_correlation` and + `_bayesian_pair_cache`. +3. Document that `_bayesian_predictive_dataset` remains keyed by + `experiment_name`. +4. Document that `analysis/results.h5` uses `h5py` as a direct + dependency. +5. Update this plan checklist for Step 1. + +Suggested commit message: + +```text +Clarify analysis fit-state ADR schema +``` + +### Step 2: Add Common Fit-State Categories + +Files likely to change: + +- `src/easydiffraction/analysis/categories/fit_state/` +- `src/easydiffraction/analysis/categories/fit_parameters/` +- `src/easydiffraction/analysis/categories/fit_result/` +- `src/easydiffraction/analysis/categories/fit_parameter_correlations/` +- `src/easydiffraction/analysis/categories/__init__.py` +- `src/easydiffraction/analysis/__init__.py` +- `src/easydiffraction/analysis/enums.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Add `(str, Enum)` classes for closed values: + `FitResultKindEnum` with `deterministic` and `bayesian`, and + `FitCorrelationSourceEnum` with `deterministic` and `posterior`. +2. Add category modules following existing analysis category patterns: + `default.py`, `factory.py`, and `__init__.py` with explicit imports. +3. Add `FitState` as a `CategoryItem` with `_category_code = + 'fit_state'` and numeric `schema_version` default `1`. +4. Add `FitParameterItem` and `FitParameters` for `_fit_parameter`. + Use `_category_entry_name = 'param_unique_name'`. +5. Add `FitResult` for `_fit_result` with `result_kind`, `success`, + `message`, `iterations`, `fitting_time`, and `reduced_chi_square`. +6. Add `FitParameterCorrelationItem` and collection for + `_fit_parameter_correlation`. Include persisted + `_fit_parameter_correlation.id` and use `_category_entry_name = + 'id'`. Generate a stable default id from the normalized source and + parameter pair when callers do not provide one. +7. Normalize correlation pairs so only upper-triangle rows are stored. +8. Use `StringDescriptor`, `NumericDescriptor`, and `BoolDescriptor` as + appropriate. Avoid raw Python attributes for persisted fields. +9. Do not add JSON fields or loose tags. +10. Update imports in the package `__init__.py` files so concrete + classes are registered and importable. +11. Update this plan checklist for Step 2. + +Implementation notes: + +- The collection `add()` path assumes one key. For categories with a + persisted `id`, set `_category_entry_name = 'id'` on the item and + generate a stable default `id` before adding the item to the + collection. +- Keep CIF tag names exactly as in the ADR, for example + `_fit_parameter.param_unique_name`. +- If an enum value from CIF is invalid, warn clearly and keep the + default. Do not fail silently. + +Suggested commit message: + +```text +Add common analysis fit-state categories +``` + +### Step 3: Add Deterministic Result Categories + +Files likely to change: + +- `src/easydiffraction/analysis/categories/deterministic_result/` +- `src/easydiffraction/analysis/categories/deterministic_parameter_results/` +- `src/easydiffraction/analysis/categories/__init__.py` +- `src/easydiffraction/analysis/__init__.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Add `DeterministicResult` as a single-item category with the ADR + fields: `optimizer_name`, `method_name`, `objective_name`, + `objective_value`, `n_data_points`, `n_parameters`, + `n_free_parameters`, `degrees_of_freedom`, `covariance_available`, + and `correlation_available`. +2. Add `DeterministicParameterResultItem` and collection for + `_deterministic_parameter_result` with `order_index`, + `param_unique_name`, `final_value`, `final_uncertainty`, + `at_lower_bound`, and `at_upper_bound`. +3. Use `_category_entry_name = 'param_unique_name'` for deterministic + parameter result rows. Keep `order_index` as display and array order. +4. Do not duplicate pre-fit values here; those belong to + `_fit_parameter`. +5. Add explicit package imports. +6. Update this plan checklist for Step 3. + +Suggested commit message: + +```text +Add deterministic fit-result categories +``` + +### Step 4: Add Bayesian Metadata Categories + +Files likely to change: + +- `src/easydiffraction/analysis/categories/bayesian_result/` +- `src/easydiffraction/analysis/categories/bayesian_sampler/` +- `src/easydiffraction/analysis/categories/bayesian_convergence/` +- `src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/` +- `src/easydiffraction/analysis/categories/__init__.py` +- `src/easydiffraction/analysis/__init__.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Add `BayesianResult` as a single-item category with all ADR fields. +2. Add `BayesianSampler` as a single-item category with resolved DREAM + sampler settings: `steps`, `burn`, `thin`, `pop`, `parallel`, + `init`, and `random_seed`. +3. Add `BayesianConvergence` as a single-item category with + `converged`, `max_r_hat`, `min_ess_bulk`, `n_draws`, `n_chains`, and + `n_parameters`. +4. Add `BayesianParameterPosteriorItem` and collection with all ADR + posterior summary fields. Use `_category_entry_name = 'unique_name'`. +5. Preserve the repo naming rule from prior Bayesian work: + `best_sample` and `Best posterior sample` refer to the committed + sampled point, not a continuous MAP estimate. +6. Add explicit package imports. +7. Update this plan checklist for Step 4. + +Suggested commit message: + +```text +Add Bayesian fit-result metadata categories +``` + +### Step 5: Add Bayesian Cache Manifest Categories + +Files likely to change: + +- `src/easydiffraction/analysis/categories/bayesian_distribution_caches/` +- `src/easydiffraction/analysis/categories/bayesian_pair_caches/` +- `src/easydiffraction/analysis/categories/bayesian_predictive_datasets/` +- `src/easydiffraction/analysis/categories/__init__.py` +- `src/easydiffraction/analysis/__init__.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Add distribution cache manifest rows keyed by + `param_unique_name`. +2. Add pair cache manifest rows with persisted `_bayesian_pair_cache.id` + and `_category_entry_name = 'id'`. Generate a stable default id from + the normalized parameter pair when callers do not provide one. +3. Add predictive dataset manifest rows keyed by `experiment_name`. + If multiple predictive datasets per experiment become necessary, + stop and ask before changing the ADR schema. +4. Store only HDF5 dataset paths and shape/count metadata in CIF. +5. Do not write numerical arrays into CIF loops. +6. Add explicit package imports. +7. Update this plan checklist for Step 5. + +Suggested commit message: + +```text +Add Bayesian fit-cache manifest categories +``` + +### Step 6: Wire Analysis CIF Save And Load + +Files likely to change: + +- `src/easydiffraction/analysis/analysis.py` +- `src/easydiffraction/io/cif/serialize.py` +- `src/easydiffraction/project/project.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Instantiate the new fit-state categories in `Analysis.__init__`. +2. Add read-only properties using the names in this plan. +3. Add `Analysis._has_persisted_fit_state()` or an equivalent helper. +4. Update `Analysis._serializable_categories()` so fit-state categories + are appended only when a fit-state projection exists. +5. Keep the order from the ADR: normal analysis configuration first, + then `_fit_state`, `_fit_parameter`, `_fit_result`, correlations, + deterministic categories, Bayesian categories, and cache manifests. +6. Update `analysis_from_cif()` to restore the new categories after + existing fitting, aliases, constraints, and active mode-specific + configuration. +7. Make missing fit-state categories a no-op for older saved projects. +8. Add clear warnings for unsupported `_fit_state.schema_version`. +9. Add a project-level helper to build a `{unique_name: parameter}` map + from structures and experiments. Reuse it for alias and fit-state + reference restoration if practical. +10. Update this plan checklist for Step 6. + +Suggested commit message: + +```text +Wire analysis fit-state CIF restore +``` + +### Step 7: Capture Fit Projections After Fitting + +Files likely to change: + +- `src/easydiffraction/analysis/analysis.py` +- `src/easydiffraction/analysis/fitting.py` +- `src/easydiffraction/analysis/minimizers/base.py` +- `src/easydiffraction/analysis/minimizers/bumps.py` +- `src/easydiffraction/analysis/minimizers/bumps_dream.py` +- `src/easydiffraction/analysis/minimizers/lmfit.py` +- `src/easydiffraction/analysis/fit_helpers/reporting.py` +- `src/easydiffraction/analysis/fit_helpers/bayesian.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Add an analysis-owned method such as + `_capture_fit_parameter_state(parameters)` that records + `param_unique_name`, `fit_min`, `fit_max`, + `fit_bounds_uncertainty_multiplier`, `start_value`, and + `start_uncertainty` before the minimizer mutates parameters. +2. Do not rely on `GenericParameter._start_value`; it exists but is not + currently the value used by fit result reporting. +3. Continue supporting existing `_fit_start_value` until a separate + approved refactor replaces it. +4. Add `_store_fit_result_projection(results)` or equivalent on + `Analysis` to fill common, deterministic, and Bayesian categories + from `FitResults` or `BayesianFitResults`. +5. For deterministic fits, prefer live parameter values for + calculations and store final values only as display projections. +6. If deterministic projection values disagree with live parameter + state on load, warn and keep the live parameter state. +7. For Bayesian fits, keep `point_estimate_name = 'best_sample'` unless + the result object says otherwise. +8. Store upper-triangle parameter correlations only. +9. Clear stale fit-state categories at the start of a new fit so old + cache manifests cannot survive a new result. +10. Update this plan checklist for Step 7. + +Suggested commit message: + +```text +Capture persisted fit-state projections +``` + +### Step 8: Add HDF5 Sidecar Save And Load + +The HDF5 dependency decision is approved: add `h5py` directly. + +Files likely to change: + +- `pyproject.toml` +- `src/easydiffraction/analysis/analysis.py` +- `src/easydiffraction/io/` +- `src/easydiffraction/project/project.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Add `h5py` as a direct dependency. +2. Add a small sidecar module for `analysis/results.h5`; keep imports + local if the package is heavy. +3. Write canonical posterior arrays when available: + `/posterior/parameter_samples`, `/posterior/log_posterior`, and + `/posterior/draw_index`. +4. Write cache arrays only when the corresponding manifest rows are + present. +5. Validate that the HDF5 dataset shape matches manifest metadata. +6. Make the sidecar optional for summary-only restore. If it is missing, + warn clearly and keep available CIF summaries. +7. Call the sidecar writer from `Project.save()` after `analysis.cif` + data has been prepared and before analysis directory contents are + listed. +8. Call the sidecar reader from `Project.load()` after `analysis_from_cif()` + and before restored display state is used. +9. Do not persist backend runtime objects, DREAM drivers, raw engine + results, or ArviZ `InferenceData`. +10. Update this plan checklist for Step 8. + +Suggested commit message: + +```text +Persist Bayesian fit arrays in results sidecar +``` + +### Step 9: Restore Result Objects And Display Cache Inputs + +Files likely to change: + +- `src/easydiffraction/analysis/analysis.py` +- `src/easydiffraction/analysis/fit_helpers/reporting.py` +- `src/easydiffraction/analysis/fit_helpers/bayesian.py` +- `src/easydiffraction/project/display.py` +- `src/easydiffraction/display/plotting.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Actions: + +1. Rebuild a lightweight `FitResults` or `BayesianFitResults` from the + persisted categories after project load. +2. Attach restored live parameter objects where their unique names are + still present. +3. Keep backend runtime fields such as `engine_result` as `None`. +4. Make `analysis.display.fit_results()` work from the restored result + projection. +5. Update correlation plotting so it can use + `_fit_parameter_correlation` when raw covariance or posterior samples + are not available. +6. Keep correlation heatmaps compact. Do not replace the heatmap path + with many per-cell Plotly traces. +7. Make posterior distribution, pair, and predictive display methods + prefer valid persisted cache arrays when available. +8. If a requested cache is unavailable or invalid, warn clearly and use + the existing recomputation path only when enough runtime data exists. +9. Do not make display methods recompute KDE, contours, or predictive + bands when valid cache arrays were restored. +10. Update this plan checklist for Step 9. + +Suggested commit message: + +```text +Restore fit results from saved analysis state +``` + +### Phase 1 Review Gate + +After Step 9, stop. Present the implementation for human review before +creating or running tests. Mention any deviations from this plan and any +open design questions that appeared during implementation. + +Suggested commit message if only the plan checklist changes at the gate: + +```text +Update analysis fit-state plan progress +``` + +## Phase 2: Verification + +Only start Phase 2 after the user approves the Phase 1 implementation. + +### Step 1: Add Category Unit Tests + +Files likely to change: + +- `tests/unit/easydiffraction/analysis/categories/` +- `tests/unit/easydiffraction/analysis/test_enums.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Tests to add: + +1. Each new category has the expected CIF tags. +2. Each descriptor validates basic type and enum constraints. +3. Empty collections serialize to an empty string. +4. Collections rebuild indexes after `from_cif()`. +5. Persisted-id collections reject duplicate ids and normalize duplicate + pair rows. +6. Correlation rows store only the upper triangle excluding the + diagonal. + +### Step 2: Add CIF And Project Save/Load Tests + +Files likely to change: + +- `tests/unit/easydiffraction/io/cif/` +- `tests/unit/easydiffraction/project/test_project_save.py` +- `tests/unit/easydiffraction/project/test_project_load.py` +- `tests/functional/test_fitting_workflow.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Tests to add: + +1. A project with no fit state does not emit empty fit-state loops. +2. Deterministic fit-state categories round-trip through + `analysis/analysis.cif`. +3. Fit bounds, bound provenance, start value, and start uncertainty + round-trip by parameter unique name. +4. Live structure or experiment parameter values remain the calculation + source of truth after load. +5. Mismatched deterministic result projections warn and keep live + parameter values. +6. Unknown fit-state schema versions warn clearly. +7. Older projects without fit-state categories still load. + +### Step 3: Add Bayesian Sidecar And Display Tests + +Files likely to change: + +- `tests/unit/easydiffraction/analysis/fit_helpers/` +- `tests/unit/easydiffraction/display/test_plotting.py` +- `tests/unit/easydiffraction/project/test_display.py` +- `tests/unit/easydiffraction/project/test_project_load.py` +- `docs/dev/plans/analysis-cif-fit-state.md` + +Tests to add: + +1. Bayesian summary-only restore works when `analysis/results.h5` is + missing and emits a clear warning. +2. Posterior sample arrays round-trip through `analysis/results.h5`. +3. Manifest rows and HDF5 dataset shapes are validated. +4. Posterior distributions use cache arrays when valid. +5. Posterior pair plots use cache arrays when valid and preserve sample + pairing semantics for contours. +6. Posterior predictive displays use saved predictive arrays when valid. +7. Recompute paths remain available when runtime posterior samples are + present but caches are absent. + +### Step 4: Run Verification Commands + +Run in this order from the repository root: + +```text +pixi run test-structure-check +pixi run fix +pixi run check +pixi run unit-tests +pixi run integration-tests +pixi run script-tests +``` + +Notes: + +- `pixi run fix` may regenerate + `docs/dev/package-structure/full.md` and + `docs/dev/package-structure/short.md`. Accept those generated changes + if the command produced them. +- If a command fails for an unrelated existing problem, do not fix + unrelated code. Record the failure and ask for guidance. + +## Files Most Likely To Change + +Implementation files: + +- `pyproject.toml` +- `src/easydiffraction/analysis/analysis.py` +- `src/easydiffraction/analysis/enums.py` +- `src/easydiffraction/analysis/fitting.py` +- `src/easydiffraction/analysis/categories/` +- `src/easydiffraction/analysis/fit_helpers/reporting.py` +- `src/easydiffraction/analysis/fit_helpers/bayesian.py` +- `src/easydiffraction/analysis/minimizers/base.py` +- `src/easydiffraction/analysis/minimizers/bumps.py` +- `src/easydiffraction/analysis/minimizers/bumps_dream.py` +- `src/easydiffraction/analysis/minimizers/lmfit.py` +- `src/easydiffraction/io/cif/serialize.py` +- `src/easydiffraction/project/project.py` +- `src/easydiffraction/project/display.py` +- `src/easydiffraction/display/plotting.py` + +Test files: + +- `tests/unit/easydiffraction/analysis/` +- `tests/unit/easydiffraction/io/cif/` +- `tests/unit/easydiffraction/project/` +- `tests/unit/easydiffraction/display/` +- `tests/functional/test_fitting_workflow.py` +- `tests/integration/fitting/` + +Documentation files: + +- `docs/dev/adrs/suggestions/analysis-cif-fit-state.md` +- `docs/dev/plans/analysis-cif-fit-state.md` + +## Do Not Change Without Approval + +- Do not serialize posterior summaries inside structure or experiment + CIF files. +- Do not rename `Project`, `project.cif`, or the existing saved project + layout. +- Do not remove the legacy `analysis.cif` root fallback in + `Project.load()`. +- Do not add a generic posterior-minimizer capability abstraction until + there is a second concrete posterior-capable minimizer. +- Do not change tutorial notebooks directly. Edit tutorial `.py` files + and run notebook preparation only if the user asks for tutorial work. +- Do not persist raw backend result objects, optimizer instances, DREAM + drivers, or ArviZ objects. + +## Suggested Pull Request + +Title: Persist analysis fit state in saved projects + +Description: Save fit bounds, result summaries, and Bayesian result +manifests with projects so users can reopen fitted analyses with the +same fit-state and posterior display context available. \ No newline at end of file From 527e018fcf155778ef29ba73e1ac3b182e547119 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:07:16 +0200 Subject: [PATCH 10/72] Reformat documentation tables and line breaks --- .../python-cif-category-correspondence.md | 40 ++--- docs/dev/plans/analysis-cif-fit-state.md | 143 ++++++++++-------- 2 files changed, 102 insertions(+), 81 deletions(-) diff --git a/docs/dev/adrs/suggestions/python-cif-category-correspondence.md b/docs/dev/adrs/suggestions/python-cif-category-correspondence.md index 32bcd13f..191f18e5 100644 --- a/docs/dev/adrs/suggestions/python-cif-category-correspondence.md +++ b/docs/dev/adrs/suggestions/python-cif-category-correspondence.md @@ -65,17 +65,17 @@ to objects reached from the current `Project` root, for example ### Project-Level Configuration -| Current Python path | Current CIF path | Match? | Notes | -| -------------------------------- | ------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------- | -| `project.info.name` | `_project.id` | No | Python uses user-facing `name`; CIF uses `id`; category is `info` in Python but `_project` in CIF. | -| `project.info.title` | `_project.title` | Partly | Field name matches, category name does not. | -| `project.info.description` | `_project.description` | Partly | Field name matches, category name does not. | -| `project.info.created` | `_project.created` | Partly | Field name matches, category name does not. | -| `project.info.last_modified` | `_project.last_modified` | Partly | Field name matches, category name does not. | -| `project.info.path` | none | No | Runtime storage path, not a CIF field. | -| `project.rendering.chart_engine` | `_rendering.chart_engine` | Yes | Direct category and field mapping. | -| `project.rendering.table_engine` | `_rendering.table_engine` | Yes | Direct category and field mapping. | -| `project.verbosity.fit` | `_verbosity.fit` | Yes | Direct category and field mapping for fitting process output verbosity. | +| Current Python path | Current CIF path | Match? | Notes | +| -------------------------------- | ------------------------- | ------ | -------------------------------------------------------------------------------------------------- | +| `project.info.name` | `_project.id` | No | Python uses user-facing `name`; CIF uses `id`; category is `info` in Python but `_project` in CIF. | +| `project.info.title` | `_project.title` | Partly | Field name matches, category name does not. | +| `project.info.description` | `_project.description` | Partly | Field name matches, category name does not. | +| `project.info.created` | `_project.created` | Partly | Field name matches, category name does not. | +| `project.info.last_modified` | `_project.last_modified` | Partly | Field name matches, category name does not. | +| `project.info.path` | none | No | Runtime storage path, not a CIF field. | +| `project.rendering.chart_engine` | `_rendering.chart_engine` | Yes | Direct category and field mapping. | +| `project.rendering.table_engine` | `_rendering.table_engine` | Yes | Direct category and field mapping. | +| `project.verbosity.fit` | `_verbosity.fit` | Yes | Direct category and field mapping for fitting process output verbosity. | ### Analysis Configuration @@ -239,15 +239,15 @@ keep separate chart and table engine fields. Possible strict-correspondence target if a future ADR explicitly changes the accepted `_project.*` baseline: -| Python path | Target CIF path | Current state | -| -------------------------------- | ------------------------- | ------------------------------------------ | -| `project.info.name` | `_info.name` | Currently `_project.id`. | -| `project.info.title` | `_info.title` | Currently `_project.title`. | -| `project.info.description` | `_info.description` | Currently `_project.description`. | -| `project.info.created` | `_info.created` | Currently `_project.created`. | -| `project.info.last_modified` | `_info.last_modified` | Currently `_project.last_modified`. | -| `project.rendering.chart_engine` | `_rendering.chart_engine` | Already matches. | -| `project.rendering.table_engine` | `_rendering.table_engine` | Already matches. | +| Python path | Target CIF path | Current state | +| -------------------------------- | ------------------------- | ------------------------------------------------ | +| `project.info.name` | `_info.name` | Currently `_project.id`. | +| `project.info.title` | `_info.title` | Currently `_project.title`. | +| `project.info.description` | `_info.description` | Currently `_project.description`. | +| `project.info.created` | `_info.created` | Currently `_project.created`. | +| `project.info.last_modified` | `_info.last_modified` | Currently `_project.last_modified`. | +| `project.rendering.chart_engine` | `_rendering.chart_engine` | Already matches. | +| `project.rendering.table_engine` | `_rendering.table_engine` | Already matches. | | `project.verbosity.fit` | `_verbosity.fit` | Implemented direct fit-output verbosity mapping. | Alternative target if the project identity field should be called `id` diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 9e9ac979..51a67c0a 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -108,6 +108,19 @@ For every new category package in Phase 1: generic `CategoryCollection.create(**kwargs)` path. Add an explicit `create(...)` method that builds the row and uses private helpers. +Complexity guardrails: + +- Steps 7 and 9 are broad. Start with the smallest central hook, then + edit individual minimizers or display helpers only when the required + data is not available through that central hook. +- If one step needs more than six source files, more than one new public + class family beyond the planned categories, or a public API change not + named in this plan, stop and ask to split the step. +- When auditing usages or renaming symbols, search code, tests, + tutorials, and docs with `git grep -n` before editing. +- Do not fix unrelated lint, formatting, typing, or test failures while + implementing this plan. Mention them at the review gate instead. + Required commit discipline for any AI agent following this plan: ```text @@ -133,8 +146,8 @@ feature/analysis-cif-fit-state - `category_owner_to_cif()` serializes explicit `CategoryItem` and `CategoryCollection` instances in the order returned by `_serializable_categories()`. -- `analysis_from_cif()` restores fitting configuration, active - fit-mode sections, aliases, and constraints. +- `analysis_from_cif()` restores fitting configuration, active fit-mode + sections, aliases, and constraints. - `Project.save()` writes `analysis/analysis.cif` from `self.analysis.as_cif` and lists all files already present under the `analysis/` directory. @@ -158,21 +171,21 @@ Use exact CIF category codes from the ADR. For Python attributes on `Analysis`, use singular names for single-item categories and plural names for collections: -| Python attribute | CIF category | Shape | -| --- | --- | --- | -| `fit_state` | `_fit_state` | single item | -| `fit_parameters` | `_fit_parameter` | collection | -| `fit_result` | `_fit_result` | single item | -| `fit_parameter_correlations` | `_fit_parameter_correlation` | collection | -| `deterministic_result` | `_deterministic_result` | single item | -| `deterministic_parameter_results` | `_deterministic_parameter_result` | collection | -| `bayesian_result` | `_bayesian_result` | single item | -| `bayesian_sampler` | `_bayesian_sampler` | single item | -| `bayesian_convergence` | `_bayesian_convergence` | single item | -| `bayesian_parameter_posteriors` | `_bayesian_parameter_posterior` | collection | -| `bayesian_distribution_caches` | `_bayesian_distribution_cache` | collection | -| `bayesian_pair_caches` | `_bayesian_pair_cache` | collection | -| `bayesian_predictive_datasets` | `_bayesian_predictive_dataset` | collection | +| Python attribute | CIF category | Shape | +| --------------------------------- | --------------------------------- | ----------- | +| `fit_state` | `_fit_state` | single item | +| `fit_parameters` | `_fit_parameter` | collection | +| `fit_result` | `_fit_result` | single item | +| `fit_parameter_correlations` | `_fit_parameter_correlation` | collection | +| `deterministic_result` | `_deterministic_result` | single item | +| `deterministic_parameter_results` | `_deterministic_parameter_result` | collection | +| `bayesian_result` | `_bayesian_result` | single item | +| `bayesian_sampler` | `_bayesian_sampler` | single item | +| `bayesian_convergence` | `_bayesian_convergence` | single item | +| `bayesian_parameter_posteriors` | `_bayesian_parameter_posterior` | collection | +| `bayesian_distribution_caches` | `_bayesian_distribution_cache` | collection | +| `bayesian_pair_caches` | `_bayesian_pair_cache` | collection | +| `bayesian_predictive_datasets` | `_bayesian_predictive_dataset` | collection | If this public surface feels too noisy during implementation, stop and ask before hiding these properties from `Analysis.help()`. Do not move @@ -224,22 +237,23 @@ Files likely to change: Actions: -1. Add `(str, Enum)` classes for closed values: - `FitResultKindEnum` with `deterministic` and `bayesian`, and - `FitCorrelationSourceEnum` with `deterministic` and `posterior`. +1. Add `(str, Enum)` classes for closed values: `FitResultKindEnum` with + `deterministic` and `bayesian`, and `FitCorrelationSourceEnum` with + `deterministic` and `posterior`. 2. Add category modules following existing analysis category patterns: `default.py`, `factory.py`, and `__init__.py` with explicit imports. -3. Add `FitState` as a `CategoryItem` with `_category_code = - 'fit_state'` and numeric `schema_version` default `1`. -4. Add `FitParameterItem` and `FitParameters` for `_fit_parameter`. - Use `_category_entry_name = 'param_unique_name'`. +3. Add `FitState` as a `CategoryItem` with + `_category_code = 'fit_state'` and numeric `schema_version` default + `1`. +4. Add `FitParameterItem` and `FitParameters` for `_fit_parameter`. Use + `_category_entry_name = 'param_unique_name'`. 5. Add `FitResult` for `_fit_result` with `result_kind`, `success`, `message`, `iterations`, `fitting_time`, and `reduced_chi_square`. 6. Add `FitParameterCorrelationItem` and collection for `_fit_parameter_correlation`. Include persisted - `_fit_parameter_correlation.id` and use `_category_entry_name = - 'id'`. Generate a stable default id from the normalized source and - parameter pair when callers do not provide one. + `_fit_parameter_correlation.id` and use + `_category_entry_name = 'id'`. Generate a stable default id from the + normalized source and parameter pair when callers do not provide one. 7. Normalize correlation pairs so only upper-triangle rows are stored. 8. Use `StringDescriptor`, `NumericDescriptor`, and `BoolDescriptor` as appropriate. Avoid raw Python attributes for persisted fields. @@ -251,9 +265,9 @@ Actions: Implementation notes: - The collection `add()` path assumes one key. For categories with a - persisted `id`, set `_category_entry_name = 'id'` on the item and - generate a stable default `id` before adding the item to the - collection. + persisted `id`, set `_category_entry_name = 'id'` on the item and + generate a stable default `id` before adding the item to the + collection. - Keep CIF tag names exactly as in the ADR, for example `_fit_parameter.param_unique_name`. - If an enum value from CIF is invalid, warn clearly and keep the @@ -315,16 +329,16 @@ Actions: 1. Add `BayesianResult` as a single-item category with all ADR fields. 2. Add `BayesianSampler` as a single-item category with resolved DREAM - sampler settings: `steps`, `burn`, `thin`, `pop`, `parallel`, - `init`, and `random_seed`. -3. Add `BayesianConvergence` as a single-item category with - `converged`, `max_r_hat`, `min_ess_bulk`, `n_draws`, `n_chains`, and + sampler settings: `steps`, `burn`, `thin`, `pop`, `parallel`, `init`, + and `random_seed`. +3. Add `BayesianConvergence` as a single-item category with `converged`, + `max_r_hat`, `min_ess_bulk`, `n_draws`, `n_chains`, and `n_parameters`. 4. Add `BayesianParameterPosteriorItem` and collection with all ADR posterior summary fields. Use `_category_entry_name = 'unique_name'`. -5. Preserve the repo naming rule from prior Bayesian work: - `best_sample` and `Best posterior sample` refer to the committed - sampled point, not a continuous MAP estimate. +5. Preserve the repo naming rule from prior Bayesian work: `best_sample` + and `Best posterior sample` refer to the committed sampled point, not + a continuous MAP estimate. 6. Add explicit package imports. 7. Update this plan checklist for Step 4. @@ -347,14 +361,13 @@ Files likely to change: Actions: -1. Add distribution cache manifest rows keyed by - `param_unique_name`. +1. Add distribution cache manifest rows keyed by `param_unique_name`. 2. Add pair cache manifest rows with persisted `_bayesian_pair_cache.id` and `_category_entry_name = 'id'`. Generate a stable default id from the normalized parameter pair when callers do not provide one. -3. Add predictive dataset manifest rows keyed by `experiment_name`. - If multiple predictive datasets per experiment become necessary, - stop and ask before changing the ADR schema. +3. Add predictive dataset manifest rows keyed by `experiment_name`. If + multiple predictive datasets per experiment become necessary, stop + and ask before changing the ADR schema. 4. Store only HDF5 dataset paths and shape/count metadata in CIF. 5. Do not write numerical arrays into CIF loops. 6. Add explicit package imports. @@ -429,16 +442,20 @@ Actions: 4. Add `_store_fit_result_projection(results)` or equivalent on `Analysis` to fill common, deterministic, and Bayesian categories from `FitResults` or `BayesianFitResults`. -5. For deterministic fits, prefer live parameter values for - calculations and store final values only as display projections. -6. If deterministic projection values disagree with live parameter - state on load, warn and keep the live parameter state. -7. For Bayesian fits, keep `point_estimate_name = 'best_sample'` unless +5. Prefer calling the analysis-owned capture and projection methods from + `Fitter.fit()` or the existing `Analysis._fit_*` methods. Only edit + individual minimizer classes when a required result field is missing + from `FitResults` or `BayesianFitResults`. +6. For deterministic fits, prefer live parameter values for calculations + and store final values only as display projections. +7. If deterministic projection values disagree with live parameter state + on load, warn and keep the live parameter state. +8. For Bayesian fits, keep `point_estimate_name = 'best_sample'` unless the result object says otherwise. -8. Store upper-triangle parameter correlations only. -9. Clear stale fit-state categories at the start of a new fit so old +9. Store upper-triangle parameter correlations only. +10. Clear stale fit-state categories at the start of a new fit so old cache manifests cannot survive a new result. -10. Update this plan checklist for Step 7. +11. Update this plan checklist for Step 7. Suggested commit message: @@ -474,8 +491,8 @@ Actions: 7. Call the sidecar writer from `Project.save()` after `analysis.cif` data has been prepared and before analysis directory contents are listed. -8. Call the sidecar reader from `Project.load()` after `analysis_from_cif()` - and before restored display state is used. +8. Call the sidecar reader from `Project.load()` after + `analysis_from_cif()` and before restored display state is used. 9. Do not persist backend runtime objects, DREAM drivers, raw engine results, or ArviZ `InferenceData`. 10. Update this plan checklist for Step 8. @@ -506,18 +523,23 @@ Actions: 3. Keep backend runtime fields such as `engine_result` as `None`. 4. Make `analysis.display.fit_results()` work from the restored result projection. -5. Update correlation plotting so it can use +5. First restore non-plotting result behavior and correlation summaries. + Only then add cache-aware posterior distribution, pair, and predictive + plotting. +6. Update correlation plotting so it can use `_fit_parameter_correlation` when raw covariance or posterior samples are not available. -6. Keep correlation heatmaps compact. Do not replace the heatmap path +7. Keep correlation heatmaps compact. Do not replace the heatmap path with many per-cell Plotly traces. -7. Make posterior distribution, pair, and predictive display methods +8. Make posterior distribution, pair, and predictive display methods prefer valid persisted cache arrays when available. -8. If a requested cache is unavailable or invalid, warn clearly and use +9. If a requested cache is unavailable or invalid, warn clearly and use the existing recomputation path only when enough runtime data exists. -9. Do not make display methods recompute KDE, contours, or predictive +10. Do not make display methods recompute KDE, contours, or predictive bands when valid cache arrays were restored. -10. Update this plan checklist for Step 9. +11. If cache-aware display requires a new helper object or cache API not + named in this plan, stop and ask before adding it. +12. Update this plan checklist for Step 9. Suggested commit message: @@ -622,8 +644,7 @@ pixi run script-tests Notes: -- `pixi run fix` may regenerate - `docs/dev/package-structure/full.md` and +- `pixi run fix` may regenerate `docs/dev/package-structure/full.md` and `docs/dev/package-structure/short.md`. Accept those generated changes if the command produced them. - If a command fails for an unrelated existing problem, do not fix @@ -684,4 +705,4 @@ Title: Persist analysis fit state in saved projects Description: Save fit bounds, result summaries, and Bayesian result manifests with projects so users can reopen fitted analyses with the -same fit-state and posterior display context available. \ No newline at end of file +same fit-state and posterior display context available. From daa31f1f0107baf3db3ce045f96d04a809aa5cea Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:25:59 +0200 Subject: [PATCH 11/72] Add project fit verbosity verification --- docs/dev/package-structure/full.md | 6 +++ docs/dev/package-structure/short.md | 4 ++ docs/dev/plans/analysis-cif-fit-state.md | 20 ++++----- .../datablocks/experiment/collection.py | 2 +- .../project/categories/rendering/default.py | 8 ++-- .../project/categories/verbosity/__init__.py | 2 + .../project/categories/verbosity/default.py | 2 + .../project/categories/verbosity/factory.py | 2 + tests/functional/test_project_lifecycle.py | 6 +-- .../analysis/test_sequential.py | 4 +- .../categories/verbosity/test_default.py | 44 +++++++++++++++++++ .../categories/verbosity/test_factory.py | 25 +++++++++++ .../easydiffraction/project/test_display.py | 2 +- .../easydiffraction/project/test_project.py | 8 ++-- .../project/test_project_config.py | 13 +++++- 15 files changed, 122 insertions(+), 26 deletions(-) create mode 100644 tests/unit/easydiffraction/project/categories/verbosity/test_default.py create mode 100644 tests/unit/easydiffraction/project/categories/verbosity/test_factory.py diff --git a/docs/dev/package-structure/full.md b/docs/dev/package-structure/full.md index 720505ba..1e786df4 100644 --- a/docs/dev/package-structure/full.md +++ b/docs/dev/package-structure/full.md @@ -439,6 +439,12 @@ │ │ │ │ └── 🏷️ class Rendering │ │ │ └── 📄 factory.py │ │ │ └── 🏷️ class RenderingFactory +│ │ ├── 📁 verbosity +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ └── 🏷️ class Verbosity +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class VerbosityFactory │ │ └── 📄 __init__.py │ ├── 📄 __init__.py │ ├── 📄 display.py diff --git a/docs/dev/package-structure/short.md b/docs/dev/package-structure/short.md index da42f7a5..b60e1bf9 100644 --- a/docs/dev/package-structure/short.md +++ b/docs/dev/package-structure/short.md @@ -216,6 +216,10 @@ │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py │ │ │ └── 📄 factory.py +│ │ ├── 📁 verbosity +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py │ │ └── 📄 __init__.py │ ├── 📄 __init__.py │ ├── 📄 display.py diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 51a67c0a..0066cf3e 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -111,15 +111,15 @@ For every new category package in Phase 1: Complexity guardrails: - Steps 7 and 9 are broad. Start with the smallest central hook, then - edit individual minimizers or display helpers only when the required - data is not available through that central hook. + edit individual minimizers or display helpers only when the required + data is not available through that central hook. - If one step needs more than six source files, more than one new public - class family beyond the planned categories, or a public API change not - named in this plan, stop and ask to split the step. + class family beyond the planned categories, or a public API change not + named in this plan, stop and ask to split the step. - When auditing usages or renaming symbols, search code, tests, - tutorials, and docs with `git grep -n` before editing. + tutorials, and docs with `git grep -n` before editing. - Do not fix unrelated lint, formatting, typing, or test failures while - implementing this plan. Mention them at the review gate instead. + implementing this plan. Mention them at the review gate instead. Required commit discipline for any AI agent following this plan: @@ -454,7 +454,7 @@ Actions: the result object says otherwise. 9. Store upper-triangle parameter correlations only. 10. Clear stale fit-state categories at the start of a new fit so old - cache manifests cannot survive a new result. + cache manifests cannot survive a new result. 11. Update this plan checklist for Step 7. Suggested commit message: @@ -524,8 +524,8 @@ Actions: 4. Make `analysis.display.fit_results()` work from the restored result projection. 5. First restore non-plotting result behavior and correlation summaries. - Only then add cache-aware posterior distribution, pair, and predictive - plotting. + Only then add cache-aware posterior distribution, pair, and + predictive plotting. 6. Update correlation plotting so it can use `_fit_parameter_correlation` when raw covariance or posterior samples are not available. @@ -536,7 +536,7 @@ Actions: 9. If a requested cache is unavailable or invalid, warn clearly and use the existing recomputation path only when enough runtime data exists. 10. Do not make display methods recompute KDE, contours, or predictive - bands when valid cache arrays were restored. + bands when valid cache arrays were restored. 11. If cache-aware display requires a new helper object or cache API not named in this plan, stop and ask before adding it. 12. Update this plan checklist for Step 9. diff --git a/src/easydiffraction/datablocks/experiment/collection.py b/src/easydiffraction/datablocks/experiment/collection.py index 16bfe5a4..65646d50 100644 --- a/src/easydiffraction/datablocks/experiment/collection.py +++ b/src/easydiffraction/datablocks/experiment/collection.py @@ -125,7 +125,7 @@ def add_from_data_path( scattering_type : str | None, default=None Scattering type (e.g. ``'bragg'``). """ - verbosity = self._parent.verbosity if self._parent is not None else None + verbosity = self._parent.verbosity.fit.value if self._parent is not None else None verb = VerbosityEnum(verbosity) if verbosity is not None else VerbosityEnum.FULL experiment = ExperimentFactory.from_scratch( name=name, diff --git a/src/easydiffraction/project/categories/rendering/default.py b/src/easydiffraction/project/categories/rendering/default.py index 431da789..179162a0 100644 --- a/src/easydiffraction/project/categories/rendering/default.py +++ b/src/easydiffraction/project/categories/rendering/default.py @@ -41,7 +41,7 @@ def __init__(self) -> None: self._plotter = Plotter() self._tabler = TableRenderer.get() - # Persist symbolic "auto" so project.cif stays portable across environments. + # Persist symbolic "auto" so project.cif stays portable. self._chart_engine = StringDescriptor( name='chart_engine', description='Chart renderer backend type', @@ -65,12 +65,14 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_rendering.table_engine']), ) - def _resolved_chart_engine(self, value: str) -> str: + @staticmethod + def _resolved_chart_engine(value: str) -> str: if value == AUTO_ENGINE: return PlotterEngineEnum.default().value return value - def _resolved_table_engine(self, value: str) -> str: + @staticmethod + def _resolved_table_engine(value: str) -> str: if value == AUTO_ENGINE: return TableEngineEnum.default().value return value diff --git a/src/easydiffraction/project/categories/verbosity/__init__.py b/src/easydiffraction/project/categories/verbosity/__init__.py index be1abd47..a7bfce5b 100644 --- a/src/easydiffraction/project/categories/verbosity/__init__.py +++ b/src/easydiffraction/project/categories/verbosity/__init__.py @@ -1,3 +1,5 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause """Project verbosity category exports.""" from __future__ import annotations diff --git a/src/easydiffraction/project/categories/verbosity/default.py b/src/easydiffraction/project/categories/verbosity/default.py index 1121bd3a..09b3821c 100644 --- a/src/easydiffraction/project/categories/verbosity/default.py +++ b/src/easydiffraction/project/categories/verbosity/default.py @@ -1,3 +1,5 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause """Project fit-output verbosity category.""" from __future__ import annotations diff --git a/src/easydiffraction/project/categories/verbosity/factory.py b/src/easydiffraction/project/categories/verbosity/factory.py index bf6e54c4..28b26bb9 100644 --- a/src/easydiffraction/project/categories/verbosity/factory.py +++ b/src/easydiffraction/project/categories/verbosity/factory.py @@ -1,3 +1,5 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause """Factory for project verbosity categories.""" from __future__ import annotations diff --git a/tests/functional/test_project_lifecycle.py b/tests/functional/test_project_lifecycle.py index 7d9c5a19..850f040e 100644 --- a/tests/functional/test_project_lifecycle.py +++ b/tests/functional/test_project_lifecycle.py @@ -88,7 +88,7 @@ def test_default_verbosity_is_full(self): project = Project() finally: Project._loading = False - assert project.verbosity == 'full' + assert project.verbosity.fit.value == 'full' def test_set_verbosity_short(self): Project._loading = True @@ -97,7 +97,7 @@ def test_set_verbosity_short(self): finally: Project._loading = False project.verbosity = 'short' - assert project.verbosity == 'short' + assert project.verbosity.fit.value == 'short' def test_set_verbosity_silent(self): Project._loading = True @@ -106,7 +106,7 @@ def test_set_verbosity_silent(self): finally: Project._loading = False project.verbosity = 'silent' - assert project.verbosity == 'silent' + assert project.verbosity.fit.value == 'silent' def test_invalid_verbosity_raises(self): Project._loading = True diff --git a/tests/unit/easydiffraction/analysis/test_sequential.py b/tests/unit/easydiffraction/analysis/test_sequential.py index ab2327c9..a8294ec5 100644 --- a/tests/unit/easydiffraction/analysis/test_sequential.py +++ b/tests/unit/easydiffraction/analysis/test_sequential.py @@ -137,7 +137,7 @@ def _run_non_silent_fit(monkeypatch, tmp_path, *, verbosity, is_jupyter): del is_jupyter # legacy parameter, no longer affects behavior analysis = SimpleNamespace( - project=SimpleNamespace(verbosity=verbosity), + project=SimpleNamespace(verbosity=SimpleNamespace(fit=SimpleNamespace(value=verbosity))), fitter=SimpleNamespace(selection='lmfit'), ) @@ -728,7 +728,7 @@ def fake_run_fit_loop( monkeypatch.setattr(sequential_mod, '_restore_main_state', lambda *args: None) analysis = SimpleNamespace( - project=SimpleNamespace(verbosity='silent'), + project=SimpleNamespace(verbosity=SimpleNamespace(fit=SimpleNamespace(value='silent'))), fitter=SimpleNamespace(selection='lmfit'), ) diff --git a/tests/unit/easydiffraction/project/categories/verbosity/test_default.py b/tests/unit/easydiffraction/project/categories/verbosity/test_default.py new file mode 100644 index 00000000..782bdbbf --- /dev/null +++ b/tests/unit/easydiffraction/project/categories/verbosity/test_default.py @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import gemmi +import pytest + + +def test_verbosity_defaults_and_cif_output(): + from easydiffraction.project.categories.verbosity.default import Verbosity + + verbosity = Verbosity() + + assert verbosity.type_info.tag == 'default' + assert verbosity._identity.category_code == 'verbosity' + assert verbosity.fit.value == 'full' + assert '_verbosity.fit full' in verbosity.as_cif + + +def test_verbosity_setter_validates_enum_values(): + from easydiffraction.project.categories.verbosity.default import Verbosity + + verbosity = Verbosity() + + verbosity.fit = 'short' + assert verbosity.fit.value == 'short' + + verbosity.fit = 'silent' + assert verbosity.fit.value == 'silent' + + with pytest.raises(ValueError, match="'verbose' is not a valid VerbosityEnum"): + verbosity.fit = 'verbose' + + +def test_verbosity_from_cif_restores_fit_value(): + from easydiffraction.project.categories.verbosity.default import Verbosity + + verbosity = Verbosity() + block = gemmi.cif.read_string('data_test\n_verbosity.fit short\n').sole_block() + + verbosity.from_cif(block) + + assert verbosity.fit.value == 'short' diff --git a/tests/unit/easydiffraction/project/categories/verbosity/test_factory.py b/tests/unit/easydiffraction/project/categories/verbosity/test_factory.py new file mode 100644 index 00000000..3d3d6941 --- /dev/null +++ b/tests/unit/easydiffraction/project/categories/verbosity/test_factory.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import pytest + + +def test_verbosity_factory_default_and_create(): + from easydiffraction.project.categories.verbosity.default import Verbosity + from easydiffraction.project.categories.verbosity.factory import VerbosityFactory + + assert VerbosityFactory.default_tag() == 'default' + assert 'default' in VerbosityFactory.supported_tags() + + verbosity = VerbosityFactory.create('default') + + assert isinstance(verbosity, Verbosity) + + +def test_verbosity_factory_rejects_unknown_tag(): + from easydiffraction.project.categories.verbosity.factory import VerbosityFactory + + with pytest.raises(ValueError, match=r"Unsupported type: 'missing'"): + VerbosityFactory.create('missing') diff --git a/tests/unit/easydiffraction/project/test_display.py b/tests/unit/easydiffraction/project/test_display.py index 1a892bca..f7e11e37 100644 --- a/tests/unit/easydiffraction/project/test_display.py +++ b/tests/unit/easydiffraction/project/test_display.py @@ -52,7 +52,7 @@ def _recorder(*args, **kwargs): analysis=SimpleNamespace(display=analysis_display), rendering=SimpleNamespace(plotter=plotter), free_parameters=[], - verbosity='full', + verbosity=SimpleNamespace(fit=SimpleNamespace(value='full')), ) return project, calls diff --git a/tests/unit/easydiffraction/project/test_project.py b/tests/unit/easydiffraction/project/test_project.py index e3dcf7a1..3afbd363 100644 --- a/tests/unit/easydiffraction/project/test_project.py +++ b/tests/unit/easydiffraction/project/test_project.py @@ -30,7 +30,7 @@ def test_project_verbosity_default(): from easydiffraction.project.project import Project p = Project() - assert p.verbosity == 'full' + assert p.verbosity.fit.value == 'full' def test_project_verbosity_setter(): @@ -38,11 +38,11 @@ def test_project_verbosity_setter(): p = Project() p.verbosity = 'short' - assert p.verbosity == 'short' + assert p.verbosity.fit.value == 'short' p.verbosity = 'silent' - assert p.verbosity == 'silent' + assert p.verbosity.fit.value == 'silent' p.verbosity = 'full' - assert p.verbosity == 'full' + assert p.verbosity.fit.value == 'full' def test_project_verbosity_invalid(): diff --git a/tests/unit/easydiffraction/project/test_project_config.py b/tests/unit/easydiffraction/project/test_project_config.py index 528c99d7..0efd49b3 100644 --- a/tests/unit/easydiffraction/project/test_project_config.py +++ b/tests/unit/easydiffraction/project/test_project_config.py @@ -23,8 +23,12 @@ def test_project_config_exposes_project_info_and_rendering_categories(): assert config.info.path is None assert isinstance(config.info.created, datetime.datetime) assert isinstance(config.info.last_modified, datetime.datetime) - assert config.categories == [config.info, config.rendering] - assert config.parameters == config.info.parameters + config.rendering.parameters + assert config.verbosity._parent is config + assert config.verbosity.fit.value == 'full' + assert config.categories == [config.info, config.rendering, config.verbosity] + assert config.parameters == ( + config.info.parameters + config.rendering.parameters + config.verbosity.parameters + ) def test_project_config_as_cif_has_project_and_rendering_sections_without_data_header(): @@ -44,6 +48,7 @@ def test_project_config_as_cif_has_project_and_rendering_sections_without_data_h assert '_rendering.table_engine' in cif_text assert '_rendering.chart_engine auto' in cif_text assert '_rendering.table_engine auto' in cif_text + assert '_verbosity.fit full' in cif_text def test_project_save_and_load_use_auto_rendering_defaults_when_unset(tmp_path): @@ -57,11 +62,13 @@ def test_project_save_and_load_use_auto_rendering_defaults_when_unset(tmp_path): assert not project_cif.startswith('data_') assert '_rendering.chart_engine auto' in project_cif assert '_rendering.table_engine auto' in project_cif + assert '_verbosity.fit full' in project_cif loaded = Project.load(str(tmp_path / 'proj')) assert loaded.rendering.chart_engine.value == 'auto' assert loaded.rendering.table_engine.value == 'auto' + assert loaded.verbosity.fit.value == 'full' def test_project_save_and_load_keep_project_config_section_format(tmp_path): @@ -77,6 +84,7 @@ def test_project_save_and_load_keep_project_config_section_format(tmp_path): assert '_project.id beer' in project_cif assert '_rendering.chart_engine asciichartpy' in project_cif assert '_rendering.table_engine rich' in project_cif + assert '_verbosity.fit full' in project_cif loaded = Project.load(str(tmp_path / 'proj')) assert loaded.info.name == 'beer' @@ -86,3 +94,4 @@ def test_project_save_and_load_keep_project_config_section_format(tmp_path): assert isinstance(loaded.info.last_modified, datetime.datetime) assert loaded.rendering.chart_engine.value == 'asciichartpy' assert loaded.rendering.table_engine.value == 'rich' + assert loaded.verbosity.fit.value == 'full' From fa1fac41047e9f2ed8353e753a611d0d15f33769 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:30:37 +0200 Subject: [PATCH 12/72] Clarify analysis fit-state ADR schema --- .../suggestions/analysis-cif-fit-state.md | 25 +++++++++++++++++-- docs/dev/plans/analysis-cif-fit-state.md | 2 +- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md index 200645f3..a3a0df9e 100644 --- a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md +++ b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md @@ -158,24 +158,32 @@ this category. ### 5. Add `_fit_parameter_correlation` for reusable correlations `_fit_parameter_correlation` stores compact pairwise correlation -summaries: +summaries keyed by a persisted `id`: ```cif loop_ +_fit_parameter_correlation.id _fit_parameter_correlation.source_kind _fit_parameter_correlation.param_unique_name_i _fit_parameter_correlation.param_unique_name_j _fit_parameter_correlation.correlation -posterior lbco.cell.length_a hrpt.peak.broad_gauss_u 0.87 +"posterior:lbco.cell.length_a:hrpt.peak.broad_gauss_u" posterior lbco.cell.length_a hrpt.peak.broad_gauss_u 0.87 ``` Fields: +- `id` - `source_kind` - `param_unique_name_i` - `param_unique_name_j` - `correlation` +Rows are keyed by the persisted `id` field so each correlation pair has +stable collection identity in both Python and CIF. When a caller does +not provide an explicit `id`, implementations should derive one from +the normalized `source_kind`, `param_unique_name_i`, and +`param_unique_name_j` values. + Only the upper triangle excluding the diagonal is stored. Correlation heatmaps can be restored from this loop alone. Posterior pair plots still use the Bayesian pair cache or posterior samples. @@ -301,6 +309,7 @@ caches therefore have their own manifest categories in - `param_unique_name_x` - `param_unique_name_y` +- `id` - `x_path` - `y_path` - `density_path` @@ -309,6 +318,12 @@ caches therefore have their own manifest categories in - `n_grid_y` - `n_draws_cached` +`_bayesian_pair_cache` rows are keyed by the persisted `id` field so +each cached parameter pair has stable identity in both Python and CIF. +When a caller does not provide an explicit `id`, implementations should +derive one from the normalized `param_unique_name_x` and +`param_unique_name_y` values. + `_bayesian_predictive_dataset` supports `project.display.posterior.predictive(...)`: @@ -324,6 +339,9 @@ caches therefore have their own manifest categories in - `n_x` - `n_draws_cached` +`_bayesian_predictive_dataset` is keyed by `experiment_name` in this +schema, with at most one cached predictive dataset per experiment. + The manifest rows are the source of truth for HDF5 paths. HDF5 group naming conventions are implementation details and may change as long as the manifest remains valid. @@ -335,6 +353,9 @@ arrays large enough to make CIF unwieldy are stored in: - `analysis/results.h5` +The reference implementation uses a direct `h5py` dependency to read +and write this sidecar. + Required canonical posterior arrays, when available: - `/posterior/parameter_samples` diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 0066cf3e..fe7280b4 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -33,7 +33,7 @@ structure or experiment CIF files. - [x] Confirm composite-key loop strategy: add persisted `id` columns. - [x] Confirm public surface: expose read-only `Analysis` properties. - [x] Confirm predictive cache identity: key by `experiment_name`. -- [ ] Phase 1 step 1: update the ADR suggestion with clarifications. +- [x] Phase 1 step 1: update the ADR suggestion with clarifications. - [ ] Phase 1 step 2: add common fit-state category models. - [ ] Phase 1 step 3: add deterministic result category models. - [ ] Phase 1 step 4: add Bayesian metadata category models. From 703a387b7cdaa67209db1dc44102b76c739691a4 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:37:33 +0200 Subject: [PATCH 13/72] Add common analysis fit-state categories --- docs/dev/plans/analysis-cif-fit-state.md | 2 +- src/easydiffraction/analysis/__init__.py | 18 ++ .../analysis/categories/__init__.py | 10 + .../fit_parameter_correlations/__init__.py | 12 ++ .../fit_parameter_correlations/default.py | 202 ++++++++++++++++++ .../fit_parameter_correlations/factory.py | 17 ++ .../categories/fit_parameters/__init__.py | 6 + .../categories/fit_parameters/default.py | 175 +++++++++++++++ .../categories/fit_parameters/factory.py | 17 ++ .../categories/fit_result/__init__.py | 5 + .../analysis/categories/fit_result/default.py | 126 +++++++++++ .../analysis/categories/fit_result/factory.py | 17 ++ .../analysis/categories/fit_state/__init__.py | 5 + .../analysis/categories/fit_state/default.py | 42 ++++ .../analysis/categories/fit_state/factory.py | 17 ++ src/easydiffraction/analysis/enums.py | 25 +++ 16 files changed, 695 insertions(+), 1 deletion(-) create mode 100644 src/easydiffraction/analysis/categories/fit_parameter_correlations/__init__.py create mode 100644 src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py create mode 100644 src/easydiffraction/analysis/categories/fit_parameter_correlations/factory.py create mode 100644 src/easydiffraction/analysis/categories/fit_parameters/__init__.py create mode 100644 src/easydiffraction/analysis/categories/fit_parameters/default.py create mode 100644 src/easydiffraction/analysis/categories/fit_parameters/factory.py create mode 100644 src/easydiffraction/analysis/categories/fit_result/__init__.py create mode 100644 src/easydiffraction/analysis/categories/fit_result/default.py create mode 100644 src/easydiffraction/analysis/categories/fit_result/factory.py create mode 100644 src/easydiffraction/analysis/categories/fit_state/__init__.py create mode 100644 src/easydiffraction/analysis/categories/fit_state/default.py create mode 100644 src/easydiffraction/analysis/categories/fit_state/factory.py diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index fe7280b4..26bac6f7 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -34,7 +34,7 @@ structure or experiment CIF files. - [x] Confirm public surface: expose read-only `Analysis` properties. - [x] Confirm predictive cache identity: key by `experiment_name`. - [x] Phase 1 step 1: update the ADR suggestion with clarifications. -- [ ] Phase 1 step 2: add common fit-state category models. +- [x] Phase 1 step 2: add common fit-state category models. - [ ] Phase 1 step 3: add deterministic result category models. - [ ] Phase 1 step 4: add Bayesian metadata category models. - [ ] Phase 1 step 5: add Bayesian cache manifest category models. diff --git a/src/easydiffraction/analysis/__init__.py b/src/easydiffraction/analysis/__init__.py index 0fe4386c..21874792 100644 --- a/src/easydiffraction/analysis/__init__.py +++ b/src/easydiffraction/analysis/__init__.py @@ -3,6 +3,22 @@ from easydiffraction.analysis.categories.fitting import Fitting from easydiffraction.analysis.categories.fitting import FittingFactory +from easydiffraction.analysis.categories.fit_parameter_correlations import ( + FitParameterCorrelationItem, +) +from easydiffraction.analysis.categories.fit_parameter_correlations import ( + FitParameterCorrelations, +) +from easydiffraction.analysis.categories.fit_parameter_correlations import ( + FitParameterCorrelationsFactory, +) +from easydiffraction.analysis.categories.fit_parameters import FitParameterItem +from easydiffraction.analysis.categories.fit_parameters import FitParameters +from easydiffraction.analysis.categories.fit_parameters import FitParametersFactory +from easydiffraction.analysis.categories.fit_result import FitResult +from easydiffraction.analysis.categories.fit_result import FitResultFactory +from easydiffraction.analysis.categories.fit_state import FitState +from easydiffraction.analysis.categories.fit_state import FitStateFactory from easydiffraction.analysis.categories.joint_fit import JointFitCollection from easydiffraction.analysis.categories.joint_fit import JointFitFactory from easydiffraction.analysis.categories.joint_fit import JointFitItem @@ -13,4 +29,6 @@ ) from easydiffraction.analysis.categories.sequential_fit_extract import SequentialFitExtractFactory from easydiffraction.analysis.categories.sequential_fit_extract import SequentialFitExtractItem +from easydiffraction.analysis.enums import FitCorrelationSourceEnum from easydiffraction.analysis.enums import FitModeEnum +from easydiffraction.analysis.enums import FitResultKindEnum diff --git a/src/easydiffraction/analysis/categories/__init__.py b/src/easydiffraction/analysis/categories/__init__.py index 6c070cf1..11ed8493 100644 --- a/src/easydiffraction/analysis/categories/__init__.py +++ b/src/easydiffraction/analysis/categories/__init__.py @@ -5,6 +5,16 @@ from easydiffraction.analysis.categories.aliases import Aliases from easydiffraction.analysis.categories.constraints import Constraint from easydiffraction.analysis.categories.constraints import Constraints +from easydiffraction.analysis.categories.fit_parameter_correlations import ( + FitParameterCorrelationItem, +) +from easydiffraction.analysis.categories.fit_parameter_correlations import ( + FitParameterCorrelations, +) +from easydiffraction.analysis.categories.fit_parameters import FitParameterItem +from easydiffraction.analysis.categories.fit_parameters import FitParameters +from easydiffraction.analysis.categories.fit_result import FitResult +from easydiffraction.analysis.categories.fit_state import FitState from easydiffraction.analysis.categories.fitting import Fitting from easydiffraction.analysis.categories.joint_fit import JointFitCollection from easydiffraction.analysis.categories.joint_fit import JointFitItem diff --git a/src/easydiffraction/analysis/categories/fit_parameter_correlations/__init__.py b/src/easydiffraction/analysis/categories/fit_parameter_correlations/__init__.py new file mode 100644 index 00000000..5c6f22b8 --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_parameter_correlations/__init__.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.fit_parameter_correlations.default import ( + FitParameterCorrelationItem, +) +from easydiffraction.analysis.categories.fit_parameter_correlations.default import ( + FitParameterCorrelations, +) +from easydiffraction.analysis.categories.fit_parameter_correlations.factory import ( + FitParameterCorrelationsFactory, +) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py b/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py new file mode 100644 index 00000000..5d678fde --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py @@ -0,0 +1,202 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Persisted fit-parameter correlation summaries.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.fit_parameter_correlations.factory import ( + FitParameterCorrelationsFactory, +) +from easydiffraction.analysis.enums import FitCorrelationSourceEnum +from easydiffraction.core.category import CategoryCollection +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.validation import MembershipValidator +from easydiffraction.core.validation import RangeValidator +from easydiffraction.core.validation import RegexValidator +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +def _normalized_parameter_pair( + param_unique_name_i: str, + param_unique_name_j: str, +) -> tuple[str, str]: + """Return a stable ordering for a parameter pair.""" + if param_unique_name_i <= param_unique_name_j: + return param_unique_name_i, param_unique_name_j + return param_unique_name_j, param_unique_name_i + + +def _default_correlation_id( + *, + source_kind: str, + param_unique_name_i: str, + param_unique_name_j: str, +) -> str: + """Return the default persisted id for a correlation row.""" + normalized_i, normalized_j = _normalized_parameter_pair( + param_unique_name_i, + param_unique_name_j, + ) + return f'{source_kind}:{normalized_i}:{normalized_j}' + + +class FitParameterCorrelationItem(CategoryItem): + """Single persisted fit-parameter correlation row.""" + + _category_code = 'fit_parameter_correlation' + _category_entry_name = 'id' + + def __init__(self) -> None: + super().__init__() + self._id = StringDescriptor( + name='id', + description='Stable identifier for the persisted correlation row.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z0-9_.:-]+$'), + ), + cif_handler=CifHandler(names=['_fit_parameter_correlation.id']), + ) + self._source_kind = StringDescriptor( + name='source_kind', + description='Origin of the persisted correlation summary.', + value_spec=AttributeSpec( + default=FitCorrelationSourceEnum.default().value, + validator=MembershipValidator( + allowed=[member.value for member in FitCorrelationSourceEnum] + ), + ), + cif_handler=CifHandler(names=['_fit_parameter_correlation.source_kind']), + ) + self._param_unique_name_i = StringDescriptor( + name='param_unique_name_i', + description='First unique parameter name in the persisted pair.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), + ), + cif_handler=CifHandler(names=['_fit_parameter_correlation.param_unique_name_i']), + ) + self._param_unique_name_j = StringDescriptor( + name='param_unique_name_j', + description='Second unique parameter name in the persisted pair.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), + ), + cif_handler=CifHandler(names=['_fit_parameter_correlation.param_unique_name_j']), + ) + self._correlation = NumericDescriptor( + name='correlation', + description='Persisted correlation coefficient for the parameter pair.', + value_spec=AttributeSpec( + default=0.0, + validator=RangeValidator(ge=-1.0, le=1.0), + ), + cif_handler=CifHandler(names=['_fit_parameter_correlation.correlation']), + ) + + @property + def id(self) -> StringDescriptor: + """Stable identifier for the persisted correlation row.""" + return self._id + + def _set_id(self, value: str) -> None: + """Set the persisted correlation-row id for internal callers.""" + self._id.value = value + + @property + def source_kind(self) -> StringDescriptor: + """Origin of the persisted correlation summary.""" + return self._source_kind + + def _set_source_kind(self, value: str) -> None: + """Set the correlation source kind for internal callers.""" + self._source_kind.value = value + + @property + def param_unique_name_i(self) -> StringDescriptor: + """First unique parameter name in the persisted pair.""" + return self._param_unique_name_i + + def _set_param_unique_name_i(self, value: str) -> None: + """Set the first parameter name for internal callers.""" + self._param_unique_name_i.value = value + + @property + def param_unique_name_j(self) -> StringDescriptor: + """Second unique parameter name in the persisted pair.""" + return self._param_unique_name_j + + def _set_param_unique_name_j(self, value: str) -> None: + """Set the second parameter name for internal callers.""" + self._param_unique_name_j.value = value + + @property + def correlation(self) -> NumericDescriptor: + """Persisted correlation coefficient for the parameter pair.""" + return self._correlation + + def _set_correlation(self, value: int | float) -> None: + """Set the correlation coefficient for internal callers.""" + self._correlation.value = value + + +@FitParameterCorrelationsFactory.register +class FitParameterCorrelations(CategoryCollection): + """Collection of persisted fit-parameter correlation summaries.""" + + type_info = TypeInfo( + tag='default', + description='Persisted fit-parameter correlation summaries', + ) + + def __init__(self) -> None: + super().__init__(item_type=FitParameterCorrelationItem) + + def create( + self, + *, + source_kind: str, + param_unique_name_i: str, + param_unique_name_j: str, + correlation: int | float, + id: str | None = None, + ) -> None: + """ + Create a persisted fit-parameter correlation row. + + Parameters + ---------- + source_kind : str + Origin of the persisted correlation summary. + param_unique_name_i : str + First unique parameter name in the pair. + param_unique_name_j : str + Second unique parameter name in the pair. + correlation : int | float + Correlation coefficient for the parameter pair. + id : str | None, default=None + Explicit persisted row identifier. When omitted, a stable id + is derived from the normalized parameter pair. + """ + normalized_i, normalized_j = _normalized_parameter_pair( + param_unique_name_i, + param_unique_name_j, + ) + item = FitParameterCorrelationItem() + item._set_source_kind(source_kind) + item._set_param_unique_name_i(normalized_i) + item._set_param_unique_name_j(normalized_j) + item._set_correlation(correlation) + resolved_id = id or _default_correlation_id( + source_kind=source_kind, + param_unique_name_i=normalized_i, + param_unique_name_j=normalized_j, + ) + item._set_id(resolved_id) + self.add(item) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_parameter_correlations/factory.py b/src/easydiffraction/analysis/categories/fit_parameter_correlations/factory.py new file mode 100644 index 00000000..8a6f77a0 --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_parameter_correlations/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Fit-parameter-correlation factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class FitParameterCorrelationsFactory(FactoryBase): + """Create fit-parameter correlation collections by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_parameters/__init__.py b/src/easydiffraction/analysis/categories/fit_parameters/__init__.py new file mode 100644 index 00000000..ad06e0df --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_parameters/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.fit_parameters.default import FitParameterItem +from easydiffraction.analysis.categories.fit_parameters.default import FitParameters +from easydiffraction.analysis.categories.fit_parameters.factory import FitParametersFactory \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_parameters/default.py b/src/easydiffraction/analysis/categories/fit_parameters/default.py new file mode 100644 index 00000000..31572fcf --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_parameters/default.py @@ -0,0 +1,175 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Fit-parameter control snapshots.""" + +from __future__ import annotations + +import numpy as np + +from easydiffraction.analysis.categories.fit_parameters.factory import FitParametersFactory +from easydiffraction.core.category import CategoryCollection +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.validation import RegexValidator +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +class FitParameterItem(CategoryItem): + """Single persisted fit-parameter control row.""" + + _category_code = 'fit_parameter' + _category_entry_name = 'param_unique_name' + + def __init__(self) -> None: + super().__init__() + self._param_unique_name = StringDescriptor( + name='param_unique_name', + description='Unique name of the referenced live parameter.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), + ), + cif_handler=CifHandler(names=['_fit_parameter.param_unique_name']), + ) + self._fit_min = NumericDescriptor( + name='fit_min', + description='Persisted lower fit bound.', + value_spec=AttributeSpec(default=-np.inf), + cif_handler=CifHandler(names=['_fit_parameter.fit_min']), + ) + self._fit_max = NumericDescriptor( + name='fit_max', + description='Persisted upper fit bound.', + value_spec=AttributeSpec(default=np.inf), + cif_handler=CifHandler(names=['_fit_parameter.fit_max']), + ) + self._fit_bounds_uncertainty_multiplier = NumericDescriptor( + name='fit_bounds_uncertainty_multiplier', + description='Multiplier used to derive fit bounds from uncertainty.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler( + names=['_fit_parameter.fit_bounds_uncertainty_multiplier'] + ), + ) + self._start_value = NumericDescriptor( + name='start_value', + description='Persisted pre-fit value snapshot.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_fit_parameter.start_value']), + ) + self._start_uncertainty = NumericDescriptor( + name='start_uncertainty', + description='Persisted pre-fit uncertainty snapshot.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_fit_parameter.start_uncertainty']), + ) + + @property + def param_unique_name(self) -> StringDescriptor: + """Unique name of the referenced live parameter.""" + return self._param_unique_name + + def _set_param_unique_name(self, value: str) -> None: + """Set the referenced parameter unique name for internal callers.""" + self._param_unique_name.value = value + + @property + def fit_min(self) -> NumericDescriptor: + """Persisted lower fit bound.""" + return self._fit_min + + def _set_fit_min(self, value: int | float) -> None: + """Set the persisted lower fit bound for internal callers.""" + self._fit_min.value = value + + @property + def fit_max(self) -> NumericDescriptor: + """Persisted upper fit bound.""" + return self._fit_max + + def _set_fit_max(self, value: int | float) -> None: + """Set the persisted upper fit bound for internal callers.""" + self._fit_max.value = value + + @property + def fit_bounds_uncertainty_multiplier(self) -> NumericDescriptor: + """Multiplier used to derive fit bounds from uncertainty.""" + return self._fit_bounds_uncertainty_multiplier + + def _set_fit_bounds_uncertainty_multiplier( + self, + value: int | float | None, + ) -> None: + """Set the fit-bounds uncertainty multiplier for internal callers.""" + self._fit_bounds_uncertainty_multiplier.value = value + + @property + def start_value(self) -> NumericDescriptor: + """Persisted pre-fit value snapshot.""" + return self._start_value + + def _set_start_value(self, value: int | float | None) -> None: + """Set the pre-fit value snapshot for internal callers.""" + self._start_value.value = value + + @property + def start_uncertainty(self) -> NumericDescriptor: + """Persisted pre-fit uncertainty snapshot.""" + return self._start_uncertainty + + def _set_start_uncertainty(self, value: int | float | None) -> None: + """Set the pre-fit uncertainty snapshot for internal callers.""" + self._start_uncertainty.value = value + + +@FitParametersFactory.register +class FitParameters(CategoryCollection): + """Collection of persisted fit-parameter control snapshots.""" + + type_info = TypeInfo( + tag='default', + description='Persisted fit-parameter control snapshots', + ) + + def __init__(self) -> None: + super().__init__(item_type=FitParameterItem) + + def create( + self, + *, + param_unique_name: str, + fit_min: int | float, + fit_max: int | float, + fit_bounds_uncertainty_multiplier: int | float | None = None, + start_value: int | float | None = None, + start_uncertainty: int | float | None = None, + ) -> None: + """ + Create a persisted fit-parameter control snapshot row. + + Parameters + ---------- + param_unique_name : str + Unique name of the referenced live parameter. + fit_min : int | float + Persisted lower fit bound. + fit_max : int | float + Persisted upper fit bound. + fit_bounds_uncertainty_multiplier : int | float | None, default=None + Multiplier used to derive fit bounds from uncertainty. + start_value : int | float | None, default=None + Persisted pre-fit value snapshot. + start_uncertainty : int | float | None, default=None + Persisted pre-fit uncertainty snapshot. + """ + item = FitParameterItem() + item._set_param_unique_name(param_unique_name) + item._set_fit_min(fit_min) + item._set_fit_max(fit_max) + item._set_fit_bounds_uncertainty_multiplier(fit_bounds_uncertainty_multiplier) + item._set_start_value(start_value) + item._set_start_uncertainty(start_uncertainty) + self.add(item) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_parameters/factory.py b/src/easydiffraction/analysis/categories/fit_parameters/factory.py new file mode 100644 index 00000000..a8ae8c34 --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_parameters/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Fit-parameter factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class FitParametersFactory(FactoryBase): + """Create fit-parameter collections by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_result/__init__.py b/src/easydiffraction/analysis/categories/fit_result/__init__.py new file mode 100644 index 00000000..22163bc0 --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_result/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.fit_result.default import FitResult +from easydiffraction.analysis.categories.fit_result.factory import FitResultFactory \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_result/default.py b/src/easydiffraction/analysis/categories/fit_result/default.py new file mode 100644 index 00000000..b3c04844 --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_result/default.py @@ -0,0 +1,126 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Common fit-result status category.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.fit_result.factory import FitResultFactory +from easydiffraction.analysis.enums import FitResultKindEnum +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.validation import MembershipValidator +from easydiffraction.core.variable import BoolDescriptor +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +@FitResultFactory.register +class FitResult(CategoryItem): + """Common persisted fit-result status metadata.""" + + _category_code = 'fit_result' + + type_info = TypeInfo( + tag='default', + description='Common persisted fit-result status metadata', + ) + + def __init__(self) -> None: + super().__init__() + self._result_kind = StringDescriptor( + name='result_kind', + description='Kind of the latest persisted fit-result projection.', + value_spec=AttributeSpec( + default=FitResultKindEnum.default().value, + validator=MembershipValidator( + allowed=[member.value for member in FitResultKindEnum] + ), + ), + cif_handler=CifHandler(names=['_fit_result.result_kind']), + ) + self._success = BoolDescriptor( + name='success', + description='Whether the latest persisted fit-result projection succeeded.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_fit_result.success']), + ) + self._message = StringDescriptor( + name='message', + description='Status message for the latest persisted fit-result projection.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_fit_result.message']), + ) + self._iterations = NumericDescriptor( + name='iterations', + description='Iteration count for the latest persisted fit-result projection.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_fit_result.iterations']), + ) + self._fitting_time = NumericDescriptor( + name='fitting_time', + description='Fitting time in seconds for the latest persisted projection.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_fit_result.fitting_time']), + ) + self._reduced_chi_square = NumericDescriptor( + name='reduced_chi_square', + description='Reduced chi-square for the latest persisted projection.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_fit_result.reduced_chi_square']), + ) + + @property + def result_kind(self) -> StringDescriptor: + """Kind of the latest persisted fit-result projection.""" + return self._result_kind + + def _set_result_kind(self, value: str) -> None: + """Set the result kind for internal callers.""" + self._result_kind.value = value + + @property + def success(self) -> BoolDescriptor: + """Whether the latest persisted fit-result projection succeeded.""" + return self._success + + def _set_success(self, value: bool) -> None: + """Set the success flag for internal callers.""" + self._success.value = value + + @property + def message(self) -> StringDescriptor: + """Status message for the latest persisted fit-result projection.""" + return self._message + + def _set_message(self, value: str) -> None: + """Set the fit-result message for internal callers.""" + self._message.value = value + + @property + def iterations(self) -> NumericDescriptor: + """Iteration count for the latest persisted fit-result projection.""" + return self._iterations + + def _set_iterations(self, value: int | float) -> None: + """Set the iteration count for internal callers.""" + self._iterations.value = value + + @property + def fitting_time(self) -> NumericDescriptor: + """Fitting time in seconds for the latest persisted projection.""" + return self._fitting_time + + def _set_fitting_time(self, value: int | float | None) -> None: + """Set the fitting time for internal callers.""" + self._fitting_time.value = value + + @property + def reduced_chi_square(self) -> NumericDescriptor: + """Reduced chi-square for the latest persisted projection.""" + return self._reduced_chi_square + + def _set_reduced_chi_square(self, value: int | float | None) -> None: + """Set the reduced chi-square for internal callers.""" + self._reduced_chi_square.value = value \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_result/factory.py b/src/easydiffraction/analysis/categories/fit_result/factory.py new file mode 100644 index 00000000..8637e65a --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_result/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Fit-result factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class FitResultFactory(FactoryBase): + """Create fit-result categories by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_state/__init__.py b/src/easydiffraction/analysis/categories/fit_state/__init__.py new file mode 100644 index 00000000..6036277b --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_state/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.fit_state.default import FitState +from easydiffraction.analysis.categories.fit_state.factory import FitStateFactory \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_state/default.py b/src/easydiffraction/analysis/categories/fit_state/default.py new file mode 100644 index 00000000..ac8e955b --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_state/default.py @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Fit-state schema metadata category.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.fit_state.factory import FitStateFactory +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +@FitStateFactory.register +class FitState(CategoryItem): + """Persisted fit-state schema metadata.""" + + _category_code = 'fit_state' + + type_info = TypeInfo( + tag='default', + description='Persisted fit-state schema metadata', + ) + + def __init__(self) -> None: + super().__init__() + self._schema_version = NumericDescriptor( + name='schema_version', + description='Persisted fit-state schema version.', + value_spec=AttributeSpec(default=1), + cif_handler=CifHandler(names=['_fit_state.schema_version']), + ) + + @property + def schema_version(self) -> NumericDescriptor: + """Persisted fit-state schema version.""" + return self._schema_version + + def _set_schema_version(self, value: int | float) -> None: + """Set the fit-state schema version for internal callers.""" + self._schema_version.value = value \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/fit_state/factory.py b/src/easydiffraction/analysis/categories/fit_state/factory.py new file mode 100644 index 00000000..1d394c5a --- /dev/null +++ b/src/easydiffraction/analysis/categories/fit_state/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Fit-state factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class FitStateFactory(FactoryBase): + """Create fit-state categories by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/enums.py b/src/easydiffraction/analysis/enums.py index c1ef93f7..c70c81b8 100644 --- a/src/easydiffraction/analysis/enums.py +++ b/src/easydiffraction/analysis/enums.py @@ -4,6 +4,7 @@ from __future__ import annotations +from enum import Enum from enum import StrEnum @@ -28,3 +29,27 @@ def description(self) -> str: if self is FitModeEnum.SEQUENTIAL: return 'Fit one experiment against a series of data files.' return '' + + +class FitResultKindEnum(str, Enum): + """Persisted kind of the latest fit-result projection.""" + + DETERMINISTIC = 'deterministic' + BAYESIAN = 'bayesian' + + @classmethod + def default(cls) -> FitResultKindEnum: + """Return the default persisted fit-result kind.""" + return cls.DETERMINISTIC + + +class FitCorrelationSourceEnum(str, Enum): + """Source of a persisted fit-parameter correlation summary.""" + + DETERMINISTIC = 'deterministic' + POSTERIOR = 'posterior' + + @classmethod + def default(cls) -> FitCorrelationSourceEnum: + """Return the default persisted correlation source.""" + return cls.DETERMINISTIC From 63686875fdcc5565a7367f950b5f306604681576 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:40:47 +0200 Subject: [PATCH 14/72] Add deterministic fit-result categories --- docs/dev/plans/analysis-cif-fit-state.md | 2 +- src/easydiffraction/analysis/__init__.py | 13 ++ .../analysis/categories/__init__.py | 7 + .../__init__.py | 12 ++ .../default.py | 179 +++++++++++++++++ .../factory.py | 17 ++ .../deterministic_result/__init__.py | 9 + .../deterministic_result/default.py | 185 ++++++++++++++++++ .../deterministic_result/factory.py | 17 ++ 9 files changed, 440 insertions(+), 1 deletion(-) create mode 100644 src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py create mode 100644 src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py create mode 100644 src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py create mode 100644 src/easydiffraction/analysis/categories/deterministic_result/__init__.py create mode 100644 src/easydiffraction/analysis/categories/deterministic_result/default.py create mode 100644 src/easydiffraction/analysis/categories/deterministic_result/factory.py diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 26bac6f7..0c263be9 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -35,7 +35,7 @@ structure or experiment CIF files. - [x] Confirm predictive cache identity: key by `experiment_name`. - [x] Phase 1 step 1: update the ADR suggestion with clarifications. - [x] Phase 1 step 2: add common fit-state category models. -- [ ] Phase 1 step 3: add deterministic result category models. +- [x] Phase 1 step 3: add deterministic result category models. - [ ] Phase 1 step 4: add Bayesian metadata category models. - [ ] Phase 1 step 5: add Bayesian cache manifest category models. - [ ] Phase 1 step 6: wire analysis CIF save/load for fit state. diff --git a/src/easydiffraction/analysis/__init__.py b/src/easydiffraction/analysis/__init__.py index 21874792..7ea4fc02 100644 --- a/src/easydiffraction/analysis/__init__.py +++ b/src/easydiffraction/analysis/__init__.py @@ -1,6 +1,19 @@ # SPDX-FileCopyrightText: 2025 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +from easydiffraction.analysis.categories.deterministic_parameter_results import ( + DeterministicParameterResultItem, +) +from easydiffraction.analysis.categories.deterministic_parameter_results import ( + DeterministicParameterResults, +) +from easydiffraction.analysis.categories.deterministic_parameter_results import ( + DeterministicParameterResultsFactory, +) +from easydiffraction.analysis.categories.deterministic_result import DeterministicResult +from easydiffraction.analysis.categories.deterministic_result import ( + DeterministicResultFactory, +) from easydiffraction.analysis.categories.fitting import Fitting from easydiffraction.analysis.categories.fitting import FittingFactory from easydiffraction.analysis.categories.fit_parameter_correlations import ( diff --git a/src/easydiffraction/analysis/categories/__init__.py b/src/easydiffraction/analysis/categories/__init__.py index 11ed8493..5aa7c2cf 100644 --- a/src/easydiffraction/analysis/categories/__init__.py +++ b/src/easydiffraction/analysis/categories/__init__.py @@ -5,6 +5,13 @@ from easydiffraction.analysis.categories.aliases import Aliases from easydiffraction.analysis.categories.constraints import Constraint from easydiffraction.analysis.categories.constraints import Constraints +from easydiffraction.analysis.categories.deterministic_parameter_results import ( + DeterministicParameterResultItem, +) +from easydiffraction.analysis.categories.deterministic_parameter_results import ( + DeterministicParameterResults, +) +from easydiffraction.analysis.categories.deterministic_result import DeterministicResult from easydiffraction.analysis.categories.fit_parameter_correlations import ( FitParameterCorrelationItem, ) diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py new file mode 100644 index 00000000..8e00501e --- /dev/null +++ b/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.deterministic_parameter_results.default import ( + DeterministicParameterResultItem, +) +from easydiffraction.analysis.categories.deterministic_parameter_results.default import ( + DeterministicParameterResults, +) +from easydiffraction.analysis.categories.deterministic_parameter_results.factory import ( + DeterministicParameterResultsFactory, +) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py new file mode 100644 index 00000000..002d13dc --- /dev/null +++ b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py @@ -0,0 +1,179 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Deterministic fit parameter-result rows.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.deterministic_parameter_results.factory import ( + DeterministicParameterResultsFactory, +) +from easydiffraction.core.category import CategoryCollection +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.validation import RegexValidator +from easydiffraction.core.variable import BoolDescriptor +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +class DeterministicParameterResultItem(CategoryItem): + """Single persisted deterministic parameter-result row.""" + + _category_code = 'deterministic_parameter_result' + _category_entry_name = 'param_unique_name' + + def __init__(self) -> None: + super().__init__() + self._order_index = NumericDescriptor( + name='order_index', + description='Display and array order for the persisted parameter result.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_deterministic_parameter_result.order_index']), + ) + self._param_unique_name = StringDescriptor( + name='param_unique_name', + description='Unique name of the persisted parameter result row.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), + ), + cif_handler=CifHandler( + names=['_deterministic_parameter_result.param_unique_name'] + ), + ) + self._final_value = NumericDescriptor( + name='final_value', + description='Final fitted value for the persisted parameter result.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_deterministic_parameter_result.final_value']), + ) + self._final_uncertainty = NumericDescriptor( + name='final_uncertainty', + description='Final uncertainty for the persisted parameter result.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler( + names=['_deterministic_parameter_result.final_uncertainty'] + ), + ) + self._at_lower_bound = BoolDescriptor( + name='at_lower_bound', + description='Whether the parameter finished at the lower fit bound.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler( + names=['_deterministic_parameter_result.at_lower_bound'] + ), + ) + self._at_upper_bound = BoolDescriptor( + name='at_upper_bound', + description='Whether the parameter finished at the upper fit bound.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler( + names=['_deterministic_parameter_result.at_upper_bound'] + ), + ) + + @property + def order_index(self) -> NumericDescriptor: + """Display and array order for the persisted parameter result.""" + return self._order_index + + def _set_order_index(self, value: int | float) -> None: + """Set the order index for internal callers.""" + self._order_index.value = value + + @property + def param_unique_name(self) -> StringDescriptor: + """Unique name of the persisted parameter result row.""" + return self._param_unique_name + + def _set_param_unique_name(self, value: str) -> None: + """Set the parameter unique name for internal callers.""" + self._param_unique_name.value = value + + @property + def final_value(self) -> NumericDescriptor: + """Final fitted value for the persisted parameter result.""" + return self._final_value + + def _set_final_value(self, value: int | float | None) -> None: + """Set the final fitted value for internal callers.""" + self._final_value.value = value + + @property + def final_uncertainty(self) -> NumericDescriptor: + """Final uncertainty for the persisted parameter result.""" + return self._final_uncertainty + + def _set_final_uncertainty(self, value: int | float | None) -> None: + """Set the final uncertainty for internal callers.""" + self._final_uncertainty.value = value + + @property + def at_lower_bound(self) -> BoolDescriptor: + """Whether the parameter finished at the lower fit bound.""" + return self._at_lower_bound + + def _set_at_lower_bound(self, value: bool) -> None: + """Set the lower-bound flag for internal callers.""" + self._at_lower_bound.value = value + + @property + def at_upper_bound(self) -> BoolDescriptor: + """Whether the parameter finished at the upper fit bound.""" + return self._at_upper_bound + + def _set_at_upper_bound(self, value: bool) -> None: + """Set the upper-bound flag for internal callers.""" + self._at_upper_bound.value = value + + +@DeterministicParameterResultsFactory.register +class DeterministicParameterResults(CategoryCollection): + """Collection of persisted deterministic parameter-result rows.""" + + type_info = TypeInfo( + tag='default', + description='Persisted deterministic parameter-result rows', + ) + + def __init__(self) -> None: + super().__init__(item_type=DeterministicParameterResultItem) + + def create( + self, + *, + order_index: int | float, + param_unique_name: str, + final_value: int | float | None = None, + final_uncertainty: int | float | None = None, + at_lower_bound: bool = False, + at_upper_bound: bool = False, + ) -> None: + """ + Create a persisted deterministic parameter-result row. + + Parameters + ---------- + order_index : int | float + Display and array order for the persisted parameter result. + param_unique_name : str + Unique name of the persisted parameter result row. + final_value : int | float | None, default=None + Final fitted value for the persisted parameter result. + final_uncertainty : int | float | None, default=None + Final uncertainty for the persisted parameter result. + at_lower_bound : bool, default=False + Whether the parameter finished at the lower fit bound. + at_upper_bound : bool, default=False + Whether the parameter finished at the upper fit bound. + """ + item = DeterministicParameterResultItem() + item._set_order_index(order_index) + item._set_param_unique_name(param_unique_name) + item._set_final_value(final_value) + item._set_final_uncertainty(final_uncertainty) + item._set_at_lower_bound(at_lower_bound) + item._set_at_upper_bound(at_upper_bound) + self.add(item) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py new file mode 100644 index 00000000..47b0db4c --- /dev/null +++ b/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Deterministic-parameter-results factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class DeterministicParameterResultsFactory(FactoryBase): + """Create deterministic-parameter-result collections by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/deterministic_result/__init__.py b/src/easydiffraction/analysis/categories/deterministic_result/__init__.py new file mode 100644 index 00000000..851d2a1c --- /dev/null +++ b/src/easydiffraction/analysis/categories/deterministic_result/__init__.py @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.deterministic_result.default import ( + DeterministicResult, +) +from easydiffraction.analysis.categories.deterministic_result.factory import ( + DeterministicResultFactory, +) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/deterministic_result/default.py b/src/easydiffraction/analysis/categories/deterministic_result/default.py new file mode 100644 index 00000000..51f51984 --- /dev/null +++ b/src/easydiffraction/analysis/categories/deterministic_result/default.py @@ -0,0 +1,185 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Deterministic fit-result metadata category.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.deterministic_result.factory import ( + DeterministicResultFactory, +) +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.variable import BoolDescriptor +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +@DeterministicResultFactory.register +class DeterministicResult(CategoryItem): + """Persisted deterministic fit-result metadata.""" + + _category_code = 'deterministic_result' + + type_info = TypeInfo( + tag='default', + description='Persisted deterministic fit-result metadata', + ) + + def __init__(self) -> None: + super().__init__() + self._optimizer_name = StringDescriptor( + name='optimizer_name', + description='Name of the persisted deterministic optimizer.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_deterministic_result.optimizer_name']), + ) + self._method_name = StringDescriptor( + name='method_name', + description='Method name of the persisted deterministic optimizer.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_deterministic_result.method_name']), + ) + self._objective_name = StringDescriptor( + name='objective_name', + description='Objective function name for the persisted deterministic fit.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_deterministic_result.objective_name']), + ) + self._objective_value = NumericDescriptor( + name='objective_value', + description='Objective value for the persisted deterministic fit.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_deterministic_result.objective_value']), + ) + self._n_data_points = NumericDescriptor( + name='n_data_points', + description='Number of data points used in the persisted deterministic fit.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_deterministic_result.n_data_points']), + ) + self._n_parameters = NumericDescriptor( + name='n_parameters', + description='Number of parameters considered in the persisted deterministic fit.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_deterministic_result.n_parameters']), + ) + self._n_free_parameters = NumericDescriptor( + name='n_free_parameters', + description='Number of free parameters in the persisted deterministic fit.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_deterministic_result.n_free_parameters']), + ) + self._degrees_of_freedom = NumericDescriptor( + name='degrees_of_freedom', + description='Degrees of freedom for the persisted deterministic fit.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_deterministic_result.degrees_of_freedom']), + ) + self._covariance_available = BoolDescriptor( + name='covariance_available', + description='Whether covariance was available for the persisted deterministic fit.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler( + names=['_deterministic_result.covariance_available'] + ), + ) + self._correlation_available = BoolDescriptor( + name='correlation_available', + description='Whether correlations were available for the persisted deterministic fit.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler( + names=['_deterministic_result.correlation_available'] + ), + ) + + @property + def optimizer_name(self) -> StringDescriptor: + """Name of the persisted deterministic optimizer.""" + return self._optimizer_name + + def _set_optimizer_name(self, value: str) -> None: + """Set the optimizer name for internal callers.""" + self._optimizer_name.value = value + + @property + def method_name(self) -> StringDescriptor: + """Method name of the persisted deterministic optimizer.""" + return self._method_name + + def _set_method_name(self, value: str) -> None: + """Set the method name for internal callers.""" + self._method_name.value = value + + @property + def objective_name(self) -> StringDescriptor: + """Objective function name for the persisted deterministic fit.""" + return self._objective_name + + def _set_objective_name(self, value: str) -> None: + """Set the objective name for internal callers.""" + self._objective_name.value = value + + @property + def objective_value(self) -> NumericDescriptor: + """Objective value for the persisted deterministic fit.""" + return self._objective_value + + def _set_objective_value(self, value: int | float | None) -> None: + """Set the objective value for internal callers.""" + self._objective_value.value = value + + @property + def n_data_points(self) -> NumericDescriptor: + """Number of data points used in the persisted deterministic fit.""" + return self._n_data_points + + def _set_n_data_points(self, value: int | float) -> None: + """Set the data-point count for internal callers.""" + self._n_data_points.value = value + + @property + def n_parameters(self) -> NumericDescriptor: + """Number of parameters considered in the persisted deterministic fit.""" + return self._n_parameters + + def _set_n_parameters(self, value: int | float) -> None: + """Set the parameter count for internal callers.""" + self._n_parameters.value = value + + @property + def n_free_parameters(self) -> NumericDescriptor: + """Number of free parameters in the persisted deterministic fit.""" + return self._n_free_parameters + + def _set_n_free_parameters(self, value: int | float) -> None: + """Set the free-parameter count for internal callers.""" + self._n_free_parameters.value = value + + @property + def degrees_of_freedom(self) -> NumericDescriptor: + """Degrees of freedom for the persisted deterministic fit.""" + return self._degrees_of_freedom + + def _set_degrees_of_freedom(self, value: int | float) -> None: + """Set the degrees of freedom for internal callers.""" + self._degrees_of_freedom.value = value + + @property + def covariance_available(self) -> BoolDescriptor: + """Whether covariance was available for the persisted deterministic fit.""" + return self._covariance_available + + def _set_covariance_available(self, value: bool) -> None: + """Set the covariance-available flag for internal callers.""" + self._covariance_available.value = value + + @property + def correlation_available(self) -> BoolDescriptor: + """Whether correlations were available for the persisted deterministic fit.""" + return self._correlation_available + + def _set_correlation_available(self, value: bool) -> None: + """Set the correlation-available flag for internal callers.""" + self._correlation_available.value = value \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/deterministic_result/factory.py b/src/easydiffraction/analysis/categories/deterministic_result/factory.py new file mode 100644 index 00000000..44416cee --- /dev/null +++ b/src/easydiffraction/analysis/categories/deterministic_result/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Deterministic-result factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class DeterministicResultFactory(FactoryBase): + """Create deterministic-result categories by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file From 4b656b88d623aeebb24fda4f0d92c779f32f8734 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:43:34 +0200 Subject: [PATCH 15/72] Add Bayesian fit-result metadata categories --- docs/dev/plans/analysis-cif-fit-state.md | 2 +- src/easydiffraction/analysis/__init__.py | 17 + .../analysis/categories/__init__.py | 9 + .../bayesian_convergence/__init__.py | 9 + .../bayesian_convergence/default.py | 120 +++++++ .../bayesian_convergence/factory.py | 17 + .../bayesian_parameter_posteriors/__init__.py | 12 + .../bayesian_parameter_posteriors/default.py | 294 ++++++++++++++++++ .../bayesian_parameter_posteriors/factory.py | 17 + .../categories/bayesian_result/__init__.py | 5 + .../categories/bayesian_result/default.py | 209 +++++++++++++ .../categories/bayesian_result/factory.py | 17 + .../categories/bayesian_sampler/__init__.py | 5 + .../categories/bayesian_sampler/default.py | 134 ++++++++ .../categories/bayesian_sampler/factory.py | 17 + 15 files changed, 883 insertions(+), 1 deletion(-) create mode 100644 src/easydiffraction/analysis/categories/bayesian_convergence/__init__.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_convergence/default.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_convergence/factory.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/__init__.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/factory.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_result/__init__.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_result/default.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_result/factory.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_sampler/__init__.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_sampler/default.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_sampler/factory.py diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 0c263be9..3e972789 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -36,7 +36,7 @@ structure or experiment CIF files. - [x] Phase 1 step 1: update the ADR suggestion with clarifications. - [x] Phase 1 step 2: add common fit-state category models. - [x] Phase 1 step 3: add deterministic result category models. -- [ ] Phase 1 step 4: add Bayesian metadata category models. +- [x] Phase 1 step 4: add Bayesian metadata category models. - [ ] Phase 1 step 5: add Bayesian cache manifest category models. - [ ] Phase 1 step 6: wire analysis CIF save/load for fit state. - [ ] Phase 1 step 7: capture fit projections after fitting. diff --git a/src/easydiffraction/analysis/__init__.py b/src/easydiffraction/analysis/__init__.py index 7ea4fc02..8b9c9777 100644 --- a/src/easydiffraction/analysis/__init__.py +++ b/src/easydiffraction/analysis/__init__.py @@ -1,6 +1,23 @@ # SPDX-FileCopyrightText: 2025 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +from easydiffraction.analysis.categories.bayesian_convergence import BayesianConvergence +from easydiffraction.analysis.categories.bayesian_convergence import ( + BayesianConvergenceFactory, +) +from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( + BayesianParameterPosteriorItem, +) +from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( + BayesianParameterPosteriors, +) +from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( + BayesianParameterPosteriorsFactory, +) +from easydiffraction.analysis.categories.bayesian_result import BayesianResult +from easydiffraction.analysis.categories.bayesian_result import BayesianResultFactory +from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler +from easydiffraction.analysis.categories.bayesian_sampler import BayesianSamplerFactory from easydiffraction.analysis.categories.deterministic_parameter_results import ( DeterministicParameterResultItem, ) diff --git a/src/easydiffraction/analysis/categories/__init__.py b/src/easydiffraction/analysis/categories/__init__.py index 5aa7c2cf..2659c3aa 100644 --- a/src/easydiffraction/analysis/categories/__init__.py +++ b/src/easydiffraction/analysis/categories/__init__.py @@ -3,6 +3,15 @@ from easydiffraction.analysis.categories.aliases import Alias from easydiffraction.analysis.categories.aliases import Aliases +from easydiffraction.analysis.categories.bayesian_convergence import BayesianConvergence +from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( + BayesianParameterPosteriorItem, +) +from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( + BayesianParameterPosteriors, +) +from easydiffraction.analysis.categories.bayesian_result import BayesianResult +from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler from easydiffraction.analysis.categories.constraints import Constraint from easydiffraction.analysis.categories.constraints import Constraints from easydiffraction.analysis.categories.deterministic_parameter_results import ( diff --git a/src/easydiffraction/analysis/categories/bayesian_convergence/__init__.py b/src/easydiffraction/analysis/categories/bayesian_convergence/__init__.py new file mode 100644 index 00000000..c77527f4 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_convergence/__init__.py @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.bayesian_convergence.default import ( + BayesianConvergence, +) +from easydiffraction.analysis.categories.bayesian_convergence.factory import ( + BayesianConvergenceFactory, +) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_convergence/default.py b/src/easydiffraction/analysis/categories/bayesian_convergence/default.py new file mode 100644 index 00000000..e6a76945 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_convergence/default.py @@ -0,0 +1,120 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian convergence diagnostics category.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.bayesian_convergence.factory import ( + BayesianConvergenceFactory, +) +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.variable import BoolDescriptor +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +@BayesianConvergenceFactory.register +class BayesianConvergence(CategoryItem): + """Persisted Bayesian convergence diagnostics.""" + + _category_code = 'bayesian_convergence' + + type_info = TypeInfo( + tag='default', + description='Persisted Bayesian convergence diagnostics', + ) + + def __init__(self) -> None: + super().__init__() + self._converged = BoolDescriptor( + name='converged', + description='Whether the Bayesian fit met convergence criteria.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_bayesian_convergence.converged']), + ) + self._max_r_hat = NumericDescriptor( + name='max_r_hat', + description='Maximum rank-normalized split-R-hat across parameters.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_convergence.max_r_hat']), + ) + self._min_ess_bulk = NumericDescriptor( + name='min_ess_bulk', + description='Minimum bulk effective sample size across parameters.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_convergence.min_ess_bulk']), + ) + self._n_draws = NumericDescriptor( + name='n_draws', + description='Number of stored posterior draws.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_convergence.n_draws']), + ) + self._n_chains = NumericDescriptor( + name='n_chains', + description='Number of stored posterior chains.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_convergence.n_chains']), + ) + self._n_parameters = NumericDescriptor( + name='n_parameters', + description='Number of sampled parameters.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_convergence.n_parameters']), + ) + + @property + def converged(self) -> BoolDescriptor: + """Whether the Bayesian fit met convergence criteria.""" + return self._converged + + def _set_converged(self, value: bool) -> None: + """Set the convergence flag for internal callers.""" + self._converged.value = value + + @property + def max_r_hat(self) -> NumericDescriptor: + """Maximum rank-normalized split-R-hat across parameters.""" + return self._max_r_hat + + def _set_max_r_hat(self, value: int | float | None) -> None: + """Set the maximum R-hat for internal callers.""" + self._max_r_hat.value = value + + @property + def min_ess_bulk(self) -> NumericDescriptor: + """Minimum bulk effective sample size across parameters.""" + return self._min_ess_bulk + + def _set_min_ess_bulk(self, value: int | float | None) -> None: + """Set the minimum ESS bulk for internal callers.""" + self._min_ess_bulk.value = value + + @property + def n_draws(self) -> NumericDescriptor: + """Number of stored posterior draws.""" + return self._n_draws + + def _set_n_draws(self, value: int | float) -> None: + """Set the draw count for internal callers.""" + self._n_draws.value = value + + @property + def n_chains(self) -> NumericDescriptor: + """Number of stored posterior chains.""" + return self._n_chains + + def _set_n_chains(self, value: int | float) -> None: + """Set the chain count for internal callers.""" + self._n_chains.value = value + + @property + def n_parameters(self) -> NumericDescriptor: + """Number of sampled parameters.""" + return self._n_parameters + + def _set_n_parameters(self, value: int | float) -> None: + """Set the sampled-parameter count for internal callers.""" + self._n_parameters.value = value \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_convergence/factory.py b/src/easydiffraction/analysis/categories/bayesian_convergence/factory.py new file mode 100644 index 00000000..208982d5 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_convergence/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian-convergence factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class BayesianConvergenceFactory(FactoryBase): + """Create Bayesian-convergence categories by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/__init__.py b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/__init__.py new file mode 100644 index 00000000..b8e4f251 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/__init__.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.bayesian_parameter_posteriors.default import ( + BayesianParameterPosteriorItem, +) +from easydiffraction.analysis.categories.bayesian_parameter_posteriors.default import ( + BayesianParameterPosteriors, +) +from easydiffraction.analysis.categories.bayesian_parameter_posteriors.factory import ( + BayesianParameterPosteriorsFactory, +) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py new file mode 100644 index 00000000..872d9d8c --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py @@ -0,0 +1,294 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian parameter posterior summary rows.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.bayesian_parameter_posteriors.factory import ( + BayesianParameterPosteriorsFactory, +) +from easydiffraction.core.category import CategoryCollection +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.validation import RegexValidator +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +class BayesianParameterPosteriorItem(CategoryItem): + """Single persisted Bayesian parameter posterior summary row.""" + + _category_code = 'bayesian_parameter_posterior' + _category_entry_name = 'unique_name' + + def __init__(self) -> None: + super().__init__() + self._order_index = NumericDescriptor( + name='order_index', + description='Parameter column order in posterior sample arrays.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.order_index']), + ) + self._unique_name = StringDescriptor( + name='unique_name', + description='Unique EasyDiffraction parameter name.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), + ), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.unique_name']), + ) + self._display_name = StringDescriptor( + name='display_name', + description='Human-readable parameter label.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.display_name']), + ) + self._best_sample_value = NumericDescriptor( + name='best_sample_value', + description='Committed sampled parameter value.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler( + names=['_bayesian_parameter_posterior.best_sample_value'] + ), + ) + self._median = NumericDescriptor( + name='median', + description='Posterior median value.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.median']), + ) + self._uncertainty = NumericDescriptor( + name='uncertainty', + description='Posterior standard deviation.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.uncertainty']), + ) + self._interval_68_lower = NumericDescriptor( + name='interval_68_lower', + description='Lower bound of the 68% credible interval.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler( + names=['_bayesian_parameter_posterior.interval_68_lower'] + ), + ) + self._interval_68_upper = NumericDescriptor( + name='interval_68_upper', + description='Upper bound of the 68% credible interval.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler( + names=['_bayesian_parameter_posterior.interval_68_upper'] + ), + ) + self._interval_95_lower = NumericDescriptor( + name='interval_95_lower', + description='Lower bound of the 95% credible interval.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler( + names=['_bayesian_parameter_posterior.interval_95_lower'] + ), + ) + self._interval_95_upper = NumericDescriptor( + name='interval_95_upper', + description='Upper bound of the 95% credible interval.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler( + names=['_bayesian_parameter_posterior.interval_95_upper'] + ), + ) + self._ess_bulk = NumericDescriptor( + name='ess_bulk', + description='Bulk effective sample size when available.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.ess_bulk']), + ) + self._r_hat = NumericDescriptor( + name='r_hat', + description='Rank-normalized split-R-hat when available.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.r_hat']), + ) + + @property + def order_index(self) -> NumericDescriptor: + """Parameter column order in posterior sample arrays.""" + return self._order_index + + def _set_order_index(self, value: int | float) -> None: + """Set the order index for internal callers.""" + self._order_index.value = value + + @property + def unique_name(self) -> StringDescriptor: + """Unique EasyDiffraction parameter name.""" + return self._unique_name + + def _set_unique_name(self, value: str) -> None: + """Set the unique parameter name for internal callers.""" + self._unique_name.value = value + + @property + def display_name(self) -> StringDescriptor: + """Human-readable parameter label.""" + return self._display_name + + def _set_display_name(self, value: str) -> None: + """Set the display name for internal callers.""" + self._display_name.value = value + + @property + def best_sample_value(self) -> NumericDescriptor: + """Committed sampled parameter value.""" + return self._best_sample_value + + def _set_best_sample_value(self, value: int | float | None) -> None: + """Set the best sampled parameter value for internal callers.""" + self._best_sample_value.value = value + + @property + def median(self) -> NumericDescriptor: + """Posterior median value.""" + return self._median + + def _set_median(self, value: int | float | None) -> None: + """Set the posterior median for internal callers.""" + self._median.value = value + + @property + def uncertainty(self) -> NumericDescriptor: + """Posterior standard deviation.""" + return self._uncertainty + + def _set_uncertainty(self, value: int | float | None) -> None: + """Set the posterior uncertainty for internal callers.""" + self._uncertainty.value = value + + @property + def interval_68_lower(self) -> NumericDescriptor: + """Lower bound of the 68% credible interval.""" + return self._interval_68_lower + + def _set_interval_68_lower(self, value: int | float | None) -> None: + """Set the 68% interval lower bound for internal callers.""" + self._interval_68_lower.value = value + + @property + def interval_68_upper(self) -> NumericDescriptor: + """Upper bound of the 68% credible interval.""" + return self._interval_68_upper + + def _set_interval_68_upper(self, value: int | float | None) -> None: + """Set the 68% interval upper bound for internal callers.""" + self._interval_68_upper.value = value + + @property + def interval_95_lower(self) -> NumericDescriptor: + """Lower bound of the 95% credible interval.""" + return self._interval_95_lower + + def _set_interval_95_lower(self, value: int | float | None) -> None: + """Set the 95% interval lower bound for internal callers.""" + self._interval_95_lower.value = value + + @property + def interval_95_upper(self) -> NumericDescriptor: + """Upper bound of the 95% credible interval.""" + return self._interval_95_upper + + def _set_interval_95_upper(self, value: int | float | None) -> None: + """Set the 95% interval upper bound for internal callers.""" + self._interval_95_upper.value = value + + @property + def ess_bulk(self) -> NumericDescriptor: + """Bulk effective sample size when available.""" + return self._ess_bulk + + def _set_ess_bulk(self, value: int | float | None) -> None: + """Set the ESS bulk value for internal callers.""" + self._ess_bulk.value = value + + @property + def r_hat(self) -> NumericDescriptor: + """Rank-normalized split-R-hat when available.""" + return self._r_hat + + def _set_r_hat(self, value: int | float | None) -> None: + """Set the R-hat value for internal callers.""" + self._r_hat.value = value + + +@BayesianParameterPosteriorsFactory.register +class BayesianParameterPosteriors(CategoryCollection): + """Collection of persisted Bayesian parameter posterior summaries.""" + + type_info = TypeInfo( + tag='default', + description='Persisted Bayesian parameter posterior summaries', + ) + + def __init__(self) -> None: + super().__init__(item_type=BayesianParameterPosteriorItem) + + def create( + self, + *, + order_index: int | float, + unique_name: str, + display_name: str, + best_sample_value: int | float | None = None, + median: int | float | None = None, + uncertainty: int | float | None = None, + interval_68_lower: int | float | None = None, + interval_68_upper: int | float | None = None, + interval_95_lower: int | float | None = None, + interval_95_upper: int | float | None = None, + ess_bulk: int | float | None = None, + r_hat: int | float | None = None, + ) -> None: + """ + Create a persisted Bayesian parameter posterior summary row. + + Parameters + ---------- + order_index : int | float + Parameter column order in posterior sample arrays. + unique_name : str + Unique EasyDiffraction parameter name. + display_name : str + Human-readable parameter label. + best_sample_value : int | float | None, default=None + Committed sampled parameter value. + median : int | float | None, default=None + Posterior median value. + uncertainty : int | float | None, default=None + Posterior standard deviation. + interval_68_lower : int | float | None, default=None + Lower bound of the 68% credible interval. + interval_68_upper : int | float | None, default=None + Upper bound of the 68% credible interval. + interval_95_lower : int | float | None, default=None + Lower bound of the 95% credible interval. + interval_95_upper : int | float | None, default=None + Upper bound of the 95% credible interval. + ess_bulk : int | float | None, default=None + Bulk effective sample size when available. + r_hat : int | float | None, default=None + Rank-normalized split-R-hat when available. + """ + item = BayesianParameterPosteriorItem() + item._set_order_index(order_index) + item._set_unique_name(unique_name) + item._set_display_name(display_name) + item._set_best_sample_value(best_sample_value) + item._set_median(median) + item._set_uncertainty(uncertainty) + item._set_interval_68_lower(interval_68_lower) + item._set_interval_68_upper(interval_68_upper) + item._set_interval_95_lower(interval_95_lower) + item._set_interval_95_upper(interval_95_upper) + item._set_ess_bulk(ess_bulk) + item._set_r_hat(r_hat) + self.add(item) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/factory.py b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/factory.py new file mode 100644 index 00000000..2069feec --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian-parameter-posteriors factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class BayesianParameterPosteriorsFactory(FactoryBase): + """Create Bayesian-parameter-posterior collections by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_result/__init__.py b/src/easydiffraction/analysis/categories/bayesian_result/__init__.py new file mode 100644 index 00000000..e5f58ab4 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_result/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.bayesian_result.default import BayesianResult +from easydiffraction.analysis.categories.bayesian_result.factory import BayesianResultFactory \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_result/default.py b/src/easydiffraction/analysis/categories/bayesian_result/default.py new file mode 100644 index 00000000..d86dd687 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_result/default.py @@ -0,0 +1,209 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian fit-result metadata category.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.bayesian_result.factory import BayesianResultFactory +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.variable import BoolDescriptor +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +@BayesianResultFactory.register +class BayesianResult(CategoryItem): + """Persisted Bayesian fit-result metadata.""" + + _category_code = 'bayesian_result' + + type_info = TypeInfo( + tag='default', + description='Persisted Bayesian fit-result metadata', + ) + + def __init__(self) -> None: + super().__init__() + self._sampler_name = StringDescriptor( + name='sampler_name', + description='Name of the persisted Bayesian sampler.', + value_spec=AttributeSpec(default='dream'), + cif_handler=CifHandler(names=['_bayesian_result.sampler_name']), + ) + self._point_estimate_name = StringDescriptor( + name='point_estimate_name', + description='Committed sampled point estimate name.', + value_spec=AttributeSpec(default='best_sample'), + cif_handler=CifHandler(names=['_bayesian_result.point_estimate_name']), + ) + self._success = BoolDescriptor( + name='success', + description='Whether the persisted Bayesian fit produced usable results.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_bayesian_result.success']), + ) + self._sampler_completed = BoolDescriptor( + name='sampler_completed', + description='Whether the sampler completed and returned posterior data.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_bayesian_result.sampler_completed']), + ) + self._best_log_posterior = NumericDescriptor( + name='best_log_posterior', + description='Best log-posterior value reported by the sampler.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_result.best_log_posterior']), + ) + self._credible_interval_inner = NumericDescriptor( + name='credible_interval_inner', + description='Inner credible-interval level used in summaries.', + value_spec=AttributeSpec(default=0.68), + cif_handler=CifHandler(names=['_bayesian_result.credible_interval_inner']), + ) + self._credible_interval_outer = NumericDescriptor( + name='credible_interval_outer', + description='Outer credible-interval level used in summaries.', + value_spec=AttributeSpec(default=0.95), + cif_handler=CifHandler(names=['_bayesian_result.credible_interval_outer']), + ) + self._has_posterior_samples = BoolDescriptor( + name='has_posterior_samples', + description='Whether posterior samples were persisted.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_bayesian_result.has_posterior_samples']), + ) + self._has_distribution_cache = BoolDescriptor( + name='has_distribution_cache', + description='Whether distribution-cache manifests were persisted.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_bayesian_result.has_distribution_cache']), + ) + self._has_pair_cache = BoolDescriptor( + name='has_pair_cache', + description='Whether pair-cache manifests were persisted.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_bayesian_result.has_pair_cache']), + ) + self._has_posterior_predictive = BoolDescriptor( + name='has_posterior_predictive', + description='Whether posterior predictive manifests were persisted.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_bayesian_result.has_posterior_predictive']), + ) + self._sidecar_file = StringDescriptor( + name='sidecar_file', + description='Relative path to the persisted Bayesian HDF5 sidecar.', + value_spec=AttributeSpec(default='results.h5'), + cif_handler=CifHandler(names=['_bayesian_result.sidecar_file']), + ) + + @property + def sampler_name(self) -> StringDescriptor: + """Name of the persisted Bayesian sampler.""" + return self._sampler_name + + def _set_sampler_name(self, value: str) -> None: + """Set the sampler name for internal callers.""" + self._sampler_name.value = value + + @property + def point_estimate_name(self) -> StringDescriptor: + """Committed sampled point estimate name.""" + return self._point_estimate_name + + def _set_point_estimate_name(self, value: str) -> None: + """Set the point-estimate name for internal callers.""" + self._point_estimate_name.value = value + + @property + def success(self) -> BoolDescriptor: + """Whether the persisted Bayesian fit produced usable results.""" + return self._success + + def _set_success(self, value: bool) -> None: + """Set the success flag for internal callers.""" + self._success.value = value + + @property + def sampler_completed(self) -> BoolDescriptor: + """Whether the sampler completed and returned posterior data.""" + return self._sampler_completed + + def _set_sampler_completed(self, value: bool) -> None: + """Set the sampler-completed flag for internal callers.""" + self._sampler_completed.value = value + + @property + def best_log_posterior(self) -> NumericDescriptor: + """Best log-posterior value reported by the sampler.""" + return self._best_log_posterior + + def _set_best_log_posterior(self, value: int | float | None) -> None: + """Set the best log-posterior for internal callers.""" + self._best_log_posterior.value = value + + @property + def credible_interval_inner(self) -> NumericDescriptor: + """Inner credible-interval level used in summaries.""" + return self._credible_interval_inner + + def _set_credible_interval_inner(self, value: int | float) -> None: + """Set the inner credible-interval level for internal callers.""" + self._credible_interval_inner.value = value + + @property + def credible_interval_outer(self) -> NumericDescriptor: + """Outer credible-interval level used in summaries.""" + return self._credible_interval_outer + + def _set_credible_interval_outer(self, value: int | float) -> None: + """Set the outer credible-interval level for internal callers.""" + self._credible_interval_outer.value = value + + @property + def has_posterior_samples(self) -> BoolDescriptor: + """Whether posterior samples were persisted.""" + return self._has_posterior_samples + + def _set_has_posterior_samples(self, value: bool) -> None: + """Set the posterior-samples flag for internal callers.""" + self._has_posterior_samples.value = value + + @property + def has_distribution_cache(self) -> BoolDescriptor: + """Whether distribution-cache manifests were persisted.""" + return self._has_distribution_cache + + def _set_has_distribution_cache(self, value: bool) -> None: + """Set the distribution-cache flag for internal callers.""" + self._has_distribution_cache.value = value + + @property + def has_pair_cache(self) -> BoolDescriptor: + """Whether pair-cache manifests were persisted.""" + return self._has_pair_cache + + def _set_has_pair_cache(self, value: bool) -> None: + """Set the pair-cache flag for internal callers.""" + self._has_pair_cache.value = value + + @property + def has_posterior_predictive(self) -> BoolDescriptor: + """Whether posterior predictive manifests were persisted.""" + return self._has_posterior_predictive + + def _set_has_posterior_predictive(self, value: bool) -> None: + """Set the posterior-predictive flag for internal callers.""" + self._has_posterior_predictive.value = value + + @property + def sidecar_file(self) -> StringDescriptor: + """Relative path to the persisted Bayesian HDF5 sidecar.""" + return self._sidecar_file + + def _set_sidecar_file(self, value: str) -> None: + """Set the sidecar-file path for internal callers.""" + self._sidecar_file.value = value \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_result/factory.py b/src/easydiffraction/analysis/categories/bayesian_result/factory.py new file mode 100644 index 00000000..2d7decc3 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_result/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian-result factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class BayesianResultFactory(FactoryBase): + """Create Bayesian-result categories by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/__init__.py b/src/easydiffraction/analysis/categories/bayesian_sampler/__init__.py new file mode 100644 index 00000000..efead92d --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.bayesian_sampler.default import BayesianSampler +from easydiffraction.analysis.categories.bayesian_sampler.factory import BayesianSamplerFactory \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py new file mode 100644 index 00000000..27b42512 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py @@ -0,0 +1,134 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Resolved Bayesian sampler settings category.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.bayesian_sampler.factory import BayesianSamplerFactory +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.variable import BoolDescriptor +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +@BayesianSamplerFactory.register +class BayesianSampler(CategoryItem): + """Persisted resolved Bayesian sampler settings.""" + + _category_code = 'bayesian_sampler' + + type_info = TypeInfo( + tag='default', + description='Persisted resolved Bayesian sampler settings', + ) + + def __init__(self) -> None: + super().__init__() + self._steps = NumericDescriptor( + name='steps', + description='Resolved number of sampler steps.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_sampler.steps']), + ) + self._burn = NumericDescriptor( + name='burn', + description='Resolved burn-in count.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_sampler.burn']), + ) + self._thin = NumericDescriptor( + name='thin', + description='Resolved thinning interval.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_sampler.thin']), + ) + self._pop = NumericDescriptor( + name='pop', + description='Resolved population size.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_sampler.pop']), + ) + self._parallel = BoolDescriptor( + name='parallel', + description='Whether sampling ran in parallel.', + value_spec=AttributeSpec(default=False), + cif_handler=CifHandler(names=['_bayesian_sampler.parallel']), + ) + self._init = StringDescriptor( + name='init', + description='Resolved DREAM initialization mode.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_sampler.init']), + ) + self._random_seed = NumericDescriptor( + name='random_seed', + description='Resolved random seed used by the sampler.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_sampler.random_seed']), + ) + + @property + def steps(self) -> NumericDescriptor: + """Resolved number of sampler steps.""" + return self._steps + + def _set_steps(self, value: int | float) -> None: + """Set the step count for internal callers.""" + self._steps.value = value + + @property + def burn(self) -> NumericDescriptor: + """Resolved burn-in count.""" + return self._burn + + def _set_burn(self, value: int | float) -> None: + """Set the burn-in count for internal callers.""" + self._burn.value = value + + @property + def thin(self) -> NumericDescriptor: + """Resolved thinning interval.""" + return self._thin + + def _set_thin(self, value: int | float) -> None: + """Set the thinning interval for internal callers.""" + self._thin.value = value + + @property + def pop(self) -> NumericDescriptor: + """Resolved population size.""" + return self._pop + + def _set_pop(self, value: int | float) -> None: + """Set the population size for internal callers.""" + self._pop.value = value + + @property + def parallel(self) -> BoolDescriptor: + """Whether sampling ran in parallel.""" + return self._parallel + + def _set_parallel(self, value: bool) -> None: + """Set the parallel flag for internal callers.""" + self._parallel.value = value + + @property + def init(self) -> StringDescriptor: + """Resolved DREAM initialization mode.""" + return self._init + + def _set_init(self, value: str) -> None: + """Set the initialization mode for internal callers.""" + self._init.value = value + + @property + def random_seed(self) -> NumericDescriptor: + """Resolved random seed used by the sampler.""" + return self._random_seed + + def _set_random_seed(self, value: int | float | None) -> None: + """Set the random seed for internal callers.""" + self._random_seed.value = value \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/factory.py b/src/easydiffraction/analysis/categories/bayesian_sampler/factory.py new file mode 100644 index 00000000..66858e6d --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian-sampler factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class BayesianSamplerFactory(FactoryBase): + """Create Bayesian-sampler categories by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file From b28197c02af04e4a48a975a83957c00a7fe61754 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:48:15 +0200 Subject: [PATCH 16/72] Add Bayesian fit-cache manifest categories --- docs/dev/plans/analysis-cif-fit-state.md | 2 +- src/easydiffraction/analysis/__init__.py | 25 ++ .../analysis/categories/__init__.py | 16 + .../bayesian_distribution_caches/__init__.py | 12 + .../bayesian_distribution_caches/default.py | 151 ++++++++++ .../bayesian_distribution_caches/factory.py | 17 ++ .../bayesian_pair_caches/__init__.py | 12 + .../bayesian_pair_caches/default.py | 284 ++++++++++++++++++ .../bayesian_pair_caches/factory.py | 17 ++ .../bayesian_predictive_datasets/__init__.py | 12 + .../bayesian_predictive_datasets/default.py | 263 ++++++++++++++++ .../bayesian_predictive_datasets/factory.py | 17 ++ 12 files changed, 827 insertions(+), 1 deletion(-) create mode 100644 src/easydiffraction/analysis/categories/bayesian_distribution_caches/__init__.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_distribution_caches/factory.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_pair_caches/__init__.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_pair_caches/factory.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_predictive_datasets/__init__.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py create mode 100644 src/easydiffraction/analysis/categories/bayesian_predictive_datasets/factory.py diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 3e972789..bb9a1492 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -37,7 +37,7 @@ structure or experiment CIF files. - [x] Phase 1 step 2: add common fit-state category models. - [x] Phase 1 step 3: add deterministic result category models. - [x] Phase 1 step 4: add Bayesian metadata category models. -- [ ] Phase 1 step 5: add Bayesian cache manifest category models. +- [x] Phase 1 step 5: add Bayesian cache manifest category models. - [ ] Phase 1 step 6: wire analysis CIF save/load for fit state. - [ ] Phase 1 step 7: capture fit projections after fitting. - [ ] Phase 1 step 8: add HDF5 sidecar save/load. diff --git a/src/easydiffraction/analysis/__init__.py b/src/easydiffraction/analysis/__init__.py index 8b9c9777..c08c60c8 100644 --- a/src/easydiffraction/analysis/__init__.py +++ b/src/easydiffraction/analysis/__init__.py @@ -5,6 +5,15 @@ from easydiffraction.analysis.categories.bayesian_convergence import ( BayesianConvergenceFactory, ) +from easydiffraction.analysis.categories.bayesian_distribution_caches import ( + BayesianDistributionCacheItem, +) +from easydiffraction.analysis.categories.bayesian_distribution_caches import ( + BayesianDistributionCaches, +) +from easydiffraction.analysis.categories.bayesian_distribution_caches import ( + BayesianDistributionCachesFactory, +) from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriorItem, ) @@ -14,6 +23,22 @@ from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriorsFactory, ) +from easydiffraction.analysis.categories.bayesian_pair_caches import ( + BayesianPairCacheItem, +) +from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCaches +from easydiffraction.analysis.categories.bayesian_pair_caches import ( + BayesianPairCachesFactory, +) +from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( + BayesianPredictiveDatasetItem, +) +from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( + BayesianPredictiveDatasets, +) +from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( + BayesianPredictiveDatasetsFactory, +) from easydiffraction.analysis.categories.bayesian_result import BayesianResult from easydiffraction.analysis.categories.bayesian_result import BayesianResultFactory from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler diff --git a/src/easydiffraction/analysis/categories/__init__.py b/src/easydiffraction/analysis/categories/__init__.py index 2659c3aa..f6abbd51 100644 --- a/src/easydiffraction/analysis/categories/__init__.py +++ b/src/easydiffraction/analysis/categories/__init__.py @@ -4,12 +4,28 @@ from easydiffraction.analysis.categories.aliases import Alias from easydiffraction.analysis.categories.aliases import Aliases from easydiffraction.analysis.categories.bayesian_convergence import BayesianConvergence +from easydiffraction.analysis.categories.bayesian_distribution_caches import ( + BayesianDistributionCacheItem, +) +from easydiffraction.analysis.categories.bayesian_distribution_caches import ( + BayesianDistributionCaches, +) from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriorItem, ) from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriors, ) +from easydiffraction.analysis.categories.bayesian_pair_caches import ( + BayesianPairCacheItem, +) +from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCaches +from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( + BayesianPredictiveDatasetItem, +) +from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( + BayesianPredictiveDatasets, +) from easydiffraction.analysis.categories.bayesian_result import BayesianResult from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler from easydiffraction.analysis.categories.constraints import Constraint diff --git a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/__init__.py b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/__init__.py new file mode 100644 index 00000000..590e1f6b --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/__init__.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.bayesian_distribution_caches.default import ( + BayesianDistributionCacheItem, +) +from easydiffraction.analysis.categories.bayesian_distribution_caches.default import ( + BayesianDistributionCaches, +) +from easydiffraction.analysis.categories.bayesian_distribution_caches.factory import ( + BayesianDistributionCachesFactory, +) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py new file mode 100644 index 00000000..d067f667 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py @@ -0,0 +1,151 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian distribution-cache manifest rows.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.bayesian_distribution_caches.factory import ( + BayesianDistributionCachesFactory, +) +from easydiffraction.core.category import CategoryCollection +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.validation import RegexValidator +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +class BayesianDistributionCacheItem(CategoryItem): + """Single persisted Bayesian distribution-cache manifest row.""" + + _category_code = 'bayesian_distribution_cache' + _category_entry_name = 'param_unique_name' + + def __init__(self) -> None: + super().__init__() + self._param_unique_name = StringDescriptor( + name='param_unique_name', + description='Unique parameter name for the cached distribution.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), + ), + cif_handler=CifHandler(names=['_bayesian_distribution_cache.param_unique_name']), + ) + self._x_path = StringDescriptor( + name='x_path', + description='HDF5 dataset path for the distribution x-grid.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_distribution_cache.x_path']), + ) + self._density_path = StringDescriptor( + name='density_path', + description='HDF5 dataset path for the cached density values.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_distribution_cache.density_path']), + ) + self._n_grid = NumericDescriptor( + name='n_grid', + description='Number of grid points in the cached distribution.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_distribution_cache.n_grid']), + ) + self._n_draws_cached = NumericDescriptor( + name='n_draws_cached', + description='Number of draws summarized into the cached distribution.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_distribution_cache.n_draws_cached']), + ) + + @property + def param_unique_name(self) -> StringDescriptor: + """Unique parameter name for the cached distribution.""" + return self._param_unique_name + + def _set_param_unique_name(self, value: str) -> None: + """Set the unique parameter name for internal callers.""" + self._param_unique_name.value = value + + @property + def x_path(self) -> StringDescriptor: + """HDF5 dataset path for the distribution x-grid.""" + return self._x_path + + def _set_x_path(self, value: str) -> None: + """Set the x-grid dataset path for internal callers.""" + self._x_path.value = value + + @property + def density_path(self) -> StringDescriptor: + """HDF5 dataset path for the cached density values.""" + return self._density_path + + def _set_density_path(self, value: str) -> None: + """Set the density dataset path for internal callers.""" + self._density_path.value = value + + @property + def n_grid(self) -> NumericDescriptor: + """Number of grid points in the cached distribution.""" + return self._n_grid + + def _set_n_grid(self, value: int | float) -> None: + """Set the grid-size count for internal callers.""" + self._n_grid.value = value + + @property + def n_draws_cached(self) -> NumericDescriptor: + """Number of draws summarized into the cached distribution.""" + return self._n_draws_cached + + def _set_n_draws_cached(self, value: int | float) -> None: + """Set the cached-draw count for internal callers.""" + self._n_draws_cached.value = value + + +@BayesianDistributionCachesFactory.register +class BayesianDistributionCaches(CategoryCollection): + """Collection of persisted Bayesian distribution-cache manifests.""" + + type_info = TypeInfo( + tag='default', + description='Persisted Bayesian distribution-cache manifests', + ) + + def __init__(self) -> None: + super().__init__(item_type=BayesianDistributionCacheItem) + + def create( + self, + *, + param_unique_name: str, + x_path: str, + density_path: str, + n_grid: int | float, + n_draws_cached: int | float, + ) -> None: + """ + Create a persisted Bayesian distribution-cache manifest row. + + Parameters + ---------- + param_unique_name : str + Unique parameter name for the cached distribution. + x_path : str + HDF5 dataset path for the distribution x-grid. + density_path : str + HDF5 dataset path for the cached density values. + n_grid : int | float + Number of grid points in the cached distribution. + n_draws_cached : int | float + Number of draws summarized into the cached distribution. + """ + item = BayesianDistributionCacheItem() + item._set_param_unique_name(param_unique_name) + item._set_x_path(x_path) + item._set_density_path(density_path) + item._set_n_grid(n_grid) + item._set_n_draws_cached(n_draws_cached) + self.add(item) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/factory.py b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/factory.py new file mode 100644 index 00000000..30f1d4e9 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian-distribution-caches factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class BayesianDistributionCachesFactory(FactoryBase): + """Create Bayesian-distribution-cache collections by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_pair_caches/__init__.py b/src/easydiffraction/analysis/categories/bayesian_pair_caches/__init__.py new file mode 100644 index 00000000..7bf4be16 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_pair_caches/__init__.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.bayesian_pair_caches.default import ( + BayesianPairCacheItem, +) +from easydiffraction.analysis.categories.bayesian_pair_caches.default import ( + BayesianPairCaches, +) +from easydiffraction.analysis.categories.bayesian_pair_caches.factory import ( + BayesianPairCachesFactory, +) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py b/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py new file mode 100644 index 00000000..a027cfd1 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py @@ -0,0 +1,284 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian pair-cache manifest rows.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.bayesian_pair_caches.factory import ( + BayesianPairCachesFactory, +) +from easydiffraction.core.category import CategoryCollection +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.validation import RegexValidator +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +def _normalized_parameter_pair( + param_unique_name_x: str, + param_unique_name_y: str, +) -> tuple[str, str]: + """Return a stable ordering for a cached parameter pair.""" + if param_unique_name_x <= param_unique_name_y: + return param_unique_name_x, param_unique_name_y + return param_unique_name_y, param_unique_name_x + + +def _default_pair_cache_id( + *, + param_unique_name_x: str, + param_unique_name_y: str, +) -> str: + """Return the default persisted id for a pair-cache row.""" + normalized_x, normalized_y = _normalized_parameter_pair( + param_unique_name_x, + param_unique_name_y, + ) + return f'{normalized_x}:{normalized_y}' + + +class BayesianPairCacheItem(CategoryItem): + """Single persisted Bayesian pair-cache manifest row.""" + + _category_code = 'bayesian_pair_cache' + _category_entry_name = 'id' + + def __init__(self) -> None: + super().__init__() + self._param_unique_name_x = StringDescriptor( + name='param_unique_name_x', + description='First unique parameter name in the cached pair.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), + ), + cif_handler=CifHandler(names=['_bayesian_pair_cache.param_unique_name_x']), + ) + self._param_unique_name_y = StringDescriptor( + name='param_unique_name_y', + description='Second unique parameter name in the cached pair.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), + ), + cif_handler=CifHandler(names=['_bayesian_pair_cache.param_unique_name_y']), + ) + self._id = StringDescriptor( + name='id', + description='Stable identifier for the cached parameter pair.', + value_spec=AttributeSpec( + default='_', + validator=RegexValidator(pattern=r'^[A-Za-z0-9_.:-]+$'), + ), + cif_handler=CifHandler(names=['_bayesian_pair_cache.id']), + ) + self._x_path = StringDescriptor( + name='x_path', + description='HDF5 dataset path for the pair-cache x-grid.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_pair_cache.x_path']), + ) + self._y_path = StringDescriptor( + name='y_path', + description='HDF5 dataset path for the pair-cache y-grid.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_pair_cache.y_path']), + ) + self._density_path = StringDescriptor( + name='density_path', + description='HDF5 dataset path for the pair-cache density grid.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_pair_cache.density_path']), + ) + self._contour_level_path = StringDescriptor( + name='contour_level_path', + description='HDF5 dataset path for cached contour levels.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_pair_cache.contour_level_path']), + ) + self._n_grid_x = NumericDescriptor( + name='n_grid_x', + description='Number of x-grid points in the cached pair.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_pair_cache.n_grid_x']), + ) + self._n_grid_y = NumericDescriptor( + name='n_grid_y', + description='Number of y-grid points in the cached pair.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_pair_cache.n_grid_y']), + ) + self._n_draws_cached = NumericDescriptor( + name='n_draws_cached', + description='Number of draws summarized into the cached pair.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_pair_cache.n_draws_cached']), + ) + + @property + def param_unique_name_x(self) -> StringDescriptor: + """First unique parameter name in the cached pair.""" + return self._param_unique_name_x + + def _set_param_unique_name_x(self, value: str) -> None: + """Set the first unique parameter name for internal callers.""" + self._param_unique_name_x.value = value + + @property + def param_unique_name_y(self) -> StringDescriptor: + """Second unique parameter name in the cached pair.""" + return self._param_unique_name_y + + def _set_param_unique_name_y(self, value: str) -> None: + """Set the second unique parameter name for internal callers.""" + self._param_unique_name_y.value = value + + @property + def id(self) -> StringDescriptor: + """Stable identifier for the cached parameter pair.""" + return self._id + + def _set_id(self, value: str) -> None: + """Set the pair-cache id for internal callers.""" + self._id.value = value + + @property + def x_path(self) -> StringDescriptor: + """HDF5 dataset path for the pair-cache x-grid.""" + return self._x_path + + def _set_x_path(self, value: str) -> None: + """Set the pair-cache x-grid path for internal callers.""" + self._x_path.value = value + + @property + def y_path(self) -> StringDescriptor: + """HDF5 dataset path for the pair-cache y-grid.""" + return self._y_path + + def _set_y_path(self, value: str) -> None: + """Set the pair-cache y-grid path for internal callers.""" + self._y_path.value = value + + @property + def density_path(self) -> StringDescriptor: + """HDF5 dataset path for the pair-cache density grid.""" + return self._density_path + + def _set_density_path(self, value: str) -> None: + """Set the pair-cache density path for internal callers.""" + self._density_path.value = value + + @property + def contour_level_path(self) -> StringDescriptor: + """HDF5 dataset path for cached contour levels.""" + return self._contour_level_path + + def _set_contour_level_path(self, value: str) -> None: + """Set the contour-level path for internal callers.""" + self._contour_level_path.value = value + + @property + def n_grid_x(self) -> NumericDescriptor: + """Number of x-grid points in the cached pair.""" + return self._n_grid_x + + def _set_n_grid_x(self, value: int | float) -> None: + """Set the x-grid size for internal callers.""" + self._n_grid_x.value = value + + @property + def n_grid_y(self) -> NumericDescriptor: + """Number of y-grid points in the cached pair.""" + return self._n_grid_y + + def _set_n_grid_y(self, value: int | float) -> None: + """Set the y-grid size for internal callers.""" + self._n_grid_y.value = value + + @property + def n_draws_cached(self) -> NumericDescriptor: + """Number of draws summarized into the cached pair.""" + return self._n_draws_cached + + def _set_n_draws_cached(self, value: int | float) -> None: + """Set the cached-draw count for internal callers.""" + self._n_draws_cached.value = value + + +@BayesianPairCachesFactory.register +class BayesianPairCaches(CategoryCollection): + """Collection of persisted Bayesian pair-cache manifests.""" + + type_info = TypeInfo( + tag='default', + description='Persisted Bayesian pair-cache manifests', + ) + + def __init__(self) -> None: + super().__init__(item_type=BayesianPairCacheItem) + + def create( + self, + *, + param_unique_name_x: str, + param_unique_name_y: str, + x_path: str, + y_path: str, + density_path: str, + contour_level_path: str, + n_grid_x: int | float, + n_grid_y: int | float, + n_draws_cached: int | float, + id: str | None = None, + ) -> None: + """ + Create a persisted Bayesian pair-cache manifest row. + + Parameters + ---------- + param_unique_name_x : str + First unique parameter name in the cached pair. + param_unique_name_y : str + Second unique parameter name in the cached pair. + x_path : str + HDF5 dataset path for the pair-cache x-grid. + y_path : str + HDF5 dataset path for the pair-cache y-grid. + density_path : str + HDF5 dataset path for the pair-cache density grid. + contour_level_path : str + HDF5 dataset path for cached contour levels. + n_grid_x : int | float + Number of x-grid points in the cached pair. + n_grid_y : int | float + Number of y-grid points in the cached pair. + n_draws_cached : int | float + Number of draws summarized into the cached pair. + id : str | None, default=None + Explicit persisted row id. When omitted, a stable id is + derived from the normalized parameter pair. + """ + normalized_x, normalized_y = _normalized_parameter_pair( + param_unique_name_x, + param_unique_name_y, + ) + item = BayesianPairCacheItem() + item._set_param_unique_name_x(normalized_x) + item._set_param_unique_name_y(normalized_y) + item._set_x_path(x_path) + item._set_y_path(y_path) + item._set_density_path(density_path) + item._set_contour_level_path(contour_level_path) + item._set_n_grid_x(n_grid_x) + item._set_n_grid_y(n_grid_y) + item._set_n_draws_cached(n_draws_cached) + resolved_id = id or _default_pair_cache_id( + param_unique_name_x=normalized_x, + param_unique_name_y=normalized_y, + ) + item._set_id(resolved_id) + self.add(item) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_pair_caches/factory.py b/src/easydiffraction/analysis/categories/bayesian_pair_caches/factory.py new file mode 100644 index 00000000..3bd97cb4 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_pair_caches/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian-pair-caches factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class BayesianPairCachesFactory(FactoryBase): + """Create Bayesian-pair-cache collections by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/__init__.py b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/__init__.py new file mode 100644 index 00000000..c6fdef73 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/__init__.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause + +from easydiffraction.analysis.categories.bayesian_predictive_datasets.default import ( + BayesianPredictiveDatasetItem, +) +from easydiffraction.analysis.categories.bayesian_predictive_datasets.default import ( + BayesianPredictiveDatasets, +) +from easydiffraction.analysis.categories.bayesian_predictive_datasets.factory import ( + BayesianPredictiveDatasetsFactory, +) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py new file mode 100644 index 00000000..1f615ebb --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py @@ -0,0 +1,263 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian predictive-dataset manifest rows.""" + +from __future__ import annotations + +from easydiffraction.analysis.categories.bayesian_predictive_datasets.factory import ( + BayesianPredictiveDatasetsFactory, +) +from easydiffraction.core.category import CategoryCollection +from easydiffraction.core.category import CategoryItem +from easydiffraction.core.metadata import TypeInfo +from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.variable import NumericDescriptor +from easydiffraction.core.variable import StringDescriptor +from easydiffraction.io.cif.handler import CifHandler + + +class BayesianPredictiveDatasetItem(CategoryItem): + """Single persisted Bayesian predictive-dataset manifest row.""" + + _category_code = 'bayesian_predictive_dataset' + _category_entry_name = 'experiment_name' + + def __init__(self) -> None: + super().__init__() + self._experiment_name = StringDescriptor( + name='experiment_name', + description='Experiment name for the cached predictive dataset.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.experiment_name']), + ) + self._x_axis_name = StringDescriptor( + name='x_axis_name', + description='Name of the predictive dataset x-axis.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.x_axis_name']), + ) + self._x_path = StringDescriptor( + name='x_path', + description='HDF5 dataset path for the predictive x-axis values.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.x_path']), + ) + self._best_sample_prediction_path = StringDescriptor( + name='best_sample_prediction_path', + description='HDF5 dataset path for the committed predictive curve.', + value_spec=AttributeSpec(default=''), + cif_handler=CifHandler( + names=['_bayesian_predictive_dataset.best_sample_prediction_path'] + ), + ) + self._lower_95_path = StringDescriptor( + name='lower_95_path', + description='HDF5 dataset path for the lower 95% predictive band.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.lower_95_path']), + ) + self._upper_95_path = StringDescriptor( + name='upper_95_path', + description='HDF5 dataset path for the upper 95% predictive band.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.upper_95_path']), + ) + self._lower_68_path = StringDescriptor( + name='lower_68_path', + description='HDF5 dataset path for the lower 68% predictive band.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.lower_68_path']), + ) + self._upper_68_path = StringDescriptor( + name='upper_68_path', + description='HDF5 dataset path for the upper 68% predictive band.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.upper_68_path']), + ) + self._draws_path = StringDescriptor( + name='draws_path', + description='HDF5 dataset path for cached predictive draws.', + value_spec=AttributeSpec(default=None, allow_none=True), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.draws_path']), + ) + self._n_x = NumericDescriptor( + name='n_x', + description='Number of x-axis points in the cached predictive dataset.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.n_x']), + ) + self._n_draws_cached = NumericDescriptor( + name='n_draws_cached', + description='Number of cached predictive draws.', + value_spec=AttributeSpec(default=0), + cif_handler=CifHandler(names=['_bayesian_predictive_dataset.n_draws_cached']), + ) + + @property + def experiment_name(self) -> StringDescriptor: + """Experiment name for the cached predictive dataset.""" + return self._experiment_name + + def _set_experiment_name(self, value: str) -> None: + """Set the experiment name for internal callers.""" + self._experiment_name.value = value + + @property + def x_axis_name(self) -> StringDescriptor: + """Name of the predictive dataset x-axis.""" + return self._x_axis_name + + def _set_x_axis_name(self, value: str) -> None: + """Set the x-axis name for internal callers.""" + self._x_axis_name.value = value + + @property + def x_path(self) -> StringDescriptor: + """HDF5 dataset path for the predictive x-axis values.""" + return self._x_path + + def _set_x_path(self, value: str) -> None: + """Set the predictive x-axis path for internal callers.""" + self._x_path.value = value + + @property + def best_sample_prediction_path(self) -> StringDescriptor: + """HDF5 dataset path for the committed predictive curve.""" + return self._best_sample_prediction_path + + def _set_best_sample_prediction_path(self, value: str) -> None: + """Set the best-sample prediction path for internal callers.""" + self._best_sample_prediction_path.value = value + + @property + def lower_95_path(self) -> StringDescriptor: + """HDF5 dataset path for the lower 95% predictive band.""" + return self._lower_95_path + + def _set_lower_95_path(self, value: str | None) -> None: + """Set the lower-95 path for internal callers.""" + self._lower_95_path.value = value + + @property + def upper_95_path(self) -> StringDescriptor: + """HDF5 dataset path for the upper 95% predictive band.""" + return self._upper_95_path + + def _set_upper_95_path(self, value: str | None) -> None: + """Set the upper-95 path for internal callers.""" + self._upper_95_path.value = value + + @property + def lower_68_path(self) -> StringDescriptor: + """HDF5 dataset path for the lower 68% predictive band.""" + return self._lower_68_path + + def _set_lower_68_path(self, value: str | None) -> None: + """Set the lower-68 path for internal callers.""" + self._lower_68_path.value = value + + @property + def upper_68_path(self) -> StringDescriptor: + """HDF5 dataset path for the upper 68% predictive band.""" + return self._upper_68_path + + def _set_upper_68_path(self, value: str | None) -> None: + """Set the upper-68 path for internal callers.""" + self._upper_68_path.value = value + + @property + def draws_path(self) -> StringDescriptor: + """HDF5 dataset path for cached predictive draws.""" + return self._draws_path + + def _set_draws_path(self, value: str | None) -> None: + """Set the predictive-draws path for internal callers.""" + self._draws_path.value = value + + @property + def n_x(self) -> NumericDescriptor: + """Number of x-axis points in the cached predictive dataset.""" + return self._n_x + + def _set_n_x(self, value: int | float) -> None: + """Set the predictive x-axis size for internal callers.""" + self._n_x.value = value + + @property + def n_draws_cached(self) -> NumericDescriptor: + """Number of cached predictive draws.""" + return self._n_draws_cached + + def _set_n_draws_cached(self, value: int | float) -> None: + """Set the cached predictive-draw count for internal callers.""" + self._n_draws_cached.value = value + + +@BayesianPredictiveDatasetsFactory.register +class BayesianPredictiveDatasets(CategoryCollection): + """Collection of persisted Bayesian predictive-dataset manifests.""" + + type_info = TypeInfo( + tag='default', + description='Persisted Bayesian predictive-dataset manifests', + ) + + def __init__(self) -> None: + super().__init__(item_type=BayesianPredictiveDatasetItem) + + def create( + self, + *, + experiment_name: str, + x_axis_name: str, + x_path: str, + best_sample_prediction_path: str, + lower_95_path: str | None = None, + upper_95_path: str | None = None, + lower_68_path: str | None = None, + upper_68_path: str | None = None, + draws_path: str | None = None, + n_x: int | float, + n_draws_cached: int | float, + ) -> None: + """ + Create a persisted Bayesian predictive-dataset manifest row. + + Parameters + ---------- + experiment_name : str + Experiment name for the cached predictive dataset. + x_axis_name : str + Name of the predictive dataset x-axis. + x_path : str + HDF5 dataset path for the predictive x-axis values. + best_sample_prediction_path : str + HDF5 dataset path for the committed predictive curve. + lower_95_path : str | None, default=None + HDF5 dataset path for the lower 95% predictive band. + upper_95_path : str | None, default=None + HDF5 dataset path for the upper 95% predictive band. + lower_68_path : str | None, default=None + HDF5 dataset path for the lower 68% predictive band. + upper_68_path : str | None, default=None + HDF5 dataset path for the upper 68% predictive band. + draws_path : str | None, default=None + HDF5 dataset path for cached predictive draws. + n_x : int | float + Number of x-axis points in the cached predictive dataset. + n_draws_cached : int | float + Number of cached predictive draws. + """ + item = BayesianPredictiveDatasetItem() + item._set_experiment_name(experiment_name) + item._set_x_axis_name(x_axis_name) + item._set_x_path(x_path) + item._set_best_sample_prediction_path(best_sample_prediction_path) + item._set_lower_95_path(lower_95_path) + item._set_upper_95_path(upper_95_path) + item._set_lower_68_path(lower_68_path) + item._set_upper_68_path(upper_68_path) + item._set_draws_path(draws_path) + item._set_n_x(n_x) + item._set_n_draws_cached(n_draws_cached) + self.add(item) \ No newline at end of file diff --git a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/factory.py b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/factory.py new file mode 100644 index 00000000..db242413 --- /dev/null +++ b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/factory.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian-predictive-datasets factory.""" + +from __future__ import annotations + +from typing import ClassVar + +from easydiffraction.core.factory import FactoryBase + + +class BayesianPredictiveDatasetsFactory(FactoryBase): + """Create Bayesian-predictive-dataset collections by tag.""" + + _default_rules: ClassVar[dict] = { + frozenset(): 'default', + } \ No newline at end of file From 5c8a2513fd0b1f2786bd1e61ddb1dd7b5f3f8bd0 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:51:42 +0200 Subject: [PATCH 17/72] Add project save step to tutorial --- docs/docs/tutorials/ed-21.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/docs/tutorials/ed-21.py b/docs/docs/tutorials/ed-21.py index 87f003e4..b6a52791 100644 --- a/docs/docs/tutorials/ed-21.py +++ b/docs/docs/tutorials/ed-21.py @@ -33,10 +33,16 @@ # The project object keeps structures, experiments, fit settings, and # plotting utilities together in a single place. We will build the full # workflow inside this object. +# +# Save the project to a directory early on so that you can easily reload +# it later if needed. # %% project = ed.Project() +# %% +project.save_as('projects/lbco_hrpt_bayesian') + # %% [markdown] # ## Step 2: Build the Structural Model # From 643061d8d346b43edda343f36a1a096577b1d911 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:54:00 +0200 Subject: [PATCH 18/72] Wire analysis fit-state CIF restore --- docs/dev/plans/analysis-cif-fit-state.md | 2 +- src/easydiffraction/analysis/analysis.py | 151 +++++++++++++++++++++++ src/easydiffraction/io/cif/serialize.py | 88 +++++++++++++ src/easydiffraction/project/project.py | 24 ++-- 4 files changed, 257 insertions(+), 8 deletions(-) diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index bb9a1492..fed14aa3 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -38,7 +38,7 @@ structure or experiment CIF files. - [x] Phase 1 step 3: add deterministic result category models. - [x] Phase 1 step 4: add Bayesian metadata category models. - [x] Phase 1 step 5: add Bayesian cache manifest category models. -- [ ] Phase 1 step 6: wire analysis CIF save/load for fit state. +- [x] Phase 1 step 6: wire analysis CIF save/load for fit state. - [ ] Phase 1 step 7: capture fit projections after fitting. - [ ] Phase 1 step 8: add HDF5 sidecar save/load. - [ ] Phase 1 step 9: restore result objects and display cache inputs. diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index a8d4e257..9f0e2577 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -10,7 +10,30 @@ import pandas as pd from easydiffraction.analysis.categories.aliases.factory import AliasesFactory +from easydiffraction.analysis.categories.bayesian_convergence import BayesianConvergence +from easydiffraction.analysis.categories.bayesian_distribution_caches import ( + BayesianDistributionCaches, +) +from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCaches +from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( + BayesianParameterPosteriors, +) +from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( + BayesianPredictiveDatasets, +) +from easydiffraction.analysis.categories.bayesian_result import BayesianResult +from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler from easydiffraction.analysis.categories.constraints.factory import ConstraintsFactory +from easydiffraction.analysis.categories.deterministic_parameter_results import ( + DeterministicParameterResults, +) +from easydiffraction.analysis.categories.deterministic_result import DeterministicResult +from easydiffraction.analysis.categories.fit_parameter_correlations import ( + FitParameterCorrelations, +) +from easydiffraction.analysis.categories.fit_parameters import FitParameters +from easydiffraction.analysis.categories.fit_result import FitResult +from easydiffraction.analysis.categories.fit_state import FitState from easydiffraction.analysis.categories.fitting import Fitting from easydiffraction.analysis.categories.fitting import FittingFactory from easydiffraction.analysis.categories.joint_fit import JointFitCollection @@ -19,6 +42,7 @@ from easydiffraction.analysis.categories.sequential_fit_extract import ( SequentialFitExtractCollection, ) +from easydiffraction.analysis.enums import FitResultKindEnum from easydiffraction.analysis.enums import FitModeEnum from easydiffraction.analysis.fitting import Fitter from easydiffraction.core.category_owner import CategoryOwner @@ -379,6 +403,20 @@ def __init__(self, project: object) -> None: SequentialFitFactory.default_tag() ) self._sequential_fit_extract = SequentialFitExtractCollection() + self._fit_state = FitState() + self._fit_parameters = FitParameters() + self._fit_result = FitResult() + self._fit_parameter_correlations = FitParameterCorrelations() + self._deterministic_result = DeterministicResult() + self._deterministic_parameter_results = DeterministicParameterResults() + self._bayesian_result = BayesianResult() + self._bayesian_sampler = BayesianSampler() + self._bayesian_convergence = BayesianConvergence() + self._bayesian_parameter_posteriors = BayesianParameterPosteriors() + self._bayesian_distribution_caches = BayesianDistributionCaches() + self._bayesian_pair_caches = BayesianPairCaches() + self._bayesian_predictive_datasets = BayesianPredictiveDatasets() + self._has_persisted_fit_state_data = False self._fitter = Fitter(self._fitting.minimizer_type.value) self._fit_results = None self._parameter_snapshots: dict[str, dict[str, dict]] = {} @@ -502,6 +540,9 @@ def _serializable_categories(self) -> list: self.sequential_fit_extract, ]) + if self._has_persisted_fit_state(): + categories.extend(self._fit_state_categories()) + return categories # ------------------------------------------------------------------ @@ -684,6 +725,116 @@ def sequential_fit_extract(self) -> SequentialFitExtractCollection: """Persisted extract rules for sequential fitting.""" return self._sequential_fit_extract + @property + def fit_state(self) -> FitState: + """Persisted fit-state schema metadata.""" + return self._fit_state + + @property + def fit_parameters(self) -> FitParameters: + """Persisted fit-parameter control snapshots.""" + return self._fit_parameters + + @property + def fit_result(self) -> FitResult: + """Persisted common fit-result status metadata.""" + return self._fit_result + + @property + def fit_parameter_correlations(self) -> FitParameterCorrelations: + """Persisted fit-parameter correlation summaries.""" + return self._fit_parameter_correlations + + @property + def deterministic_result(self) -> DeterministicResult: + """Persisted deterministic fit-result metadata.""" + return self._deterministic_result + + @property + def deterministic_parameter_results(self) -> DeterministicParameterResults: + """Persisted deterministic parameter-result summaries.""" + return self._deterministic_parameter_results + + @property + def bayesian_result(self) -> BayesianResult: + """Persisted Bayesian fit-result metadata.""" + return self._bayesian_result + + @property + def bayesian_sampler(self) -> BayesianSampler: + """Persisted Bayesian sampler settings.""" + return self._bayesian_sampler + + @property + def bayesian_convergence(self) -> BayesianConvergence: + """Persisted Bayesian convergence diagnostics.""" + return self._bayesian_convergence + + @property + def bayesian_parameter_posteriors(self) -> BayesianParameterPosteriors: + """Persisted Bayesian parameter posterior summaries.""" + return self._bayesian_parameter_posteriors + + @property + def bayesian_distribution_caches(self) -> BayesianDistributionCaches: + """Persisted Bayesian distribution-cache manifests.""" + return self._bayesian_distribution_caches + + @property + def bayesian_pair_caches(self) -> BayesianPairCaches: + """Persisted Bayesian pair-cache manifests.""" + return self._bayesian_pair_caches + + @property + def bayesian_predictive_datasets(self) -> BayesianPredictiveDatasets: + """Persisted Bayesian predictive-dataset manifests.""" + return self._bayesian_predictive_datasets + + def _has_persisted_fit_state(self) -> bool: + """Return whether a persisted fit-state projection is present.""" + return self._has_persisted_fit_state_data + + def _set_has_persisted_fit_state(self, value: bool) -> None: + """Set the persisted fit-state presence flag for internal callers.""" + self._has_persisted_fit_state_data = value + + def _fit_state_categories(self) -> list[object]: + """Return fit-state categories for the current persisted result kind.""" + categories: list[object] = [ + self.fit_state, + self.fit_parameters, + self.fit_result, + self.fit_parameter_correlations, + ] + + try: + result_kind = FitResultKindEnum(self.fit_result.result_kind.value) + except ValueError: + log.warning( + 'Unsupported fit_result.result_kind while serializing analysis CIF: ' + f"{self.fit_result.result_kind.value!r}. " + 'Saving only common fit-state categories.', + ) + return categories + + if result_kind is FitResultKindEnum.DETERMINISTIC: + categories.extend([ + self.deterministic_result, + self.deterministic_parameter_results, + ]) + return categories + + categories.extend([ + self.bayesian_result, + self.bayesian_sampler, + self.bayesian_convergence, + self.bayesian_parameter_posteriors, + self.bayesian_distribution_caches, + self.bayesian_pair_caches, + self.bayesian_predictive_datasets, + ]) + return categories + def _resolve_sequential_data_dir(self) -> Path: """ Resolve the sequential-fit data directory to an absolute path. diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index a7fa8f98..88eed412 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -560,6 +560,94 @@ def analysis_from_cif(analysis: object, cif_text: str) -> None: if analysis.constraints._items: analysis.constraints.enable() + if _has_persisted_fit_state_sections(block): + _restore_persisted_fit_state(analysis, block) + + +def _has_persisted_fit_state_sections(block: object) -> bool: + """Return True when any persisted fit-state section is present.""" + scalar_tags = ( + '_fit_state.schema_version', + '_fit_result.result_kind', + '_deterministic_result.optimizer_name', + '_bayesian_result.sampler_name', + '_bayesian_sampler.steps', + '_bayesian_convergence.converged', + ) + loop_tags = ( + '_fit_parameter.param_unique_name', + '_fit_parameter_correlation.param_unique_name_i', + '_deterministic_parameter_result.param_unique_name', + '_bayesian_parameter_posterior.unique_name', + '_bayesian_distribution_cache.param_unique_name', + '_bayesian_pair_cache.param_unique_name_x', + '_bayesian_predictive_dataset.experiment_name', + ) + + return any(_has_cif_value(block, tag) for tag in scalar_tags) or any( + _has_cif_loop(block, tag) for tag in loop_tags + ) + + +def _warn_for_unsupported_fit_state_schema(analysis: object) -> None: + """Warn when the persisted fit-state schema version is unsupported.""" + schema_version = analysis.fit_state.schema_version.value + if schema_version != 1: + log.warning( + 'Unsupported _fit_state.schema_version in analysis CIF: ' + f'{schema_version}. Attempting best-effort restore for schema version 1.', + ) + + +def _restore_common_fit_state(analysis: object, block: object) -> None: + """Restore fit-state categories shared by deterministic and Bayesian fits.""" + analysis.fit_state.from_cif(block) + _warn_for_unsupported_fit_state_schema(analysis) + analysis.fit_parameters.from_cif(block) + analysis.fit_result.from_cif(block) + analysis.fit_parameter_correlations.from_cif(block) + + +def _restore_deterministic_fit_state(analysis: object, block: object) -> None: + """Restore deterministic-only persisted fit-state categories.""" + analysis.deterministic_result.from_cif(block) + analysis.deterministic_parameter_results.from_cif(block) + + +def _restore_bayesian_fit_state(analysis: object, block: object) -> None: + """Restore Bayesian-only persisted fit-state categories.""" + analysis.bayesian_result.from_cif(block) + analysis.bayesian_sampler.from_cif(block) + analysis.bayesian_convergence.from_cif(block) + analysis.bayesian_parameter_posteriors.from_cif(block) + analysis.bayesian_distribution_caches.from_cif(block) + analysis.bayesian_pair_caches.from_cif(block) + analysis.bayesian_predictive_datasets.from_cif(block) + + +def _restore_persisted_fit_state(analysis: object, block: object) -> None: + """Restore persisted fit-state categories after analysis configuration.""" + from easydiffraction.analysis.enums import FitResultKindEnum # noqa: PLC0415 + + analysis._set_has_persisted_fit_state(True) + _restore_common_fit_state(analysis, block) + + result_kind_value = analysis.fit_result.result_kind.value + try: + result_kind = FitResultKindEnum(result_kind_value) + except ValueError: + log.warning( + 'Unsupported _fit_result.result_kind in analysis CIF: ' + f'{result_kind_value!r}. Skipping kind-specific fit-state categories.', + ) + return + + if result_kind is FitResultKindEnum.DETERMINISTIC: + _restore_deterministic_fit_state(analysis, block) + return + + _restore_bayesian_fit_state(analysis, block) + def _collect_legacy_analysis_tags(block: object) -> list[str]: """Return deprecated analysis CIF tags present in a block.""" diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index adf10904..f023f5c3 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -368,13 +368,7 @@ def _resolve_alias_references(self) -> None: if not aliases._items: return - # Build unique_name → parameter map - all_params = self._structures.parameters + self._experiments.parameters - param_map: dict[str, object] = {} - for p in all_params: - uname = getattr(p, 'unique_name', None) - if uname is not None: - param_map[uname] = p + param_map = self._build_parameter_map() for alias in aliases: uname = alias.param_unique_name.value @@ -386,6 +380,22 @@ def _resolve_alias_references(self) -> None: f"parameter '{uname}'. Reference not resolved." ) + def _build_parameter_map(self) -> dict[str, object]: + """ + Return a ``unique_name`` to live parameter mapping. + + The map combines structure and experiment parameters and is + reused by CIF restore steps that need to reconnect persisted + names to live parameter objects. + """ + all_params = self._structures.parameters + self._experiments.parameters + param_map: dict[str, object] = {} + for param in all_params: + unique_name = getattr(param, 'unique_name', None) + if unique_name is not None: + param_map[unique_name] = param + return param_map + def save(self) -> None: """Save the project into the existing project directory.""" if self.info.path is None: From 723e26041c5f98b4532859d7cd19856a4fb9e384 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:55:31 +0200 Subject: [PATCH 19/72] Linting and formatting --- src/easydiffraction/analysis/__init__.py | 26 +++----- src/easydiffraction/analysis/analysis.py | 21 +++--- .../analysis/categories/__init__.py | 10 +-- .../bayesian_convergence/__init__.py | 6 +- .../bayesian_convergence/default.py | 12 ++-- .../bayesian_convergence/factory.py | 2 +- .../bayesian_distribution_caches/__init__.py | 2 +- .../bayesian_distribution_caches/default.py | 10 +-- .../bayesian_distribution_caches/factory.py | 2 +- .../bayesian_pair_caches/__init__.py | 10 +-- .../bayesian_pair_caches/default.py | 14 ++-- .../bayesian_pair_caches/factory.py | 2 +- .../bayesian_parameter_posteriors/__init__.py | 2 +- .../bayesian_parameter_posteriors/default.py | 66 ++++++++----------- .../bayesian_parameter_posteriors/factory.py | 2 +- .../bayesian_predictive_datasets/__init__.py | 2 +- .../bayesian_predictive_datasets/default.py | 10 +-- .../bayesian_predictive_datasets/factory.py | 2 +- .../categories/bayesian_result/__init__.py | 2 +- .../categories/bayesian_result/default.py | 20 ++++-- .../categories/bayesian_result/factory.py | 2 +- .../categories/bayesian_sampler/__init__.py | 2 +- .../categories/bayesian_sampler/default.py | 12 ++-- .../categories/bayesian_sampler/factory.py | 2 +- .../__init__.py | 2 +- .../default.py | 34 ++++------ .../factory.py | 2 +- .../deterministic_result/__init__.py | 6 +- .../deterministic_result/default.py | 47 ++++++++----- .../deterministic_result/factory.py | 2 +- .../fit_parameter_correlations/__init__.py | 2 +- .../fit_parameter_correlations/default.py | 6 +- .../fit_parameter_correlations/factory.py | 2 +- .../categories/fit_parameters/__init__.py | 2 +- .../categories/fit_parameters/default.py | 34 +++++----- .../categories/fit_parameters/factory.py | 2 +- .../categories/fit_result/__init__.py | 2 +- .../analysis/categories/fit_result/default.py | 24 ++++--- .../analysis/categories/fit_result/factory.py | 2 +- .../analysis/categories/fit_state/__init__.py | 2 +- .../analysis/categories/fit_state/default.py | 4 +- .../analysis/categories/fit_state/factory.py | 2 +- src/easydiffraction/io/cif/serialize.py | 13 +++- 43 files changed, 217 insertions(+), 214 deletions(-) diff --git a/src/easydiffraction/analysis/__init__.py b/src/easydiffraction/analysis/__init__.py index c08c60c8..57b2e447 100644 --- a/src/easydiffraction/analysis/__init__.py +++ b/src/easydiffraction/analysis/__init__.py @@ -2,9 +2,7 @@ # SPDX-License-Identifier: BSD-3-Clause from easydiffraction.analysis.categories.bayesian_convergence import BayesianConvergence -from easydiffraction.analysis.categories.bayesian_convergence import ( - BayesianConvergenceFactory, -) +from easydiffraction.analysis.categories.bayesian_convergence import BayesianConvergenceFactory from easydiffraction.analysis.categories.bayesian_distribution_caches import ( BayesianDistributionCacheItem, ) @@ -14,6 +12,9 @@ from easydiffraction.analysis.categories.bayesian_distribution_caches import ( BayesianDistributionCachesFactory, ) +from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCacheItem +from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCaches +from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCachesFactory from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriorItem, ) @@ -23,13 +24,6 @@ from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriorsFactory, ) -from easydiffraction.analysis.categories.bayesian_pair_caches import ( - BayesianPairCacheItem, -) -from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCaches -from easydiffraction.analysis.categories.bayesian_pair_caches import ( - BayesianPairCachesFactory, -) from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( BayesianPredictiveDatasetItem, ) @@ -53,17 +47,11 @@ DeterministicParameterResultsFactory, ) from easydiffraction.analysis.categories.deterministic_result import DeterministicResult -from easydiffraction.analysis.categories.deterministic_result import ( - DeterministicResultFactory, -) -from easydiffraction.analysis.categories.fitting import Fitting -from easydiffraction.analysis.categories.fitting import FittingFactory +from easydiffraction.analysis.categories.deterministic_result import DeterministicResultFactory from easydiffraction.analysis.categories.fit_parameter_correlations import ( FitParameterCorrelationItem, ) -from easydiffraction.analysis.categories.fit_parameter_correlations import ( - FitParameterCorrelations, -) +from easydiffraction.analysis.categories.fit_parameter_correlations import FitParameterCorrelations from easydiffraction.analysis.categories.fit_parameter_correlations import ( FitParameterCorrelationsFactory, ) @@ -74,6 +62,8 @@ from easydiffraction.analysis.categories.fit_result import FitResultFactory from easydiffraction.analysis.categories.fit_state import FitState from easydiffraction.analysis.categories.fit_state import FitStateFactory +from easydiffraction.analysis.categories.fitting import Fitting +from easydiffraction.analysis.categories.fitting import FittingFactory from easydiffraction.analysis.categories.joint_fit import JointFitCollection from easydiffraction.analysis.categories.joint_fit import JointFitFactory from easydiffraction.analysis.categories.joint_fit import JointFitItem diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 9f0e2577..f4e681f0 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -28,9 +28,7 @@ DeterministicParameterResults, ) from easydiffraction.analysis.categories.deterministic_result import DeterministicResult -from easydiffraction.analysis.categories.fit_parameter_correlations import ( - FitParameterCorrelations, -) +from easydiffraction.analysis.categories.fit_parameter_correlations import FitParameterCorrelations from easydiffraction.analysis.categories.fit_parameters import FitParameters from easydiffraction.analysis.categories.fit_result import FitResult from easydiffraction.analysis.categories.fit_state import FitState @@ -42,8 +40,8 @@ from easydiffraction.analysis.categories.sequential_fit_extract import ( SequentialFitExtractCollection, ) -from easydiffraction.analysis.enums import FitResultKindEnum from easydiffraction.analysis.enums import FitModeEnum +from easydiffraction.analysis.enums import FitResultKindEnum from easydiffraction.analysis.fitting import Fitter from easydiffraction.core.category_owner import CategoryOwner from easydiffraction.core.guard import _apply_help_filter @@ -791,15 +789,22 @@ def bayesian_predictive_datasets(self) -> BayesianPredictiveDatasets: return self._bayesian_predictive_datasets def _has_persisted_fit_state(self) -> bool: - """Return whether a persisted fit-state projection is present.""" + """ + Return whether a persisted fit-state projection is present. + """ return self._has_persisted_fit_state_data def _set_has_persisted_fit_state(self, value: bool) -> None: - """Set the persisted fit-state presence flag for internal callers.""" + """ + Set the persisted fit-state presence flag for internal callers. + """ self._has_persisted_fit_state_data = value def _fit_state_categories(self) -> list[object]: - """Return fit-state categories for the current persisted result kind.""" + """ + Return fit-state categories for the current persisted result + kind. + """ categories: list[object] = [ self.fit_state, self.fit_parameters, @@ -812,7 +817,7 @@ def _fit_state_categories(self) -> list[object]: except ValueError: log.warning( 'Unsupported fit_result.result_kind while serializing analysis CIF: ' - f"{self.fit_result.result_kind.value!r}. " + f'{self.fit_result.result_kind.value!r}. ' 'Saving only common fit-state categories.', ) return categories diff --git a/src/easydiffraction/analysis/categories/__init__.py b/src/easydiffraction/analysis/categories/__init__.py index f6abbd51..4a3faeb0 100644 --- a/src/easydiffraction/analysis/categories/__init__.py +++ b/src/easydiffraction/analysis/categories/__init__.py @@ -10,16 +10,14 @@ from easydiffraction.analysis.categories.bayesian_distribution_caches import ( BayesianDistributionCaches, ) +from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCacheItem +from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCaches from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriorItem, ) from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriors, ) -from easydiffraction.analysis.categories.bayesian_pair_caches import ( - BayesianPairCacheItem, -) -from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCaches from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( BayesianPredictiveDatasetItem, ) @@ -40,9 +38,7 @@ from easydiffraction.analysis.categories.fit_parameter_correlations import ( FitParameterCorrelationItem, ) -from easydiffraction.analysis.categories.fit_parameter_correlations import ( - FitParameterCorrelations, -) +from easydiffraction.analysis.categories.fit_parameter_correlations import FitParameterCorrelations from easydiffraction.analysis.categories.fit_parameters import FitParameterItem from easydiffraction.analysis.categories.fit_parameters import FitParameters from easydiffraction.analysis.categories.fit_result import FitResult diff --git a/src/easydiffraction/analysis/categories/bayesian_convergence/__init__.py b/src/easydiffraction/analysis/categories/bayesian_convergence/__init__.py index c77527f4..e9acbd71 100644 --- a/src/easydiffraction/analysis/categories/bayesian_convergence/__init__.py +++ b/src/easydiffraction/analysis/categories/bayesian_convergence/__init__.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: 2026 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause -from easydiffraction.analysis.categories.bayesian_convergence.default import ( - BayesianConvergence, -) +from easydiffraction.analysis.categories.bayesian_convergence.default import BayesianConvergence from easydiffraction.analysis.categories.bayesian_convergence.factory import ( BayesianConvergenceFactory, -) \ No newline at end of file +) diff --git a/src/easydiffraction/analysis/categories/bayesian_convergence/default.py b/src/easydiffraction/analysis/categories/bayesian_convergence/default.py index e6a76945..ef4a2acb 100644 --- a/src/easydiffraction/analysis/categories/bayesian_convergence/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_convergence/default.py @@ -79,7 +79,7 @@ def max_r_hat(self) -> NumericDescriptor: """Maximum rank-normalized split-R-hat across parameters.""" return self._max_r_hat - def _set_max_r_hat(self, value: int | float | None) -> None: + def _set_max_r_hat(self, value: float | None) -> None: """Set the maximum R-hat for internal callers.""" self._max_r_hat.value = value @@ -88,7 +88,7 @@ def min_ess_bulk(self) -> NumericDescriptor: """Minimum bulk effective sample size across parameters.""" return self._min_ess_bulk - def _set_min_ess_bulk(self, value: int | float | None) -> None: + def _set_min_ess_bulk(self, value: float | None) -> None: """Set the minimum ESS bulk for internal callers.""" self._min_ess_bulk.value = value @@ -97,7 +97,7 @@ def n_draws(self) -> NumericDescriptor: """Number of stored posterior draws.""" return self._n_draws - def _set_n_draws(self, value: int | float) -> None: + def _set_n_draws(self, value: float) -> None: """Set the draw count for internal callers.""" self._n_draws.value = value @@ -106,7 +106,7 @@ def n_chains(self) -> NumericDescriptor: """Number of stored posterior chains.""" return self._n_chains - def _set_n_chains(self, value: int | float) -> None: + def _set_n_chains(self, value: float) -> None: """Set the chain count for internal callers.""" self._n_chains.value = value @@ -115,6 +115,6 @@ def n_parameters(self) -> NumericDescriptor: """Number of sampled parameters.""" return self._n_parameters - def _set_n_parameters(self, value: int | float) -> None: + def _set_n_parameters(self, value: float) -> None: """Set the sampled-parameter count for internal callers.""" - self._n_parameters.value = value \ No newline at end of file + self._n_parameters.value = value diff --git a/src/easydiffraction/analysis/categories/bayesian_convergence/factory.py b/src/easydiffraction/analysis/categories/bayesian_convergence/factory.py index 208982d5..fbe5da38 100644 --- a/src/easydiffraction/analysis/categories/bayesian_convergence/factory.py +++ b/src/easydiffraction/analysis/categories/bayesian_convergence/factory.py @@ -14,4 +14,4 @@ class BayesianConvergenceFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/__init__.py b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/__init__.py index 590e1f6b..4ecf63f0 100644 --- a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/__init__.py +++ b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/__init__.py @@ -9,4 +9,4 @@ ) from easydiffraction.analysis.categories.bayesian_distribution_caches.factory import ( BayesianDistributionCachesFactory, -) \ No newline at end of file +) diff --git a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py index d067f667..59eb1354 100644 --- a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py @@ -91,7 +91,7 @@ def n_grid(self) -> NumericDescriptor: """Number of grid points in the cached distribution.""" return self._n_grid - def _set_n_grid(self, value: int | float) -> None: + def _set_n_grid(self, value: float) -> None: """Set the grid-size count for internal callers.""" self._n_grid.value = value @@ -100,7 +100,7 @@ def n_draws_cached(self) -> NumericDescriptor: """Number of draws summarized into the cached distribution.""" return self._n_draws_cached - def _set_n_draws_cached(self, value: int | float) -> None: + def _set_n_draws_cached(self, value: float) -> None: """Set the cached-draw count for internal callers.""" self._n_draws_cached.value = value @@ -123,8 +123,8 @@ def create( param_unique_name: str, x_path: str, density_path: str, - n_grid: int | float, - n_draws_cached: int | float, + n_grid: float, + n_draws_cached: float, ) -> None: """ Create a persisted Bayesian distribution-cache manifest row. @@ -148,4 +148,4 @@ def create( item._set_density_path(density_path) item._set_n_grid(n_grid) item._set_n_draws_cached(n_draws_cached) - self.add(item) \ No newline at end of file + self.add(item) diff --git a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/factory.py b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/factory.py index 30f1d4e9..a45016f5 100644 --- a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/factory.py +++ b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/factory.py @@ -14,4 +14,4 @@ class BayesianDistributionCachesFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/bayesian_pair_caches/__init__.py b/src/easydiffraction/analysis/categories/bayesian_pair_caches/__init__.py index 7bf4be16..edc955fa 100644 --- a/src/easydiffraction/analysis/categories/bayesian_pair_caches/__init__.py +++ b/src/easydiffraction/analysis/categories/bayesian_pair_caches/__init__.py @@ -1,12 +1,8 @@ # SPDX-FileCopyrightText: 2026 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause -from easydiffraction.analysis.categories.bayesian_pair_caches.default import ( - BayesianPairCacheItem, -) -from easydiffraction.analysis.categories.bayesian_pair_caches.default import ( - BayesianPairCaches, -) +from easydiffraction.analysis.categories.bayesian_pair_caches.default import BayesianPairCacheItem +from easydiffraction.analysis.categories.bayesian_pair_caches.default import BayesianPairCaches from easydiffraction.analysis.categories.bayesian_pair_caches.factory import ( BayesianPairCachesFactory, -) \ No newline at end of file +) diff --git a/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py b/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py index a027cfd1..b1157722 100644 --- a/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py @@ -186,7 +186,7 @@ def n_grid_x(self) -> NumericDescriptor: """Number of x-grid points in the cached pair.""" return self._n_grid_x - def _set_n_grid_x(self, value: int | float) -> None: + def _set_n_grid_x(self, value: float) -> None: """Set the x-grid size for internal callers.""" self._n_grid_x.value = value @@ -195,7 +195,7 @@ def n_grid_y(self) -> NumericDescriptor: """Number of y-grid points in the cached pair.""" return self._n_grid_y - def _set_n_grid_y(self, value: int | float) -> None: + def _set_n_grid_y(self, value: float) -> None: """Set the y-grid size for internal callers.""" self._n_grid_y.value = value @@ -204,7 +204,7 @@ def n_draws_cached(self) -> NumericDescriptor: """Number of draws summarized into the cached pair.""" return self._n_draws_cached - def _set_n_draws_cached(self, value: int | float) -> None: + def _set_n_draws_cached(self, value: float) -> None: """Set the cached-draw count for internal callers.""" self._n_draws_cached.value = value @@ -230,9 +230,9 @@ def create( y_path: str, density_path: str, contour_level_path: str, - n_grid_x: int | float, - n_grid_y: int | float, - n_draws_cached: int | float, + n_grid_x: float, + n_grid_y: float, + n_draws_cached: float, id: str | None = None, ) -> None: """ @@ -281,4 +281,4 @@ def create( param_unique_name_y=normalized_y, ) item._set_id(resolved_id) - self.add(item) \ No newline at end of file + self.add(item) diff --git a/src/easydiffraction/analysis/categories/bayesian_pair_caches/factory.py b/src/easydiffraction/analysis/categories/bayesian_pair_caches/factory.py index 3bd97cb4..5c6df89b 100644 --- a/src/easydiffraction/analysis/categories/bayesian_pair_caches/factory.py +++ b/src/easydiffraction/analysis/categories/bayesian_pair_caches/factory.py @@ -14,4 +14,4 @@ class BayesianPairCachesFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/__init__.py b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/__init__.py index b8e4f251..10ec7695 100644 --- a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/__init__.py +++ b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/__init__.py @@ -9,4 +9,4 @@ ) from easydiffraction.analysis.categories.bayesian_parameter_posteriors.factory import ( BayesianParameterPosteriorsFactory, -) \ No newline at end of file +) diff --git a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py index 872d9d8c..8f74f39b 100644 --- a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py @@ -50,9 +50,7 @@ def __init__(self) -> None: name='best_sample_value', description='Committed sampled parameter value.', value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler( - names=['_bayesian_parameter_posterior.best_sample_value'] - ), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.best_sample_value']), ) self._median = NumericDescriptor( name='median', @@ -70,33 +68,25 @@ def __init__(self) -> None: name='interval_68_lower', description='Lower bound of the 68% credible interval.', value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler( - names=['_bayesian_parameter_posterior.interval_68_lower'] - ), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.interval_68_lower']), ) self._interval_68_upper = NumericDescriptor( name='interval_68_upper', description='Upper bound of the 68% credible interval.', value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler( - names=['_bayesian_parameter_posterior.interval_68_upper'] - ), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.interval_68_upper']), ) self._interval_95_lower = NumericDescriptor( name='interval_95_lower', description='Lower bound of the 95% credible interval.', value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler( - names=['_bayesian_parameter_posterior.interval_95_lower'] - ), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.interval_95_lower']), ) self._interval_95_upper = NumericDescriptor( name='interval_95_upper', description='Upper bound of the 95% credible interval.', value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler( - names=['_bayesian_parameter_posterior.interval_95_upper'] - ), + cif_handler=CifHandler(names=['_bayesian_parameter_posterior.interval_95_upper']), ) self._ess_bulk = NumericDescriptor( name='ess_bulk', @@ -116,7 +106,7 @@ def order_index(self) -> NumericDescriptor: """Parameter column order in posterior sample arrays.""" return self._order_index - def _set_order_index(self, value: int | float) -> None: + def _set_order_index(self, value: float) -> None: """Set the order index for internal callers.""" self._order_index.value = value @@ -143,7 +133,7 @@ def best_sample_value(self) -> NumericDescriptor: """Committed sampled parameter value.""" return self._best_sample_value - def _set_best_sample_value(self, value: int | float | None) -> None: + def _set_best_sample_value(self, value: float | None) -> None: """Set the best sampled parameter value for internal callers.""" self._best_sample_value.value = value @@ -152,7 +142,7 @@ def median(self) -> NumericDescriptor: """Posterior median value.""" return self._median - def _set_median(self, value: int | float | None) -> None: + def _set_median(self, value: float | None) -> None: """Set the posterior median for internal callers.""" self._median.value = value @@ -161,7 +151,7 @@ def uncertainty(self) -> NumericDescriptor: """Posterior standard deviation.""" return self._uncertainty - def _set_uncertainty(self, value: int | float | None) -> None: + def _set_uncertainty(self, value: float | None) -> None: """Set the posterior uncertainty for internal callers.""" self._uncertainty.value = value @@ -170,7 +160,7 @@ def interval_68_lower(self) -> NumericDescriptor: """Lower bound of the 68% credible interval.""" return self._interval_68_lower - def _set_interval_68_lower(self, value: int | float | None) -> None: + def _set_interval_68_lower(self, value: float | None) -> None: """Set the 68% interval lower bound for internal callers.""" self._interval_68_lower.value = value @@ -179,7 +169,7 @@ def interval_68_upper(self) -> NumericDescriptor: """Upper bound of the 68% credible interval.""" return self._interval_68_upper - def _set_interval_68_upper(self, value: int | float | None) -> None: + def _set_interval_68_upper(self, value: float | None) -> None: """Set the 68% interval upper bound for internal callers.""" self._interval_68_upper.value = value @@ -188,7 +178,7 @@ def interval_95_lower(self) -> NumericDescriptor: """Lower bound of the 95% credible interval.""" return self._interval_95_lower - def _set_interval_95_lower(self, value: int | float | None) -> None: + def _set_interval_95_lower(self, value: float | None) -> None: """Set the 95% interval lower bound for internal callers.""" self._interval_95_lower.value = value @@ -197,7 +187,7 @@ def interval_95_upper(self) -> NumericDescriptor: """Upper bound of the 95% credible interval.""" return self._interval_95_upper - def _set_interval_95_upper(self, value: int | float | None) -> None: + def _set_interval_95_upper(self, value: float | None) -> None: """Set the 95% interval upper bound for internal callers.""" self._interval_95_upper.value = value @@ -206,7 +196,7 @@ def ess_bulk(self) -> NumericDescriptor: """Bulk effective sample size when available.""" return self._ess_bulk - def _set_ess_bulk(self, value: int | float | None) -> None: + def _set_ess_bulk(self, value: float | None) -> None: """Set the ESS bulk value for internal callers.""" self._ess_bulk.value = value @@ -215,14 +205,16 @@ def r_hat(self) -> NumericDescriptor: """Rank-normalized split-R-hat when available.""" return self._r_hat - def _set_r_hat(self, value: int | float | None) -> None: + def _set_r_hat(self, value: float | None) -> None: """Set the R-hat value for internal callers.""" self._r_hat.value = value @BayesianParameterPosteriorsFactory.register class BayesianParameterPosteriors(CategoryCollection): - """Collection of persisted Bayesian parameter posterior summaries.""" + """ + Collection of persisted Bayesian parameter posterior summaries. + """ type_info = TypeInfo( tag='default', @@ -235,18 +227,18 @@ def __init__(self) -> None: def create( self, *, - order_index: int | float, + order_index: float, unique_name: str, display_name: str, - best_sample_value: int | float | None = None, - median: int | float | None = None, - uncertainty: int | float | None = None, - interval_68_lower: int | float | None = None, - interval_68_upper: int | float | None = None, - interval_95_lower: int | float | None = None, - interval_95_upper: int | float | None = None, - ess_bulk: int | float | None = None, - r_hat: int | float | None = None, + best_sample_value: float | None = None, + median: float | None = None, + uncertainty: float | None = None, + interval_68_lower: float | None = None, + interval_68_upper: float | None = None, + interval_95_lower: float | None = None, + interval_95_upper: float | None = None, + ess_bulk: float | None = None, + r_hat: float | None = None, ) -> None: """ Create a persisted Bayesian parameter posterior summary row. @@ -291,4 +283,4 @@ def create( item._set_interval_95_upper(interval_95_upper) item._set_ess_bulk(ess_bulk) item._set_r_hat(r_hat) - self.add(item) \ No newline at end of file + self.add(item) diff --git a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/factory.py b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/factory.py index 2069feec..54eef2b8 100644 --- a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/factory.py +++ b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/factory.py @@ -14,4 +14,4 @@ class BayesianParameterPosteriorsFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/__init__.py b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/__init__.py index c6fdef73..f706de4f 100644 --- a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/__init__.py +++ b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/__init__.py @@ -9,4 +9,4 @@ ) from easydiffraction.analysis.categories.bayesian_predictive_datasets.factory import ( BayesianPredictiveDatasetsFactory, -) \ No newline at end of file +) diff --git a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py index 1f615ebb..25f9b58f 100644 --- a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py @@ -179,7 +179,7 @@ def n_x(self) -> NumericDescriptor: """Number of x-axis points in the cached predictive dataset.""" return self._n_x - def _set_n_x(self, value: int | float) -> None: + def _set_n_x(self, value: float) -> None: """Set the predictive x-axis size for internal callers.""" self._n_x.value = value @@ -188,7 +188,7 @@ def n_draws_cached(self) -> NumericDescriptor: """Number of cached predictive draws.""" return self._n_draws_cached - def _set_n_draws_cached(self, value: int | float) -> None: + def _set_n_draws_cached(self, value: float) -> None: """Set the cached predictive-draw count for internal callers.""" self._n_draws_cached.value = value @@ -217,8 +217,8 @@ def create( lower_68_path: str | None = None, upper_68_path: str | None = None, draws_path: str | None = None, - n_x: int | float, - n_draws_cached: int | float, + n_x: float, + n_draws_cached: float, ) -> None: """ Create a persisted Bayesian predictive-dataset manifest row. @@ -260,4 +260,4 @@ def create( item._set_draws_path(draws_path) item._set_n_x(n_x) item._set_n_draws_cached(n_draws_cached) - self.add(item) \ No newline at end of file + self.add(item) diff --git a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/factory.py b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/factory.py index db242413..a8844926 100644 --- a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/factory.py +++ b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/factory.py @@ -14,4 +14,4 @@ class BayesianPredictiveDatasetsFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/bayesian_result/__init__.py b/src/easydiffraction/analysis/categories/bayesian_result/__init__.py index e5f58ab4..eab2a07f 100644 --- a/src/easydiffraction/analysis/categories/bayesian_result/__init__.py +++ b/src/easydiffraction/analysis/categories/bayesian_result/__init__.py @@ -2,4 +2,4 @@ # SPDX-License-Identifier: BSD-3-Clause from easydiffraction.analysis.categories.bayesian_result.default import BayesianResult -from easydiffraction.analysis.categories.bayesian_result.factory import BayesianResultFactory \ No newline at end of file +from easydiffraction.analysis.categories.bayesian_result.factory import BayesianResultFactory diff --git a/src/easydiffraction/analysis/categories/bayesian_result/default.py b/src/easydiffraction/analysis/categories/bayesian_result/default.py index d86dd687..66cf888e 100644 --- a/src/easydiffraction/analysis/categories/bayesian_result/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_result/default.py @@ -120,7 +120,9 @@ def _set_point_estimate_name(self, value: str) -> None: @property def success(self) -> BoolDescriptor: - """Whether the persisted Bayesian fit produced usable results.""" + """ + Whether the persisted Bayesian fit produced usable results. + """ return self._success def _set_success(self, value: bool) -> None: @@ -141,7 +143,7 @@ def best_log_posterior(self) -> NumericDescriptor: """Best log-posterior value reported by the sampler.""" return self._best_log_posterior - def _set_best_log_posterior(self, value: int | float | None) -> None: + def _set_best_log_posterior(self, value: float | None) -> None: """Set the best log-posterior for internal callers.""" self._best_log_posterior.value = value @@ -150,8 +152,10 @@ def credible_interval_inner(self) -> NumericDescriptor: """Inner credible-interval level used in summaries.""" return self._credible_interval_inner - def _set_credible_interval_inner(self, value: int | float) -> None: - """Set the inner credible-interval level for internal callers.""" + def _set_credible_interval_inner(self, value: float) -> None: + """ + Set the inner credible-interval level for internal callers. + """ self._credible_interval_inner.value = value @property @@ -159,8 +163,10 @@ def credible_interval_outer(self) -> NumericDescriptor: """Outer credible-interval level used in summaries.""" return self._credible_interval_outer - def _set_credible_interval_outer(self, value: int | float) -> None: - """Set the outer credible-interval level for internal callers.""" + def _set_credible_interval_outer(self, value: float) -> None: + """ + Set the outer credible-interval level for internal callers. + """ self._credible_interval_outer.value = value @property @@ -206,4 +212,4 @@ def sidecar_file(self) -> StringDescriptor: def _set_sidecar_file(self, value: str) -> None: """Set the sidecar-file path for internal callers.""" - self._sidecar_file.value = value \ No newline at end of file + self._sidecar_file.value = value diff --git a/src/easydiffraction/analysis/categories/bayesian_result/factory.py b/src/easydiffraction/analysis/categories/bayesian_result/factory.py index 2d7decc3..3d437a0d 100644 --- a/src/easydiffraction/analysis/categories/bayesian_result/factory.py +++ b/src/easydiffraction/analysis/categories/bayesian_result/factory.py @@ -14,4 +14,4 @@ class BayesianResultFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/__init__.py b/src/easydiffraction/analysis/categories/bayesian_sampler/__init__.py index efead92d..962e3fba 100644 --- a/src/easydiffraction/analysis/categories/bayesian_sampler/__init__.py +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/__init__.py @@ -2,4 +2,4 @@ # SPDX-License-Identifier: BSD-3-Clause from easydiffraction.analysis.categories.bayesian_sampler.default import BayesianSampler -from easydiffraction.analysis.categories.bayesian_sampler.factory import BayesianSamplerFactory \ No newline at end of file +from easydiffraction.analysis.categories.bayesian_sampler.factory import BayesianSamplerFactory diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py index 27b42512..3512c468 100644 --- a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py @@ -75,7 +75,7 @@ def steps(self) -> NumericDescriptor: """Resolved number of sampler steps.""" return self._steps - def _set_steps(self, value: int | float) -> None: + def _set_steps(self, value: float) -> None: """Set the step count for internal callers.""" self._steps.value = value @@ -84,7 +84,7 @@ def burn(self) -> NumericDescriptor: """Resolved burn-in count.""" return self._burn - def _set_burn(self, value: int | float) -> None: + def _set_burn(self, value: float) -> None: """Set the burn-in count for internal callers.""" self._burn.value = value @@ -93,7 +93,7 @@ def thin(self) -> NumericDescriptor: """Resolved thinning interval.""" return self._thin - def _set_thin(self, value: int | float) -> None: + def _set_thin(self, value: float) -> None: """Set the thinning interval for internal callers.""" self._thin.value = value @@ -102,7 +102,7 @@ def pop(self) -> NumericDescriptor: """Resolved population size.""" return self._pop - def _set_pop(self, value: int | float) -> None: + def _set_pop(self, value: float) -> None: """Set the population size for internal callers.""" self._pop.value = value @@ -129,6 +129,6 @@ def random_seed(self) -> NumericDescriptor: """Resolved random seed used by the sampler.""" return self._random_seed - def _set_random_seed(self, value: int | float | None) -> None: + def _set_random_seed(self, value: float | None) -> None: """Set the random seed for internal callers.""" - self._random_seed.value = value \ No newline at end of file + self._random_seed.value = value diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/factory.py b/src/easydiffraction/analysis/categories/bayesian_sampler/factory.py index 66858e6d..6f5d1033 100644 --- a/src/easydiffraction/analysis/categories/bayesian_sampler/factory.py +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/factory.py @@ -14,4 +14,4 @@ class BayesianSamplerFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py index 8e00501e..ea4e4467 100644 --- a/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py +++ b/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py @@ -9,4 +9,4 @@ ) from easydiffraction.analysis.categories.deterministic_parameter_results.factory import ( DeterministicParameterResultsFactory, -) \ No newline at end of file +) diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py index 002d13dc..8054a4e1 100644 --- a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py +++ b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py @@ -39,9 +39,7 @@ def __init__(self) -> None: default='_', validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), ), - cif_handler=CifHandler( - names=['_deterministic_parameter_result.param_unique_name'] - ), + cif_handler=CifHandler(names=['_deterministic_parameter_result.param_unique_name']), ) self._final_value = NumericDescriptor( name='final_value', @@ -53,33 +51,29 @@ def __init__(self) -> None: name='final_uncertainty', description='Final uncertainty for the persisted parameter result.', value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler( - names=['_deterministic_parameter_result.final_uncertainty'] - ), + cif_handler=CifHandler(names=['_deterministic_parameter_result.final_uncertainty']), ) self._at_lower_bound = BoolDescriptor( name='at_lower_bound', description='Whether the parameter finished at the lower fit bound.', value_spec=AttributeSpec(default=False), - cif_handler=CifHandler( - names=['_deterministic_parameter_result.at_lower_bound'] - ), + cif_handler=CifHandler(names=['_deterministic_parameter_result.at_lower_bound']), ) self._at_upper_bound = BoolDescriptor( name='at_upper_bound', description='Whether the parameter finished at the upper fit bound.', value_spec=AttributeSpec(default=False), - cif_handler=CifHandler( - names=['_deterministic_parameter_result.at_upper_bound'] - ), + cif_handler=CifHandler(names=['_deterministic_parameter_result.at_upper_bound']), ) @property def order_index(self) -> NumericDescriptor: - """Display and array order for the persisted parameter result.""" + """ + Display and array order for the persisted parameter result. + """ return self._order_index - def _set_order_index(self, value: int | float) -> None: + def _set_order_index(self, value: float) -> None: """Set the order index for internal callers.""" self._order_index.value = value @@ -97,7 +91,7 @@ def final_value(self) -> NumericDescriptor: """Final fitted value for the persisted parameter result.""" return self._final_value - def _set_final_value(self, value: int | float | None) -> None: + def _set_final_value(self, value: float | None) -> None: """Set the final fitted value for internal callers.""" self._final_value.value = value @@ -106,7 +100,7 @@ def final_uncertainty(self) -> NumericDescriptor: """Final uncertainty for the persisted parameter result.""" return self._final_uncertainty - def _set_final_uncertainty(self, value: int | float | None) -> None: + def _set_final_uncertainty(self, value: float | None) -> None: """Set the final uncertainty for internal callers.""" self._final_uncertainty.value = value @@ -144,10 +138,10 @@ def __init__(self) -> None: def create( self, *, - order_index: int | float, + order_index: float, param_unique_name: str, - final_value: int | float | None = None, - final_uncertainty: int | float | None = None, + final_value: float | None = None, + final_uncertainty: float | None = None, at_lower_bound: bool = False, at_upper_bound: bool = False, ) -> None: @@ -176,4 +170,4 @@ def create( item._set_final_uncertainty(final_uncertainty) item._set_at_lower_bound(at_lower_bound) item._set_at_upper_bound(at_upper_bound) - self.add(item) \ No newline at end of file + self.add(item) diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py index 47b0db4c..ff50b256 100644 --- a/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py +++ b/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py @@ -14,4 +14,4 @@ class DeterministicParameterResultsFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/deterministic_result/__init__.py b/src/easydiffraction/analysis/categories/deterministic_result/__init__.py index 851d2a1c..18ed7af8 100644 --- a/src/easydiffraction/analysis/categories/deterministic_result/__init__.py +++ b/src/easydiffraction/analysis/categories/deterministic_result/__init__.py @@ -1,9 +1,7 @@ # SPDX-FileCopyrightText: 2026 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause -from easydiffraction.analysis.categories.deterministic_result.default import ( - DeterministicResult, -) +from easydiffraction.analysis.categories.deterministic_result.default import DeterministicResult from easydiffraction.analysis.categories.deterministic_result.factory import ( DeterministicResultFactory, -) \ No newline at end of file +) diff --git a/src/easydiffraction/analysis/categories/deterministic_result/default.py b/src/easydiffraction/analysis/categories/deterministic_result/default.py index 51f51984..d929ca41 100644 --- a/src/easydiffraction/analysis/categories/deterministic_result/default.py +++ b/src/easydiffraction/analysis/categories/deterministic_result/default.py @@ -81,17 +81,13 @@ def __init__(self) -> None: name='covariance_available', description='Whether covariance was available for the persisted deterministic fit.', value_spec=AttributeSpec(default=False), - cif_handler=CifHandler( - names=['_deterministic_result.covariance_available'] - ), + cif_handler=CifHandler(names=['_deterministic_result.covariance_available']), ) self._correlation_available = BoolDescriptor( name='correlation_available', description='Whether correlations were available for the persisted deterministic fit.', value_spec=AttributeSpec(default=False), - cif_handler=CifHandler( - names=['_deterministic_result.correlation_available'] - ), + cif_handler=CifHandler(names=['_deterministic_result.correlation_available']), ) @property @@ -114,7 +110,9 @@ def _set_method_name(self, value: str) -> None: @property def objective_name(self) -> StringDescriptor: - """Objective function name for the persisted deterministic fit.""" + """ + Objective function name for the persisted deterministic fit. + """ return self._objective_name def _set_objective_name(self, value: str) -> None: @@ -126,34 +124,41 @@ def objective_value(self) -> NumericDescriptor: """Objective value for the persisted deterministic fit.""" return self._objective_value - def _set_objective_value(self, value: int | float | None) -> None: + def _set_objective_value(self, value: float | None) -> None: """Set the objective value for internal callers.""" self._objective_value.value = value @property def n_data_points(self) -> NumericDescriptor: - """Number of data points used in the persisted deterministic fit.""" + """ + Number of data points used in the persisted deterministic fit. + """ return self._n_data_points - def _set_n_data_points(self, value: int | float) -> None: + def _set_n_data_points(self, value: float) -> None: """Set the data-point count for internal callers.""" self._n_data_points.value = value @property def n_parameters(self) -> NumericDescriptor: - """Number of parameters considered in the persisted deterministic fit.""" + """ + Number of parameters considered in the persisted deterministic + fit. + """ return self._n_parameters - def _set_n_parameters(self, value: int | float) -> None: + def _set_n_parameters(self, value: float) -> None: """Set the parameter count for internal callers.""" self._n_parameters.value = value @property def n_free_parameters(self) -> NumericDescriptor: - """Number of free parameters in the persisted deterministic fit.""" + """ + Number of free parameters in the persisted deterministic fit. + """ return self._n_free_parameters - def _set_n_free_parameters(self, value: int | float) -> None: + def _set_n_free_parameters(self, value: float) -> None: """Set the free-parameter count for internal callers.""" self._n_free_parameters.value = value @@ -162,13 +167,16 @@ def degrees_of_freedom(self) -> NumericDescriptor: """Degrees of freedom for the persisted deterministic fit.""" return self._degrees_of_freedom - def _set_degrees_of_freedom(self, value: int | float) -> None: + def _set_degrees_of_freedom(self, value: float) -> None: """Set the degrees of freedom for internal callers.""" self._degrees_of_freedom.value = value @property def covariance_available(self) -> BoolDescriptor: - """Whether covariance was available for the persisted deterministic fit.""" + """ + Whether covariance was available for the persisted deterministic + fit. + """ return self._covariance_available def _set_covariance_available(self, value: bool) -> None: @@ -177,9 +185,12 @@ def _set_covariance_available(self, value: bool) -> None: @property def correlation_available(self) -> BoolDescriptor: - """Whether correlations were available for the persisted deterministic fit.""" + """ + Whether correlations were available for the persisted + deterministic fit. + """ return self._correlation_available def _set_correlation_available(self, value: bool) -> None: """Set the correlation-available flag for internal callers.""" - self._correlation_available.value = value \ No newline at end of file + self._correlation_available.value = value diff --git a/src/easydiffraction/analysis/categories/deterministic_result/factory.py b/src/easydiffraction/analysis/categories/deterministic_result/factory.py index 44416cee..1ee96acf 100644 --- a/src/easydiffraction/analysis/categories/deterministic_result/factory.py +++ b/src/easydiffraction/analysis/categories/deterministic_result/factory.py @@ -14,4 +14,4 @@ class DeterministicResultFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/fit_parameter_correlations/__init__.py b/src/easydiffraction/analysis/categories/fit_parameter_correlations/__init__.py index 5c6f22b8..bbb74736 100644 --- a/src/easydiffraction/analysis/categories/fit_parameter_correlations/__init__.py +++ b/src/easydiffraction/analysis/categories/fit_parameter_correlations/__init__.py @@ -9,4 +9,4 @@ ) from easydiffraction.analysis.categories.fit_parameter_correlations.factory import ( FitParameterCorrelationsFactory, -) \ No newline at end of file +) diff --git a/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py b/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py index 5d678fde..48ae4b51 100644 --- a/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py +++ b/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py @@ -141,7 +141,7 @@ def correlation(self) -> NumericDescriptor: """Persisted correlation coefficient for the parameter pair.""" return self._correlation - def _set_correlation(self, value: int | float) -> None: + def _set_correlation(self, value: float) -> None: """Set the correlation coefficient for internal callers.""" self._correlation.value = value @@ -164,7 +164,7 @@ def create( source_kind: str, param_unique_name_i: str, param_unique_name_j: str, - correlation: int | float, + correlation: float, id: str | None = None, ) -> None: """ @@ -199,4 +199,4 @@ def create( param_unique_name_j=normalized_j, ) item._set_id(resolved_id) - self.add(item) \ No newline at end of file + self.add(item) diff --git a/src/easydiffraction/analysis/categories/fit_parameter_correlations/factory.py b/src/easydiffraction/analysis/categories/fit_parameter_correlations/factory.py index 8a6f77a0..09541108 100644 --- a/src/easydiffraction/analysis/categories/fit_parameter_correlations/factory.py +++ b/src/easydiffraction/analysis/categories/fit_parameter_correlations/factory.py @@ -14,4 +14,4 @@ class FitParameterCorrelationsFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/fit_parameters/__init__.py b/src/easydiffraction/analysis/categories/fit_parameters/__init__.py index ad06e0df..64e72b41 100644 --- a/src/easydiffraction/analysis/categories/fit_parameters/__init__.py +++ b/src/easydiffraction/analysis/categories/fit_parameters/__init__.py @@ -3,4 +3,4 @@ from easydiffraction.analysis.categories.fit_parameters.default import FitParameterItem from easydiffraction.analysis.categories.fit_parameters.default import FitParameters -from easydiffraction.analysis.categories.fit_parameters.factory import FitParametersFactory \ No newline at end of file +from easydiffraction.analysis.categories.fit_parameters.factory import FitParametersFactory diff --git a/src/easydiffraction/analysis/categories/fit_parameters/default.py b/src/easydiffraction/analysis/categories/fit_parameters/default.py index 31572fcf..79dc8549 100644 --- a/src/easydiffraction/analysis/categories/fit_parameters/default.py +++ b/src/easydiffraction/analysis/categories/fit_parameters/default.py @@ -50,9 +50,7 @@ def __init__(self) -> None: name='fit_bounds_uncertainty_multiplier', description='Multiplier used to derive fit bounds from uncertainty.', value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler( - names=['_fit_parameter.fit_bounds_uncertainty_multiplier'] - ), + cif_handler=CifHandler(names=['_fit_parameter.fit_bounds_uncertainty_multiplier']), ) self._start_value = NumericDescriptor( name='start_value', @@ -73,7 +71,9 @@ def param_unique_name(self) -> StringDescriptor: return self._param_unique_name def _set_param_unique_name(self, value: str) -> None: - """Set the referenced parameter unique name for internal callers.""" + """ + Set the referenced parameter unique name for internal callers. + """ self._param_unique_name.value = value @property @@ -81,7 +81,7 @@ def fit_min(self) -> NumericDescriptor: """Persisted lower fit bound.""" return self._fit_min - def _set_fit_min(self, value: int | float) -> None: + def _set_fit_min(self, value: float) -> None: """Set the persisted lower fit bound for internal callers.""" self._fit_min.value = value @@ -90,7 +90,7 @@ def fit_max(self) -> NumericDescriptor: """Persisted upper fit bound.""" return self._fit_max - def _set_fit_max(self, value: int | float) -> None: + def _set_fit_max(self, value: float) -> None: """Set the persisted upper fit bound for internal callers.""" self._fit_max.value = value @@ -101,9 +101,11 @@ def fit_bounds_uncertainty_multiplier(self) -> NumericDescriptor: def _set_fit_bounds_uncertainty_multiplier( self, - value: int | float | None, + value: float | None, ) -> None: - """Set the fit-bounds uncertainty multiplier for internal callers.""" + """ + Set the fit-bounds uncertainty multiplier for internal callers. + """ self._fit_bounds_uncertainty_multiplier.value = value @property @@ -111,7 +113,7 @@ def start_value(self) -> NumericDescriptor: """Persisted pre-fit value snapshot.""" return self._start_value - def _set_start_value(self, value: int | float | None) -> None: + def _set_start_value(self, value: float | None) -> None: """Set the pre-fit value snapshot for internal callers.""" self._start_value.value = value @@ -120,7 +122,7 @@ def start_uncertainty(self) -> NumericDescriptor: """Persisted pre-fit uncertainty snapshot.""" return self._start_uncertainty - def _set_start_uncertainty(self, value: int | float | None) -> None: + def _set_start_uncertainty(self, value: float | None) -> None: """Set the pre-fit uncertainty snapshot for internal callers.""" self._start_uncertainty.value = value @@ -141,11 +143,11 @@ def create( self, *, param_unique_name: str, - fit_min: int | float, - fit_max: int | float, - fit_bounds_uncertainty_multiplier: int | float | None = None, - start_value: int | float | None = None, - start_uncertainty: int | float | None = None, + fit_min: float, + fit_max: float, + fit_bounds_uncertainty_multiplier: float | None = None, + start_value: float | None = None, + start_uncertainty: float | None = None, ) -> None: """ Create a persisted fit-parameter control snapshot row. @@ -172,4 +174,4 @@ def create( item._set_fit_bounds_uncertainty_multiplier(fit_bounds_uncertainty_multiplier) item._set_start_value(start_value) item._set_start_uncertainty(start_uncertainty) - self.add(item) \ No newline at end of file + self.add(item) diff --git a/src/easydiffraction/analysis/categories/fit_parameters/factory.py b/src/easydiffraction/analysis/categories/fit_parameters/factory.py index a8ae8c34..4b73be48 100644 --- a/src/easydiffraction/analysis/categories/fit_parameters/factory.py +++ b/src/easydiffraction/analysis/categories/fit_parameters/factory.py @@ -14,4 +14,4 @@ class FitParametersFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/fit_result/__init__.py b/src/easydiffraction/analysis/categories/fit_result/__init__.py index 22163bc0..8578ab47 100644 --- a/src/easydiffraction/analysis/categories/fit_result/__init__.py +++ b/src/easydiffraction/analysis/categories/fit_result/__init__.py @@ -2,4 +2,4 @@ # SPDX-License-Identifier: BSD-3-Clause from easydiffraction.analysis.categories.fit_result.default import FitResult -from easydiffraction.analysis.categories.fit_result.factory import FitResultFactory \ No newline at end of file +from easydiffraction.analysis.categories.fit_result.factory import FitResultFactory diff --git a/src/easydiffraction/analysis/categories/fit_result/default.py b/src/easydiffraction/analysis/categories/fit_result/default.py index b3c04844..87466f9f 100644 --- a/src/easydiffraction/analysis/categories/fit_result/default.py +++ b/src/easydiffraction/analysis/categories/fit_result/default.py @@ -82,7 +82,9 @@ def _set_result_kind(self, value: str) -> None: @property def success(self) -> BoolDescriptor: - """Whether the latest persisted fit-result projection succeeded.""" + """ + Whether the latest persisted fit-result projection succeeded. + """ return self._success def _set_success(self, value: bool) -> None: @@ -91,7 +93,9 @@ def _set_success(self, value: bool) -> None: @property def message(self) -> StringDescriptor: - """Status message for the latest persisted fit-result projection.""" + """ + Status message for the latest persisted fit-result projection. + """ return self._message def _set_message(self, value: str) -> None: @@ -100,19 +104,23 @@ def _set_message(self, value: str) -> None: @property def iterations(self) -> NumericDescriptor: - """Iteration count for the latest persisted fit-result projection.""" + """ + Iteration count for the latest persisted fit-result projection. + """ return self._iterations - def _set_iterations(self, value: int | float) -> None: + def _set_iterations(self, value: float) -> None: """Set the iteration count for internal callers.""" self._iterations.value = value @property def fitting_time(self) -> NumericDescriptor: - """Fitting time in seconds for the latest persisted projection.""" + """ + Fitting time in seconds for the latest persisted projection. + """ return self._fitting_time - def _set_fitting_time(self, value: int | float | None) -> None: + def _set_fitting_time(self, value: float | None) -> None: """Set the fitting time for internal callers.""" self._fitting_time.value = value @@ -121,6 +129,6 @@ def reduced_chi_square(self) -> NumericDescriptor: """Reduced chi-square for the latest persisted projection.""" return self._reduced_chi_square - def _set_reduced_chi_square(self, value: int | float | None) -> None: + def _set_reduced_chi_square(self, value: float | None) -> None: """Set the reduced chi-square for internal callers.""" - self._reduced_chi_square.value = value \ No newline at end of file + self._reduced_chi_square.value = value diff --git a/src/easydiffraction/analysis/categories/fit_result/factory.py b/src/easydiffraction/analysis/categories/fit_result/factory.py index 8637e65a..66cbd32b 100644 --- a/src/easydiffraction/analysis/categories/fit_result/factory.py +++ b/src/easydiffraction/analysis/categories/fit_result/factory.py @@ -14,4 +14,4 @@ class FitResultFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/analysis/categories/fit_state/__init__.py b/src/easydiffraction/analysis/categories/fit_state/__init__.py index 6036277b..43b065a9 100644 --- a/src/easydiffraction/analysis/categories/fit_state/__init__.py +++ b/src/easydiffraction/analysis/categories/fit_state/__init__.py @@ -2,4 +2,4 @@ # SPDX-License-Identifier: BSD-3-Clause from easydiffraction.analysis.categories.fit_state.default import FitState -from easydiffraction.analysis.categories.fit_state.factory import FitStateFactory \ No newline at end of file +from easydiffraction.analysis.categories.fit_state.factory import FitStateFactory diff --git a/src/easydiffraction/analysis/categories/fit_state/default.py b/src/easydiffraction/analysis/categories/fit_state/default.py index ac8e955b..7ec6dac2 100644 --- a/src/easydiffraction/analysis/categories/fit_state/default.py +++ b/src/easydiffraction/analysis/categories/fit_state/default.py @@ -37,6 +37,6 @@ def schema_version(self) -> NumericDescriptor: """Persisted fit-state schema version.""" return self._schema_version - def _set_schema_version(self, value: int | float) -> None: + def _set_schema_version(self, value: float) -> None: """Set the fit-state schema version for internal callers.""" - self._schema_version.value = value \ No newline at end of file + self._schema_version.value = value diff --git a/src/easydiffraction/analysis/categories/fit_state/factory.py b/src/easydiffraction/analysis/categories/fit_state/factory.py index 1d394c5a..1705bcc8 100644 --- a/src/easydiffraction/analysis/categories/fit_state/factory.py +++ b/src/easydiffraction/analysis/categories/fit_state/factory.py @@ -14,4 +14,4 @@ class FitStateFactory(FactoryBase): _default_rules: ClassVar[dict] = { frozenset(): 'default', - } \ No newline at end of file + } diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index 88eed412..8833fd57 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -590,7 +590,9 @@ def _has_persisted_fit_state_sections(block: object) -> bool: def _warn_for_unsupported_fit_state_schema(analysis: object) -> None: - """Warn when the persisted fit-state schema version is unsupported.""" + """ + Warn when the persisted fit-state schema version is unsupported. + """ schema_version = analysis.fit_state.schema_version.value if schema_version != 1: log.warning( @@ -600,7 +602,10 @@ def _warn_for_unsupported_fit_state_schema(analysis: object) -> None: def _restore_common_fit_state(analysis: object, block: object) -> None: - """Restore fit-state categories shared by deterministic and Bayesian fits.""" + """ + Restore fit-state categories shared by deterministic and Bayesian + fits. + """ analysis.fit_state.from_cif(block) _warn_for_unsupported_fit_state_schema(analysis) analysis.fit_parameters.from_cif(block) @@ -626,7 +631,9 @@ def _restore_bayesian_fit_state(analysis: object, block: object) -> None: def _restore_persisted_fit_state(analysis: object, block: object) -> None: - """Restore persisted fit-state categories after analysis configuration.""" + """ + Restore persisted fit-state categories after analysis configuration. + """ from easydiffraction.analysis.enums import FitResultKindEnum # noqa: PLC0415 analysis._set_has_persisted_fit_state(True) From c27e3892911da7d221b38b502bcc23bd719d0d8b Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 15:55:42 +0200 Subject: [PATCH 20/72] Add h5py dependency --- pixi.lock | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/pixi.lock b/pixi.lock index 927dd568..974cbe77 100644 --- a/pixi.lock +++ b/pixi.lock @@ -8150,6 +8150,7 @@ packages: - diffpy-pdffit2 - diffpy-utils - gemmi + - h5py - lmfit - numpy - pandas diff --git a/pyproject.toml b/pyproject.toml index 3a4b78a0..507a069d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dependencies = [ 'diffpy.pdffit2', # Calculations of Pair Distribution Function (PDF) 'diffpy.utils', # Utilities for PDF calculations 'uncertainties', # Propagation of uncertainties + 'h5py', # HDF5 file handling 'typeguard', # Runtime type checking 'darkdetect', # Detecting dark mode (system-level) 'pandas', # Displaying tables in Jupyter notebooks From 407389b0b374f179c89dd2ca5f29bfaa24d355dc Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 16:01:08 +0200 Subject: [PATCH 21/72] Capture persisted fit-state projections --- docs/dev/plans/analysis-cif-fit-state.md | 2 +- src/easydiffraction/analysis/analysis.py | 333 +++++++++++++++++++++++ src/easydiffraction/analysis/fitting.py | 62 +++++ 3 files changed, 396 insertions(+), 1 deletion(-) diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index fed14aa3..948c04fa 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -39,7 +39,7 @@ structure or experiment CIF files. - [x] Phase 1 step 4: add Bayesian metadata category models. - [x] Phase 1 step 5: add Bayesian cache manifest category models. - [x] Phase 1 step 6: wire analysis CIF save/load for fit state. -- [ ] Phase 1 step 7: capture fit projections after fitting. +- [x] Phase 1 step 7: capture fit projections after fitting. - [ ] Phase 1 step 8: add HDF5 sidecar save/load. - [ ] Phase 1 step 9: restore result objects and display cache inputs. - [ ] Phase 1 review gate: stop for human review. diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index f4e681f0..324d1aea 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -40,15 +40,20 @@ from easydiffraction.analysis.categories.sequential_fit_extract import ( SequentialFitExtractCollection, ) +from easydiffraction.analysis.fit_helpers.bayesian import BayesianFitResults +from easydiffraction.analysis.fit_helpers.reporting import FitResults +from easydiffraction.analysis.enums import FitCorrelationSourceEnum from easydiffraction.analysis.enums import FitModeEnum from easydiffraction.analysis.enums import FitResultKindEnum from easydiffraction.analysis.fitting import Fitter +from easydiffraction.analysis.minimizers.base import BOUNDARY_PROXIMITY_FRACTION from easydiffraction.core.category_owner import CategoryOwner from easydiffraction.core.guard import _apply_help_filter from easydiffraction.core.singleton import ConstraintsHandler from easydiffraction.core.variable import NumericDescriptor from easydiffraction.core.variable import Parameter from easydiffraction.core.variable import StringDescriptor +from easydiffraction.datablocks.experiment.item.base import intensity_category_for from easydiffraction.display.progress import make_display_handle from easydiffraction.display.tables import TableRenderer from easydiffraction.io.cif.serialize import analysis_to_cif @@ -840,6 +845,332 @@ def _fit_state_categories(self) -> list[object]: ]) return categories + def _clear_persisted_fit_state(self) -> None: + """Reset all persisted fit-state categories before a new fit.""" + self._fit_state = FitState() + self._fit_parameters = FitParameters() + self._fit_result = FitResult() + self._fit_parameter_correlations = FitParameterCorrelations() + self._deterministic_result = DeterministicResult() + self._deterministic_parameter_results = DeterministicParameterResults() + self._bayesian_result = BayesianResult() + self._bayesian_sampler = BayesianSampler() + self._bayesian_convergence = BayesianConvergence() + self._bayesian_parameter_posteriors = BayesianParameterPosteriors() + self._bayesian_distribution_caches = BayesianDistributionCaches() + self._bayesian_pair_caches = BayesianPairCaches() + self._bayesian_predictive_datasets = BayesianPredictiveDatasets() + self._set_has_persisted_fit_state(False) + + def _capture_fit_parameter_state(self, parameters: list[Parameter]) -> None: + """Capture pre-fit parameter state into persisted fit-state categories.""" + self._clear_persisted_fit_state() + self.fit_state._set_schema_version(1) + + for param in parameters: + self.fit_parameters.create( + param_unique_name=param.unique_name, + fit_min=param.fit_min, + fit_max=param.fit_max, + fit_bounds_uncertainty_multiplier=param.fit_bounds_uncertainty_multiplier, + start_value=param.value, + start_uncertainty=param.uncertainty, + ) + + self._set_has_persisted_fit_state(True) + + @staticmethod + def _parameter_is_at_fit_bound( + param: Parameter, + *, + use_upper_bound: bool, + ) -> bool: + """Return whether a parameter finished within tolerance of a fit bound.""" + value = param.value + if value is None: + return False + + bound = param.fit_max if use_upper_bound else param.fit_min + if not np.isfinite(bound): + return False + + span = param.fit_max - param.fit_min + if np.isfinite(span) and span > 0: + tolerance = BOUNDARY_PROXIMITY_FRACTION * span + else: + tolerance = BOUNDARY_PROXIMITY_FRACTION * max(abs(bound), 1.0) + return abs(value - bound) <= tolerance + + def _selected_parameters_for_fit(self, experiments: list[object]) -> list[Parameter]: + """Return unique live parameters involved in the current fit slice.""" + selected_parameters: list[Parameter] = [] + seen_unique_names: set[str] = set() + + for param in self.project.structures.parameters: + if not isinstance(param, Parameter): + continue + if param.unique_name in seen_unique_names: + continue + selected_parameters.append(param) + seen_unique_names.add(param.unique_name) + + for experiment in experiments: + for param in experiment.parameters: + if not isinstance(param, Parameter): + continue + if param.unique_name in seen_unique_names: + continue + selected_parameters.append(param) + seen_unique_names.add(param.unique_name) + + return selected_parameters + + @staticmethod + def _fit_data_point_count(experiments: list[object]) -> int: + """Return the total number of observed data points in the fit slice.""" + total = 0 + for experiment in experiments: + intensity_category = intensity_category_for(experiment) + total += int(np.asarray(intensity_category.intensity_meas).size) + return total + + @staticmethod + def _resolve_covariance_matrix(results: FitResults) -> np.ndarray | None: + """Return a covariance matrix when the raw fit result exposes one.""" + raw_result = results.engine_result + for attribute_name in ('covar', 'covariance_matrix'): + covariance = getattr(raw_result, attribute_name, None) + if covariance is None: + continue + + covariance_array = np.asarray(covariance, dtype=float) + if covariance_array.ndim != 2: + continue + if covariance_array.shape[0] != covariance_array.shape[1]: + continue + return covariance_array + + return None + + @staticmethod + def _correlation_matrix_from_covariance(covariance: np.ndarray) -> np.ndarray | None: + """Return a correlation matrix derived from a covariance matrix.""" + diagonal = np.diag(covariance) + if np.any(diagonal <= 0): + return None + + scales = np.sqrt(diagonal) + denominator = np.outer(scales, scales) + with np.errstate(invalid='ignore', divide='ignore'): + correlation = covariance / denominator + + if not np.all(np.isfinite(correlation)): + return None + return correlation + + @staticmethod + def _resolve_objective_value(results: FitResults) -> float | None: + """Return the objective value stored for a fit result.""" + if results.chi_square is None: + return None + return float(results.chi_square) + + def _store_common_fit_result_projection( + self, + results: FitResults, + *, + result_kind: FitResultKindEnum, + ) -> None: + """Store fields shared by deterministic and Bayesian fit results.""" + self.fit_state._set_schema_version(1) + self.fit_result._set_result_kind(result_kind.value) + self.fit_result._set_success(results.success) + self.fit_result._set_message(results.message) + self.fit_result._set_iterations(results.iterations) + self.fit_result._set_fitting_time(results.fitting_time) + self.fit_result._set_reduced_chi_square(results.reduced_chi_square) + self._set_has_persisted_fit_state(True) + + def _store_correlation_projection( + self, + *, + unique_names: list[str], + correlation_matrix: np.ndarray, + source_kind: FitCorrelationSourceEnum, + ) -> None: + """Store upper-triangle parameter correlations from a correlation matrix.""" + if len(unique_names) <= 1: + return + if correlation_matrix.shape != (len(unique_names), len(unique_names)): + return + + for row_index, unique_name_i in enumerate(unique_names[:-1]): + for column_index in range(row_index + 1, len(unique_names)): + correlation = correlation_matrix[row_index, column_index] + if not np.isfinite(correlation): + continue + self.fit_parameter_correlations.create( + source_kind=source_kind.value, + param_unique_name_i=unique_name_i, + param_unique_name_j=unique_names[column_index], + correlation=float(np.clip(correlation, -1.0, 1.0)), + ) + + def _store_deterministic_result_projection( + self, + results: FitResults, + *, + experiments: list[object], + fitted_parameters: list[Parameter], + ) -> None: + """Store deterministic fit-result projections into persisted categories.""" + selected_parameters = self._selected_parameters_for_fit(experiments) + n_parameters = len(selected_parameters) + n_free_parameters = len(fitted_parameters) + n_data_points = self._fit_data_point_count(experiments) + degrees_of_freedom = max(n_data_points - n_free_parameters, 0) + covariance = self._resolve_covariance_matrix(results) + correlation_matrix = ( + self._correlation_matrix_from_covariance(covariance) + if covariance is not None + else None + ) + + self.deterministic_result._set_optimizer_name( + str(self.fitter.minimizer.name or self.fitter.selection) + ) + self.deterministic_result._set_method_name(str(self.fitter.minimizer.method or '')) + self.deterministic_result._set_objective_name('chi_square') + self.deterministic_result._set_objective_value(self._resolve_objective_value(results)) + self.deterministic_result._set_n_data_points(n_data_points) + self.deterministic_result._set_n_parameters(n_parameters) + self.deterministic_result._set_n_free_parameters(n_free_parameters) + self.deterministic_result._set_degrees_of_freedom(degrees_of_freedom) + self.deterministic_result._set_covariance_available(covariance is not None) + self.deterministic_result._set_correlation_available(correlation_matrix is not None) + + for order_index, param in enumerate(fitted_parameters): + self.deterministic_parameter_results.create( + order_index=order_index, + param_unique_name=param.unique_name, + final_value=param.value, + final_uncertainty=param.uncertainty, + at_lower_bound=self._parameter_is_at_fit_bound( + param, + use_upper_bound=False, + ), + at_upper_bound=self._parameter_is_at_fit_bound( + param, + use_upper_bound=True, + ), + ) + + if correlation_matrix is not None: + self._store_correlation_projection( + unique_names=[param.unique_name for param in fitted_parameters], + correlation_matrix=correlation_matrix, + source_kind=FitCorrelationSourceEnum.DETERMINISTIC, + ) + + def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None: + """Store Bayesian fit-result projections into persisted categories.""" + credible_interval_inner = 0.68 + credible_interval_outer = 0.95 + if len(results.credible_interval_levels) >= 2: + credible_interval_inner = float(results.credible_interval_levels[0]) + credible_interval_outer = float(results.credible_interval_levels[1]) + + point_estimate_name = results.point_estimate_name or 'best_sample' + sampler_settings = results.sampler_settings + convergence = results.convergence_diagnostics + + self.bayesian_result._set_sampler_name(results.sampler_name) + self.bayesian_result._set_point_estimate_name(point_estimate_name) + self.bayesian_result._set_success(results.success) + self.bayesian_result._set_sampler_completed(results.sampler_completed) + self.bayesian_result._set_best_log_posterior(results.best_log_posterior) + self.bayesian_result._set_credible_interval_inner(credible_interval_inner) + self.bayesian_result._set_credible_interval_outer(credible_interval_outer) + self.bayesian_result._set_has_posterior_samples(results.posterior_samples is not None) + self.bayesian_result._set_has_distribution_cache(False) + self.bayesian_result._set_has_pair_cache(False) + self.bayesian_result._set_has_posterior_predictive(bool(results.posterior_predictive)) + self.bayesian_result._set_sidecar_file('results.h5') + + self.bayesian_sampler._set_steps(int(sampler_settings.get('steps', 0))) + self.bayesian_sampler._set_burn(int(sampler_settings.get('burn', 0))) + self.bayesian_sampler._set_thin(int(sampler_settings.get('thin', 0))) + self.bayesian_sampler._set_pop(int(sampler_settings.get('pop', 0))) + self.bayesian_sampler._set_parallel(bool(sampler_settings.get('parallel', False))) + self.bayesian_sampler._set_init(str(sampler_settings.get('init', ''))) + random_seed = sampler_settings.get('random_seed') + self.bayesian_sampler._set_random_seed( + None if random_seed is None else int(random_seed) + ) + + self.bayesian_convergence._set_converged(bool(convergence.get('converged', False))) + self.bayesian_convergence._set_max_r_hat(convergence.get('max_r_hat')) + self.bayesian_convergence._set_min_ess_bulk(convergence.get('min_ess_bulk')) + self.bayesian_convergence._set_n_draws(int(convergence.get('n_draws', 0))) + self.bayesian_convergence._set_n_chains(int(convergence.get('n_chains', 0))) + self.bayesian_convergence._set_n_parameters(int(convergence.get('n_parameters', 0))) + + for order_index, summary in enumerate(results.posterior_parameter_summaries): + self.bayesian_parameter_posteriors.create( + order_index=order_index, + unique_name=summary.unique_name, + display_name=summary.display_name, + best_sample_value=summary.best_sample_value, + median=summary.median, + uncertainty=summary.standard_deviation, + interval_68_lower=summary.interval_68[0], + interval_68_upper=summary.interval_68[1], + interval_95_lower=summary.interval_95[0], + interval_95_upper=summary.interval_95[1], + ess_bulk=summary.ess_bulk, + r_hat=summary.r_hat, + ) + + posterior_samples = results.posterior_samples + if posterior_samples is None: + return + if len(posterior_samples.parameter_names) <= 1: + return + + flattened = posterior_samples.flattened() + correlation_matrix = np.corrcoef(flattened, rowvar=False) + self._store_correlation_projection( + unique_names=list(posterior_samples.parameter_names), + correlation_matrix=correlation_matrix, + source_kind=FitCorrelationSourceEnum.POSTERIOR, + ) + + def _store_fit_result_projection( + self, + results: FitResults, + *, + experiments: list[object], + fitted_parameters: list[Parameter], + ) -> None: + """Store the latest fit result into persisted fit-state categories.""" + if isinstance(results, BayesianFitResults): + self._store_common_fit_result_projection( + results, + result_kind=FitResultKindEnum.BAYESIAN, + ) + self._store_bayesian_result_projection(results) + return + + self._store_common_fit_result_projection( + results, + result_kind=FitResultKindEnum.DETERMINISTIC, + ) + self._store_deterministic_result_projection( + results, + experiments=experiments, + fitted_parameters=fitted_parameters, + ) + def _resolve_sequential_data_dir(self) -> Path: """ Resolve the sequential-fit data directory to an absolute path. @@ -924,6 +1255,7 @@ def _run_sequential(self) -> None: self._set_fitting_mode_type(FitModeEnum.SEQUENTIAL.value) self._update_categories() + self._clear_persisted_fit_state() max_workers_value = self._sequential_fit.max_workers.value max_workers = max_workers_value if max_workers_value == 'auto' else int(max_workers_value) @@ -946,6 +1278,7 @@ def _run_sequential(self) -> None: finally: self.fit_results = None self.fitter.results = None + self._clear_persisted_fit_state() if self.project.info.path is not None: self.project.save() diff --git a/src/easydiffraction/analysis/fitting.py b/src/easydiffraction/analysis/fitting.py index 343c1574..a25f04fc 100644 --- a/src/easydiffraction/analysis/fitting.py +++ b/src/easydiffraction/analysis/fitting.py @@ -21,6 +21,50 @@ from easydiffraction.datablocks.structure.collection import Structures +def _resolve_fit_result_message(results: FitResults) -> str: + """Return a normalized fit-result message.""" + if results.message: + return results.message + + raw_result = results.engine_result + message = getattr(raw_result, 'message', '') + return str(message) if message is not None else '' + + +def _resolve_fit_result_iterations(results: FitResults) -> int: + """Return a normalized iteration or evaluation count.""" + if results.iterations: + return int(results.iterations) + + raw_result = results.engine_result + for attribute_name in ('nfev', 'nit', 'iterations', 'niter'): + value = getattr(raw_result, attribute_name, None) + if value is not None: + return int(value) + return 0 + + +def _resolve_fit_result_chi_square(results: FitResults) -> float | None: + """Return a normalized chi-square-like objective value.""" + if results.chi_square is not None: + return float(results.chi_square) + + raw_result = results.engine_result + chisqr = getattr(raw_result, 'chisqr', None) + if chisqr is not None: + return float(chisqr) + + fun = getattr(raw_result, 'fun', None) + if fun is None: + return None + + if np.isscalar(fun): + return float(fun) + + fun_array = np.asarray(fun, dtype=float) + return float(np.sum(fun_array**2)) + + class Fitter: """Handles the fitting workflow using a pluggable minimizer.""" @@ -86,9 +130,16 @@ def fit( params = structures.free_parameters + expt_free_params if not params: + if analysis is not None: + analysis._clear_persisted_fit_state() + analysis.fit_results = None + self.results = None print('⚠️ No parameters selected for fitting.') return + if analysis is not None: + analysis._capture_fit_parameter_state(params) + for param in params: param._fit_start_value = param.value @@ -124,6 +175,17 @@ def objective_function(engine_params: dict[str, Any]) -> np.ndarray: random_seed=random_seed, ) + if self.results is not None: + self.results.message = _resolve_fit_result_message(self.results) + self.results.iterations = _resolve_fit_result_iterations(self.results) + self.results.chi_square = _resolve_fit_result_chi_square(self.results) + if analysis is not None: + analysis._store_fit_result_projection( + self.results, + experiments=experiments, + fitted_parameters=params, + ) + def _process_fit_results( self, structures: Structures, From 386ccec3ddc720d298ef8ec9bf4a00c304ce2ec9 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 16:10:05 +0200 Subject: [PATCH 22/72] Persist Bayesian fit arrays in results sidecar --- docs/dev/plans/analysis-cif-fit-state.md | 8 +- src/easydiffraction/analysis/analysis.py | 2 + src/easydiffraction/io/results_sidecar.py | 569 ++++++++++++++++++++++ src/easydiffraction/project/project.py | 11 + 4 files changed, 587 insertions(+), 3 deletions(-) create mode 100644 src/easydiffraction/io/results_sidecar.py diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 948c04fa..5cc4a7bc 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -40,7 +40,7 @@ structure or experiment CIF files. - [x] Phase 1 step 5: add Bayesian cache manifest category models. - [x] Phase 1 step 6: wire analysis CIF save/load for fit state. - [x] Phase 1 step 7: capture fit projections after fitting. -- [ ] Phase 1 step 8: add HDF5 sidecar save/load. +- [x] Phase 1 step 8: add HDF5 sidecar save/load. - [ ] Phase 1 step 9: restore result objects and display cache inputs. - [ ] Phase 1 review gate: stop for human review. - [ ] Phase 2 step 1: add unit tests for new categories. @@ -465,7 +465,8 @@ Capture persisted fit-state projections ### Step 8: Add HDF5 Sidecar Save And Load -The HDF5 dependency decision is approved: add `h5py` directly. +The HDF5 dependency decision is approved and already satisfied: +`pyproject.toml` already lists `h5py` directly. Files likely to change: @@ -477,7 +478,8 @@ Files likely to change: Actions: -1. Add `h5py` as a direct dependency. +1. Confirm `h5py` remains a direct dependency; no edit is needed while + it is already present. 2. Add a small sidecar module for `analysis/results.h5`; keep imports local if the package is heavy. 3. Write canonical posterior arrays when available: diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 324d1aea..238b1ddd 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -420,6 +420,7 @@ def __init__(self, project: object) -> None: self._bayesian_pair_caches = BayesianPairCaches() self._bayesian_predictive_datasets = BayesianPredictiveDatasets() self._has_persisted_fit_state_data = False + self._persisted_fit_state_sidecar: dict[str, object] = {} self._fitter = Fitter(self._fitting.minimizer_type.value) self._fit_results = None self._parameter_snapshots: dict[str, dict[str, dict]] = {} @@ -861,6 +862,7 @@ def _clear_persisted_fit_state(self) -> None: self._bayesian_pair_caches = BayesianPairCaches() self._bayesian_predictive_datasets = BayesianPredictiveDatasets() self._set_has_persisted_fit_state(False) + self._persisted_fit_state_sidecar = {} def _capture_fit_parameter_state(self, parameters: list[Parameter]) -> None: """Capture pre-fit parameter state into persisted fit-state categories.""" diff --git a/src/easydiffraction/io/results_sidecar.py b/src/easydiffraction/io/results_sidecar.py new file mode 100644 index 00000000..8dc171a4 --- /dev/null +++ b/src/easydiffraction/io/results_sidecar.py @@ -0,0 +1,569 @@ +"""Read and write persisted Bayesian fit arrays in ``analysis/results.h5``.""" + +from __future__ import annotations + +from pathlib import Path + +import numpy as np + +from easydiffraction.utils.logging import log + +_POSTERIOR_PARAMETER_SAMPLES_PATH = '/posterior/parameter_samples' +_POSTERIOR_LOG_POSTERIOR_PATH = '/posterior/log_posterior' +_POSTERIOR_DRAW_INDEX_PATH = '/posterior/draw_index' + + +def _normalized_hdf5_path(path: str) -> str: + """Return an HDF5 path without a leading slash.""" + return path.lstrip('/') + + +def _sidecar_file_name(analysis: object) -> str: + """Return the configured sidecar file name for an analysis.""" + bayesian_result = getattr(analysis, 'bayesian_result', None) + if bayesian_result is None: + return 'results.h5' + + file_name = bayesian_result.sidecar_file.value + if isinstance(file_name, str) and file_name.strip(): + return file_name + return 'results.h5' + + +def _sidecar_path(*, analysis: object, analysis_dir: Path) -> Path: + """Return the results sidecar path inside the analysis directory.""" + return analysis_dir / _sidecar_file_name(analysis) + + +def _should_use_sidecar(analysis: object) -> bool: + """Return whether the analysis currently expects a Bayesian sidecar.""" + has_fit_state = getattr(analysis, '_has_persisted_fit_state', None) + if not callable(has_fit_state) or not has_fit_state(): + return False + + if analysis.fit_result.result_kind.value != 'bayesian': + return False + + return any( + ( + analysis.bayesian_result.has_posterior_samples.value, + len(analysis.bayesian_distribution_caches) > 0, + len(analysis.bayesian_pair_caches) > 0, + len(analysis.bayesian_predictive_datasets) > 0, + ) + ) + + +def _delete_stale_sidecar(sidecar_path: Path) -> None: + """Delete an existing sidecar when no persisted arrays should remain.""" + if sidecar_path.is_file(): + sidecar_path.unlink() + + +def _create_dataset(handle: object, path: str, data: np.ndarray) -> None: + """Create or replace one dataset in an open HDF5 file.""" + normalized_path = _normalized_hdf5_path(path) + group_name, _, dataset_name = normalized_path.rpartition('/') + group = handle.require_group(group_name) if group_name else handle + if dataset_name in group: + del group[dataset_name] + group.create_dataset(dataset_name, data=data) + + +def _read_dataset(handle: object, path: str) -> np.ndarray | None: + """Read one dataset from an open HDF5 file when it exists.""" + normalized_path = _normalized_hdf5_path(path) + if normalized_path not in handle: + return None + return np.asarray(handle[normalized_path]) + + +def _posterior_payload_from_analysis(analysis: object) -> dict[str, np.ndarray | None]: + """Return canonical posterior arrays from runtime results or restored sidecar data.""" + fit_results = getattr(analysis, 'fit_results', None) + posterior_samples = getattr(fit_results, 'posterior_samples', None) + if posterior_samples is not None: + return { + 'parameter_samples': np.asarray(posterior_samples.parameter_samples, dtype=float), + 'log_posterior': ( + None + if posterior_samples.log_posterior is None + else np.asarray(posterior_samples.log_posterior, dtype=float) + ), + 'draw_index': ( + None + if posterior_samples.draw_index is None + else np.asarray(posterior_samples.draw_index) + ), + } + + sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) + return dict(sidecar_data.get('posterior', {})) + + +def _distribution_cache_payload(analysis: object) -> dict[str, dict[str, np.ndarray]]: + """Return persisted distribution-cache arrays keyed by parameter name.""" + sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) + return dict(sidecar_data.get('distribution_caches', {})) + + +def _pair_cache_payload(analysis: object) -> dict[str, dict[str, np.ndarray]]: + """Return persisted pair-cache arrays keyed by cache id.""" + sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) + return dict(sidecar_data.get('pair_caches', {})) + + +def _predictive_payload(analysis: object) -> dict[str, dict[str, np.ndarray]]: + """Return persisted predictive arrays keyed by experiment name.""" + fit_results = getattr(analysis, 'fit_results', None) + posterior_predictive = getattr(fit_results, 'posterior_predictive', None) + if posterior_predictive: + payload: dict[str, dict[str, np.ndarray]] = {} + for experiment_name, summary in posterior_predictive.items(): + payload[experiment_name] = { + 'x': np.asarray(summary.x, dtype=float), + 'best_sample_prediction': np.asarray(summary.best_sample_prediction, dtype=float), + } + if summary.lower_95 is not None: + payload[experiment_name]['lower_95'] = np.asarray(summary.lower_95, dtype=float) + if summary.upper_95 is not None: + payload[experiment_name]['upper_95'] = np.asarray(summary.upper_95, dtype=float) + if summary.lower_68 is not None: + payload[experiment_name]['lower_68'] = np.asarray(summary.lower_68, dtype=float) + if summary.upper_68 is not None: + payload[experiment_name]['upper_68'] = np.asarray(summary.upper_68, dtype=float) + if summary.draws is not None: + payload[experiment_name]['draws'] = np.asarray(summary.draws, dtype=float) + return payload + + sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) + return dict(sidecar_data.get('predictive_datasets', {})) + + +def _validate_posterior_payload( + analysis: object, + payload: dict[str, np.ndarray | None], +) -> bool: + """Return whether canonical posterior arrays match manifest metadata.""" + parameter_samples = payload.get('parameter_samples') + if parameter_samples is None: + if analysis.bayesian_result.has_posterior_samples.value: + log.warning('Bayesian fit-state expects posterior samples, but none are available.') + return False + + parameter_samples = np.asarray(parameter_samples, dtype=float) + if parameter_samples.ndim != 3: + log.warning( + 'Posterior parameter samples must have shape (n_draws, n_chains, n_parameters).' + ) + return False + + n_draws, n_chains, n_parameters = parameter_samples.shape + if analysis.bayesian_convergence.n_draws.value not in (0, n_draws): + log.warning('Posterior sample draw count does not match bayesian_convergence.n_draws.') + return False + if analysis.bayesian_convergence.n_chains.value not in (0, n_chains): + log.warning('Posterior sample chain count does not match bayesian_convergence.n_chains.') + return False + if analysis.bayesian_convergence.n_parameters.value not in (0, n_parameters): + log.warning( + 'Posterior sample parameter count does not match bayesian_convergence.n_parameters.' + ) + return False + + log_posterior = payload.get('log_posterior') + if log_posterior is not None and np.asarray(log_posterior).shape != (n_draws, n_chains): + log.warning( + 'Posterior log-posterior array does not match posterior sample draw and chain axes.' + ) + return False + + draw_index = payload.get('draw_index') + if draw_index is not None and np.asarray(draw_index).shape != (n_draws,): + log.warning('Posterior draw-index array does not match posterior sample draw count.') + return False + + return True + + +def _write_posterior_payload(handle: object, analysis: object) -> bool: + """Write canonical posterior arrays when they are available and valid.""" + payload = _posterior_payload_from_analysis(analysis) + if not _validate_posterior_payload(analysis, payload): + return False + + parameter_samples = np.asarray(payload['parameter_samples'], dtype=float) + _create_dataset(handle, _POSTERIOR_PARAMETER_SAMPLES_PATH, parameter_samples) + + log_posterior = payload.get('log_posterior') + if log_posterior is not None: + _create_dataset(handle, _POSTERIOR_LOG_POSTERIOR_PATH, np.asarray(log_posterior)) + + draw_index = payload.get('draw_index') + if draw_index is not None: + _create_dataset(handle, _POSTERIOR_DRAW_INDEX_PATH, np.asarray(draw_index)) + + return True + + +def _write_distribution_caches(handle: object, analysis: object) -> bool: + """Write cached posterior distribution arrays for manifest rows.""" + payload = _distribution_cache_payload(analysis) + wrote_any = False + for cache in analysis.bayesian_distribution_caches: + cache_data = payload.get(cache.param_unique_name.value) + if cache_data is None: + continue + + x_values = np.asarray(cache_data.get('x')) + density_values = np.asarray(cache_data.get('density')) + n_grid = int(cache.n_grid.value) + if x_values.shape != (n_grid,) or density_values.shape != (n_grid,): + log.warning( + 'Skipping Bayesian distribution cache with shape mismatch for ' + f"{cache.param_unique_name.value!r}." + ) + continue + + _create_dataset(handle, cache.x_path.value, x_values) + _create_dataset(handle, cache.density_path.value, density_values) + wrote_any = True + + return wrote_any + + +def _write_pair_caches(handle: object, analysis: object) -> bool: + """Write cached posterior pair-density arrays for manifest rows.""" + payload = _pair_cache_payload(analysis) + wrote_any = False + for cache in analysis.bayesian_pair_caches: + cache_data = payload.get(cache.id.value) + if cache_data is None: + continue + + x_values = np.asarray(cache_data.get('x')) + y_values = np.asarray(cache_data.get('y')) + density_values = np.asarray(cache_data.get('density')) + contour_levels = np.asarray(cache_data.get('contour_levels')) + n_grid_x = int(cache.n_grid_x.value) + n_grid_y = int(cache.n_grid_y.value) + + valid_density_shape = density_values.shape in ( + (n_grid_y, n_grid_x), + (n_grid_x, n_grid_y), + ) + if x_values.shape != (n_grid_x,) or y_values.shape != (n_grid_y,) or not valid_density_shape: + log.warning( + 'Skipping Bayesian pair cache with shape mismatch for ' + f"{cache.id.value!r}." + ) + continue + + _create_dataset(handle, cache.x_path.value, x_values) + _create_dataset(handle, cache.y_path.value, y_values) + _create_dataset(handle, cache.density_path.value, density_values) + _create_dataset(handle, cache.contour_level_path.value, contour_levels) + wrote_any = True + + return wrote_any + + +def _write_predictive_datasets(handle: object, analysis: object) -> bool: + """Write cached posterior predictive arrays for manifest rows.""" + payload = _predictive_payload(analysis) + wrote_any = False + for dataset in analysis.bayesian_predictive_datasets: + dataset_data = payload.get(dataset.experiment_name.value) + if dataset_data is None: + continue + + x_values = np.asarray(dataset_data.get('x')) + best_sample_prediction = np.asarray(dataset_data.get('best_sample_prediction')) + n_x = int(dataset.n_x.value) + if x_values.shape != (n_x,) or best_sample_prediction.shape != (n_x,): + log.warning( + 'Skipping Bayesian predictive dataset with shape mismatch for ' + f"{dataset.experiment_name.value!r}." + ) + continue + + _create_dataset(handle, dataset.x_path.value, x_values) + _create_dataset( + handle, + dataset.best_sample_prediction_path.value, + best_sample_prediction, + ) + + for field_name, path_value in ( + ('lower_95', dataset.lower_95_path.value), + ('upper_95', dataset.upper_95_path.value), + ('lower_68', dataset.lower_68_path.value), + ('upper_68', dataset.upper_68_path.value), + ): + values = dataset_data.get(field_name) + if values is None or path_value is None: + continue + values_array = np.asarray(values) + if values_array.shape != (n_x,): + log.warning( + 'Skipping Bayesian predictive band with shape mismatch for ' + f"{dataset.experiment_name.value!r}:{field_name}." + ) + continue + _create_dataset(handle, path_value, values_array) + + draws = dataset_data.get('draws') + if draws is not None and dataset.draws_path.value is not None: + draws_array = np.asarray(draws) + if draws_array.ndim != 2 or draws_array.shape[1] != n_x: + log.warning( + 'Skipping Bayesian predictive draws with shape mismatch for ' + f"{dataset.experiment_name.value!r}." + ) + else: + if dataset.n_draws_cached.value not in (0, draws_array.shape[0]): + log.warning( + 'Skipping Bayesian predictive draws whose draw count does not match ' + 'the manifest metadata.' + ) + else: + _create_dataset(handle, dataset.draws_path.value, draws_array) + + wrote_any = True + + return wrote_any + + +def write_analysis_results_sidecar( + *, + analysis: object, + analysis_dir: Path, +) -> None: + """ + Write persisted Bayesian arrays to ``analysis/results.h5``. + + Parameters + ---------- + analysis : object + Analysis instance that owns fit-state categories and runtime + fit results. + analysis_dir : Path + The project ``analysis/`` directory. + """ + sidecar_path = _sidecar_path(analysis=analysis, analysis_dir=analysis_dir) + if not _should_use_sidecar(analysis): + _delete_stale_sidecar(sidecar_path) + return + + import h5py # noqa: PLC0415 + from tempfile import NamedTemporaryFile # noqa: PLC0415 + + analysis_dir.mkdir(parents=True, exist_ok=True) + temporary_file = NamedTemporaryFile( + delete=False, + dir=analysis_dir, + prefix=f'{sidecar_path.stem}.', + suffix=sidecar_path.suffix, + ) + temporary_path = Path(temporary_file.name) + temporary_file.close() + + try: + with h5py.File(temporary_path, 'w') as handle: + wrote_any = _write_posterior_payload(handle, analysis) + wrote_any = _write_distribution_caches(handle, analysis) or wrote_any + wrote_any = _write_pair_caches(handle, analysis) or wrote_any + wrote_any = _write_predictive_datasets(handle, analysis) or wrote_any + except Exception: + if temporary_path.exists(): + temporary_path.unlink() + raise + + if not wrote_any: + temporary_path.unlink() + _delete_stale_sidecar(sidecar_path) + return + + temporary_path.replace(sidecar_path) + + +def _read_posterior_payload(handle: object, analysis: object) -> dict[str, np.ndarray]: + """Read canonical posterior arrays from a sidecar file.""" + parameter_samples = _read_dataset(handle, _POSTERIOR_PARAMETER_SAMPLES_PATH) + if parameter_samples is None: + return {} + + payload: dict[str, np.ndarray] = { + 'parameter_samples': np.asarray(parameter_samples, dtype=float), + } + log_posterior = _read_dataset(handle, _POSTERIOR_LOG_POSTERIOR_PATH) + if log_posterior is not None: + payload['log_posterior'] = np.asarray(log_posterior, dtype=float) + draw_index = _read_dataset(handle, _POSTERIOR_DRAW_INDEX_PATH) + if draw_index is not None: + payload['draw_index'] = np.asarray(draw_index) + + if not _validate_posterior_payload(analysis, payload): + return {} + return payload + + +def _read_distribution_caches(handle: object, analysis: object) -> dict[str, dict[str, np.ndarray]]: + """Read cached posterior distribution arrays for manifest rows.""" + payload: dict[str, dict[str, np.ndarray]] = {} + for cache in analysis.bayesian_distribution_caches: + x_values = _read_dataset(handle, cache.x_path.value) + density_values = _read_dataset(handle, cache.density_path.value) + if x_values is None or density_values is None: + continue + if x_values.shape != (int(cache.n_grid.value),) or density_values.shape != ( + int(cache.n_grid.value), + ): + log.warning( + 'Skipping restored Bayesian distribution cache with shape mismatch for ' + f"{cache.param_unique_name.value!r}." + ) + continue + payload[cache.param_unique_name.value] = { + 'x': np.asarray(x_values), + 'density': np.asarray(density_values), + } + return payload + + +def _read_pair_caches(handle: object, analysis: object) -> dict[str, dict[str, np.ndarray]]: + """Read cached posterior pair-density arrays for manifest rows.""" + payload: dict[str, dict[str, np.ndarray]] = {} + for cache in analysis.bayesian_pair_caches: + x_values = _read_dataset(handle, cache.x_path.value) + y_values = _read_dataset(handle, cache.y_path.value) + density_values = _read_dataset(handle, cache.density_path.value) + contour_levels = _read_dataset(handle, cache.contour_level_path.value) + if any(value is None for value in (x_values, y_values, density_values, contour_levels)): + continue + + n_grid_x = int(cache.n_grid_x.value) + n_grid_y = int(cache.n_grid_y.value) + valid_density_shape = density_values.shape in ( + (n_grid_y, n_grid_x), + (n_grid_x, n_grid_y), + ) + if x_values.shape != (n_grid_x,) or y_values.shape != (n_grid_y,) or not valid_density_shape: + log.warning( + 'Skipping restored Bayesian pair cache with shape mismatch for ' + f"{cache.id.value!r}." + ) + continue + + payload[cache.id.value] = { + 'x': np.asarray(x_values), + 'y': np.asarray(y_values), + 'density': np.asarray(density_values), + 'contour_levels': np.asarray(contour_levels), + } + return payload + + +def _read_predictive_datasets(handle: object, analysis: object) -> dict[str, dict[str, np.ndarray]]: + """Read cached posterior predictive arrays for manifest rows.""" + payload: dict[str, dict[str, np.ndarray]] = {} + for dataset in analysis.bayesian_predictive_datasets: + x_values = _read_dataset(handle, dataset.x_path.value) + best_sample_prediction = _read_dataset(handle, dataset.best_sample_prediction_path.value) + if x_values is None or best_sample_prediction is None: + continue + + n_x = int(dataset.n_x.value) + if x_values.shape != (n_x,) or best_sample_prediction.shape != (n_x,): + log.warning( + 'Skipping restored Bayesian predictive dataset with shape mismatch for ' + f"{dataset.experiment_name.value!r}." + ) + continue + + dataset_payload: dict[str, np.ndarray] = { + 'x': np.asarray(x_values), + 'best_sample_prediction': np.asarray(best_sample_prediction), + } + + for field_name, path_value in ( + ('lower_95', dataset.lower_95_path.value), + ('upper_95', dataset.upper_95_path.value), + ('lower_68', dataset.lower_68_path.value), + ('upper_68', dataset.upper_68_path.value), + ('draws', dataset.draws_path.value), + ): + if path_value is None: + continue + values = _read_dataset(handle, path_value) + if values is None: + continue + values_array = np.asarray(values) + if field_name == 'draws': + if values_array.ndim != 2 or values_array.shape[1] != n_x: + log.warning( + 'Skipping restored Bayesian predictive draws with shape mismatch for ' + f"{dataset.experiment_name.value!r}." + ) + continue + elif values_array.shape != (n_x,): + log.warning( + 'Skipping restored Bayesian predictive band with shape mismatch for ' + f"{dataset.experiment_name.value!r}:{field_name}." + ) + continue + dataset_payload[field_name] = values_array + + payload[dataset.experiment_name.value] = dataset_payload + return payload + + +def read_analysis_results_sidecar( + *, + analysis: object, + analysis_dir: Path, +) -> None: + """ + Read persisted Bayesian arrays from ``analysis/results.h5``. + + Parameters + ---------- + analysis : object + Analysis instance that owns fit-state categories. + analysis_dir : Path + The project ``analysis/`` directory. + """ + analysis._persisted_fit_state_sidecar = {} + if not _should_use_sidecar(analysis): + return + + sidecar_path = _sidecar_path(analysis=analysis, analysis_dir=analysis_dir) + if not sidecar_path.is_file(): + log.warning( + 'Expected Bayesian results sidecar is missing: ' + f"'{sidecar_path}'. Restoring available CIF summaries only." + ) + return + + import h5py # noqa: PLC0415 + + with h5py.File(sidecar_path, 'r') as handle: + sidecar_data: dict[str, object] = {} + + posterior_payload = _read_posterior_payload(handle, analysis) + if posterior_payload: + sidecar_data['posterior'] = posterior_payload + + distribution_caches = _read_distribution_caches(handle, analysis) + if distribution_caches: + sidecar_data['distribution_caches'] = distribution_caches + + pair_caches = _read_pair_caches(handle, analysis) + if pair_caches: + sidecar_data['pair_caches'] = pair_caches + + predictive_datasets = _read_predictive_datasets(handle, analysis) + if predictive_datasets: + sidecar_data['predictive_datasets'] = predictive_datasets + + analysis._persisted_fit_state_sidecar = sidecar_data \ No newline at end of file diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index f023f5c3..016b697d 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -300,6 +300,7 @@ def load(cls, dir_path: str) -> Project: """ from easydiffraction.io.cif.serialize import analysis_from_cif # noqa: PLC0415 from easydiffraction.io.cif.serialize import project_config_from_cif # noqa: PLC0415 + from easydiffraction.io.results_sidecar import read_analysis_results_sidecar # noqa: PLC0415 project_path = pathlib.Path(dir_path) if not project_path.is_dir(): @@ -344,6 +345,10 @@ def load(cls, dir_path: str) -> Project: if analysis_cif_path.is_file(): cif_text = analysis_cif_path.read_text() analysis_from_cif(project._analysis, cif_text) + read_analysis_results_sidecar( + analysis=project._analysis, + analysis_dir=analysis_cif_path.parent, + ) # 5. Resolve alias param references project._resolve_alias_references() @@ -398,6 +403,8 @@ def _build_parameter_map(self) -> dict[str, object]: def save(self) -> None: """Save the project into the existing project directory.""" + from easydiffraction.io.results_sidecar import write_analysis_results_sidecar # noqa: PLC0415 + if self.info.path is None: log.error('Project path not specified. Use save_as() to define the path first.') return @@ -446,6 +453,10 @@ def save(self) -> None: with (analysis_dir / 'analysis.cif').open('w') as f: f.write(self.analysis.as_cif) console.print('├── 📁 analysis/') + write_analysis_results_sidecar( + analysis=self.analysis, + analysis_dir=analysis_dir, + ) analysis_file_names = sorted( path.name for path in analysis_dir.iterdir() if path.is_file() From 7cf25bdba65aa4e015e19d9eb12ce47ba0c2f78f Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 16:25:50 +0200 Subject: [PATCH 23/72] Restore fit results from saved analysis state --- docs/dev/plans/analysis-cif-fit-state.md | 2 +- src/easydiffraction/analysis/analysis.py | 289 +++++++++++++++++++++++ src/easydiffraction/display/plotting.py | 250 +++++++++++++++++--- src/easydiffraction/project/display.py | 3 +- 4 files changed, 504 insertions(+), 40 deletions(-) diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 5cc4a7bc..74795ba9 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -41,7 +41,7 @@ structure or experiment CIF files. - [x] Phase 1 step 6: wire analysis CIF save/load for fit state. - [x] Phase 1 step 7: capture fit projections after fitting. - [x] Phase 1 step 8: add HDF5 sidecar save/load. -- [ ] Phase 1 step 9: restore result objects and display cache inputs. +- [x] Phase 1 step 9: restore result objects and display cache inputs. - [ ] Phase 1 review gate: stop for human review. - [ ] Phase 2 step 1: add unit tests for new categories. - [ ] Phase 2 step 2: add CIF and project save/load tests. diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 238b1ddd..68bcc160 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -41,6 +41,9 @@ SequentialFitExtractCollection, ) from easydiffraction.analysis.fit_helpers.bayesian import BayesianFitResults +from easydiffraction.analysis.fit_helpers.bayesian import PosteriorParameterSummary +from easydiffraction.analysis.fit_helpers.bayesian import PosteriorPredictiveSummary +from easydiffraction.analysis.fit_helpers.bayesian import PosteriorSamples from easydiffraction.analysis.fit_helpers.reporting import FitResults from easydiffraction.analysis.enums import FitCorrelationSourceEnum from easydiffraction.analysis.enums import FitModeEnum @@ -458,11 +461,297 @@ def fitter(self, value: Fitter) -> None: @property def fit_results(self) -> object | None: """Results from the most recent fit, if any.""" + if self._fit_results is None and self._has_persisted_fit_state(): + self._restore_fit_results_from_projection() return self._fit_results @fit_results.setter def fit_results(self, value: object | None) -> None: self._fit_results = value + self._fitter.results = value + + @staticmethod + def _predictive_cache_key( + experiment_name: str, + x_axis_name: str, + *, + include_draws: bool = True, + ) -> str: + """Return the runtime cache key for one predictive summary.""" + key_suffix = 'draws' if include_draws else 'band' + return f'{experiment_name}:{x_axis_name}:{key_suffix}' + + def _live_parameter_map(self) -> dict[str, Parameter]: + """Return live structure and experiment parameters keyed by unique name.""" + all_parameters = self.project.structures.parameters + self.project.experiments.parameters + return { + param.unique_name: param + for param in all_parameters + if isinstance(param, Parameter) and hasattr(param, 'unique_name') + } + + def _ordered_restored_parameter_names(self) -> list[str]: + """Return persisted parameter names in display and array order.""" + if self.fit_result.result_kind.value == FitResultKindEnum.BAYESIAN.value: + posterior_rows = sorted( + list(self.bayesian_parameter_posteriors), + key=lambda row: int(row.order_index.value), + ) + if posterior_rows: + return [row.unique_name.value for row in posterior_rows] + + deterministic_rows = sorted( + list(self.deterministic_parameter_results), + key=lambda row: int(row.order_index.value), + ) + if deterministic_rows: + return [row.param_unique_name.value for row in deterministic_rows] + + return [row.param_unique_name.value for row in self.fit_parameters] + + def _restore_live_parameter_state(self, param_map: dict[str, Parameter]) -> None: + """Restore persisted fit metadata onto live parameter objects.""" + for row in self.fit_parameters: + parameter = param_map.get(row.param_unique_name.value) + if parameter is None: + log.warning( + 'Persisted fit-state references unknown parameter ' + f"{row.param_unique_name.value!r}." + ) + continue + + parameter.fit_min = row.fit_min.value + parameter.fit_max = row.fit_max.value + parameter.fit_bounds_uncertainty_multiplier = ( + row.fit_bounds_uncertainty_multiplier.value + ) + parameter._fit_start_value = row.start_value.value + parameter._fit_start_uncertainty = row.start_uncertainty.value + + for row in self.deterministic_parameter_results: + parameter = param_map.get(row.param_unique_name.value) + if parameter is None or row.final_uncertainty.value is None: + continue + parameter.uncertainty = float(row.final_uncertainty.value) + + for row in self.bayesian_parameter_posteriors: + parameter = param_map.get(row.unique_name.value) + if parameter is None or row.uncertainty.value is None: + continue + parameter.uncertainty = float(row.uncertainty.value) + + def _restored_fit_parameters(self, param_map: dict[str, Parameter]) -> list[Parameter]: + """Return live parameters in the persisted fit-result order.""" + restored_parameters: list[Parameter] = [] + for unique_name in self._ordered_restored_parameter_names(): + parameter = param_map.get(unique_name) + if parameter is not None: + restored_parameters.append(parameter) + return restored_parameters + + def _restored_posterior_samples(self) -> PosteriorSamples | None: + """Return restored posterior samples from the HDF5 sidecar.""" + if not self.bayesian_result.has_posterior_samples.value: + return None + + posterior_data = self._persisted_fit_state_sidecar.get('posterior', {}) + parameter_samples = posterior_data.get('parameter_samples') + if parameter_samples is None: + return None + + posterior_rows = sorted( + list(self.bayesian_parameter_posteriors), + key=lambda row: int(row.order_index.value), + ) + parameter_names = [row.unique_name.value for row in posterior_rows] + if not parameter_names: + parameter_names = [row.param_unique_name.value for row in self.fit_parameters] + + parameter_sample_array = np.asarray(parameter_samples, dtype=float) + if parameter_sample_array.ndim != 3: + log.warning('Persisted posterior samples have an invalid shape for restore.') + return None + if parameter_sample_array.shape[2] != len(parameter_names): + log.warning( + 'Persisted posterior samples do not match restored posterior parameter names.' + ) + return None + + log_posterior = posterior_data.get('log_posterior') + draw_index = posterior_data.get('draw_index') + return PosteriorSamples( + parameter_names=parameter_names, + parameter_samples=parameter_sample_array, + log_posterior=( + None if log_posterior is None else np.asarray(log_posterior, dtype=float) + ), + draw_index=None if draw_index is None else np.asarray(draw_index), + ) + + def _restored_posterior_summaries(self) -> list[PosteriorParameterSummary]: + """Return posterior summary rows as runtime summary objects.""" + restored_summaries: list[PosteriorParameterSummary] = [] + posterior_rows = sorted( + list(self.bayesian_parameter_posteriors), + key=lambda row: int(row.order_index.value), + ) + for row in posterior_rows: + restored_summaries.append( + PosteriorParameterSummary( + unique_name=row.unique_name.value, + display_name=row.display_name.value, + best_sample_value=float(row.best_sample_value.value), + median=float(row.median.value), + standard_deviation=float(row.uncertainty.value), + interval_68=( + float(row.interval_68_lower.value), + float(row.interval_68_upper.value), + ), + interval_95=( + float(row.interval_95_lower.value), + float(row.interval_95_upper.value), + ), + ess_bulk=row.ess_bulk.value, + r_hat=row.r_hat.value, + ) + ) + return restored_summaries + + def _restored_predictive_summaries(self) -> dict[str, PosteriorPredictiveSummary]: + """Return restored posterior predictive summaries keyed for runtime reuse.""" + restored_predictive: dict[str, PosteriorPredictiveSummary] = {} + predictive_data = self._persisted_fit_state_sidecar.get('predictive_datasets', {}) + for row in self.bayesian_predictive_datasets: + dataset = predictive_data.get(row.experiment_name.value) + if dataset is None: + continue + + summary = PosteriorPredictiveSummary( + experiment_name=row.experiment_name.value, + x_axis_name=row.x_axis_name.value, + x=np.asarray(dataset['x'], dtype=float), + best_sample_prediction=np.asarray( + dataset['best_sample_prediction'], + dtype=float, + ), + lower_95=( + None + if dataset.get('lower_95') is None + else np.asarray(dataset['lower_95'], dtype=float) + ), + upper_95=( + None + if dataset.get('upper_95') is None + else np.asarray(dataset['upper_95'], dtype=float) + ), + lower_68=( + None + if dataset.get('lower_68') is None + else np.asarray(dataset['lower_68'], dtype=float) + ), + upper_68=( + None + if dataset.get('upper_68') is None + else np.asarray(dataset['upper_68'], dtype=float) + ), + draws=( + None + if dataset.get('draws') is None + else np.asarray(dataset['draws'], dtype=float) + ), + ) + restored_predictive[row.experiment_name.value] = summary + restored_predictive[ + self._predictive_cache_key( + row.experiment_name.value, + row.x_axis_name.value, + include_draws=False, + ) + ] = summary + if summary.draws is not None: + restored_predictive[ + self._predictive_cache_key( + row.experiment_name.value, + row.x_axis_name.value, + include_draws=True, + ) + ] = summary + return restored_predictive + + def _restore_fit_results_from_projection(self) -> object | None: + """Rebuild a lightweight runtime fit-result object from persisted state.""" + if not self._has_persisted_fit_state(): + return None + + param_map = self._live_parameter_map() + self._restore_live_parameter_state(param_map) + restored_parameters = self._restored_fit_parameters(param_map) + fitting_time = self.fit_result.fitting_time.value + reduced_chi_square = self.fit_result.reduced_chi_square.value + + if self.fit_result.result_kind.value == FitResultKindEnum.BAYESIAN.value: + restored_results = BayesianFitResults( + success=bool(self.fit_result.success.value), + parameters=restored_parameters, + reduced_chi_square=reduced_chi_square, + starting_parameters=list(restored_parameters), + fitting_time=fitting_time, + sampler_name=self.bayesian_result.sampler_name.value, + point_estimate_name=self.bayesian_result.point_estimate_name.value, + posterior_samples=self._restored_posterior_samples(), + posterior_parameter_summaries=self._restored_posterior_summaries(), + posterior_predictive=self._restored_predictive_summaries(), + credible_interval_levels=( + float(self.bayesian_result.credible_interval_inner.value), + float(self.bayesian_result.credible_interval_outer.value), + ), + sampler_settings={ + 'steps': int(self.bayesian_sampler.steps.value), + 'burn': int(self.bayesian_sampler.burn.value), + 'thin': int(self.bayesian_sampler.thin.value), + 'pop': int(self.bayesian_sampler.pop.value), + 'parallel': bool(self.bayesian_sampler.parallel.value), + 'init': self.bayesian_sampler.init.value, + 'random_seed': self.bayesian_sampler.random_seed.value, + }, + convergence_diagnostics={ + 'converged': bool(self.bayesian_convergence.converged.value), + 'max_r_hat': self.bayesian_convergence.max_r_hat.value, + 'min_ess_bulk': self.bayesian_convergence.min_ess_bulk.value, + 'n_draws': int(self.bayesian_convergence.n_draws.value), + 'n_chains': int(self.bayesian_convergence.n_chains.value), + 'n_parameters': int(self.bayesian_convergence.n_parameters.value), + }, + sampler_completed=bool(self.bayesian_result.sampler_completed.value), + best_log_posterior=self.bayesian_result.best_log_posterior.value, + ) + restored_results.message = self.fit_result.message.value + restored_results.iterations = int(self.fit_result.iterations.value) + self.fit_results = restored_results + return restored_results + + restored_results = FitResults( + success=bool(self.fit_result.success.value), + parameters=restored_parameters, + reduced_chi_square=reduced_chi_square, + starting_parameters=list(restored_parameters), + fitting_time=fitting_time, + optimizer_name=self.deterministic_result.optimizer_name.value, + method_name=self.deterministic_result.method_name.value, + objective_name=self.deterministic_result.objective_name.value, + objective_value=self.deterministic_result.objective_value.value, + n_data_points=int(self.deterministic_result.n_data_points.value), + n_parameters=int(self.deterministic_result.n_parameters.value), + n_free_parameters=int(self.deterministic_result.n_free_parameters.value), + degrees_of_freedom=int(self.deterministic_result.degrees_of_freedom.value), + covariance_available=bool(self.deterministic_result.covariance_available.value), + correlation_available=bool(self.deterministic_result.correlation_available.value), + ) + restored_results.message = self.fit_result.message.value + restored_results.iterations = int(self.fit_result.iterations.value) + restored_results.chi_square = self.deterministic_result.objective_value.value + self.fit_results = restored_results + return restored_results def help(self) -> None: """Print a summary of analysis properties and methods.""" diff --git a/src/easydiffraction/display/plotting.py b/src/easydiffraction/display/plotting.py index 85cb1545..16e3e680 100644 --- a/src/easydiffraction/display/plotting.py +++ b/src/easydiffraction/display/plotting.py @@ -16,6 +16,8 @@ import numpy as np import pandas as pd +from easydiffraction.analysis.enums import FitCorrelationSourceEnum +from easydiffraction.analysis.enums import FitResultKindEnum from easydiffraction.analysis.fit_helpers.bayesian import PosteriorPredictiveSummary from easydiffraction.datablocks.experiment.item.base import intensity_category_for from easydiffraction.datablocks.experiment.item.enums import SampleFormEnum @@ -1573,13 +1575,15 @@ def _get_param_correlation_dataframe(self) -> pd.DataFrame | None: return corr_df raw_result = self._raw_fit_result_for_correlation(fit_results) - if raw_result is None: - return None + if raw_result is not None: + corr_df = self._correlation_dataframe_from_engine_result( + raw_result=raw_result, + parameters=fit_results.parameters, + ) + if corr_df is not None: + return corr_df - corr_df = self._correlation_dataframe_from_engine_result( - raw_result=raw_result, - parameters=fit_results.parameters, - ) + corr_df = self._correlation_dataframe_from_persisted_projection(fit_results) if corr_df is not None: return corr_df @@ -1606,15 +1610,71 @@ def _raw_fit_result_for_correlation(fit_results: object) -> object | None: if raw_result is None: raw_result = getattr(fit_results, 'engine_result', None) if raw_result is None: - log.warning('No raw fit result available. Correlation matrix cannot be plotted.') return None var_names = getattr(raw_result, 'var_names', None) if not var_names: - log.warning('Fit result does not expose variable names for a correlation matrix.') return None return raw_result + def _correlation_dataframe_from_persisted_projection( + self, + fit_results: object, + ) -> pd.DataFrame | None: + """Return correlations restored from persisted fit-state rows.""" + if self._project is None: + return None + + analysis = self._project.analysis + source_kind = ( + FitCorrelationSourceEnum.POSTERIOR.value + if analysis.fit_result.result_kind.value == FitResultKindEnum.BAYESIAN.value + else FitCorrelationSourceEnum.DETERMINISTIC.value + ) + correlation_rows = [ + row + for row in analysis.fit_parameter_correlations + if row.source_kind.value == source_kind + ] + if not correlation_rows: + return None + + parameter_names = [ + getattr(parameter, 'unique_name', '') + for parameter in getattr(fit_results, 'parameters', []) + if getattr(parameter, 'unique_name', None) + ] + if not parameter_names: + parameter_names = [ + summary.unique_name + for summary in getattr(fit_results, 'posterior_parameter_summaries', []) + ] + + for row in correlation_rows: + parameter_names.extend( + [row.param_unique_name_i.value, row.param_unique_name_j.value] + ) + parameter_names = list(dict.fromkeys(parameter_names)) + if len(parameter_names) < 2: + return None + + correlation_values = np.eye(len(parameter_names), dtype=float) + corr_df = pd.DataFrame( + correlation_values, + index=parameter_names, + columns=parameter_names, + ) + wrote_any = False + for row in correlation_rows: + i_name = row.param_unique_name_i.value + j_name = row.param_unique_name_j.value + if i_name not in corr_df.index or j_name not in corr_df.index: + continue + corr_df.loc[i_name, j_name] = float(row.correlation.value) + corr_df.loc[j_name, i_name] = float(row.correlation.value) + wrote_any = True + return corr_df if wrote_any else None + def _correlation_dataframe_from_engine_result( self, *, @@ -2446,31 +2506,41 @@ def _posterior_contour_traces( """ go = __import__('plotly.graph_objects', fromlist=['Contour']) - bounds = self._posterior_pair_bounds( - fit_results=fit_results, + cached_surface = self._cached_posterior_pair_surface( x_parameter_name=x_parameter_name, y_parameter_name=y_parameter_name, - x_values=x_values, - y_values=y_values, - ) - density_surface = self._posterior_pair_density_surface( - x_values=x_values, - y_values=y_values, - x_bounds=bounds[0], - y_bounds=bounds[1], - grid_size=grid_size, ) - if density_surface is None: - return None + contour_levels = None + if cached_surface is None: + bounds = self._posterior_pair_bounds( + fit_results=fit_results, + x_parameter_name=x_parameter_name, + y_parameter_name=y_parameter_name, + x_values=x_values, + y_values=y_values, + ) + density_surface = self._posterior_pair_density_surface( + x_values=x_values, + y_values=y_values, + x_bounds=bounds[0], + y_bounds=bounds[1], + grid_size=grid_size, + ) + if density_surface is None: + return None + + x_grid, y_grid, density = density_surface + else: + x_grid, y_grid, density, contour_levels = cached_surface - x_grid, y_grid, density = density_surface fill_colorscale, line_colorscale = self._posterior_pair_contour_colorscales( x_values, y_values, ) - contour_start = float(np.max(density) * 0.20) - contour_end = float(np.max(density) * 0.95) - contour_size = float(np.max(density) * 0.15) + contour_start, contour_end, contour_size = self._posterior_contour_levels( + density=density, + contour_levels=contour_levels, + ) fill_density = np.array(density, copy=True) fill_density[fill_density < contour_start] = np.nan fill_trace = go.Contour( @@ -2516,6 +2586,78 @@ def _posterior_contour_traces( ) return fill_trace, line_trace + def _cached_posterior_pair_surface( + self, + *, + x_parameter_name: str, + y_parameter_name: str, + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray | None] | None: + """Return a restored posterior pair-density surface when available.""" + if self._project is None: + return None + + analysis = self._project.analysis + sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) + pair_caches = sidecar_data.get('pair_caches', {}) + for cache in analysis.bayesian_pair_caches: + cache_x = cache.param_unique_name_x.value + cache_y = cache.param_unique_name_y.value + if {cache_x, cache_y} != {x_parameter_name, y_parameter_name}: + continue + + cache_data = pair_caches.get(cache.id.value) + if cache_data is None: + return None + + x_grid = np.asarray(cache_data.get('x'), dtype=float) + y_grid = np.asarray(cache_data.get('y'), dtype=float) + density = np.asarray(cache_data.get('density'), dtype=float) + contour_levels = cache_data.get('contour_levels') + if contour_levels is not None: + contour_levels = np.asarray(contour_levels, dtype=float) + + if x_parameter_name != cache_x or y_parameter_name != cache_y: + x_grid, y_grid = y_grid, x_grid + if density.ndim == 2: + density = density.T + + expected_shape = (y_grid.size, x_grid.size) + if x_grid.ndim != 1 or y_grid.ndim != 1 or density.shape != expected_shape: + log.warning( + 'Persisted posterior pair cache is invalid for ' + f'{x_parameter_name!r} and {y_parameter_name!r}.' + ) + return None + return x_grid, y_grid, density, contour_levels + + return None + + @staticmethod + def _posterior_contour_levels( + *, + density: np.ndarray, + contour_levels: np.ndarray | None, + ) -> tuple[float, float, float]: + """Return contour start, end, and step for one pair-density surface.""" + if contour_levels is not None and contour_levels.ndim == 1 and contour_levels.size > 0: + finite_levels = contour_levels[np.isfinite(contour_levels)] + if finite_levels.size > 0: + start = float(finite_levels[0]) + end = float(finite_levels[-1]) + if finite_levels.size > 1: + size = float(np.min(np.diff(finite_levels))) + else: + size = max(end - start, abs(end) * 0.15, 1e-6) + if end > start and size > 0: + return start, end, size + + density_max = float(np.max(density)) + return ( + density_max * 0.20, + density_max * 0.95, + density_max * 0.15, + ) + def _build_param_distribution_plot( self, param: object, @@ -2608,11 +2750,13 @@ def _plot_ascii_param_distribution( fit_results=context.fit_results, parameter_name=context.parameter_name, ) - density_curve = self._posterior_density_curve( - context.values, - lower_bound=lower_bound, - upper_bound=upper_bound, - ) + density_curve = self._cached_posterior_density_curve(context.parameter_name) + if density_curve is None: + density_curve = self._posterior_density_curve( + context.values, + lower_bound=lower_bound, + upper_bound=upper_bound, + ) if density_curve is None: log.warning( f'Posterior distribution is unavailable for parameter {context.parameter_name}.' @@ -2629,6 +2773,28 @@ def _plot_ascii_param_distribution( height=self.height, ) + def _cached_posterior_density_curve( + self, + parameter_name: str, + ) -> tuple[np.ndarray, np.ndarray] | None: + """Return a restored posterior density curve for one parameter.""" + if self._project is None: + return None + + sidecar_data = getattr(self._project.analysis, '_persisted_fit_state_sidecar', {}) + cache_data = sidecar_data.get('distribution_caches', {}).get(parameter_name) + if cache_data is None: + return None + + x_values = np.asarray(cache_data.get('x'), dtype=float) + density_values = np.asarray(cache_data.get('density'), dtype=float) + if x_values.ndim != 1 or density_values.shape != x_values.shape: + log.warning( + f'Persisted posterior distribution cache is invalid for {parameter_name!r}.' + ) + return None + return x_values, density_values + def _posterior_distribution_context( self, param: object, @@ -2974,11 +3140,13 @@ def _posterior_density_trace( fit_results=fit_results, parameter_name=parameter_name, ) - density_curve = self._posterior_density_curve( - values, - lower_bound=lower_bound, - upper_bound=upper_bound, - ) + density_curve = self._cached_posterior_density_curve(parameter_name) + if density_curve is None: + density_curve = self._posterior_density_curve( + values, + lower_bound=lower_bound, + upper_bound=upper_bound, + ) if density_curve is None: return None @@ -3312,8 +3480,7 @@ def _get_or_build_posterior_predictive_summary( return None posterior_predictive = getattr(fit_results, 'posterior_predictive', None) - posterior_samples = getattr(fit_results, 'posterior_samples', None) - if posterior_predictive is None or posterior_samples is None: + if posterior_predictive is None: return None x_axis_name = getattr(x_axis, 'value', x_axis) @@ -3331,9 +3498,18 @@ def _get_or_build_posterior_predictive_summary( summary = posterior_predictive.get(cache_key) if summary is None and not include_draws: summary = posterior_predictive.get(draw_cache_key) + if summary is None: + summary = posterior_predictive.get(expt_name) + summary_x_axis = getattr(summary, 'x_axis_name', None) + if summary is not None and str(summary_x_axis) != str(x_axis_name): + summary = None if summary is not None: return summary + posterior_samples = getattr(fit_results, 'posterior_samples', None) + if posterior_samples is None: + return None + summary = self._build_posterior_predictive_summary( fit_results=fit_results, experiment=experiment, diff --git a/src/easydiffraction/project/display.py b/src/easydiffraction/project/display.py index e47c8ef7..219ac451 100644 --- a/src/easydiffraction/project/display.py +++ b/src/easydiffraction/project/display.py @@ -739,9 +739,8 @@ def _uncertainty_status( if fit_results is None: return False, 'No fit results are available.' - posterior_samples = getattr(fit_results, 'posterior_samples', None) posterior_predictive = getattr(fit_results, 'posterior_predictive', None) - if posterior_samples is None or posterior_predictive is None: + if not posterior_predictive: return False, 'Posterior predictive data is unavailable.' active_chart_engine = getattr(self._project.rendering.plotter, 'engine', None) From 0de17d43dc45a6221af30e62923318786d02060e Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 16:37:06 +0200 Subject: [PATCH 24/72] Align persisted fit-state schema with review feedback --- .../suggestions/analysis-cif-fit-state.md | 72 ++++++++----------- docs/dev/plans/analysis-cif-fit-state.md | 70 +++++++++--------- src/easydiffraction/analysis/__init__.py | 2 - src/easydiffraction/analysis/analysis.py | 42 +++-------- .../analysis/categories/__init__.py | 1 - .../bayesian_pair_caches/default.py | 22 +----- .../bayesian_parameter_posteriors/default.py | 19 ----- .../categories/bayesian_sampler/default.py | 15 ++-- .../default.py | 21 ------ .../fit_parameter_correlations/default.py | 24 +------ src/easydiffraction/io/cif/serialize.py | 15 ---- 11 files changed, 87 insertions(+), 216 deletions(-) diff --git a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md index a3a0df9e..0a287a4b 100644 --- a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md +++ b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md @@ -54,7 +54,6 @@ Existing categories remain responsible for existing configuration: New common fit-state categories are: -- `_fit_state` - `_fit_parameter` - `_fit_result` - `_fit_parameter_correlation` @@ -77,19 +76,11 @@ Bayesian-specific categories are: Bulk arrays referenced by Bayesian categories live in `analysis/results.h5`. -### 2. Add `_fit_state` for schema versioning +### 2. Do not add a dedicated `_fit_state` schema category -`_fit_state` is a single-item category for the persisted fit-state -schema: - -```cif -_fit_state.schema_version 1 -``` - -This version applies to the fit-state CIF categories and any HDF5 -sidecar manifests they reference. It is not the EasyDiffraction package -version. Individual result categories should not repeat `schema_version` -unless they later need independent evolution. +Persisted fit state is detected from the presence of `_fit_result` and +the related fit-state loops. Do not add a dedicated `_fit_state` +category or a standalone `schema_version` tag for this feature. ### 3. Add `_fit_parameter` for per-parameter fit controls @@ -167,7 +158,7 @@ _fit_parameter_correlation.source_kind _fit_parameter_correlation.param_unique_name_i _fit_parameter_correlation.param_unique_name_j _fit_parameter_correlation.correlation -"posterior:lbco.cell.length_a:hrpt.peak.broad_gauss_u" posterior lbco.cell.length_a hrpt.peak.broad_gauss_u 0.87 +1 posterior lbco.cell.length_a hrpt.peak.broad_gauss_u 0.87 ``` Fields: @@ -180,9 +171,8 @@ Fields: Rows are keyed by the persisted `id` field so each correlation pair has stable collection identity in both Python and CIF. When a caller does -not provide an explicit `id`, implementations should derive one from -the normalized `source_kind`, `param_unique_name_i`, and -`param_unique_name_j` values. +not provide an explicit `id`, implementations should generate a simple +sequential numeric identifier such as `1`, `2`, `3`, and so on. Only the upper triangle excluding the diagonal is stored. Correlation heatmaps can be restored from this loop alone. Posterior pair plots @@ -210,13 +200,15 @@ categories for optimizer details and parameter-result display state. `_deterministic_parameter_result` stores one row per parameter varied in the latest deterministic fit: -- `order_index` - `param_unique_name` - `final_value` - `final_uncertainty` - `at_lower_bound` - `at_upper_bound` +Loop order is the display order for restored deterministic parameter +results. + `final_value` and `final_uncertainty` are a result projection for display and consistency checks. The calculation source of truth remains the live parameter value and uncertainty restored from structure and @@ -259,6 +251,9 @@ Bayesian categories in `analysis/analysis.cif`. - `init` - `random_seed` +`parallel` stores the resolved non-negative DREAM worker count. `0` +means use all CPUs. + `_bayesian_convergence` stores top-level diagnostics and shapes: - `converged` @@ -271,7 +266,6 @@ Bayesian categories in `analysis/analysis.cif`. `_bayesian_parameter_posterior` stores one posterior summary row per sampled parameter: -- `order_index` - `unique_name` - `display_name` - `best_sample_value` @@ -284,10 +278,10 @@ sampled parameter: - `ess_bulk` - `r_hat` -`order_index` defines the parameter column order in posterior sample -arrays stored in the HDF5 sidecar. `parameter.posterior` is rebuilt from -this loop on load; posterior summary data is not duplicated in structure -or experiment CIF files. +Loop order defines the parameter column order in posterior sample arrays +stored in the HDF5 sidecar. `parameter.posterior` is rebuilt from this +loop on load; posterior summary data is not duplicated in structure or +experiment CIF files. ### 8. Store plot-ready Bayesian caches in explicit manifest categories @@ -321,8 +315,8 @@ caches therefore have their own manifest categories in `_bayesian_pair_cache` rows are keyed by the persisted `id` field so each cached parameter pair has stable identity in both Python and CIF. When a caller does not provide an explicit `id`, implementations should -derive one from the normalized `param_unique_name_x` and -`param_unique_name_y` values. +generate a simple sequential numeric identifier such as `1`, `2`, `3`, +and so on. `_bayesian_predictive_dataset` supports `project.display.posterior.predictive(...)`: @@ -437,14 +431,13 @@ Load order should be: 1. standard analysis configuration 2. aliases and constraints 3. active mode-specific settings -4. `_fit_state` -5. `_fit_parameter` -6. `_fit_result` -7. `_fit_parameter_correlation` -8. deterministic metadata categories when `result_kind` is +4. `_fit_parameter` +5. `_fit_result` +6. `_fit_parameter_correlation` +7. deterministic metadata categories when `result_kind` is `deterministic` -9. Bayesian metadata categories when `result_kind` is `bayesian` -10. Bayesian HDF5 sidecar arrays and plot caches +8. Bayesian metadata categories when `result_kind` is `bayesian` +9. Bayesian HDF5 sidecar arrays and plot caches This ensures bounds and live parameter references are available before fit-specific summaries and cached plot data are attached. @@ -457,8 +450,6 @@ Suggested deterministic `analysis/analysis.cif` fragment: _fitting.mode_type single _fitting.minimizer_type "lmfit (leastsq)" -_fit_state.schema_version 1 - loop_ _fit_parameter.param_unique_name _fit_parameter.fit_min @@ -488,21 +479,21 @@ _deterministic_result.covariance_available true _deterministic_result.correlation_available true loop_ -_deterministic_parameter_result.order_index _deterministic_parameter_result.param_unique_name _deterministic_parameter_result.final_value _deterministic_parameter_result.final_uncertainty _deterministic_parameter_result.at_lower_bound _deterministic_parameter_result.at_upper_bound -0 lbco.cell.length_a 3.89091 0.0003 false false -1 hrpt.peak.broad_gauss_u 0.08 0.007 false false +lbco.cell.length_a 3.89091 0.0003 false false +hrpt.peak.broad_gauss_u 0.08 0.007 false false loop_ +_fit_parameter_correlation.id _fit_parameter_correlation.source_kind _fit_parameter_correlation.param_unique_name_i _fit_parameter_correlation.param_unique_name_j _fit_parameter_correlation.correlation -deterministic lbco.cell.length_a hrpt.peak.broad_gauss_u 0.42 +1 deterministic lbco.cell.length_a hrpt.peak.broad_gauss_u 0.42 ``` Suggested Bayesian `analysis/analysis.cif` fragment: @@ -511,8 +502,6 @@ Suggested Bayesian `analysis/analysis.cif` fragment: _fitting.mode_type single _fitting.minimizer_type "bumps (dream)" -_fit_state.schema_version 1 - loop_ _fit_parameter.param_unique_name _fit_parameter.fit_min @@ -559,7 +548,6 @@ _bayesian_convergence.n_chains 20 _bayesian_convergence.n_parameters 2 loop_ -_bayesian_parameter_posterior.order_index _bayesian_parameter_posterior.unique_name _bayesian_parameter_posterior.display_name _bayesian_parameter_posterior.best_sample_value @@ -571,7 +559,7 @@ _bayesian_parameter_posterior.interval_95_lower _bayesian_parameter_posterior.interval_95_upper _bayesian_parameter_posterior.ess_bulk _bayesian_parameter_posterior.r_hat -0 lbco.cell.length_a "length_a" 3.89091 3.89090 0.0003 3.8906 3.8912 3.8903 3.8915 812.4 1.01 +lbco.cell.length_a "length_a" 3.89091 3.89090 0.0003 3.8906 3.8912 3.8903 3.8915 812.4 1.01 loop_ _bayesian_distribution_cache.param_unique_name diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 74795ba9..0405b869 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -30,7 +30,8 @@ structure or experiment CIF files. - [x] Gather planning context from ADRs, source files, and tests. - [x] Confirm ADR status: implement from the suggestion for now. - [x] Confirm HDF5 strategy: add `h5py` as a direct dependency. -- [x] Confirm composite-key loop strategy: add persisted `id` columns. +- [x] Confirm schema strategy: do not add a dedicated `_fit_state` category. +- [x] Confirm loop identity strategy: keep persisted `id` columns with simple autogenerated numeric ids. - [x] Confirm public surface: expose read-only `Analysis` properties. - [x] Confirm predictive cache identity: key by `experiment_name`. - [x] Phase 1 step 1: update the ADR suggestion with clarifications. @@ -55,11 +56,14 @@ These questions were answered on 2026-05-18. 1. Implement from `docs/dev/adrs/suggestions/analysis-cif-fit-state.md` for now. Do not move the ADR to `accepted/` as part of this plan. 2. Add `h5py` as a direct dependency for `analysis/results.h5`. -3. Add persisted `id` columns for composite-key fit-state loops instead - of using computed runtime-only keys. -4. Expose all new fit-state categories as public read-only properties on +3. Do not add a dedicated `_fit_state` category or `schema_version` + field for persisted analysis fit state. +4. Keep persisted `id` columns where the collection layer needs a + single key, but auto-generate simple numeric ids instead of derived + user-facing composite strings. +5. Expose all new fit-state categories as public read-only properties on `Analysis`. -5. Key `_bayesian_predictive_dataset` rows by `experiment_name`, keeping +6. Key `_bayesian_predictive_dataset` rows by `experiment_name`, keeping one cached predictive dataset per experiment. No remaining required gates are known. If implementation uncovers a new @@ -173,7 +177,6 @@ names for collections: | Python attribute | CIF category | Shape | | --------------------------------- | --------------------------------- | ----------- | -| `fit_state` | `_fit_state` | single item | | `fit_parameters` | `_fit_parameter` | collection | | `fit_result` | `_fit_result` | single item | | `fit_parameter_correlations` | `_fit_parameter_correlation` | collection | @@ -207,9 +210,10 @@ Files likely to change: Actions: 1. Keep the ADR in `suggestions/`; do not move it to `accepted/`. -2. Amend the ADR suggestion so composite-key loops have persisted `id` - columns. At minimum this applies to `_fit_parameter_correlation` and - `_bayesian_pair_cache`. +2. Amend the ADR suggestion so loops that need a persisted single-key + identity keep `id` columns, but autogenerated values are simple + numeric strings. At minimum this applies to + `_fit_parameter_correlation` and `_bayesian_pair_cache`. 3. Document that `_bayesian_predictive_dataset` remains keyed by `experiment_name`. 4. Document that `analysis/results.h5` uses `h5py` as a direct @@ -242,31 +246,28 @@ Actions: `deterministic` and `posterior`. 2. Add category modules following existing analysis category patterns: `default.py`, `factory.py`, and `__init__.py` with explicit imports. -3. Add `FitState` as a `CategoryItem` with - `_category_code = 'fit_state'` and numeric `schema_version` default - `1`. -4. Add `FitParameterItem` and `FitParameters` for `_fit_parameter`. Use +3. Add `FitParameterItem` and `FitParameters` for `_fit_parameter`. Use `_category_entry_name = 'param_unique_name'`. -5. Add `FitResult` for `_fit_result` with `result_kind`, `success`, +4. Add `FitResult` for `_fit_result` with `result_kind`, `success`, `message`, `iterations`, `fitting_time`, and `reduced_chi_square`. -6. Add `FitParameterCorrelationItem` and collection for +5. Add `FitParameterCorrelationItem` and collection for `_fit_parameter_correlation`. Include persisted `_fit_parameter_correlation.id` and use - `_category_entry_name = 'id'`. Generate a stable default id from the - normalized source and parameter pair when callers do not provide one. -7. Normalize correlation pairs so only upper-triangle rows are stored. -8. Use `StringDescriptor`, `NumericDescriptor`, and `BoolDescriptor` as + `_category_entry_name = 'id'`. Generate a simple numeric id when + callers do not provide one. +6. Normalize correlation pairs so only upper-triangle rows are stored. +7. Use `StringDescriptor`, `NumericDescriptor`, and `BoolDescriptor` as appropriate. Avoid raw Python attributes for persisted fields. -9. Do not add JSON fields or loose tags. -10. Update imports in the package `__init__.py` files so concrete +8. Do not add JSON fields or loose tags. +9. Update imports in the package `__init__.py` files so concrete classes are registered and importable. -11. Update this plan checklist for Step 2. +10. Update this plan checklist for Step 2. Implementation notes: - The collection `add()` path assumes one key. For categories with a persisted `id`, set `_category_entry_name = 'id'` on the item and - generate a stable default `id` before adding the item to the + generate a simple numeric-string `id` before adding the item to the collection. - Keep CIF tag names exactly as in the ADR, for example `_fit_parameter.param_unique_name`. @@ -297,11 +298,10 @@ Actions: `n_free_parameters`, `degrees_of_freedom`, `covariance_available`, and `correlation_available`. 2. Add `DeterministicParameterResultItem` and collection for - `_deterministic_parameter_result` with `order_index`, - `param_unique_name`, `final_value`, `final_uncertainty`, + `_deterministic_parameter_result` with `param_unique_name`, `final_value`, `final_uncertainty`, `at_lower_bound`, and `at_upper_bound`. 3. Use `_category_entry_name = 'param_unique_name'` for deterministic - parameter result rows. Keep `order_index` as display and array order. + parameter result rows. Preserve display order from CIF loop order. 4. Do not duplicate pre-fit values here; those belong to `_fit_parameter`. 5. Add explicit package imports. @@ -330,12 +330,15 @@ Actions: 1. Add `BayesianResult` as a single-item category with all ADR fields. 2. Add `BayesianSampler` as a single-item category with resolved DREAM sampler settings: `steps`, `burn`, `thin`, `pop`, `parallel`, `init`, - and `random_seed`. + and `random_seed`. Persist `parallel` as the non-negative worker + count; `0` means all CPUs. 3. Add `BayesianConvergence` as a single-item category with `converged`, `max_r_hat`, `min_ess_bulk`, `n_draws`, `n_chains`, and `n_parameters`. 4. Add `BayesianParameterPosteriorItem` and collection with all ADR - posterior summary fields. Use `_category_entry_name = 'unique_name'`. + posterior summary fields except `order_index`. Use + `_category_entry_name = 'unique_name'` and preserve parameter order + from CIF loop order. 5. Preserve the repo naming rule from prior Bayesian work: `best_sample` and `Best posterior sample` refer to the committed sampled point, not a continuous MAP estimate. @@ -363,8 +366,8 @@ Actions: 1. Add distribution cache manifest rows keyed by `param_unique_name`. 2. Add pair cache manifest rows with persisted `_bayesian_pair_cache.id` - and `_category_entry_name = 'id'`. Generate a stable default id from - the normalized parameter pair when callers do not provide one. + and `_category_entry_name = 'id'`. Generate a simple numeric id when + callers do not provide one. 3. Add predictive dataset manifest rows keyed by `experiment_name`. If multiple predictive datasets per experiment become necessary, stop and ask before changing the ADR schema. @@ -396,17 +399,16 @@ Actions: 4. Update `Analysis._serializable_categories()` so fit-state categories are appended only when a fit-state projection exists. 5. Keep the order from the ADR: normal analysis configuration first, - then `_fit_state`, `_fit_parameter`, `_fit_result`, correlations, + then `_fit_parameter`, `_fit_result`, correlations, deterministic categories, Bayesian categories, and cache manifests. 6. Update `analysis_from_cif()` to restore the new categories after existing fitting, aliases, constraints, and active mode-specific configuration. 7. Make missing fit-state categories a no-op for older saved projects. -8. Add clear warnings for unsupported `_fit_state.schema_version`. -9. Add a project-level helper to build a `{unique_name: parameter}` map +8. Add a project-level helper to build a `{unique_name: parameter}` map from structures and experiments. Reuse it for alias and fit-state reference restoration if practical. -10. Update this plan checklist for Step 6. +9. Update this plan checklist for Step 6. Suggested commit message: diff --git a/src/easydiffraction/analysis/__init__.py b/src/easydiffraction/analysis/__init__.py index 57b2e447..17d10219 100644 --- a/src/easydiffraction/analysis/__init__.py +++ b/src/easydiffraction/analysis/__init__.py @@ -60,8 +60,6 @@ from easydiffraction.analysis.categories.fit_parameters import FitParametersFactory from easydiffraction.analysis.categories.fit_result import FitResult from easydiffraction.analysis.categories.fit_result import FitResultFactory -from easydiffraction.analysis.categories.fit_state import FitState -from easydiffraction.analysis.categories.fit_state import FitStateFactory from easydiffraction.analysis.categories.fitting import Fitting from easydiffraction.analysis.categories.fitting import FittingFactory from easydiffraction.analysis.categories.joint_fit import JointFitCollection diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 68bcc160..35ffc53e 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -31,7 +31,6 @@ from easydiffraction.analysis.categories.fit_parameter_correlations import FitParameterCorrelations from easydiffraction.analysis.categories.fit_parameters import FitParameters from easydiffraction.analysis.categories.fit_result import FitResult -from easydiffraction.analysis.categories.fit_state import FitState from easydiffraction.analysis.categories.fitting import Fitting from easydiffraction.analysis.categories.fitting import FittingFactory from easydiffraction.analysis.categories.joint_fit import JointFitCollection @@ -409,7 +408,6 @@ def __init__(self, project: object) -> None: SequentialFitFactory.default_tag() ) self._sequential_fit_extract = SequentialFitExtractCollection() - self._fit_state = FitState() self._fit_parameters = FitParameters() self._fit_result = FitResult() self._fit_parameter_correlations = FitParameterCorrelations() @@ -493,17 +491,11 @@ def _live_parameter_map(self) -> dict[str, Parameter]: def _ordered_restored_parameter_names(self) -> list[str]: """Return persisted parameter names in display and array order.""" if self.fit_result.result_kind.value == FitResultKindEnum.BAYESIAN.value: - posterior_rows = sorted( - list(self.bayesian_parameter_posteriors), - key=lambda row: int(row.order_index.value), - ) + posterior_rows = list(self.bayesian_parameter_posteriors) if posterior_rows: return [row.unique_name.value for row in posterior_rows] - deterministic_rows = sorted( - list(self.deterministic_parameter_results), - key=lambda row: int(row.order_index.value), - ) + deterministic_rows = list(self.deterministic_parameter_results) if deterministic_rows: return [row.param_unique_name.value for row in deterministic_rows] @@ -559,10 +551,7 @@ def _restored_posterior_samples(self) -> PosteriorSamples | None: if parameter_samples is None: return None - posterior_rows = sorted( - list(self.bayesian_parameter_posteriors), - key=lambda row: int(row.order_index.value), - ) + posterior_rows = list(self.bayesian_parameter_posteriors) parameter_names = [row.unique_name.value for row in posterior_rows] if not parameter_names: parameter_names = [row.param_unique_name.value for row in self.fit_parameters] @@ -591,11 +580,7 @@ def _restored_posterior_samples(self) -> PosteriorSamples | None: def _restored_posterior_summaries(self) -> list[PosteriorParameterSummary]: """Return posterior summary rows as runtime summary objects.""" restored_summaries: list[PosteriorParameterSummary] = [] - posterior_rows = sorted( - list(self.bayesian_parameter_posteriors), - key=lambda row: int(row.order_index.value), - ) - for row in posterior_rows: + for row in self.bayesian_parameter_posteriors: restored_summaries.append( PosteriorParameterSummary( unique_name=row.unique_name.value, @@ -710,7 +695,7 @@ def _restore_fit_results_from_projection(self) -> object | None: 'burn': int(self.bayesian_sampler.burn.value), 'thin': int(self.bayesian_sampler.thin.value), 'pop': int(self.bayesian_sampler.pop.value), - 'parallel': bool(self.bayesian_sampler.parallel.value), + 'parallel': int(self.bayesian_sampler.parallel.value), 'init': self.bayesian_sampler.init.value, 'random_seed': self.bayesian_sampler.random_seed.value, }, @@ -1018,11 +1003,6 @@ def sequential_fit_extract(self) -> SequentialFitExtractCollection: """Persisted extract rules for sequential fitting.""" return self._sequential_fit_extract - @property - def fit_state(self) -> FitState: - """Persisted fit-state schema metadata.""" - return self._fit_state - @property def fit_parameters(self) -> FitParameters: """Persisted fit-parameter control snapshots.""" @@ -1101,7 +1081,6 @@ def _fit_state_categories(self) -> list[object]: kind. """ categories: list[object] = [ - self.fit_state, self.fit_parameters, self.fit_result, self.fit_parameter_correlations, @@ -1137,7 +1116,6 @@ def _fit_state_categories(self) -> list[object]: def _clear_persisted_fit_state(self) -> None: """Reset all persisted fit-state categories before a new fit.""" - self._fit_state = FitState() self._fit_parameters = FitParameters() self._fit_result = FitResult() self._fit_parameter_correlations = FitParameterCorrelations() @@ -1156,7 +1134,6 @@ def _clear_persisted_fit_state(self) -> None: def _capture_fit_parameter_state(self, parameters: list[Parameter]) -> None: """Capture pre-fit parameter state into persisted fit-state categories.""" self._clear_persisted_fit_state() - self.fit_state._set_schema_version(1) for param in parameters: self.fit_parameters.create( @@ -1273,7 +1250,6 @@ def _store_common_fit_result_projection( result_kind: FitResultKindEnum, ) -> None: """Store fields shared by deterministic and Bayesian fit results.""" - self.fit_state._set_schema_version(1) self.fit_result._set_result_kind(result_kind.value) self.fit_result._set_success(results.success) self.fit_result._set_message(results.message) @@ -1340,9 +1316,8 @@ def _store_deterministic_result_projection( self.deterministic_result._set_covariance_available(covariance is not None) self.deterministic_result._set_correlation_available(correlation_matrix is not None) - for order_index, param in enumerate(fitted_parameters): + for param in fitted_parameters: self.deterministic_parameter_results.create( - order_index=order_index, param_unique_name=param.unique_name, final_value=param.value, final_uncertainty=param.uncertainty, @@ -1392,7 +1367,7 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None self.bayesian_sampler._set_burn(int(sampler_settings.get('burn', 0))) self.bayesian_sampler._set_thin(int(sampler_settings.get('thin', 0))) self.bayesian_sampler._set_pop(int(sampler_settings.get('pop', 0))) - self.bayesian_sampler._set_parallel(bool(sampler_settings.get('parallel', False))) + self.bayesian_sampler._set_parallel(int(sampler_settings.get('parallel', 0))) self.bayesian_sampler._set_init(str(sampler_settings.get('init', ''))) random_seed = sampler_settings.get('random_seed') self.bayesian_sampler._set_random_seed( @@ -1406,9 +1381,8 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None self.bayesian_convergence._set_n_chains(int(convergence.get('n_chains', 0))) self.bayesian_convergence._set_n_parameters(int(convergence.get('n_parameters', 0))) - for order_index, summary in enumerate(results.posterior_parameter_summaries): + for summary in results.posterior_parameter_summaries: self.bayesian_parameter_posteriors.create( - order_index=order_index, unique_name=summary.unique_name, display_name=summary.display_name, best_sample_value=summary.best_sample_value, diff --git a/src/easydiffraction/analysis/categories/__init__.py b/src/easydiffraction/analysis/categories/__init__.py index 4a3faeb0..278f2692 100644 --- a/src/easydiffraction/analysis/categories/__init__.py +++ b/src/easydiffraction/analysis/categories/__init__.py @@ -42,7 +42,6 @@ from easydiffraction.analysis.categories.fit_parameters import FitParameterItem from easydiffraction.analysis.categories.fit_parameters import FitParameters from easydiffraction.analysis.categories.fit_result import FitResult -from easydiffraction.analysis.categories.fit_state import FitState from easydiffraction.analysis.categories.fitting import Fitting from easydiffraction.analysis.categories.joint_fit import JointFitCollection from easydiffraction.analysis.categories.joint_fit import JointFitItem diff --git a/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py b/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py index b1157722..7a2561fe 100644 --- a/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py @@ -27,19 +27,6 @@ def _normalized_parameter_pair( return param_unique_name_y, param_unique_name_x -def _default_pair_cache_id( - *, - param_unique_name_x: str, - param_unique_name_y: str, -) -> str: - """Return the default persisted id for a pair-cache row.""" - normalized_x, normalized_y = _normalized_parameter_pair( - param_unique_name_x, - param_unique_name_y, - ) - return f'{normalized_x}:{normalized_y}' - - class BayesianPairCacheItem(CategoryItem): """Single persisted Bayesian pair-cache manifest row.""" @@ -259,8 +246,8 @@ def create( n_draws_cached : int | float Number of draws summarized into the cached pair. id : str | None, default=None - Explicit persisted row id. When omitted, a stable id is - derived from the normalized parameter pair. + Explicit persisted row id. When omitted, a simple + sequential identifier is generated. """ normalized_x, normalized_y = _normalized_parameter_pair( param_unique_name_x, @@ -276,9 +263,6 @@ def create( item._set_n_grid_x(n_grid_x) item._set_n_grid_y(n_grid_y) item._set_n_draws_cached(n_draws_cached) - resolved_id = id or _default_pair_cache_id( - param_unique_name_x=normalized_x, - param_unique_name_y=normalized_y, - ) + resolved_id = id or str(len(self) + 1) item._set_id(resolved_id) self.add(item) diff --git a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py index 8f74f39b..6bc9173e 100644 --- a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py @@ -25,12 +25,6 @@ class BayesianParameterPosteriorItem(CategoryItem): def __init__(self) -> None: super().__init__() - self._order_index = NumericDescriptor( - name='order_index', - description='Parameter column order in posterior sample arrays.', - value_spec=AttributeSpec(default=0), - cif_handler=CifHandler(names=['_bayesian_parameter_posterior.order_index']), - ) self._unique_name = StringDescriptor( name='unique_name', description='Unique EasyDiffraction parameter name.', @@ -101,15 +95,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_bayesian_parameter_posterior.r_hat']), ) - @property - def order_index(self) -> NumericDescriptor: - """Parameter column order in posterior sample arrays.""" - return self._order_index - - def _set_order_index(self, value: float) -> None: - """Set the order index for internal callers.""" - self._order_index.value = value - @property def unique_name(self) -> StringDescriptor: """Unique EasyDiffraction parameter name.""" @@ -227,7 +212,6 @@ def __init__(self) -> None: def create( self, *, - order_index: float, unique_name: str, display_name: str, best_sample_value: float | None = None, @@ -245,8 +229,6 @@ def create( Parameters ---------- - order_index : int | float - Parameter column order in posterior sample arrays. unique_name : str Unique EasyDiffraction parameter name. display_name : str @@ -271,7 +253,6 @@ def create( Rank-normalized split-R-hat when available. """ item = BayesianParameterPosteriorItem() - item._set_order_index(order_index) item._set_unique_name(unique_name) item._set_display_name(display_name) item._set_best_sample_value(best_sample_value) diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py index 3512c468..88ffed06 100644 --- a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py @@ -8,7 +8,6 @@ from easydiffraction.core.category import CategoryItem from easydiffraction.core.metadata import TypeInfo from easydiffraction.core.validation import AttributeSpec -from easydiffraction.core.variable import BoolDescriptor from easydiffraction.core.variable import NumericDescriptor from easydiffraction.core.variable import StringDescriptor from easydiffraction.io.cif.handler import CifHandler @@ -51,10 +50,10 @@ def __init__(self) -> None: value_spec=AttributeSpec(default=0), cif_handler=CifHandler(names=['_bayesian_sampler.pop']), ) - self._parallel = BoolDescriptor( + self._parallel = NumericDescriptor( name='parallel', - description='Whether sampling ran in parallel.', - value_spec=AttributeSpec(default=False), + description='Resolved DREAM worker count; 0 uses all CPUs.', + value_spec=AttributeSpec(default=0), cif_handler=CifHandler(names=['_bayesian_sampler.parallel']), ) self._init = StringDescriptor( @@ -107,12 +106,12 @@ def _set_pop(self, value: float) -> None: self._pop.value = value @property - def parallel(self) -> BoolDescriptor: - """Whether sampling ran in parallel.""" + def parallel(self) -> NumericDescriptor: + """Resolved DREAM worker count; 0 uses all CPUs.""" return self._parallel - def _set_parallel(self, value: bool) -> None: - """Set the parallel flag for internal callers.""" + def _set_parallel(self, value: float) -> None: + """Set the DREAM worker count for internal callers.""" self._parallel.value = value @property diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py index 8054a4e1..defa5845 100644 --- a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py +++ b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py @@ -26,12 +26,6 @@ class DeterministicParameterResultItem(CategoryItem): def __init__(self) -> None: super().__init__() - self._order_index = NumericDescriptor( - name='order_index', - description='Display and array order for the persisted parameter result.', - value_spec=AttributeSpec(default=0), - cif_handler=CifHandler(names=['_deterministic_parameter_result.order_index']), - ) self._param_unique_name = StringDescriptor( name='param_unique_name', description='Unique name of the persisted parameter result row.', @@ -66,17 +60,6 @@ def __init__(self) -> None: cif_handler=CifHandler(names=['_deterministic_parameter_result.at_upper_bound']), ) - @property - def order_index(self) -> NumericDescriptor: - """ - Display and array order for the persisted parameter result. - """ - return self._order_index - - def _set_order_index(self, value: float) -> None: - """Set the order index for internal callers.""" - self._order_index.value = value - @property def param_unique_name(self) -> StringDescriptor: """Unique name of the persisted parameter result row.""" @@ -138,7 +121,6 @@ def __init__(self) -> None: def create( self, *, - order_index: float, param_unique_name: str, final_value: float | None = None, final_uncertainty: float | None = None, @@ -150,8 +132,6 @@ def create( Parameters ---------- - order_index : int | float - Display and array order for the persisted parameter result. param_unique_name : str Unique name of the persisted parameter result row. final_value : int | float | None, default=None @@ -164,7 +144,6 @@ def create( Whether the parameter finished at the upper fit bound. """ item = DeterministicParameterResultItem() - item._set_order_index(order_index) item._set_param_unique_name(param_unique_name) item._set_final_value(final_value) item._set_final_uncertainty(final_uncertainty) diff --git a/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py b/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py index 48ae4b51..d1d02ce1 100644 --- a/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py +++ b/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py @@ -30,20 +30,6 @@ def _normalized_parameter_pair( return param_unique_name_j, param_unique_name_i -def _default_correlation_id( - *, - source_kind: str, - param_unique_name_i: str, - param_unique_name_j: str, -) -> str: - """Return the default persisted id for a correlation row.""" - normalized_i, normalized_j = _normalized_parameter_pair( - param_unique_name_i, - param_unique_name_j, - ) - return f'{source_kind}:{normalized_i}:{normalized_j}' - - class FitParameterCorrelationItem(CategoryItem): """Single persisted fit-parameter correlation row.""" @@ -181,8 +167,8 @@ def create( correlation : int | float Correlation coefficient for the parameter pair. id : str | None, default=None - Explicit persisted row identifier. When omitted, a stable id - is derived from the normalized parameter pair. + Explicit persisted row identifier. When omitted, a simple + sequential identifier is generated. """ normalized_i, normalized_j = _normalized_parameter_pair( param_unique_name_i, @@ -193,10 +179,6 @@ def create( item._set_param_unique_name_i(normalized_i) item._set_param_unique_name_j(normalized_j) item._set_correlation(correlation) - resolved_id = id or _default_correlation_id( - source_kind=source_kind, - param_unique_name_i=normalized_i, - param_unique_name_j=normalized_j, - ) + resolved_id = id or str(len(self) + 1) item._set_id(resolved_id) self.add(item) diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index 8833fd57..b21a70e4 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -567,7 +567,6 @@ def analysis_from_cif(analysis: object, cif_text: str) -> None: def _has_persisted_fit_state_sections(block: object) -> bool: """Return True when any persisted fit-state section is present.""" scalar_tags = ( - '_fit_state.schema_version', '_fit_result.result_kind', '_deterministic_result.optimizer_name', '_bayesian_result.sampler_name', @@ -589,25 +588,11 @@ def _has_persisted_fit_state_sections(block: object) -> bool: ) -def _warn_for_unsupported_fit_state_schema(analysis: object) -> None: - """ - Warn when the persisted fit-state schema version is unsupported. - """ - schema_version = analysis.fit_state.schema_version.value - if schema_version != 1: - log.warning( - 'Unsupported _fit_state.schema_version in analysis CIF: ' - f'{schema_version}. Attempting best-effort restore for schema version 1.', - ) - - def _restore_common_fit_state(analysis: object, block: object) -> None: """ Restore fit-state categories shared by deterministic and Bayesian fits. """ - analysis.fit_state.from_cif(block) - _warn_for_unsupported_fit_state_schema(analysis) analysis.fit_parameters.from_cif(block) analysis.fit_result.from_cif(block) analysis.fit_parameter_correlations.from_cif(block) From 6020753a5594ecd050a9d471a106f341e138b837 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 17:42:57 +0200 Subject: [PATCH 25/72] Persist Bayesian plot caches in sidecar --- docs/docs/tutorials/ed-24.py | 99 ++++++++ docs/docs/tutorials/index.md | 4 + src/easydiffraction/analysis/analysis.py | 270 +++++++++++++++++++++- src/easydiffraction/core/variable.py | 4 + src/easydiffraction/io/results_sidecar.py | 26 ++- 5 files changed, 383 insertions(+), 20 deletions(-) create mode 100644 docs/docs/tutorials/ed-24.py diff --git a/docs/docs/tutorials/ed-24.py b/docs/docs/tutorials/ed-24.py new file mode 100644 index 00000000..7e287ba9 --- /dev/null +++ b/docs/docs/tutorials/ed-24.py @@ -0,0 +1,99 @@ +# %% [markdown] +# # Load Saved Bayesian Project: LBCO, HRPT +# +# This tutorial shows how to reopen the Bayesian project created in +# `ed-21.py` and inspect the saved fit results without rerunning DREAM. +# +# The project already contains posterior samples together with cached +# posterior density, pair, and predictive data, so the plots below are +# restored directly from disk. + +# %% [markdown] +# ## Import Library + +# %% +from pathlib import Path + +import easydiffraction as ed + +# %% [markdown] +# ## Locate the Saved Project +# +# In the repository, the saved project currently lives under +# `tmp/tutorials/projects/lbco_hrpt_bayesian`. Once a downloadable +# archive is available, replace this path with the extracted project +# directory instead. + +# %% +project_dir = Path('../../../tmp/tutorials/projects/lbco_hrpt_bayesian') + +# %% + +# %% [markdown] +# ## Load the Saved Bayesian Project +# +# Loading restores the persisted fit state, posterior samples, and plot +# caches. No new fit is launched in this tutorial. + +# %% +project = ed.Project.load(project_dir) + +# %% [markdown] +# ## Review the Saved Fit Summary +# +# The fit summary reports the committed point estimate, sampler +# settings, convergence diagnostics, and posterior parameter summaries +# from the saved Bayesian run. + +# %% +project.display.fit.results() + +# %% [markdown] +# ## Show Correlations and the Fitted Pattern +# +# The correlation matrix and measured-vs-calculated pattern are restored +# from the saved project state. + +# %% +project.display.fit.correlations() + +# %% [markdown] +# Show the standard measured vs calculated pattern for the full range. + +# %% +project.display.pattern(expt_name='hrpt') + +# %% [markdown] +# A zoomed view is useful for checking the fit quality in a narrow +# region of the diffraction pattern. + +# %% +project.display.posterior.predictive(expt_name='hrpt', x_min=92, x_max=93) + +# %% [markdown] +# ## Inspect Posterior Densities and Pair Structure +# +# The pair plot and one-dimensional posterior distributions now load +# from the persisted caches generated when the Bayesian fit was saved. + +# %% +project.display.posterior.pairs() + +# %% +project.display.posterior.distribution() + +# %% [markdown] +# ## Plot Posterior Predictive Checks +# +# The posterior predictive view reuses the cached predictive summary +# stored in the project rather than recalculating it on first display. + +# %% +project.display.posterior.predictive(expt_name='hrpt') + +# %% [markdown] +# A zoomed view is useful for checking the propagated uncertainty in a +# narrow region of the diffraction pattern. + +# %% +project.display.posterior.predictive(expt_name='hrpt', x_min=92, x_max=93) diff --git a/docs/docs/tutorials/index.md b/docs/docs/tutorials/index.md index a8960109..1c733ca9 100644 --- a/docs/docs/tutorials/index.md +++ b/docs/docs/tutorials/index.md @@ -109,6 +109,10 @@ The tutorials are organized into the following categories: tutorial covers the use of Markov Chain Monte Carlo (MCMC) sampling to explore the posterior distribution of the refined parameters, providing insights into parameter uncertainties and correlations. +- [LBCO Bayesian, saved project](ed-24.ipynb) – Shows how to load the + saved Bayesian LBCO project created in the previous tutorial and + inspect the persisted fit summary, correlation matrix, posterior + plots, and predictive checks without rerunning DREAM. - [Tb2TiO7 Bayesian](ed-22.ipynb) – Another example of a Bayesian analysis. This tutorial focuses on the Tb2TiO7 crystal structure using constant wavelength neutron single crystal diffraction data from HEiDi diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 35ffc53e..303b8760 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -4,6 +4,7 @@ from __future__ import annotations from contextlib import suppress +from itertools import combinations from pathlib import Path import numpy as np @@ -514,7 +515,7 @@ def _restore_live_parameter_state(self, param_map: dict[str, Parameter]) -> None parameter.fit_min = row.fit_min.value parameter.fit_max = row.fit_max.value - parameter.fit_bounds_uncertainty_multiplier = ( + parameter._set_fit_bounds_uncertainty_multiplier( row.fit_bounds_uncertainty_multiplier.value ) parameter._fit_start_value = row.start_value.value @@ -607,13 +608,15 @@ def _restored_predictive_summaries(self) -> dict[str, PosteriorPredictiveSummary restored_predictive: dict[str, PosteriorPredictiveSummary] = {} predictive_data = self._persisted_fit_state_sidecar.get('predictive_datasets', {}) for row in self.bayesian_predictive_datasets: - dataset = predictive_data.get(row.experiment_name.value) + experiment_name = str(row.experiment_name.value) + x_axis_name = str(row.x_axis_name.value) + dataset = predictive_data.get(experiment_name) if dataset is None: continue summary = PosteriorPredictiveSummary( - experiment_name=row.experiment_name.value, - x_axis_name=row.x_axis_name.value, + experiment_name=experiment_name, + x_axis_name=x_axis_name, x=np.asarray(dataset['x'], dtype=float), best_sample_prediction=np.asarray( dataset['best_sample_prediction'], @@ -645,19 +648,19 @@ def _restored_predictive_summaries(self) -> dict[str, PosteriorPredictiveSummary else np.asarray(dataset['draws'], dtype=float) ), ) - restored_predictive[row.experiment_name.value] = summary + restored_predictive[experiment_name] = summary restored_predictive[ self._predictive_cache_key( - row.experiment_name.value, - row.x_axis_name.value, + experiment_name, + x_axis_name, include_draws=False, ) ] = summary if summary.draws is not None: restored_predictive[ self._predictive_cache_key( - row.experiment_name.value, - row.x_axis_name.value, + experiment_name, + x_axis_name, include_draws=True, ) ] = summary @@ -1338,6 +1341,252 @@ def _store_deterministic_result_projection( source_kind=FitCorrelationSourceEnum.DETERMINISTIC, ) + def _store_bayesian_distribution_cache_projection( + self, + *, + plotter: object, + results: BayesianFitResults, + flattened_samples: np.ndarray, + parameter_names: list[str], + ) -> dict[str, dict[str, np.ndarray]]: + """Store cached posterior density curves into persisted manifests.""" + payload: dict[str, dict[str, np.ndarray]] = {} + for parameter_index, parameter_name in enumerate(parameter_names): + lower_bound, upper_bound = plotter._posterior_parameter_bounds( + fit_results=results, + parameter_name=parameter_name, + ) + density_curve = plotter._posterior_density_curve( + flattened_samples[:, parameter_index], + lower_bound=lower_bound, + upper_bound=upper_bound, + ) + if density_curve is None: + continue + + x_values, density_values = density_curve + x_array = np.asarray(x_values, dtype=float) + density_array = np.asarray(density_values, dtype=float) + cache_index = len(payload) + self.bayesian_distribution_caches.create( + param_unique_name=parameter_name, + x_path=f'/posterior/distribution/{cache_index}/x', + density_path=f'/posterior/distribution/{cache_index}/density', + n_grid=float(x_array.size), + n_draws_cached=float(np.isfinite(flattened_samples[:, parameter_index]).sum()), + ) + payload[parameter_name] = { + 'x': x_array, + 'density': density_array, + } + return payload + + @staticmethod + def _posterior_pair_contour_levels(density: np.ndarray) -> np.ndarray: + """Return default contour levels for a cached posterior pair surface.""" + density_max = float(np.max(density)) + if not np.isfinite(density_max) or density_max <= 0: + return np.asarray([], dtype=float) + return density_max * np.asarray([0.20, 0.35, 0.50, 0.65, 0.80, 0.95], dtype=float) + + def _store_bayesian_pair_cache_projection( + self, + *, + plotter: object, + results: BayesianFitResults, + flattened_samples: np.ndarray, + parameter_names: list[str], + ) -> dict[str, dict[str, np.ndarray]]: + """Store cached posterior pair-density surfaces into persisted manifests.""" + n_parameters = len(parameter_names) + if n_parameters <= 1: + return {} + + density_samples = plotter._thin_posterior_samples( + flattened_samples, + max_points=plotter._posterior_pair_density_max_points(n_parameters), + ) + contour_grid_size = plotter._posterior_pair_contour_grid_size(n_parameters) + payload: dict[str, dict[str, np.ndarray]] = {} + for first_index, second_index in combinations(range(n_parameters), 2): + x_index = first_index + y_index = second_index + x_name = parameter_names[x_index] + y_name = parameter_names[y_index] + if x_name > y_name: + x_index, y_index = y_index, x_index + x_name, y_name = y_name, x_name + + x_values = density_samples[:, x_index] + y_values = density_samples[:, y_index] + x_bounds, y_bounds = plotter._posterior_pair_bounds( + fit_results=results, + x_parameter_name=x_name, + y_parameter_name=y_name, + x_values=x_values, + y_values=y_values, + ) + density_surface = plotter._posterior_pair_density_surface( + x_values=x_values, + y_values=y_values, + x_bounds=x_bounds, + y_bounds=y_bounds, + grid_size=contour_grid_size, + ) + if density_surface is None: + continue + + x_grid, y_grid, density = density_surface + x_grid_array = np.asarray(x_grid, dtype=float) + y_grid_array = np.asarray(y_grid, dtype=float) + density_array = np.asarray(density, dtype=float) + contour_levels = self._posterior_pair_contour_levels(density_array) + pair_id = str(len(payload) + 1) + self.bayesian_pair_caches.create( + id=pair_id, + param_unique_name_x=x_name, + param_unique_name_y=y_name, + x_path=f'/posterior/pairs/{pair_id}/x', + y_path=f'/posterior/pairs/{pair_id}/y', + density_path=f'/posterior/pairs/{pair_id}/density', + contour_level_path=f'/posterior/pairs/{pair_id}/contour_levels', + n_grid_x=float(x_grid_array.size), + n_grid_y=float(y_grid_array.size), + n_draws_cached=float(density_samples.shape[0]), + ) + payload[pair_id] = { + 'x': x_grid_array, + 'y': y_grid_array, + 'density': density_array, + 'contour_levels': contour_levels, + } + return payload + + @staticmethod + def _predictive_dataset_payload( + summary: PosteriorPredictiveSummary, + ) -> dict[str, np.ndarray]: + """Return persisted predictive arrays for one summary.""" + payload: dict[str, np.ndarray] = { + 'x': np.asarray(summary.x, dtype=float), + 'best_sample_prediction': np.asarray(summary.best_sample_prediction, dtype=float), + } + if summary.lower_95 is not None: + payload['lower_95'] = np.asarray(summary.lower_95, dtype=float) + if summary.upper_95 is not None: + payload['upper_95'] = np.asarray(summary.upper_95, dtype=float) + if summary.lower_68 is not None: + payload['lower_68'] = np.asarray(summary.lower_68, dtype=float) + if summary.upper_68 is not None: + payload['upper_68'] = np.asarray(summary.upper_68, dtype=float) + if summary.draws is not None: + payload['draws'] = np.asarray(summary.draws, dtype=float) + return payload + + def _store_bayesian_predictive_projection( + self, + *, + plotter: object, + results: BayesianFitResults, + ) -> dict[str, dict[str, np.ndarray]]: + """Store posterior predictive summaries into persisted manifests.""" + predictive_payload: dict[str, dict[str, np.ndarray]] = {} + for experiment_name in self.project.experiments.names: + experiment = self.project.experiments[experiment_name] + x_axis, x_axis_name, _, _, _ = plotter._resolve_x_axis(experiment.type, None) + summary = plotter._get_or_build_posterior_predictive_summary( + experiment=experiment, + expt_name=experiment_name, + x_axis=x_axis, + include_draws=True, + ) + if summary is None: + continue + + results.posterior_predictive[summary.experiment_name] = summary + predictive_payload[summary.experiment_name] = self._predictive_dataset_payload( + summary, + ) + predictive_root = f'/predictive/{summary.experiment_name}' + self.bayesian_predictive_datasets.create( + experiment_name=summary.experiment_name, + x_axis_name=str(x_axis_name), + x_path=f'{predictive_root}/x', + best_sample_prediction_path=( + f'{predictive_root}/best_sample_prediction' + ), + lower_95_path=( + None if summary.lower_95 is None else f'{predictive_root}/lower_95' + ), + upper_95_path=( + None if summary.upper_95 is None else f'{predictive_root}/upper_95' + ), + lower_68_path=( + None if summary.lower_68 is None else f'{predictive_root}/lower_68' + ), + upper_68_path=( + None if summary.upper_68 is None else f'{predictive_root}/upper_68' + ), + draws_path=(None if summary.draws is None else f'{predictive_root}/draws'), + n_x=float(np.asarray(summary.x).size), + n_draws_cached=( + 0.0 if summary.draws is None else float(np.asarray(summary.draws).shape[0]) + ), + ) + return predictive_payload + + def _store_bayesian_plot_cache_projection(self, results: BayesianFitResults) -> None: + """Populate persisted Bayesian plot caches from live posterior results.""" + posterior_samples = results.posterior_samples + if posterior_samples is None: + self._persisted_fit_state_sidecar['distribution_caches'] = {} + self._persisted_fit_state_sidecar['pair_caches'] = {} + self._persisted_fit_state_sidecar['predictive_datasets'] = {} + self.bayesian_result._set_has_distribution_cache(False) + self.bayesian_result._set_has_pair_cache(False) + self.bayesian_result._set_has_posterior_predictive(False) + return + + flattened_samples = np.asarray(posterior_samples.flattened(), dtype=float) + parameter_names = list(posterior_samples.parameter_names) + if ( + flattened_samples.ndim != 2 + or not parameter_names + or flattened_samples.shape[1] != len(parameter_names) + ): + self._persisted_fit_state_sidecar['distribution_caches'] = {} + self._persisted_fit_state_sidecar['pair_caches'] = {} + self._persisted_fit_state_sidecar['predictive_datasets'] = {} + self.bayesian_result._set_has_distribution_cache(False) + self.bayesian_result._set_has_pair_cache(False) + self.bayesian_result._set_has_posterior_predictive(False) + return + + plotter = self.project.rendering.plotter + distribution_payload = self._store_bayesian_distribution_cache_projection( + plotter=plotter, + results=results, + flattened_samples=flattened_samples, + parameter_names=parameter_names, + ) + pair_payload = self._store_bayesian_pair_cache_projection( + plotter=plotter, + results=results, + flattened_samples=flattened_samples, + parameter_names=parameter_names, + ) + predictive_payload = self._store_bayesian_predictive_projection( + plotter=plotter, + results=results, + ) + + self._persisted_fit_state_sidecar['distribution_caches'] = distribution_payload + self._persisted_fit_state_sidecar['pair_caches'] = pair_payload + self._persisted_fit_state_sidecar['predictive_datasets'] = predictive_payload + self.bayesian_result._set_has_distribution_cache(bool(distribution_payload)) + self.bayesian_result._set_has_pair_cache(bool(pair_payload)) + self.bayesian_result._set_has_posterior_predictive(bool(predictive_payload)) + def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None: """Store Bayesian fit-result projections into persisted categories.""" credible_interval_inner = 0.68 @@ -1360,7 +1609,7 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None self.bayesian_result._set_has_posterior_samples(results.posterior_samples is not None) self.bayesian_result._set_has_distribution_cache(False) self.bayesian_result._set_has_pair_cache(False) - self.bayesian_result._set_has_posterior_predictive(bool(results.posterior_predictive)) + self.bayesian_result._set_has_posterior_predictive(False) self.bayesian_result._set_sidecar_file('results.h5') self.bayesian_sampler._set_steps(int(sampler_settings.get('steps', 0))) @@ -1409,6 +1658,7 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None correlation_matrix=correlation_matrix, source_kind=FitCorrelationSourceEnum.POSTERIOR, ) + self._store_bayesian_plot_cache_projection(results) def _store_fit_result_projection( self, diff --git a/src/easydiffraction/core/variable.py b/src/easydiffraction/core/variable.py index 257a5771..79ae879d 100644 --- a/src/easydiffraction/core/variable.py +++ b/src/easydiffraction/core/variable.py @@ -461,6 +461,10 @@ def fit_bounds_uncertainty_multiplier(self) -> float | None: """ return self._fit_bounds_uncertainty_multiplier + def _set_fit_bounds_uncertainty_multiplier(self, value: float | None) -> None: + """Set the cached uncertainty-derived fit-bounds multiplier.""" + self._fit_bounds_uncertainty_multiplier = value + def set_fit_bounds_from_uncertainty( self, multiplier: float = DEFAULT_FIT_BOUNDS_MULTIPLIER, diff --git a/src/easydiffraction/io/results_sidecar.py b/src/easydiffraction/io/results_sidecar.py index 8dc171a4..db56ef23 100644 --- a/src/easydiffraction/io/results_sidecar.py +++ b/src/easydiffraction/io/results_sidecar.py @@ -119,21 +119,27 @@ def _predictive_payload(analysis: object) -> dict[str, dict[str, np.ndarray]]: posterior_predictive = getattr(fit_results, 'posterior_predictive', None) if posterior_predictive: payload: dict[str, dict[str, np.ndarray]] = {} - for experiment_name, summary in posterior_predictive.items(): - payload[experiment_name] = { - 'x': np.asarray(summary.x, dtype=float), - 'best_sample_prediction': np.asarray(summary.best_sample_prediction, dtype=float), - } + for runtime_key, summary in posterior_predictive.items(): + experiment_name = getattr(summary, 'experiment_name', None) + if not isinstance(experiment_name, str) or not experiment_name.strip(): + experiment_name = runtime_key + + dataset_payload = payload.setdefault(experiment_name, {}) + dataset_payload['x'] = np.asarray(summary.x, dtype=float) + dataset_payload['best_sample_prediction'] = np.asarray( + summary.best_sample_prediction, + dtype=float, + ) if summary.lower_95 is not None: - payload[experiment_name]['lower_95'] = np.asarray(summary.lower_95, dtype=float) + dataset_payload['lower_95'] = np.asarray(summary.lower_95, dtype=float) if summary.upper_95 is not None: - payload[experiment_name]['upper_95'] = np.asarray(summary.upper_95, dtype=float) + dataset_payload['upper_95'] = np.asarray(summary.upper_95, dtype=float) if summary.lower_68 is not None: - payload[experiment_name]['lower_68'] = np.asarray(summary.lower_68, dtype=float) + dataset_payload['lower_68'] = np.asarray(summary.lower_68, dtype=float) if summary.upper_68 is not None: - payload[experiment_name]['upper_68'] = np.asarray(summary.upper_68, dtype=float) + dataset_payload['upper_68'] = np.asarray(summary.upper_68, dtype=float) if summary.draws is not None: - payload[experiment_name]['draws'] = np.asarray(summary.draws, dtype=float) + dataset_payload['draws'] = np.asarray(summary.draws, dtype=float) return payload sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) From 09186c1de9e70977bec0e102ae51b667fba83ba8 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 18:02:17 +0200 Subject: [PATCH 26/72] Add integer type and skip redundant processing --- src/easydiffraction/analysis/analysis.py | 17 ++- .../bayesian_convergence/default.py | 19 +-- .../categories/bayesian_sampler/default.py | 37 +++--- .../analysis/categories/fit_result/default.py | 7 +- src/easydiffraction/core/validation.py | 2 + src/easydiffraction/core/variable.py | 36 +++++ src/easydiffraction/io/cif/serialize.py | 33 ++++- src/easydiffraction/project/display.py | 125 +++++++++++++++--- 8 files changed, 224 insertions(+), 52 deletions(-) diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 303b8760..da85f3d4 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -1494,7 +1494,8 @@ def _store_bayesian_predictive_projection( for experiment_name in self.project.experiments.names: experiment = self.project.experiments[experiment_name] x_axis, x_axis_name, _, _, _ = plotter._resolve_x_axis(experiment.type, None) - summary = plotter._get_or_build_posterior_predictive_summary( + summary = plotter._build_posterior_predictive_summary( + fit_results=results, experiment=experiment, expt_name=experiment_name, x_axis=x_axis, @@ -1504,6 +1505,20 @@ def _store_bayesian_predictive_projection( continue results.posterior_predictive[summary.experiment_name] = summary + results.posterior_predictive[ + self._predictive_cache_key( + summary.experiment_name, + str(x_axis_name), + include_draws=False, + ) + ] = summary + results.posterior_predictive[ + self._predictive_cache_key( + summary.experiment_name, + str(x_axis_name), + include_draws=True, + ) + ] = summary predictive_payload[summary.experiment_name] = self._predictive_dataset_payload( summary, ) diff --git a/src/easydiffraction/analysis/categories/bayesian_convergence/default.py b/src/easydiffraction/analysis/categories/bayesian_convergence/default.py index ef4a2acb..06584f25 100644 --- a/src/easydiffraction/analysis/categories/bayesian_convergence/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_convergence/default.py @@ -11,6 +11,7 @@ from easydiffraction.core.metadata import TypeInfo from easydiffraction.core.validation import AttributeSpec from easydiffraction.core.variable import BoolDescriptor +from easydiffraction.core.variable import IntegerDescriptor from easydiffraction.core.variable import NumericDescriptor from easydiffraction.io.cif.handler import CifHandler @@ -46,19 +47,19 @@ def __init__(self) -> None: value_spec=AttributeSpec(default=None, allow_none=True), cif_handler=CifHandler(names=['_bayesian_convergence.min_ess_bulk']), ) - self._n_draws = NumericDescriptor( + self._n_draws = IntegerDescriptor( name='n_draws', description='Number of stored posterior draws.', value_spec=AttributeSpec(default=0), cif_handler=CifHandler(names=['_bayesian_convergence.n_draws']), ) - self._n_chains = NumericDescriptor( + self._n_chains = IntegerDescriptor( name='n_chains', description='Number of stored posterior chains.', value_spec=AttributeSpec(default=0), cif_handler=CifHandler(names=['_bayesian_convergence.n_chains']), ) - self._n_parameters = NumericDescriptor( + self._n_parameters = IntegerDescriptor( name='n_parameters', description='Number of sampled parameters.', value_spec=AttributeSpec(default=0), @@ -93,28 +94,28 @@ def _set_min_ess_bulk(self, value: float | None) -> None: self._min_ess_bulk.value = value @property - def n_draws(self) -> NumericDescriptor: + def n_draws(self) -> IntegerDescriptor: """Number of stored posterior draws.""" return self._n_draws - def _set_n_draws(self, value: float) -> None: + def _set_n_draws(self, value: int) -> None: """Set the draw count for internal callers.""" self._n_draws.value = value @property - def n_chains(self) -> NumericDescriptor: + def n_chains(self) -> IntegerDescriptor: """Number of stored posterior chains.""" return self._n_chains - def _set_n_chains(self, value: float) -> None: + def _set_n_chains(self, value: int) -> None: """Set the chain count for internal callers.""" self._n_chains.value = value @property - def n_parameters(self) -> NumericDescriptor: + def n_parameters(self) -> IntegerDescriptor: """Number of sampled parameters.""" return self._n_parameters - def _set_n_parameters(self, value: float) -> None: + def _set_n_parameters(self, value: int) -> None: """Set the sampled-parameter count for internal callers.""" self._n_parameters.value = value diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py index 88ffed06..12b7c661 100644 --- a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py @@ -8,6 +8,7 @@ from easydiffraction.core.category import CategoryItem from easydiffraction.core.metadata import TypeInfo from easydiffraction.core.validation import AttributeSpec +from easydiffraction.core.variable import IntegerDescriptor from easydiffraction.core.variable import NumericDescriptor from easydiffraction.core.variable import StringDescriptor from easydiffraction.io.cif.handler import CifHandler @@ -26,31 +27,31 @@ class BayesianSampler(CategoryItem): def __init__(self) -> None: super().__init__() - self._steps = NumericDescriptor( + self._steps = IntegerDescriptor( name='steps', description='Resolved number of sampler steps.', value_spec=AttributeSpec(default=0), cif_handler=CifHandler(names=['_bayesian_sampler.steps']), ) - self._burn = NumericDescriptor( + self._burn = IntegerDescriptor( name='burn', description='Resolved burn-in count.', value_spec=AttributeSpec(default=0), cif_handler=CifHandler(names=['_bayesian_sampler.burn']), ) - self._thin = NumericDescriptor( + self._thin = IntegerDescriptor( name='thin', description='Resolved thinning interval.', value_spec=AttributeSpec(default=0), cif_handler=CifHandler(names=['_bayesian_sampler.thin']), ) - self._pop = NumericDescriptor( + self._pop = IntegerDescriptor( name='pop', description='Resolved population size.', value_spec=AttributeSpec(default=0), cif_handler=CifHandler(names=['_bayesian_sampler.pop']), ) - self._parallel = NumericDescriptor( + self._parallel = IntegerDescriptor( name='parallel', description='Resolved DREAM worker count; 0 uses all CPUs.', value_spec=AttributeSpec(default=0), @@ -62,7 +63,7 @@ def __init__(self) -> None: value_spec=AttributeSpec(default=''), cif_handler=CifHandler(names=['_bayesian_sampler.init']), ) - self._random_seed = NumericDescriptor( + self._random_seed = IntegerDescriptor( name='random_seed', description='Resolved random seed used by the sampler.', value_spec=AttributeSpec(default=None, allow_none=True), @@ -70,47 +71,47 @@ def __init__(self) -> None: ) @property - def steps(self) -> NumericDescriptor: + def steps(self) -> IntegerDescriptor: """Resolved number of sampler steps.""" return self._steps - def _set_steps(self, value: float) -> None: + def _set_steps(self, value: int) -> None: """Set the step count for internal callers.""" self._steps.value = value @property - def burn(self) -> NumericDescriptor: + def burn(self) -> IntegerDescriptor: """Resolved burn-in count.""" return self._burn - def _set_burn(self, value: float) -> None: + def _set_burn(self, value: int) -> None: """Set the burn-in count for internal callers.""" self._burn.value = value @property - def thin(self) -> NumericDescriptor: + def thin(self) -> IntegerDescriptor: """Resolved thinning interval.""" return self._thin - def _set_thin(self, value: float) -> None: + def _set_thin(self, value: int) -> None: """Set the thinning interval for internal callers.""" self._thin.value = value @property - def pop(self) -> NumericDescriptor: + def pop(self) -> IntegerDescriptor: """Resolved population size.""" return self._pop - def _set_pop(self, value: float) -> None: + def _set_pop(self, value: int) -> None: """Set the population size for internal callers.""" self._pop.value = value @property - def parallel(self) -> NumericDescriptor: + def parallel(self) -> IntegerDescriptor: """Resolved DREAM worker count; 0 uses all CPUs.""" return self._parallel - def _set_parallel(self, value: float) -> None: + def _set_parallel(self, value: int) -> None: """Set the DREAM worker count for internal callers.""" self._parallel.value = value @@ -124,10 +125,10 @@ def _set_init(self, value: str) -> None: self._init.value = value @property - def random_seed(self) -> NumericDescriptor: + def random_seed(self) -> IntegerDescriptor: """Resolved random seed used by the sampler.""" return self._random_seed - def _set_random_seed(self, value: float | None) -> None: + def _set_random_seed(self, value: int | None) -> None: """Set the random seed for internal callers.""" self._random_seed.value = value diff --git a/src/easydiffraction/analysis/categories/fit_result/default.py b/src/easydiffraction/analysis/categories/fit_result/default.py index 87466f9f..e75775e9 100644 --- a/src/easydiffraction/analysis/categories/fit_result/default.py +++ b/src/easydiffraction/analysis/categories/fit_result/default.py @@ -11,6 +11,7 @@ from easydiffraction.core.validation import AttributeSpec from easydiffraction.core.validation import MembershipValidator from easydiffraction.core.variable import BoolDescriptor +from easydiffraction.core.variable import IntegerDescriptor from easydiffraction.core.variable import NumericDescriptor from easydiffraction.core.variable import StringDescriptor from easydiffraction.io.cif.handler import CifHandler @@ -52,7 +53,7 @@ def __init__(self) -> None: value_spec=AttributeSpec(default=''), cif_handler=CifHandler(names=['_fit_result.message']), ) - self._iterations = NumericDescriptor( + self._iterations = IntegerDescriptor( name='iterations', description='Iteration count for the latest persisted fit-result projection.', value_spec=AttributeSpec(default=0), @@ -103,13 +104,13 @@ def _set_message(self, value: str) -> None: self._message.value = value @property - def iterations(self) -> NumericDescriptor: + def iterations(self) -> IntegerDescriptor: """ Iteration count for the latest persisted fit-result projection. """ return self._iterations - def _set_iterations(self, value: float) -> None: + def _set_iterations(self, value: int) -> None: """Set the iteration count for internal callers.""" self._iterations.value = value diff --git a/src/easydiffraction/core/validation.py b/src/easydiffraction/core/validation.py index 3fd88208..314d7e94 100644 --- a/src/easydiffraction/core/validation.py +++ b/src/easydiffraction/core/validation.py @@ -27,6 +27,7 @@ class DataTypeHints: """Type hint aliases for numeric, string, and boolean types.""" Numeric = int | float | np.integer | np.floating + Integer = int | np.integer String = str Bool = bool @@ -38,6 +39,7 @@ class DataTypes(Enum): """Enumeration of supported data types for descriptors.""" NUMERIC = (int, float, np.integer, np.floating) + INTEGER = (int, np.integer) STRING = (str,) BOOL = (bool,) ANY = (object,) # fallback for unconstrained diff --git a/src/easydiffraction/core/variable.py b/src/easydiffraction/core/variable.py index 79ae879d..074afb71 100644 --- a/src/easydiffraction/core/variable.py +++ b/src/easydiffraction/core/variable.py @@ -276,6 +276,15 @@ def units(self) -> str: # ====================================================================== +class GenericIntegerDescriptor(GenericNumericDescriptor): + """Base descriptor that constrains values to integers.""" + + _value_type = DataTypes.INTEGER + + +# ====================================================================== + + class GenericParameter(GenericNumericDescriptor): """ Numeric descriptor extended with fitting-related attributes. @@ -616,6 +625,33 @@ def __init__( # ====================================================================== +class IntegerDescriptor(GenericIntegerDescriptor): + """Integer descriptor bound to a CIF handler.""" + + def __init__( + self, + *, + cif_handler: CifHandler, + **kwargs: object, + ) -> None: + """ + Integer descriptor bound to a CIF handler. + + Parameters + ---------- + cif_handler : CifHandler + Object that tracks CIF identifiers. + **kwargs : object + Forwarded to GenericIntegerDescriptor. + """ + super().__init__(**kwargs) + self._cif_handler = cif_handler + self._cif_handler.attach(self) + + +# ====================================================================== + + class Parameter(GenericParameter): """Fittable parameter bound to a CIF handler.""" diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index b21a70e4..b844b1ff 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -51,9 +51,9 @@ def format_value(value: object) -> str: # Booleans use CIF true/false tokens elif isinstance(value, bool): value = 'true' if value else 'false' - # Convert ints to floats - elif isinstance(value, int): - value = float(value) + # Preserve integers as integers in CIF output + elif isinstance(value, (int, np.integer)): + value = str(int(value)) # Empty strings → CIF unknown marker elif isinstance(value, str) and not value.strip(): value = '?' @@ -839,7 +839,19 @@ def param_from_cif( return # If numeric, parse with uncertainty if present - if self._value_type == DataTypes.NUMERIC: + if self._value_type == DataTypes.INTEGER: + numeric_value = str_to_ufloat(raw).n + integer_value = int(round(numeric_value)) + if not np.isclose(numeric_value, integer_value): + log.warning( + f'Ignoring non-integer CIF value {raw!r} for integer field ' + f'{self.unique_name}.' + ) + return + self.value = integer_value + + # If numeric, parse with uncertainty if present + elif self._value_type == DataTypes.NUMERIC: has_brackets = '(' in raw u = str_to_ufloat(raw) self.value = u.n @@ -891,7 +903,18 @@ def _set_param_from_raw_cif_value( if raw in {'?', '.'}: return - if param._value_type == DataTypes.NUMERIC: + if param._value_type == DataTypes.INTEGER: + numeric_value = str_to_ufloat(raw).n + integer_value = int(round(numeric_value)) + if not np.isclose(numeric_value, integer_value): + log.warning( + f'Ignoring non-integer CIF value {raw!r} for integer field ' + f'{param.unique_name}.' + ) + return + param.value = integer_value + + elif param._value_type == DataTypes.NUMERIC: has_brackets = '(' in raw u = str_to_ufloat(raw) param.value = u.n diff --git a/src/easydiffraction/project/display.py b/src/easydiffraction/project/display.py index 219ac451..2f74245d 100644 --- a/src/easydiffraction/project/display.py +++ b/src/easydiffraction/project/display.py @@ -4,6 +4,7 @@ from __future__ import annotations +from contextlib import nullcontext from dataclasses import dataclass from typing import TYPE_CHECKING @@ -134,6 +135,66 @@ class PosteriorDisplay: def __init__(self, project: Project) -> None: self._project = project + def _pairs_need_processing_indicator( + self, + *, + parameters: list[object] | None, + ) -> bool: + """Return whether posterior pair plotting still needs processing.""" + if parameters is not None: + return True + + analysis = self._project.analysis + sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) + pair_caches = sidecar_data.get('pair_caches', {}) + return not ( + analysis.bayesian_result.has_pair_cache.value + and len(analysis.bayesian_pair_caches) > 0 + and bool(pair_caches) + ) + + def _predictive_needs_processing_indicator( + self, + *, + expt_name: str, + style: str, + x: object | None, + ) -> bool: + """Return whether posterior predictive plotting still needs processing.""" + analysis = self._project.analysis + sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) + predictive_datasets = sidecar_data.get('predictive_datasets', {}) + if not ( + analysis.bayesian_result.has_posterior_predictive.value + and bool(predictive_datasets) + and expt_name in predictive_datasets + ): + return True + + experiment = self._project.experiments[expt_name] + plotter = self._project.rendering.plotter + _, x_axis_name, _, _, _ = plotter._resolve_x_axis(experiment.type, x) + require_draws = ( + plotter.engine == PlotterEngineEnum.PLOTLY.value + and style in {'draws', 'band+draws'} + ) + + matching_rows = [ + row + for row in analysis.bayesian_predictive_datasets + if row.experiment_name.value == expt_name + and str(row.x_axis_name.value) == str(x_axis_name) + ] + if not matching_rows: + return True + if not require_draws: + return False + return not any( + row.draws_path.value is not None + and predictive_datasets[expt_name].get('draws') is not None + for row in matching_rows + ) + def pairs( self, parameters: list[object] | None = None, @@ -143,10 +204,15 @@ def pairs( max_parameters: int = 6, ) -> None: """Plot posterior pair relationships for sampled parameters.""" - with activity_indicator( - ACTIVITY_LABEL_PROCESSING, - verbosity=VerbosityEnum(self._project.verbosity.fit.value), - ): + indicator_context = ( + activity_indicator( + ACTIVITY_LABEL_PROCESSING, + verbosity=VerbosityEnum(self._project.verbosity.fit.value), + ) + if self._pairs_need_processing_indicator(parameters=parameters) + else nullcontext() + ) + with indicator_context: self._project.rendering.plotter.plot_posterior_pairs( parameters=parameters, style=style, @@ -182,10 +248,19 @@ def predictive( x: object | None = None, ) -> None: """Plot posterior predictive summaries for one experiment.""" - with activity_indicator( - ACTIVITY_LABEL_PROCESSING, - verbosity=VerbosityEnum(self._project.verbosity.fit.value), - ): + indicator_context = ( + activity_indicator( + ACTIVITY_LABEL_PROCESSING, + verbosity=VerbosityEnum(self._project.verbosity.fit.value), + ) + if self._predictive_needs_processing_indicator( + expt_name=expt_name, + style=style, + x=x, + ) + else nullcontext() + ) + with indicator_context: self._project.rendering.plotter.plot_posterior_predictive( expt_name=expt_name, style=style, @@ -249,10 +324,19 @@ def pattern( msg = self._status_by_name(statuses, 'auto').reason raise ValueError(msg) if 'uncertainty' in auto_include: - with activity_indicator( - ACTIVITY_LABEL_PROCESSING, - verbosity=VerbosityEnum(self._project.verbosity.fit.value), - ): + indicator_context = ( + activity_indicator( + ACTIVITY_LABEL_PROCESSING, + verbosity=VerbosityEnum(self._project.verbosity.fit.value), + ) + if self._posterior._predictive_needs_processing_indicator( + expt_name=expt_name, + style='band', + x=x, + ) + else nullcontext() + ) + with indicator_context: self._project.rendering.plotter._plot_posterior_predictive_request( expt_name=expt_name, style='band', @@ -283,10 +367,19 @@ def pattern( raise ValueError(msg) if 'uncertainty' in normalized_include: - with activity_indicator( - ACTIVITY_LABEL_PROCESSING, - verbosity=VerbosityEnum(self._project.verbosity.fit.value), - ): + indicator_context = ( + activity_indicator( + ACTIVITY_LABEL_PROCESSING, + verbosity=VerbosityEnum(self._project.verbosity.fit.value), + ) + if self._posterior._predictive_needs_processing_indicator( + expt_name=expt_name, + style='band', + x=x, + ) + else nullcontext() + ) + with indicator_context: self._project.rendering.plotter._plot_posterior_predictive_request( expt_name=expt_name, style='band', From 9cef32a92d23cbd12ba909922e142eef30df8bad Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 18:02:42 +0200 Subject: [PATCH 27/72] Update tutorial notebooks with consistent IDs and saves --- docs/docs/tutorials/ed-17.ipynb | 2 +- docs/docs/tutorials/ed-2.ipynb | 18 +++ docs/docs/tutorials/ed-20.ipynb | 2 +- docs/docs/tutorials/ed-21.ipynb | 123 ++++++++------- docs/docs/tutorials/ed-23.ipynb | 21 --- docs/docs/tutorials/ed-24.ipynb | 270 ++++++++++++++++++++++++++++++++ docs/docs/tutorials/ed-24.py | 2 +- 7 files changed, 359 insertions(+), 79 deletions(-) create mode 100644 docs/docs/tutorials/ed-24.ipynb diff --git a/docs/docs/tutorials/ed-17.ipynb b/docs/docs/tutorials/ed-17.ipynb index f32b0371..cdc6d761 100644 --- a/docs/docs/tutorials/ed-17.ipynb +++ b/docs/docs/tutorials/ed-17.ipynb @@ -87,7 +87,7 @@ "metadata": {}, "outputs": [], "source": [ - "project.save_as('projects/cosio', temporary=False)" + "project.save_as(dir_path='projects/cosio', temporary=False)" ] }, { diff --git a/docs/docs/tutorials/ed-2.ipynb b/docs/docs/tutorials/ed-2.ipynb index e3338eec..1212cbcc 100644 --- a/docs/docs/tutorials/ed-2.ipynb +++ b/docs/docs/tutorials/ed-2.ipynb @@ -519,6 +519,24 @@ "source": [ "project.display.pattern(expt_name='hrpt')" ] + }, + { + "cell_type": "markdown", + "id": "42", + "metadata": {}, + "source": [ + "## Step 7: Save Project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43", + "metadata": {}, + "outputs": [], + "source": [ + "project.save_as('projects/lbco_hrpt')" + ] } ], "metadata": { diff --git a/docs/docs/tutorials/ed-20.ipynb b/docs/docs/tutorials/ed-20.ipynb index 688fc593..086d7d18 100644 --- a/docs/docs/tutorials/ed-20.ipynb +++ b/docs/docs/tutorials/ed-20.ipynb @@ -459,7 +459,7 @@ "outputs": [], "source": [ "project = Project(name='beer')\n", - "project.save_as(dir_path='beer_mcstas')" + "project.save_as(dir_path='projects/beer_mcstas')" ] }, { diff --git a/docs/docs/tutorials/ed-21.ipynb b/docs/docs/tutorials/ed-21.ipynb index 4905c6f0..4722b50d 100644 --- a/docs/docs/tutorials/ed-21.ipynb +++ b/docs/docs/tutorials/ed-21.ipynb @@ -74,7 +74,10 @@ "\n", "The project object keeps structures, experiments, fit settings, and\n", "plotting utilities together in a single place. We will build the full\n", - "workflow inside this object." + "workflow inside this object.\n", + "\n", + "Save the project to a directory early on so that you can easily reload\n", + "it later if needed." ] }, { @@ -88,9 +91,19 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "6", "metadata": {}, + "outputs": [], + "source": [ + "project.save_as('projects/lbco_hrpt_bayesian')" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, "source": [ "## Step 2: Build the Structural Model\n", "\n", @@ -102,7 +115,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -112,7 +125,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -122,7 +135,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -133,7 +146,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -142,7 +155,7 @@ }, { "cell_type": "markdown", - "id": "11", + "id": "12", "metadata": {}, "source": [ "The atom-site definitions below form the starting structural model. The\n", @@ -153,7 +166,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -203,7 +216,7 @@ }, { "cell_type": "markdown", - "id": "13", + "id": "14", "metadata": {}, "source": [ "## Step 3: Define the Diffraction Experiment\n", @@ -215,7 +228,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "15", "metadata": {}, "source": [ "Download the measured data from the repository. Alternatively, you\n", @@ -226,7 +239,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -235,7 +248,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "17", "metadata": {}, "source": [ "Create the experiment object and specify the sample form, beam mode,\n", @@ -245,7 +258,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -261,7 +274,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": {}, "outputs": [], "source": [ @@ -270,7 +283,7 @@ }, { "cell_type": "markdown", - "id": "19", + "id": "20", "metadata": {}, "source": [ "Link the structural phase to the experiment." @@ -279,7 +292,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "21", "metadata": {}, "outputs": [], "source": [ @@ -288,7 +301,7 @@ }, { "cell_type": "markdown", - "id": "21", + "id": "22", "metadata": {}, "source": [ "Set instrument and peak profile parameters.\n", @@ -300,7 +313,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -311,7 +324,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "24", "metadata": {}, "outputs": [], "source": [ @@ -323,7 +336,7 @@ }, { "cell_type": "markdown", - "id": "24", + "id": "25", "metadata": {}, "source": [ "Add background points and excluded regions.\n", @@ -335,7 +348,7 @@ { "cell_type": "code", "execution_count": null, - "id": "25", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -348,7 +361,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -358,7 +371,7 @@ }, { "cell_type": "markdown", - "id": "27", + "id": "28", "metadata": {}, "source": [ "## Step 4: Run an Initial Local Refinement\n", @@ -378,7 +391,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28", + "id": "29", "metadata": {}, "outputs": [], "source": [ @@ -388,7 +401,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -400,7 +413,7 @@ }, { "cell_type": "markdown", - "id": "30", + "id": "31", "metadata": {}, "source": [ "We choose the BUMPS Levenberg-Marquardt minimizer as a fast local\n", @@ -411,7 +424,7 @@ { "cell_type": "code", "execution_count": null, - "id": "31", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -421,7 +434,7 @@ { "cell_type": "code", "execution_count": null, - "id": "32", + "id": "33", "metadata": {}, "outputs": [], "source": [ @@ -431,7 +444,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -441,7 +454,7 @@ { "cell_type": "code", "execution_count": null, - "id": "34", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -450,7 +463,7 @@ }, { "cell_type": "markdown", - "id": "35", + "id": "36", "metadata": {}, "source": [ "The correlation plot shows how strongly the fitted parameters move\n", @@ -462,7 +475,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -472,7 +485,7 @@ { "cell_type": "code", "execution_count": null, - "id": "37", + "id": "38", "metadata": {}, "outputs": [], "source": [ @@ -481,7 +494,7 @@ }, { "cell_type": "markdown", - "id": "38", + "id": "39", "metadata": {}, "source": [ "## Step 5: Prepare for Bayesian Sampling\n", @@ -503,7 +516,7 @@ { "cell_type": "code", "execution_count": null, - "id": "39", + "id": "40", "metadata": {}, "outputs": [], "source": [ @@ -512,7 +525,7 @@ }, { "cell_type": "markdown", - "id": "40", + "id": "41", "metadata": {}, "source": [ "Set fit bounds for all free parameters using the default multiplier of\n", @@ -524,7 +537,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41", + "id": "42", "metadata": {}, "outputs": [], "source": [ @@ -534,7 +547,7 @@ }, { "cell_type": "markdown", - "id": "42", + "id": "43", "metadata": {}, "source": [ "Displaying the free parameters again is a convenient way to confirm\n", @@ -545,7 +558,7 @@ { "cell_type": "code", "execution_count": null, - "id": "43", + "id": "44", "metadata": {}, "outputs": [], "source": [ @@ -554,7 +567,7 @@ }, { "cell_type": "markdown", - "id": "44", + "id": "45", "metadata": {}, "source": [ "## Step 6: Configure and Run DREAM\n", @@ -580,7 +593,7 @@ { "cell_type": "code", "execution_count": null, - "id": "45", + "id": "46", "metadata": {}, "outputs": [], "source": [ @@ -590,7 +603,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46", + "id": "47", "metadata": {}, "outputs": [], "source": [ @@ -600,7 +613,7 @@ { "cell_type": "code", "execution_count": null, - "id": "47", + "id": "48", "metadata": {}, "outputs": [], "source": [ @@ -610,7 +623,7 @@ { "cell_type": "code", "execution_count": null, - "id": "48", + "id": "49", "metadata": {}, "outputs": [], "source": [ @@ -619,7 +632,7 @@ }, { "cell_type": "markdown", - "id": "49", + "id": "50", "metadata": {}, "source": [ "## Step 7: Inspect Bayesian Results\n", @@ -632,7 +645,7 @@ { "cell_type": "code", "execution_count": null, - "id": "50", + "id": "51", "metadata": {}, "outputs": [], "source": [ @@ -641,7 +654,7 @@ }, { "cell_type": "markdown", - "id": "51", + "id": "52", "metadata": {}, "source": [ "The correlation and posterior-pair plots are complementary:\n", @@ -658,7 +671,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52", + "id": "53", "metadata": {}, "outputs": [], "source": [ @@ -668,7 +681,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53", + "id": "54", "metadata": {}, "outputs": [], "source": [ @@ -677,7 +690,7 @@ }, { "cell_type": "markdown", - "id": "54", + "id": "55", "metadata": {}, "source": [ "The one-dimensional posterior distributions below make it easier to\n", @@ -688,7 +701,7 @@ { "cell_type": "code", "execution_count": null, - "id": "55", + "id": "56", "metadata": {}, "outputs": [], "source": [ @@ -697,7 +710,7 @@ }, { "cell_type": "markdown", - "id": "56", + "id": "57", "metadata": {}, "source": [ "Finally, the posterior predictive plot propagates the sampled parameter\n", @@ -709,7 +722,7 @@ { "cell_type": "code", "execution_count": null, - "id": "57", + "id": "58", "metadata": {}, "outputs": [], "source": [ @@ -718,7 +731,7 @@ }, { "cell_type": "markdown", - "id": "58", + "id": "59", "metadata": {}, "source": [ "A final zoomed measured-vs-calculated plot is useful for checking how\n", @@ -729,7 +742,7 @@ { "cell_type": "code", "execution_count": null, - "id": "59", + "id": "60", "metadata": {}, "outputs": [], "source": [ diff --git a/docs/docs/tutorials/ed-23.ipynb b/docs/docs/tutorials/ed-23.ipynb index d20045c6..02318d4e 100644 --- a/docs/docs/tutorials/ed-23.ipynb +++ b/docs/docs/tutorials/ed-23.ipynb @@ -206,27 +206,6 @@ "source": [ "project.display.fit.series(versus=temperature)" ] - }, - { - "cell_type": "markdown", - "id": "19", - "metadata": {}, - "source": [ - "## Save Project\n", - "\n", - "Save the updated project so the appended `analysis/results.csv` and\n", - "refreshed summary files remain on disk." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20", - "metadata": {}, - "outputs": [], - "source": [ - "project.save()" - ] } ], "metadata": { diff --git a/docs/docs/tutorials/ed-24.ipynb b/docs/docs/tutorials/ed-24.ipynb new file mode 100644 index 00000000..1f1112f3 --- /dev/null +++ b/docs/docs/tutorials/ed-24.ipynb @@ -0,0 +1,270 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": { + "tags": [ + "hide-in-docs" + ] + }, + "outputs": [], + "source": [ + "# Check whether easydiffraction is installed; install it if needed.\n", + "# Required for remote environments such as Google Colab.\n", + "import importlib.util\n", + "\n", + "if importlib.util.find_spec('easydiffraction') is None:\n", + " %pip install easydiffraction" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "# Load Saved Bayesian Project: LBCO, HRPT\n", + "\n", + "This tutorial shows how to reopen the Bayesian project created in\n", + "`ed-21.py` and inspect the saved fit results without rerunning DREAM.\n", + "\n", + "The project already contains posterior samples together with cached\n", + "posterior density, pair, and predictive data, so the plots below are\n", + "restored directly from disk." + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Import Library" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import easydiffraction as ed" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Locate the Saved Project\n", + "\n", + "In the repository, the saved project currently lives under\n", + "`tmp/tutorials/projects/lbco_hrpt_bayesian`. Once a downloadable\n", + "archive is available, replace this path with the extracted project\n", + "directory instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "project_dir = Path('../../../tmp/tutorials/projects/lbco_hrpt_bayesian')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "## Load the Saved Bayesian Project\n", + "\n", + "Loading restores the persisted fit state, posterior samples, and plot\n", + "caches. No new fit is launched in this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "project = ed.Project.load(project_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "## Review the Saved Fit Summary\n", + "\n", + "The fit summary reports the committed point estimate, sampler\n", + "settings, convergence diagnostics, and posterior parameter summaries\n", + "from the saved Bayesian run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.fit.results()" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "## Show Correlations and the Fitted Pattern\n", + "\n", + "The correlation matrix and measured-vs-calculated pattern are restored\n", + "from the saved project state." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.fit.correlations()" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "Show the standard measured vs calculated pattern for the full range." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.pattern(expt_name='hrpt')" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "A zoomed view is useful for checking the fit quality in a narrow\n", + "region of the diffraction pattern." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.pattern(expt_name='hrpt', x_min=92, x_max=93)" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "## Inspect Posterior Densities and Pair Structure\n", + "\n", + "The pair plot and one-dimensional posterior distributions now load\n", + "from the persisted caches generated when the Bayesian fit was saved." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.posterior.pairs()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.posterior.distribution()" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": {}, + "source": [ + "## Plot Posterior Predictive Checks\n", + "\n", + "The posterior predictive view reuses the cached predictive summary\n", + "stored in the project rather than recalculating it on first display." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.posterior.predictive(expt_name='hrpt')" + ] + }, + { + "cell_type": "markdown", + "id": "22", + "metadata": {}, + "source": [ + "A zoomed view is useful for checking the propagated uncertainty in a\n", + "narrow region of the diffraction pattern." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.posterior.predictive(expt_name='hrpt', x_min=92, x_max=93)" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/tutorials/ed-24.py b/docs/docs/tutorials/ed-24.py index 7e287ba9..ff5f5fd7 100644 --- a/docs/docs/tutorials/ed-24.py +++ b/docs/docs/tutorials/ed-24.py @@ -68,7 +68,7 @@ # region of the diffraction pattern. # %% -project.display.posterior.predictive(expt_name='hrpt', x_min=92, x_max=93) +project.display.pattern(expt_name='hrpt', x_min=92, x_max=93) # %% [markdown] # ## Inspect Posterior Densities and Pair Structure From 6fda12f0d4bfa6bf3ee7460e039b740802a79a72 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 19:17:32 +0200 Subject: [PATCH 28/72] Add pre/post-processing tracking to Bayesian fits --- src/easydiffraction/analysis/analysis.py | 37 +++++++-- .../analysis/fit_helpers/tracking.py | 78 ++++++++++++++++++- src/easydiffraction/analysis/fitting.py | 37 ++++++--- .../analysis/minimizers/base.py | 3 +- .../analysis/minimizers/bumps_dream.py | 5 +- src/easydiffraction/display/progress.py | 2 + 6 files changed, 139 insertions(+), 23 deletions(-) diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index da85f3d4..fb11ca58 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -1499,7 +1499,7 @@ def _store_bayesian_predictive_projection( experiment=experiment, expt_name=experiment_name, x_axis=x_axis, - include_draws=True, + include_draws=False, ) if summary is None: continue @@ -1512,13 +1512,6 @@ def _store_bayesian_predictive_projection( include_draws=False, ) ] = summary - results.posterior_predictive[ - self._predictive_cache_key( - summary.experiment_name, - str(x_axis_name), - include_draws=True, - ) - ] = summary predictive_payload[summary.experiment_name] = self._predictive_dataset_payload( summary, ) @@ -1602,6 +1595,33 @@ def _store_bayesian_plot_cache_projection(self, results: BayesianFitResults) -> self.bayesian_result._set_has_pair_cache(bool(pair_payload)) self.bayesian_result._set_has_posterior_predictive(bool(predictive_payload)) + def _store_bayesian_posterior_sidecar_projection( + self, + results: BayesianFitResults, + ) -> None: + """Persist canonical posterior arrays while live samples are available.""" + posterior_samples = results.posterior_samples + if posterior_samples is None: + self._persisted_fit_state_sidecar['posterior'] = {} + return + + self._persisted_fit_state_sidecar['posterior'] = { + 'parameter_samples': np.asarray( + posterior_samples.parameter_samples, + dtype=float, + ), + 'log_posterior': ( + None + if posterior_samples.log_posterior is None + else np.asarray(posterior_samples.log_posterior, dtype=float) + ), + 'draw_index': ( + None + if posterior_samples.draw_index is None + else np.asarray(posterior_samples.draw_index) + ), + } + def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None: """Store Bayesian fit-result projections into persisted categories.""" credible_interval_inner = 0.68 @@ -1626,6 +1646,7 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None self.bayesian_result._set_has_pair_cache(False) self.bayesian_result._set_has_posterior_predictive(False) self.bayesian_result._set_sidecar_file('results.h5') + self._store_bayesian_posterior_sidecar_projection(results) self.bayesian_sampler._set_steps(int(sampler_settings.get('steps', 0))) self.bayesian_sampler._set_burn(int(sampler_settings.get('burn', 0))) diff --git a/src/easydiffraction/analysis/fit_helpers/tracking.py b/src/easydiffraction/analysis/fit_helpers/tracking.py index 0f8e4c97..79c2cfb2 100644 --- a/src/easydiffraction/analysis/fit_helpers/tracking.py +++ b/src/easydiffraction/analysis/fit_helpers/tracking.py @@ -11,6 +11,8 @@ from easydiffraction.analysis.fit_helpers.metrics import calculate_reduced_chi_square from easydiffraction.display.progress import ACTIVITY_LABEL_BURN_IN from easydiffraction.display.progress import ACTIVITY_LABEL_FITTING +from easydiffraction.display.progress import ACTIVITY_LABEL_POST_PROCESSING +from easydiffraction.display.progress import ACTIVITY_LABEL_PRE_PROCESSING from easydiffraction.display.progress import ACTIVITY_LABEL_PROCESSING from easydiffraction.display.progress import ACTIVITY_LABEL_SAMPLING from easydiffraction.display.progress import ActivityIndicator @@ -28,6 +30,8 @@ SAMPLER_PROGRESS_UPDATE_SECONDS = 5.0 TRACKING_MODE_FIT = 'fit' TRACKING_MODE_SAMPLER = 'sampling' +SAMPLER_PHASE_POST_PROCESSING = 'post-processing' +SAMPLER_PHASE_PRE_PROCESSING = 'pre-processing' DEFAULT_HEADERS = ['iteration', 'time (s)', 'χ²', 'change / status'] DEFAULT_ALIGNMENTS = ['center', 'center', 'center', 'center'] SAMPLER_HEADERS = ['iteration', 'progress', 'time (s)', 'log posterior', 'phase'] @@ -85,6 +89,7 @@ def __init__(self) -> None: self._last_sampler_progress_percent: float | None = None self._last_sampler_log_posterior: float | None = None self._last_sampler_elapsed_time: float | None = None + self._sampler_pre_processing_pending: bool = False self._df_rows: list[list[str]] = [] self._activity_indicator: ActivityIndicator | None = None @@ -111,6 +116,7 @@ def reset(self) -> None: self._last_sampler_progress_percent = None self._last_sampler_log_posterior = None self._last_sampler_elapsed_time = None + self._sampler_pre_processing_pending = False self._df_rows = [] self._activity_label = ACTIVITY_LABEL_FITTING @@ -240,6 +246,37 @@ def track_sampler_progress(self, update: SamplerProgressUpdate) -> None: self._last_chi2 = update.reduced_chi2 self._last_iteration = update.iteration + def start_sampler_pre_processing(self, *, total_iterations: int) -> None: + """Mark sampler setup so its status row appears on first progress update.""" + self._tracking_mode = TRACKING_MODE_SAMPLER + self._sampler_total_iterations = max(1, total_iterations) + self._last_sampler_phase = SAMPLER_PHASE_PRE_PROCESSING + self._last_sampler_progress_percent = None + self._last_sampler_log_posterior = None + self._last_sampler_elapsed_time = None + self._sampler_pre_processing_pending = True + self._set_activity_label(ACTIVITY_LABEL_PRE_PROCESSING) + + def start_sampler_post_processing( + self, + *, + log_posterior: float | None = None, + ) -> None: + """Switch the activity indicator to post-processing.""" + if self._tracking_mode != TRACKING_MODE_SAMPLER: + return + + if self._sampler_total_iterations is None: + self._sampler_total_iterations = max(1, self._last_iteration or 1) + + elapsed_time = self._elapsed_since_start() + self._last_sampler_phase = SAMPLER_PHASE_POST_PROCESSING + self._last_sampler_progress_percent = 100.0 + if log_posterior is not None: + self._last_sampler_log_posterior = float(log_posterior) + self._last_sampler_elapsed_time = elapsed_time + self._set_activity_label(ACTIVITY_LABEL_POST_PROCESSING) + @property def best_chi2(self) -> float | None: """Best recorded reduced chi-square value or None.""" @@ -273,6 +310,14 @@ def stop_timer(self) -> None: self._end_time = time.perf_counter() self._fitting_time = self._end_time - self._start_time + def _elapsed_since_start(self) -> float | None: + """Return elapsed wall time using the active timer when available.""" + if self._start_time is None: + return None + if self._end_time is not None: + return self._end_time - self._start_time + return time.perf_counter() - self._start_time + def start_tracking(self, minimizer_name: str, *, mode: str = TRACKING_MODE_FIT) -> None: """ Initialize display and headers and announce the minimizer. @@ -354,6 +399,12 @@ def _initial_sampler_progress_row( self._best_chi2 = update.reduced_chi2 self._best_iteration = update.iteration self._last_progress_time = update.elapsed_time + if self._sampler_pre_processing_pending: + self._sampler_pre_processing_pending = False + return self._sampler_status_row( + phase=SAMPLER_PHASE_PRE_PROCESSING, + elapsed_time=update.elapsed_time, + ) return self._sampler_progress_row( clamped_iteration=clamped_iteration, clamped_progress=clamped_progress, @@ -431,6 +482,21 @@ def _sampler_progress_row( phase, ] + def _sampler_status_row( + self, + *, + phase: str, + elapsed_time: float | None, + ) -> list[str]: + """Return a status-only sampler row without iteration metrics.""" + return [ + '', + '', + self._format_elapsed_time(elapsed_time), + '', + phase, + ] + def _finalize_sampler_tracking_row(self) -> None: row = self._final_sampler_tracking_row() if row is None: @@ -453,6 +519,12 @@ def _final_sampler_tracking_row(self) -> list[str] | None: final_progress = self._resolved_final_sampler_progress() elapsed_time = self._resolved_final_sampler_elapsed_time() + if self._last_sampler_phase == SAMPLER_PHASE_POST_PROCESSING: + return self._sampler_status_row( + phase=SAMPLER_PHASE_POST_PROCESSING, + elapsed_time=elapsed_time, + ) + log_posterior = ( f'{self._last_sampler_log_posterior:.2f}' if self._last_sampler_log_posterior is not None @@ -596,14 +668,18 @@ def _replace_last_tracking_row(self, row: list[str]) -> None: def _default_activity_label(self) -> str: if self._tracking_mode == TRACKING_MODE_SAMPLER: - return ACTIVITY_LABEL_PROCESSING + return ACTIVITY_LABEL_PRE_PROCESSING return ACTIVITY_LABEL_FITTING @staticmethod def _activity_label_for_sampler_phase(phase: str) -> str: normalized_phase = phase.strip().lower() + if normalized_phase == SAMPLER_PHASE_PRE_PROCESSING: + return ACTIVITY_LABEL_PRE_PROCESSING if normalized_phase == 'burn-in': return ACTIVITY_LABEL_BURN_IN + if normalized_phase == SAMPLER_PHASE_POST_PROCESSING: + return ACTIVITY_LABEL_POST_PROCESSING if normalized_phase == 'sampling': return ACTIVITY_LABEL_SAMPLING if normalized_phase: diff --git a/src/easydiffraction/analysis/fitting.py b/src/easydiffraction/analysis/fitting.py index a25f04fc..f628a9ab 100644 --- a/src/easydiffraction/analysis/fitting.py +++ b/src/easydiffraction/analysis/fitting.py @@ -8,12 +8,14 @@ import numpy as np +from easydiffraction.analysis.fit_helpers.bayesian import BayesianFitResults from easydiffraction.analysis.fit_helpers.metrics import get_reliability_inputs from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.analysis.minimizers.factory import MinimizerFactory from easydiffraction.core.variable import Parameter from easydiffraction.datablocks.experiment.item.base import intensity_category_for from easydiffraction.utils.enums import VerbosityEnum +from easydiffraction.utils.logging import log if TYPE_CHECKING: from easydiffraction.analysis.fit_helpers.reporting import FitResults @@ -175,16 +177,31 @@ def objective_function(engine_params: dict[str, Any]) -> np.ndarray: random_seed=random_seed, ) - if self.results is not None: - self.results.message = _resolve_fit_result_message(self.results) - self.results.iterations = _resolve_fit_result_iterations(self.results) - self.results.chi_square = _resolve_fit_result_chi_square(self.results) - if analysis is not None: - analysis._store_fit_result_projection( - self.results, - experiments=experiments, - fitted_parameters=params, - ) + warn_poorly_mixed = False + try: + if self.results is not None: + self.results.message = _resolve_fit_result_message(self.results) + self.results.iterations = _resolve_fit_result_iterations(self.results) + self.results.chi_square = _resolve_fit_result_chi_square(self.results) + if analysis is not None: + if isinstance(self.results, BayesianFitResults): + warn_poorly_mixed = not self.results.convergence_diagnostics.get( + 'converged', + True, + ) + self.minimizer.tracker.start_sampler_post_processing( + log_posterior=self.results.best_log_posterior, + ) + analysis._store_fit_result_projection( + self.results, + experiments=experiments, + fitted_parameters=params, + ) + finally: + self.minimizer._stop_tracking() + + if warn_poorly_mixed: + log.warning('Convergence diagnostics indicate the posterior may be poorly mixed.') def _process_fit_results( self, diff --git a/src/easydiffraction/analysis/minimizers/base.py b/src/easydiffraction/analysis/minimizers/base.py index bbc639f1..c1ff8d17 100644 --- a/src/easydiffraction/analysis/minimizers/base.py +++ b/src/easydiffraction/analysis/minimizers/base.py @@ -343,8 +343,9 @@ def fit( if resolved_random_seed is not None: solver_args['random_seed'] = resolved_random_seed raw_result = self._run_solver(objective_function, **solver_args) - finally: + except Exception: self._stop_tracking() + raise return self._finalize_fit(parameters, raw_result) diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index 1cb42fb5..fdf248b5 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -601,6 +601,8 @@ def _run_solver( object Normalized DREAM result stored in an ``OptimizeResult``. """ + total_iterations = int(self.steps + self._resolved_burn(self.steps) + 1) + self.tracker.start_sampler_pre_processing(total_iterations=total_iterations) context = self._prepare_run_context(objective_function=objective_function, kwargs=kwargs) driver_result = self._execute_driver( driver=context.driver, @@ -896,9 +898,6 @@ def _build_success_result( posterior_parameter_summaries ) - if not convergence_diagnostics.get('converged', True): - log.warning('Convergence diagnostics indicate the posterior may be poorly mixed.') - return OptimizeResult( x=best_sample_values, dx=posterior_standard_deviations, diff --git a/src/easydiffraction/display/progress.py b/src/easydiffraction/display/progress.py index ef381c50..6123e626 100644 --- a/src/easydiffraction/display/progress.py +++ b/src/easydiffraction/display/progress.py @@ -31,6 +31,8 @@ ACTIVITY_LABEL_BURN_IN = 'Burn-in...' ACTIVITY_LABEL_FITTING = 'Fitting...' +ACTIVITY_LABEL_POST_PROCESSING = 'Post-processing...' +ACTIVITY_LABEL_PRE_PROCESSING = 'Pre-processing...' ACTIVITY_LABEL_PROCESSING = 'Processing...' ACTIVITY_LABEL_SAMPLING = 'Sampling...' ACTIVITY_ACCENT_COLOR = '#d97706' From 9a13a28b710f368daf6f8229053b55c38ef351bf Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 19:17:38 +0200 Subject: [PATCH 29/72] Add project-first CLI commands for saved projects --- docs/docs/cli/index.md | 34 ++++++++- src/easydiffraction/__main__.py | 119 +++++++++++++++++++++++++++++--- 2 files changed, 142 insertions(+), 11 deletions(-) diff --git a/docs/docs/cli/index.md b/docs/docs/cli/index.md index 5fc13c77..b42e1f23 100644 --- a/docs/docs/cli/index.md +++ b/docs/docs/cli/index.md @@ -71,7 +71,7 @@ existing files. Load a saved project and run structural refinement: ```bash -python -m easydiffraction fit PROJECT_DIR +python -m easydiffraction PROJECT_DIR fit ``` `PROJECT_DIR` is the path to a project directory previously created by @@ -86,5 +86,35 @@ Use the `--dry` flag to run the fit **without overwriting** the project files: ```bash -python -m easydiffraction fit PROJECT_DIR --dry +python -m easydiffraction PROJECT_DIR fit --dry ``` + +EasyDiffraction also accepts the legacy subcommand-first form +`python -m easydiffraction fit PROJECT_DIR`, but the project-first form +is recommended because it makes it easy to rerun the same command and +swap only the action. + +### Display a Project + +Load a saved project and show the outputs that match its current fit +state and rendering backend: + +```bash +python -m easydiffraction PROJECT_DIR display +``` + +For typical non-sequential projects this includes the latest fit +results, parameter correlations, default pattern views, and when the +saved state is Bayesian also posterior distributions and predictive +checks. Plotly-only views such as posterior pair plots are shown only +when the active chart engine is Plotly. + +### Undo the Last Fit + +The CLI already reserves the project-first undo command shape: + +```bash +python -m easydiffraction PROJECT_DIR undo +``` + +This command currently reports that undo support is not implemented yet. diff --git a/src/easydiffraction/__main__.py b/src/easydiffraction/__main__.py index ca1d3f8c..090632b4 100644 --- a/src/easydiffraction/__main__.py +++ b/src/easydiffraction/__main__.py @@ -1,6 +1,8 @@ # SPDX-FileCopyrightText: 2025 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +from __future__ import annotations + import sys # Ensure UTF-8 output on all platforms (e.g. Windows with cp1252) @@ -14,6 +16,85 @@ app = typer.Typer(add_completion=False) +_PROJECT_COMMAND_NAMES = frozenset({'fit', 'display', 'undo'}) +_GLOBAL_COMMAND_NAMES = frozenset({ + 'list-tutorials', + 'download-tutorial', + 'download-all-tutorials', + *_PROJECT_COMMAND_NAMES, +}) + + +def _normalized_cli_args(args: list[str]) -> list[str]: + """Return CLI args rewritten to support project-first commands.""" + if len(args) < 2: + return args + + first_arg = args[0] + if first_arg.startswith('-') or first_arg in _GLOBAL_COMMAND_NAMES: + return args + + if args[1] not in _PROJECT_COMMAND_NAMES: + return args + + return [args[1], first_arg, *args[2:]] + + +def _load_project(project_dir: str) -> object: + """Load one saved project directory.""" + return ed.Project.load(project_dir) + + +def _display_project_patterns(project: object) -> None: + """Render default pattern views for all experiments.""" + for experiment in project.experiments: + project.display.pattern(expt_name=experiment.name) + + +def _project_fit_mode(project: object) -> str | None: + """Return the resolved fitting mode type for one project.""" + return getattr(project.analysis, 'fitting_mode_type', None) + + +def _project_result_kind(project: object) -> str | None: + """Return the resolved fit result kind for one project.""" + result_kind = getattr(getattr(project.analysis, 'fit_result', None), 'result_kind', None) + return getattr(result_kind, 'value', None) + + +def _display_fit_outputs(project: object) -> None: + """Render the standard post-fit CLI outputs.""" + if _project_fit_mode(project) != 'sequential': + project.display.fit.results() + project.display.fit.correlations() + _display_project_patterns(project) + + +def _display_project_outputs(project: object) -> None: + """Render the typical displays for the loaded project state.""" + if _project_fit_mode(project) == 'sequential': + project.display.fit.series() + _display_project_patterns(project) + return + + project.display.fit.results() + project.display.fit.correlations() + + if _project_result_kind(project) == 'bayesian': + if project.rendering.plotter.engine == 'plotly': + project.display.posterior.pairs() + project.display.posterior.distribution() + for experiment in project.experiments: + project.display.posterior.predictive(expt_name=experiment.name) + + _display_project_patterns(project) + + +def run_cli(args: list[str] | None = None) -> None: + """Run the EasyDiffraction CLI with project-first argument support.""" + cli_args = list(sys.argv[1:] if args is None else args) + app(args=_normalized_cli_args(cli_args)) + @app.callback(invoke_without_command=True) def main( @@ -94,18 +175,38 @@ def fit( help='Run fitting without saving results back to the project directory.', ), ) -> None: - """Fit a saved project: easydiffraction fit PROJECT_DIR [--dry].""" - project = ed.Project.load(project_dir) + """Fit a saved project: easydiffraction PROJECT_DIR fit [--dry].""" + project = _load_project(project_dir) if dry: project.info._path = None project.analysis.fit() - if getattr(project.analysis, 'fitting_mode_type', None) != 'sequential': - project.display.fit.results() - project.display.fit.correlations() - for expt in project.experiments: - project.display.pattern(expt_name=expt.name) - # project.summary.show_report() + _display_fit_outputs(project) + + +@app.command('display') +def display( + project_dir: str = typer.Argument( + ..., + help='Path to the project directory (must contain project.cif).', + ), +) -> None: + """Display the typical outputs for a saved project state.""" + project = _load_project(project_dir) + _display_project_outputs(project) + + +@app.command('undo') +def undo( + project_dir: str = typer.Argument( + ..., + help='Path to the project directory (must contain project.cif).', + ), +) -> None: + """Undo the last fit for a saved project when fit-history support exists.""" + _load_project(project_dir) + typer.echo('Undo is not implemented yet. See undo-fit.md ADR.') + raise typer.Exit(code=1) if __name__ == '__main__': - app() + run_cli() From 970ef96480b188bf050a912e537aa4220d941998 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 19:35:16 +0200 Subject: [PATCH 30/72] Fix missing pre-processing row in DREAM progress table --- src/easydiffraction/analysis/fit_helpers/tracking.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/easydiffraction/analysis/fit_helpers/tracking.py b/src/easydiffraction/analysis/fit_helpers/tracking.py index 79c2cfb2..5d716831 100644 --- a/src/easydiffraction/analysis/fit_helpers/tracking.py +++ b/src/easydiffraction/analysis/fit_helpers/tracking.py @@ -392,7 +392,7 @@ def _initial_sampler_progress_row( clamped_iteration: int, clamped_progress: float, ) -> list[str]: - if self._previous_chi2 is not None and self._best_chi2 is not None: + if self._df_rows: return [] self._previous_chi2 = update.reduced_chi2 @@ -402,8 +402,10 @@ def _initial_sampler_progress_row( if self._sampler_pre_processing_pending: self._sampler_pre_processing_pending = False return self._sampler_status_row( + iteration_label=self._sampler_iteration_label(clamped_iteration), phase=SAMPLER_PHASE_PRE_PROCESSING, elapsed_time=update.elapsed_time, + log_posterior=update.log_posterior, ) return self._sampler_progress_row( clamped_iteration=clamped_iteration, @@ -485,15 +487,17 @@ def _sampler_progress_row( def _sampler_status_row( self, *, + iteration_label: str = '', phase: str, elapsed_time: float | None, + log_posterior: float | None = None, ) -> list[str]: """Return a status-only sampler row without iteration metrics.""" return [ - '', + iteration_label, '', self._format_elapsed_time(elapsed_time), - '', + '' if log_posterior is None else f'{log_posterior:.2f}', phase, ] From 3b59134f71a593315c18739f0178582307e177d0 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 19:44:20 +0200 Subject: [PATCH 31/72] Add CLI commands for example data downloads --- docs/docs/cli/index.md | 41 ++++++++++++++++ docs/docs/quick-reference/index.md | 5 ++ src/easydiffraction/__init__.py | 1 + src/easydiffraction/__main__.py | 28 +++++++++++ src/easydiffraction/utils/utils.py | 76 ++++++++++++++++++++++++++++-- 5 files changed, 147 insertions(+), 4 deletions(-) diff --git a/docs/docs/cli/index.md b/docs/docs/cli/index.md index b42e1f23..f0896fb2 100644 --- a/docs/docs/cli/index.md +++ b/docs/docs/cli/index.md @@ -48,6 +48,43 @@ List all available tutorial notebooks: python -m easydiffraction list-tutorials ``` +### List Example Data + +List all available example data files and downloadable project +archives: + +```bash +python -m easydiffraction list-data +``` + +The table includes the data ID, file name, record kind, and +description. + +### Download Example Data + +Download a specific example data record by ID: + +```bash +python -m easydiffraction download-data 3 +``` + +For downloadable saved projects, the ZIP archive is extracted +automatically, the ZIP file is removed, and the extracted project path +is reported: + +```bash +python -m easydiffraction download-data 30 +``` + +This makes it possible to go straight from download to a project-first +CLI command such as: + +```bash +python -m easydiffraction EXTRACTED_PROJECT_DIR display +``` + +Use the extracted path printed by `download-data`. + ### Download Tutorials Download a specific tutorial by ID: @@ -66,6 +103,10 @@ Both commands accept `--destination` (`-d`) to specify the output directory (default: `tutorials/`) and `--overwrite` (`-o`) to replace existing files. +`download-data` also accepts `--destination` (`-d`) and `--overwrite` +(`-o`). For project archives, `--overwrite` replaces the extracted +project directory before downloading and unpacking a fresh copy. + ### Fit a Project Load a saved project and run structural refinement: diff --git a/docs/docs/quick-reference/index.md b/docs/docs/quick-reference/index.md index c035d3fe..c3aa9f74 100644 --- a/docs/docs/quick-reference/index.md +++ b/docs/docs/quick-reference/index.md @@ -39,10 +39,15 @@ ed.show_version() Download a dataset by ID into a local directory: ```python +ed.list_data() + structure_path = ed.download_data(id=1, destination='data') data_path = ed.download_data(id=3, destination='data') ``` +Project archives are extracted automatically, and `download_data()` +returns the extracted project directory path. + For tutorial notebooks: ```python diff --git a/src/easydiffraction/__init__.py b/src/easydiffraction/__init__.py index 11ea117c..0e69a9c7 100644 --- a/src/easydiffraction/__init__.py +++ b/src/easydiffraction/__init__.py @@ -14,5 +14,6 @@ from easydiffraction.utils.utils import download_all_tutorials from easydiffraction.utils.utils import download_data from easydiffraction.utils.utils import download_tutorial +from easydiffraction.utils.utils import list_data from easydiffraction.utils.utils import list_tutorials from easydiffraction.utils.utils import show_version diff --git a/src/easydiffraction/__main__.py b/src/easydiffraction/__main__.py index 090632b4..f7b96336 100644 --- a/src/easydiffraction/__main__.py +++ b/src/easydiffraction/__main__.py @@ -18,6 +18,8 @@ _PROJECT_COMMAND_NAMES = frozenset({'fit', 'display', 'undo'}) _GLOBAL_COMMAND_NAMES = frozenset({ + 'list-data', + 'download-data', 'list-tutorials', 'download-tutorial', 'download-all-tutorials', @@ -124,6 +126,32 @@ def list_tutorials() -> None: ed.list_tutorials() +@app.command('list-data') +def list_data() -> None: + """List available example data and project archives.""" + ed.list_data() + + +@app.command('download-data') +def download_data( + id: int = typer.Argument(..., help='Data ID to download.'), + destination: str = typer.Option( + 'data', + '--destination', + '-d', + help='Directory to save the data or extracted project into.', + ), + overwrite: bool = typer.Option( # noqa: FBT001 + False, # noqa: FBT003 + '--overwrite', + '-o', + help='Overwrite an existing file or extracted project if present.', + ), +) -> None: + """Download one example data record by ID.""" + ed.download_data(id=id, destination=destination, overwrite=overwrite) + + @app.command('download-tutorial') def download_tutorial( id: int = typer.Argument(..., help='Tutorial ID to download.'), diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index 874c362f..dde42928 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -6,6 +6,7 @@ import functools import json import pathlib +import shutil import urllib.request from importlib.metadata import PackageNotFoundError from importlib.metadata import version @@ -20,6 +21,7 @@ from uncertainties import ufloat_fromstr from easydiffraction.display.tables import TableRenderer +from easydiffraction.io.ascii import extract_project_from_zip from easydiffraction.utils.environment import resolve_artifact_path from easydiffraction.utils.logging import console from easydiffraction.utils.logging import log @@ -113,6 +115,14 @@ def _fetch_data_index() -> dict: return json.load(f) +def _existing_project_dir(extraction_dir: pathlib.Path) -> pathlib.Path | None: + """Return one extracted project directory from a destination.""" + project_files = sorted(extraction_dir.rglob('project.cif')) + if not project_files: + return None + return project_files[0].parent.resolve() + + @functools.lru_cache(maxsize=1) def _fetch_tutorials_index() -> dict: """ @@ -161,16 +171,18 @@ def download_data( id : int | str Numeric dataset id (e.g. 12). destination : str, default='data' - Directory to save the file into (created if missing). Relative - destinations are resolved against the configured artifact root - when ``EASYDIFFRACTION_ARTIFACT_ROOT`` is set. + Directory to save the downloaded file or extracted project into + (created if missing). Relative destinations are resolved + against the configured artifact root when + ``EASYDIFFRACTION_ARTIFACT_ROOT`` is set. overwrite : bool, default=False Whether to overwrite the file if it already exists. Returns ------- str - Full path to the downloaded file as string. + Full path to the downloaded file, or to the extracted project + directory for project ZIP archives, as string. Raises ------ @@ -193,10 +205,12 @@ def download_data( url = _build_data_url(record_path) _validate_url(url) fname = _filename_for_id_from_path(id, record_path) + is_project_archive = record.get('kind') == 'project' and fname.endswith('.zip') dest_path = resolve_artifact_path(destination) dest_path.mkdir(parents=True, exist_ok=True) file_path = dest_path / fname + extraction_dir = dest_path / pathlib.Path(fname).stem description = record.get('description', '') message = f'Data #{id}' @@ -206,7 +220,21 @@ def download_data( console.paragraph('Getting data...') console.print(f'{message}') + if is_project_archive and extraction_dir.exists() and not overwrite: + existing_project_dir = _existing_project_dir(extraction_dir) + if existing_project_dir is not None: + console.print( + f"✅ Data #{id} already extracted at '{existing_project_dir}'. " + 'Keeping existing project.' + ) + return str(existing_project_dir) + if file_path.exists(): + if is_project_archive and not overwrite: + project_dir = extract_project_from_zip(file_path, destination=extraction_dir) + file_path.unlink() + console.print(f"✅ Data #{id} extracted to '{project_dir}'") + return str(project_dir) if not overwrite: console.print( f"✅ Data #{id} already present at '{file_path}'. Keeping existing file." @@ -217,6 +245,9 @@ def download_data( known_hash = _normalize_known_hash(record.get('hash')) + if is_project_archive and extraction_dir.exists() and overwrite: + shutil.rmtree(extraction_dir) + # Pooch downloads to destination with our controlled filename. pooch.retrieve( url=url, @@ -225,10 +256,47 @@ def download_data( path=str(dest_path), ) + if is_project_archive: + project_dir = extract_project_from_zip(file_path, destination=extraction_dir) + file_path.unlink() + console.print(f"✅ Data #{id} downloaded and extracted to '{project_dir}'") + return str(project_dir) + console.print(f"✅ Data #{id} downloaded to '{file_path}'") return str(file_path) +def list_data() -> None: + """Display a table of available example data records.""" + index = _fetch_data_index() + if not index: + console.print('❌ No example data available.') + return + + console.paragraph('Example data available for download:') + + columns_headers = ['id', 'file', 'kind', 'description'] + columns_alignment = ['right', 'left', 'left', 'left'] + columns_data = [] + + for data_id in sorted(index, key=lambda value: int(value) if value.isdigit() else value): + record = index[data_id] + columns_data.append( + [ + data_id, + pathlib.PurePosixPath(_record_path(record)).name, + record.get('kind', ''), + record.get('description', ''), + ] + ) + + render_table( + columns_headers=columns_headers, + columns_data=columns_data, + columns_alignment=columns_alignment, + ) + + def package_version(package_name: str) -> str | None: """ Get the installed version string of the specified package. From b99a71933c0e62efcf206e6fe1520260c2b9ec6c Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 19:57:18 +0200 Subject: [PATCH 32/72] Remove pattern display from tutorial --- docs/docs/tutorials/ed-24.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/docs/docs/tutorials/ed-24.py b/docs/docs/tutorials/ed-24.py index ff5f5fd7..affa3151 100644 --- a/docs/docs/tutorials/ed-24.py +++ b/docs/docs/tutorials/ed-24.py @@ -49,27 +49,13 @@ project.display.fit.results() # %% [markdown] -# ## Show Correlations and the Fitted Pattern +# ## Show Correlations # -# The correlation matrix and measured-vs-calculated pattern are restored -# from the saved project state. +# The correlation matrix is restored from the saved project state. # %% project.display.fit.correlations() -# %% [markdown] -# Show the standard measured vs calculated pattern for the full range. - -# %% -project.display.pattern(expt_name='hrpt') - -# %% [markdown] -# A zoomed view is useful for checking the fit quality in a narrow -# region of the diffraction pattern. - -# %% -project.display.pattern(expt_name='hrpt', x_min=92, x_max=93) - # %% [markdown] # ## Inspect Posterior Densities and Pair Structure # @@ -87,6 +73,8 @@ # # The posterior predictive view reuses the cached predictive summary # stored in the project rather than recalculating it on first display. +# It overlays the 95% credible interval propagated from the posterior +# samples. # %% project.display.posterior.predictive(expt_name='hrpt') From 72a0f9e657e68d862f7738fe9bd09b82b7148280 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 19:58:32 +0200 Subject: [PATCH 33/72] Group saved-project tutorials under Load Project --- docs/docs/tutorials/ed-24.py | 2 -- docs/docs/tutorials/index.md | 25 ++++++++++++++----------- docs/mkdocs.yml | 6 ++++-- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/docs/docs/tutorials/ed-24.py b/docs/docs/tutorials/ed-24.py index affa3151..d5e08ad2 100644 --- a/docs/docs/tutorials/ed-24.py +++ b/docs/docs/tutorials/ed-24.py @@ -27,8 +27,6 @@ # %% project_dir = Path('../../../tmp/tutorials/projects/lbco_hrpt_bayesian') -# %% - # %% [markdown] # ## Load the Saved Bayesian Project # diff --git a/docs/docs/tutorials/index.md b/docs/docs/tutorials/index.md index 1c733ca9..365d647e 100644 --- a/docs/docs/tutorials/index.md +++ b/docs/docs/tutorials/index.md @@ -17,10 +17,6 @@ The tutorials are organized into the following categories: ## Getting Started -- [LBCO `quick` `load`](ed-18.ipynb) – The most minimal example showing - how to load a previously saved project from a directory and run - refinement. Useful when a project has already been set up and saved in - a prior session. - [LBCO `quick` `code`](ed-2.ipynb) – A minimal example intended as a quick reference for users already familiar with the EasyDiffraction API or who want to see an example refinement when both the structure @@ -42,6 +38,20 @@ The tutorials are organized into the following categories: descriptions of every step, making it suitable for users who are new to EasyDiffraction or those who prefer a more guided approach. +## Load Project + +- [LBCO `quick` `load`](ed-18.ipynb) – The most minimal example showing + how to load a previously saved project from a directory and run + refinement. Useful when a project has already been set up and saved in + a prior session. +- [Co2SiO4 Temperature scan, resumed](ed-23.ipynb) – Continue a saved + sequential refinement of Co2SiO4 from an existing + `analysis/results.csv` after an incomplete previous run. +- [LBCO Bayesian, saved project](ed-24.ipynb) – Shows how to load the + saved Bayesian LBCO project created in the previous tutorial and + inspect the persisted fit summary, correlation matrix, posterior + plots, and predictive checks without rerunning DREAM. + ## Powder Diffraction - [Co2SiO4 `pd-neut-cwl`](ed-5.ipynb) – Demonstrates a Rietveld @@ -89,9 +99,6 @@ The tutorials are organized into the following categories: - [Co2SiO4 Temperature scan](ed-17.ipynb) – Sequential Rietveld refinement of Co2SiO4 using constant wavelength neutron powder diffraction data from D20 at ILL across a temperature scan. -- [Co2SiO4 Temperature scan, resumed](ed-23.ipynb) – Continue a saved - sequential refinement of Co2SiO4 from an existing - `analysis/results.csv` after an incomplete previous run. ## Simulated Data @@ -109,10 +116,6 @@ The tutorials are organized into the following categories: tutorial covers the use of Markov Chain Monte Carlo (MCMC) sampling to explore the posterior distribution of the refined parameters, providing insights into parameter uncertainties and correlations. -- [LBCO Bayesian, saved project](ed-24.ipynb) – Shows how to load the - saved Bayesian LBCO project created in the previous tutorial and - inspect the persisted fit summary, correlation matrix, posterior - plots, and predictive checks without rerunning DREAM. - [Tb2TiO7 Bayesian](ed-22.ipynb) – Another example of a Bayesian analysis. This tutorial focuses on the Tb2TiO7 crystal structure using constant wavelength neutron single crystal diffraction data from HEiDi diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c54ff935..ff5c2722 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -193,10 +193,13 @@ nav: - Tutorials: - Tutorials: tutorials/index.md - Getting Started: - - LBCO quick load: tutorials/ed-18.ipynb - LBCO quick code: tutorials/ed-2.ipynb - LBCO basic load: tutorials/ed-1.ipynb - LBCO complete: tutorials/ed-3.ipynb + - Load Project: + - LBCO quick load: tutorials/ed-18.ipynb + - Co2SiO4 T-scan resumed: tutorials/ed-23.ipynb + - LBCO Bayesian, saved project: tutorials/ed-24.ipynb - Powder Diffraction: - Co2SiO4 pd-neut-cwl: tutorials/ed-5.ipynb - HS pd-neut-cwl: tutorials/ed-6.ipynb @@ -213,7 +216,6 @@ nav: - PbSO4 NPD+XRD: tutorials/ed-4.ipynb - Si Bragg+PDF: tutorials/ed-16.ipynb - Co2SiO4 T-scan: tutorials/ed-17.ipynb - - Co2SiO4 T-scan resumed: tutorials/ed-23.ipynb - Simulated Data: - LBCO+Si McStas: tutorials/ed-9.ipynb - BEER McStas: tutorials/ed-20.ipynb From fec75c2d4944cc141636caedb20689dede8bc9e5 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 20:06:02 +0200 Subject: [PATCH 34/72] Consolidate download and extract in tutorials --- docs/docs/tutorials/ed-18.py | 14 +++++--------- docs/docs/tutorials/ed-23.py | 18 +++++------------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/docs/docs/tutorials/ed-18.py b/docs/docs/tutorials/ed-18.py index 7c4fff9f..fe44056e 100644 --- a/docs/docs/tutorials/ed-18.py +++ b/docs/docs/tutorials/ed-18.py @@ -17,19 +17,15 @@ # %% from easydiffraction import Project from easydiffraction import download_data -from easydiffraction import extract_project_from_zip # %% [markdown] -# ## Download Project Archive - -# %% -zip_path = download_data(id=30, destination='data') - -# %% [markdown] -# ## Extract Project +# ## Download Project +# +# Project archives are extracted automatically, and the returned path +# points to the saved project directory. # %% -project_dir = extract_project_from_zip(zip_path, destination='data') +project_dir = download_data(id=30, destination='projects') # %% [markdown] # ## Load Project diff --git a/docs/docs/tutorials/ed-23.py b/docs/docs/tutorials/ed-23.py index 659cbcbb..6388efcd 100644 --- a/docs/docs/tutorials/ed-23.py +++ b/docs/docs/tutorials/ed-23.py @@ -14,22 +14,14 @@ import easydiffraction as ed # %% [markdown] -# ## Download Saved Project Archive +# ## Download Saved Project # -# The archive should contain a saved project directory with a partially -# completed sequential fit, including `analysis/results.csv`. +# The returned path points directly to the saved project directory with +# a partially completed sequential fit, including +# `analysis/results.csv`. # %% -zip_path = ed.download_data(id=34, destination='data') - -# %% [markdown] -# ## Extract Project -# -# Extract the saved project directory locally. For a project you -# already have on disk, set `project_dir` directly instead. - -# %% -project_dir = ed.extract_project_from_zip(zip_path, destination='projects') +project_dir = ed.download_data(id=34, destination='projects') # %% [markdown] # ## Load Saved Project From ef953f673efb677a85b99ba7a6d0949d260663e5 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 22:13:48 +0200 Subject: [PATCH 35/72] Refactor Bayesian fit-state manifest create APIs --- .../suggestions/analysis-cif-fit-state.md | 4 +- docs/dev/package-structure/full.md | 98 ++- docs/dev/package-structure/short.md | 55 +- docs/dev/plans/analysis-cif-fit-state.md | 25 +- docs/docs/cli/index.md | 6 +- docs/docs/tutorials/index.md | 18 +- docs/mkdocs.yml | 6 +- src/easydiffraction/__main__.py | 9 +- src/easydiffraction/analysis/analysis.py | 578 ++++++++++-------- .../bayesian_convergence/default.py | 2 +- .../bayesian_distribution_caches/default.py | 4 +- .../bayesian_pair_caches/default.py | 61 +- .../bayesian_parameter_posteriors/default.py | 63 +- .../bayesian_predictive_datasets/default.py | 57 +- .../categories/bayesian_result/default.py | 12 +- .../categories/bayesian_sampler/default.py | 1 - .../default.py | 12 +- .../deterministic_result/default.py | 19 +- .../fit_parameter_correlations/default.py | 2 +- .../categories/fit_parameters/default.py | 10 +- .../analysis/categories/fit_result/default.py | 2 +- src/easydiffraction/analysis/enums.py | 5 +- .../analysis/fit_helpers/tracking.py | 12 +- src/easydiffraction/analysis/fitting.py | 136 +++-- .../analysis/minimizers/base.py | 6 +- .../analysis/minimizers/bumps_dream.py | 2 + src/easydiffraction/display/plotting.py | 91 ++- src/easydiffraction/io/cif/serialize.py | 52 +- src/easydiffraction/io/results_sidecar.py | 164 +++-- src/easydiffraction/project/display.py | 14 +- src/easydiffraction/project/project.py | 113 ++-- src/easydiffraction/utils/utils.py | 65 +- .../fitting/test_bayesian_dream.py | 11 +- .../categories/test_bayesian_convergence.py | 17 + .../test_bayesian_distribution_caches.py | 17 + .../categories/test_bayesian_pair_caches.py | 15 + .../test_bayesian_parameter_posteriors.py | 17 + .../test_bayesian_predictive_datasets.py | 17 + .../categories/test_bayesian_result.py | 13 + .../categories/test_bayesian_sampler.py | 13 + .../test_deterministic_parameter_results.py | 17 + .../categories/test_deterministic_result.py | 17 + .../test_fit_parameter_correlations.py | 17 + .../categories/test_fit_parameters.py | 13 + .../analysis/categories/test_fit_result.py | 13 + .../analysis/categories/test_fit_state.py | 207 +++++++ .../analysis/minimizers/test_base.py | 3 +- .../easydiffraction/analysis/test_enums.py | 14 + .../easydiffraction/analysis/test_fitting.py | 9 +- .../easydiffraction/io/cif/test_serialize.py | 2 +- .../io/cif/test_serialize_more.py | 6 +- .../io/test_results_sidecar.py | 104 ++++ .../easydiffraction/project/test_display.py | 76 ++- .../project/test_project_load.py | 52 ++ .../project/test_project_save.py | 12 + tests/unit/easydiffraction/test___main__.py | 27 +- 56 files changed, 1691 insertions(+), 722 deletions(-) create mode 100644 tests/unit/easydiffraction/analysis/categories/test_bayesian_convergence.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_bayesian_distribution_caches.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_bayesian_pair_caches.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_bayesian_parameter_posteriors.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_bayesian_predictive_datasets.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_bayesian_result.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_bayesian_sampler.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_deterministic_parameter_results.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_deterministic_result.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_fit_parameter_correlations.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_fit_parameters.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_fit_result.py create mode 100644 tests/unit/easydiffraction/analysis/categories/test_fit_state.py create mode 100644 tests/unit/easydiffraction/io/test_results_sidecar.py diff --git a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md index 0a287a4b..f493ed95 100644 --- a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md +++ b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md @@ -347,8 +347,8 @@ arrays large enough to make CIF unwieldy are stored in: - `analysis/results.h5` -The reference implementation uses a direct `h5py` dependency to read -and write this sidecar. +The reference implementation uses a direct `h5py` dependency to read and +write this sidecar. Required canonical posterior arrays, when available: diff --git a/docs/dev/package-structure/full.md b/docs/dev/package-structure/full.md index 1e786df4..06c3aa60 100644 --- a/docs/dev/package-structure/full.md +++ b/docs/dev/package-structure/full.md @@ -24,6 +24,54 @@ │ │ │ │ └── 🏷️ class Aliases │ │ │ └── 📄 factory.py │ │ │ └── 🏷️ class AliasesFactory +│ │ ├── 📁 bayesian_convergence +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ └── 🏷️ class BayesianConvergence +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class BayesianConvergenceFactory +│ │ ├── 📁 bayesian_distribution_caches +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ ├── 🏷️ class BayesianDistributionCacheItem +│ │ │ │ └── 🏷️ class BayesianDistributionCaches +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class BayesianDistributionCachesFactory +│ │ ├── 📁 bayesian_pair_caches +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ ├── 🏷️ class BayesianPairCachePaths +│ │ │ │ ├── 🏷️ class BayesianPairCacheItem +│ │ │ │ └── 🏷️ class BayesianPairCaches +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class BayesianPairCachesFactory +│ │ ├── 📁 bayesian_parameter_posteriors +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ ├── 🏷️ class BayesianParameterPosteriorItem +│ │ │ │ └── 🏷️ class BayesianParameterPosteriors +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class BayesianParameterPosteriorsFactory +│ │ ├── 📁 bayesian_predictive_datasets +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ ├── 🏷️ class BayesianPredictiveDatasetPaths +│ │ │ │ ├── 🏷️ class BayesianPredictiveDatasetItem +│ │ │ │ └── 🏷️ class BayesianPredictiveDatasets +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class BayesianPredictiveDatasetsFactory +│ │ ├── 📁 bayesian_result +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ └── 🏷️ class BayesianResult +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class BayesianResultFactory +│ │ ├── 📁 bayesian_sampler +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ └── 🏷️ class BayesianSampler +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class BayesianSamplerFactory │ │ ├── 📁 constraints │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py @@ -31,6 +79,45 @@ │ │ │ │ └── 🏷️ class Constraints │ │ │ └── 📄 factory.py │ │ │ └── 🏷️ class ConstraintsFactory +│ │ ├── 📁 deterministic_parameter_results +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ ├── 🏷️ class DeterministicParameterResultItem +│ │ │ │ └── 🏷️ class DeterministicParameterResults +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class DeterministicParameterResultsFactory +│ │ ├── 📁 deterministic_result +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ └── 🏷️ class DeterministicResult +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class DeterministicResultFactory +│ │ ├── 📁 fit_parameter_correlations +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ ├── 🏷️ class FitParameterCorrelationItem +│ │ │ │ └── 🏷️ class FitParameterCorrelations +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class FitParameterCorrelationsFactory +│ │ ├── 📁 fit_parameters +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ ├── 🏷️ class FitParameterItem +│ │ │ │ └── 🏷️ class FitParameters +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class FitParametersFactory +│ │ ├── 📁 fit_result +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ └── 🏷️ class FitResult +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class FitResultFactory +│ │ ├── 📁 fit_state +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ │ └── 🏷️ class FitState +│ │ │ └── 📄 factory.py +│ │ │ └── 🏷️ class FitStateFactory │ │ ├── 📁 fitting │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py @@ -105,9 +192,13 @@ │ ├── 📄 __init__.py │ ├── 📄 analysis.py │ │ ├── 🏷️ class AnalysisDisplay +│ │ ├── 🏷️ class _AnalysisOwnerAccessorsMixin +│ │ ├── 🏷️ class _AnalysisPersistedCategoryAccessorsMixin │ │ └── 🏷️ class Analysis │ ├── 📄 enums.py -│ │ └── 🏷️ class FitModeEnum +│ │ ├── 🏷️ class FitModeEnum +│ │ ├── 🏷️ class FitResultKindEnum +│ │ └── 🏷️ class FitCorrelationSourceEnum │ ├── 📄 fitting.py │ │ └── 🏷️ class Fitter │ └── 📄 sequential.py @@ -159,10 +250,12 @@ │ ├── 🏷️ class GenericStringDescriptor │ ├── 🏷️ class GenericBoolDescriptor │ ├── 🏷️ class GenericNumericDescriptor +│ ├── 🏷️ class GenericIntegerDescriptor │ ├── 🏷️ class GenericParameter │ ├── 🏷️ class StringDescriptor │ ├── 🏷️ class BoolDescriptor │ ├── 🏷️ class NumericDescriptor +│ ├── 🏷️ class IntegerDescriptor │ └── 🏷️ class Parameter ├── 📁 crystallography │ ├── 📄 __init__.py @@ -424,7 +517,8 @@ │ │ ├── 📄 parse.py │ │ └── 📄 serialize.py │ ├── 📄 __init__.py -│ └── 📄 ascii.py +│ ├── 📄 ascii.py +│ └── 📄 results_sidecar.py ├── 📁 project │ ├── 📁 categories │ │ ├── 📁 info diff --git a/docs/dev/package-structure/short.md b/docs/dev/package-structure/short.md index b60e1bf9..3faaf97b 100644 --- a/docs/dev/package-structure/short.md +++ b/docs/dev/package-structure/short.md @@ -15,10 +15,62 @@ │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py │ │ │ └── 📄 factory.py +│ │ ├── 📁 bayesian_convergence +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 bayesian_distribution_caches +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 bayesian_pair_caches +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 bayesian_parameter_posteriors +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 bayesian_predictive_datasets +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 bayesian_result +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 bayesian_sampler +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py │ │ ├── 📁 constraints │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py │ │ │ └── 📄 factory.py +│ │ ├── 📁 deterministic_parameter_results +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 deterministic_result +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 fit_parameter_correlations +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 fit_parameters +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 fit_result +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py +│ │ ├── 📁 fit_state +│ │ │ ├── 📄 __init__.py +│ │ │ ├── 📄 default.py +│ │ │ └── 📄 factory.py │ │ ├── 📁 fitting │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py @@ -205,7 +257,8 @@ │ │ ├── 📄 parse.py │ │ └── 📄 serialize.py │ ├── 📄 __init__.py -│ └── 📄 ascii.py +│ ├── 📄 ascii.py +│ └── 📄 results_sidecar.py ├── 📁 project │ ├── 📁 categories │ │ ├── 📁 info diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md index 0405b869..a22ead8e 100644 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ b/docs/dev/plans/analysis-cif-fit-state.md @@ -30,8 +30,10 @@ structure or experiment CIF files. - [x] Gather planning context from ADRs, source files, and tests. - [x] Confirm ADR status: implement from the suggestion for now. - [x] Confirm HDF5 strategy: add `h5py` as a direct dependency. -- [x] Confirm schema strategy: do not add a dedicated `_fit_state` category. -- [x] Confirm loop identity strategy: keep persisted `id` columns with simple autogenerated numeric ids. +- [x] Confirm schema strategy: do not add a dedicated `_fit_state` + category. +- [x] Confirm loop identity strategy: keep persisted `id` columns with + simple autogenerated numeric ids. - [x] Confirm public surface: expose read-only `Analysis` properties. - [x] Confirm predictive cache identity: key by `experiment_name`. - [x] Phase 1 step 1: update the ADR suggestion with clarifications. @@ -58,8 +60,8 @@ These questions were answered on 2026-05-18. 2. Add `h5py` as a direct dependency for `analysis/results.h5`. 3. Do not add a dedicated `_fit_state` category or `schema_version` field for persisted analysis fit state. -4. Keep persisted `id` columns where the collection layer needs a - single key, but auto-generate simple numeric ids instead of derived +4. Keep persisted `id` columns where the collection layer needs a single + key, but auto-generate simple numeric ids instead of derived user-facing composite strings. 5. Expose all new fit-state categories as public read-only properties on `Analysis`. @@ -259,15 +261,15 @@ Actions: 7. Use `StringDescriptor`, `NumericDescriptor`, and `BoolDescriptor` as appropriate. Avoid raw Python attributes for persisted fields. 8. Do not add JSON fields or loose tags. -9. Update imports in the package `__init__.py` files so concrete - classes are registered and importable. +9. Update imports in the package `__init__.py` files so concrete classes + are registered and importable. 10. Update this plan checklist for Step 2. Implementation notes: - The collection `add()` path assumes one key. For categories with a persisted `id`, set `_category_entry_name = 'id'` on the item and - generate a simple numeric-string `id` before adding the item to the + generate a simple numeric-string `id` before adding the item to the collection. - Keep CIF tag names exactly as in the ADR, for example `_fit_parameter.param_unique_name`. @@ -298,8 +300,9 @@ Actions: `n_free_parameters`, `degrees_of_freedom`, `covariance_available`, and `correlation_available`. 2. Add `DeterministicParameterResultItem` and collection for - `_deterministic_parameter_result` with `param_unique_name`, `final_value`, `final_uncertainty`, - `at_lower_bound`, and `at_upper_bound`. + `_deterministic_parameter_result` with `param_unique_name`, + `final_value`, `final_uncertainty`, `at_lower_bound`, and + `at_upper_bound`. 3. Use `_category_entry_name = 'param_unique_name'` for deterministic parameter result rows. Preserve display order from CIF loop order. 4. Do not duplicate pre-fit values here; those belong to @@ -399,8 +402,8 @@ Actions: 4. Update `Analysis._serializable_categories()` so fit-state categories are appended only when a fit-state projection exists. 5. Keep the order from the ADR: normal analysis configuration first, - then `_fit_parameter`, `_fit_result`, correlations, - deterministic categories, Bayesian categories, and cache manifests. + then `_fit_parameter`, `_fit_result`, correlations, deterministic + categories, Bayesian categories, and cache manifests. 6. Update `analysis_from_cif()` to restore the new categories after existing fitting, aliases, constraints, and active mode-specific configuration. diff --git a/docs/docs/cli/index.md b/docs/docs/cli/index.md index f0896fb2..46c28387 100644 --- a/docs/docs/cli/index.md +++ b/docs/docs/cli/index.md @@ -50,15 +50,13 @@ python -m easydiffraction list-tutorials ### List Example Data -List all available example data files and downloadable project -archives: +List all available example data files and downloadable project archives: ```bash python -m easydiffraction list-data ``` -The table includes the data ID, file name, record kind, and -description. +The table includes the data ID, file name, record kind, and description. ### Download Example Data diff --git a/docs/docs/tutorials/index.md b/docs/docs/tutorials/index.md index 365d647e..8aeffb4d 100644 --- a/docs/docs/tutorials/index.md +++ b/docs/docs/tutorials/index.md @@ -40,17 +40,17 @@ The tutorials are organized into the following categories: ## Load Project -- [LBCO `quick` `load`](ed-18.ipynb) – The most minimal example showing - how to load a previously saved project from a directory and run - refinement. Useful when a project has already been set up and saved in - a prior session. -- [Co2SiO4 Temperature scan, resumed](ed-23.ipynb) – Continue a saved +- [LBCO Standard, continuation](ed-18.ipynb) – The most minimal example + showing how to load a previously saved project from a directory and + run refinement. Useful when a project has already been set up and + saved in a prior session. +- [Co2SiO4 Sequential, resumed](ed-23.ipynb) – Continue a saved sequential refinement of Co2SiO4 from an existing `analysis/results.csv` after an incomplete previous run. -- [LBCO Bayesian, saved project](ed-24.ipynb) – Shows how to load the - saved Bayesian LBCO project created in the previous tutorial and - inspect the persisted fit summary, correlation matrix, posterior - plots, and predictive checks without rerunning DREAM. +- [LBCO Bayesian, display](ed-24.ipynb) – Shows how to load the saved + Bayesian LBCO project created in the previous tutorial and inspect the + persisted fit summary, correlation matrix, posterior plots, and + predictive checks without rerunning DREAM. ## Powder Diffraction diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index ff5c2722..fafb563f 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -197,9 +197,9 @@ nav: - LBCO basic load: tutorials/ed-1.ipynb - LBCO complete: tutorials/ed-3.ipynb - Load Project: - - LBCO quick load: tutorials/ed-18.ipynb - - Co2SiO4 T-scan resumed: tutorials/ed-23.ipynb - - LBCO Bayesian, saved project: tutorials/ed-24.ipynb + - LBCO Standard continuation: tutorials/ed-18.ipynb + - Co2SiO4 Sequential resumed: tutorials/ed-23.ipynb + - LBCO Bayesian display: tutorials/ed-24.ipynb - Powder Diffraction: - Co2SiO4 pd-neut-cwl: tutorials/ed-5.ipynb - HS pd-neut-cwl: tutorials/ed-6.ipynb diff --git a/src/easydiffraction/__main__.py b/src/easydiffraction/__main__.py index f7b96336..0945331a 100644 --- a/src/easydiffraction/__main__.py +++ b/src/easydiffraction/__main__.py @@ -16,6 +16,7 @@ app = typer.Typer(add_completion=False) +_MIN_PROJECT_FIRST_ARG_COUNT = 2 _PROJECT_COMMAND_NAMES = frozenset({'fit', 'display', 'undo'}) _GLOBAL_COMMAND_NAMES = frozenset({ 'list-data', @@ -29,7 +30,7 @@ def _normalized_cli_args(args: list[str]) -> list[str]: """Return CLI args rewritten to support project-first commands.""" - if len(args) < 2: + if len(args) < _MIN_PROJECT_FIRST_ARG_COUNT: return args first_arg = args[0] @@ -93,7 +94,9 @@ def _display_project_outputs(project: object) -> None: def run_cli(args: list[str] | None = None) -> None: - """Run the EasyDiffraction CLI with project-first argument support.""" + """ + Run the EasyDiffraction CLI with project-first argument support. + """ cli_args = list(sys.argv[1:] if args is None else args) app(args=_normalized_cli_args(cli_args)) @@ -230,7 +233,7 @@ def undo( help='Path to the project directory (must contain project.cif).', ), ) -> None: - """Undo the last fit for a saved project when fit-history support exists.""" + """Undo the last fit when fit-history support exists.""" _load_project(project_dir) typer.echo('Undo is not implemented yet. See undo-fit.md ADR.') raise typer.Exit(code=1) diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index fb11ca58..352f2d1f 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -16,12 +16,16 @@ BayesianDistributionCaches, ) from easydiffraction.analysis.categories.bayesian_pair_caches import BayesianPairCaches +from easydiffraction.analysis.categories.bayesian_pair_caches.default import BayesianPairCachePaths from easydiffraction.analysis.categories.bayesian_parameter_posteriors import ( BayesianParameterPosteriors, ) from easydiffraction.analysis.categories.bayesian_predictive_datasets import ( BayesianPredictiveDatasets, ) +from easydiffraction.analysis.categories.bayesian_predictive_datasets.default import ( + BayesianPredictiveDatasetPaths, +) from easydiffraction.analysis.categories.bayesian_result import BayesianResult from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler from easydiffraction.analysis.categories.constraints.factory import ConstraintsFactory @@ -40,14 +44,14 @@ from easydiffraction.analysis.categories.sequential_fit_extract import ( SequentialFitExtractCollection, ) +from easydiffraction.analysis.enums import FitCorrelationSourceEnum +from easydiffraction.analysis.enums import FitModeEnum +from easydiffraction.analysis.enums import FitResultKindEnum from easydiffraction.analysis.fit_helpers.bayesian import BayesianFitResults from easydiffraction.analysis.fit_helpers.bayesian import PosteriorParameterSummary from easydiffraction.analysis.fit_helpers.bayesian import PosteriorPredictiveSummary from easydiffraction.analysis.fit_helpers.bayesian import PosteriorSamples from easydiffraction.analysis.fit_helpers.reporting import FitResults -from easydiffraction.analysis.enums import FitCorrelationSourceEnum -from easydiffraction.analysis.enums import FitModeEnum -from easydiffraction.analysis.enums import FitResultKindEnum from easydiffraction.analysis.fitting import Fitter from easydiffraction.analysis.minimizers.base import BOUNDARY_PROXIMITY_FRACTION from easydiffraction.core.category_owner import CategoryOwner @@ -70,6 +74,9 @@ from easydiffraction.utils.utils import render_table _SUMMARY_HIDDEN_PARAMETER_CATEGORIES = frozenset({'pd_data', 'total_data', 'refln'}) +_POSTERIOR_SAMPLE_NDIM = 3 +_FLATTENED_POSTERIOR_SAMPLE_NDIM = 2 +_CREDIBLE_INTERVAL_LEVEL_COUNT = 2 def _discover_property_rows(cls: type) -> list[list[str]]: @@ -377,7 +384,116 @@ def as_cif(self) -> None: self._analysis.show_as_cif() -class Analysis(CategoryOwner): +class _AnalysisOwnerAccessorsMixin: + @property + def project(self) -> object: + """Project that owns this analysis section.""" + return self._project + + @property + def aliases(self) -> object: + """Alias mappings used by symbolic constraints and displays.""" + return self._aliases + + @property + def constraints(self) -> object: + """Symbolic constraints owned by this analysis section.""" + return self._constraints + + @property + def display(self) -> AnalysisDisplay: + """Display helper for parameter tables, CIF, and fit results.""" + return self._display + + @property + def fitter(self) -> Fitter: + """Fitting engine used by this analysis object.""" + return self._fitter + + @fitter.setter + def fitter(self, value: Fitter) -> None: + self._fitter = value + + @property + def fit_results(self) -> object | None: + """Results from the most recent fit, if any.""" + if self._fit_results is None and self._has_persisted_fit_state(): + self._restore_fit_results_from_projection() + return self._fit_results + + @fit_results.setter + def fit_results(self, value: object | None) -> None: + self._fit_results = value + self._fitter.results = value + + +class _AnalysisPersistedCategoryAccessorsMixin: + @property + def fit_parameters(self) -> FitParameters: + """Persisted fit-parameter control snapshots.""" + return self._fit_parameters + + @property + def fit_result(self) -> FitResult: + """Persisted common fit-result status metadata.""" + return self._fit_result + + @property + def fit_parameter_correlations(self) -> FitParameterCorrelations: + """Persisted fit-parameter correlation summaries.""" + return self._fit_parameter_correlations + + @property + def deterministic_result(self) -> DeterministicResult: + """Persisted deterministic fit-result metadata.""" + return self._deterministic_result + + @property + def deterministic_parameter_results(self) -> DeterministicParameterResults: + """Persisted deterministic parameter-result summaries.""" + return self._deterministic_parameter_results + + @property + def bayesian_result(self) -> BayesianResult: + """Persisted Bayesian fit-result metadata.""" + return self._bayesian_result + + @property + def bayesian_sampler(self) -> BayesianSampler: + """Persisted Bayesian sampler settings.""" + return self._bayesian_sampler + + @property + def bayesian_convergence(self) -> BayesianConvergence: + """Persisted Bayesian convergence diagnostics.""" + return self._bayesian_convergence + + @property + def bayesian_parameter_posteriors(self) -> BayesianParameterPosteriors: + """Persisted Bayesian parameter posterior summaries.""" + return self._bayesian_parameter_posteriors + + @property + def bayesian_distribution_caches(self) -> BayesianDistributionCaches: + """Persisted Bayesian distribution-cache manifests.""" + return self._bayesian_distribution_caches + + @property + def bayesian_pair_caches(self) -> BayesianPairCaches: + """Persisted Bayesian pair-cache manifests.""" + return self._bayesian_pair_caches + + @property + def bayesian_predictive_datasets(self) -> BayesianPredictiveDatasets: + """Persisted Bayesian predictive-dataset manifests.""" + return self._bayesian_predictive_datasets + + +class Analysis( + _AnalysisOwnerAccessorsMixin, + _AnalysisPersistedCategoryAccessorsMixin, + CategoryOwner, +): """ High-level orchestration of analysis tasks for a Project. @@ -428,47 +544,6 @@ def __init__(self, project: object) -> None: self._parameter_snapshots: dict[str, dict[str, dict]] = {} self._display = AnalysisDisplay(self) - @property - def project(self) -> object: - """Project that owns this analysis section.""" - return self._project - - @property - def aliases(self) -> object: - """Alias mappings used by symbolic constraints and displays.""" - return self._aliases - - @property - def constraints(self) -> object: - """Symbolic constraints owned by this analysis section.""" - return self._constraints - - @property - def display(self) -> AnalysisDisplay: - """Display helper for parameter tables, CIF, and fit results.""" - return self._display - - @property - def fitter(self) -> Fitter: - """Fitting engine used by this analysis object.""" - return self._fitter - - @fitter.setter - def fitter(self, value: Fitter) -> None: - self._fitter = value - - @property - def fit_results(self) -> object | None: - """Results from the most recent fit, if any.""" - if self._fit_results is None and self._has_persisted_fit_state(): - self._restore_fit_results_from_projection() - return self._fit_results - - @fit_results.setter - def fit_results(self, value: object | None) -> None: - self._fit_results = value - self._fitter.results = value - @staticmethod def _predictive_cache_key( experiment_name: str, @@ -481,7 +556,7 @@ def _predictive_cache_key( return f'{experiment_name}:{x_axis_name}:{key_suffix}' def _live_parameter_map(self) -> dict[str, Parameter]: - """Return live structure and experiment parameters keyed by unique name.""" + """Return live parameters keyed by unique name.""" all_parameters = self.project.structures.parameters + self.project.experiments.parameters return { param.unique_name: param @@ -490,7 +565,9 @@ def _live_parameter_map(self) -> dict[str, Parameter]: } def _ordered_restored_parameter_names(self) -> list[str]: - """Return persisted parameter names in display and array order.""" + """ + Return persisted parameter names in display and array order. + """ if self.fit_result.result_kind.value == FitResultKindEnum.BAYESIAN.value: posterior_rows = list(self.bayesian_parameter_posteriors) if posterior_rows: @@ -503,13 +580,13 @@ def _ordered_restored_parameter_names(self) -> list[str]: return [row.param_unique_name.value for row in self.fit_parameters] def _restore_live_parameter_state(self, param_map: dict[str, Parameter]) -> None: - """Restore persisted fit metadata onto live parameter objects.""" + """Restore saved fit metadata onto live parameter objects.""" for row in self.fit_parameters: parameter = param_map.get(row.param_unique_name.value) if parameter is None: log.warning( 'Persisted fit-state references unknown parameter ' - f"{row.param_unique_name.value!r}." + f'{row.param_unique_name.value!r}.' ) continue @@ -558,7 +635,7 @@ def _restored_posterior_samples(self) -> PosteriorSamples | None: parameter_names = [row.param_unique_name.value for row in self.fit_parameters] parameter_sample_array = np.asarray(parameter_samples, dtype=float) - if parameter_sample_array.ndim != 3: + if parameter_sample_array.ndim != _POSTERIOR_SAMPLE_NDIM: log.warning('Persisted posterior samples have an invalid shape for restore.') return None if parameter_sample_array.shape[2] != len(parameter_names): @@ -580,31 +657,29 @@ def _restored_posterior_samples(self) -> PosteriorSamples | None: def _restored_posterior_summaries(self) -> list[PosteriorParameterSummary]: """Return posterior summary rows as runtime summary objects.""" - restored_summaries: list[PosteriorParameterSummary] = [] - for row in self.bayesian_parameter_posteriors: - restored_summaries.append( - PosteriorParameterSummary( - unique_name=row.unique_name.value, - display_name=row.display_name.value, - best_sample_value=float(row.best_sample_value.value), - median=float(row.median.value), - standard_deviation=float(row.uncertainty.value), - interval_68=( - float(row.interval_68_lower.value), - float(row.interval_68_upper.value), - ), - interval_95=( - float(row.interval_95_lower.value), - float(row.interval_95_upper.value), - ), - ess_bulk=row.ess_bulk.value, - r_hat=row.r_hat.value, - ) + return [ + PosteriorParameterSummary( + unique_name=row.unique_name.value, + display_name=row.display_name.value, + best_sample_value=float(row.best_sample_value.value), + median=float(row.median.value), + standard_deviation=float(row.uncertainty.value), + interval_68=( + float(row.interval_68_lower.value), + float(row.interval_68_upper.value), + ), + interval_95=( + float(row.interval_95_lower.value), + float(row.interval_95_upper.value), + ), + ess_bulk=row.ess_bulk.value, + r_hat=row.r_hat.value, ) - return restored_summaries + for row in self.bayesian_parameter_posteriors + ] def _restored_predictive_summaries(self) -> dict[str, PosteriorPredictiveSummary]: - """Return restored posterior predictive summaries keyed for runtime reuse.""" + """Return restored predictive summaries for runtime reuse.""" restored_predictive: dict[str, PosteriorPredictiveSummary] = {} predictive_data = self._persisted_fit_state_sidecar.get('predictive_datasets', {}) for row in self.bayesian_predictive_datasets: @@ -667,7 +742,7 @@ def _restored_predictive_summaries(self) -> dict[str, PosteriorPredictiveSummary return restored_predictive def _restore_fit_results_from_projection(self) -> object | None: - """Rebuild a lightweight runtime fit-result object from persisted state.""" + """Rebuild a runtime fit-result object from saved state.""" if not self._has_persisted_fit_state(): return None @@ -1006,83 +1081,18 @@ def sequential_fit_extract(self) -> SequentialFitExtractCollection: """Persisted extract rules for sequential fitting.""" return self._sequential_fit_extract - @property - def fit_parameters(self) -> FitParameters: - """Persisted fit-parameter control snapshots.""" - return self._fit_parameters - - @property - def fit_result(self) -> FitResult: - """Persisted common fit-result status metadata.""" - return self._fit_result - - @property - def fit_parameter_correlations(self) -> FitParameterCorrelations: - """Persisted fit-parameter correlation summaries.""" - return self._fit_parameter_correlations - - @property - def deterministic_result(self) -> DeterministicResult: - """Persisted deterministic fit-result metadata.""" - return self._deterministic_result - - @property - def deterministic_parameter_results(self) -> DeterministicParameterResults: - """Persisted deterministic parameter-result summaries.""" - return self._deterministic_parameter_results - - @property - def bayesian_result(self) -> BayesianResult: - """Persisted Bayesian fit-result metadata.""" - return self._bayesian_result - - @property - def bayesian_sampler(self) -> BayesianSampler: - """Persisted Bayesian sampler settings.""" - return self._bayesian_sampler - - @property - def bayesian_convergence(self) -> BayesianConvergence: - """Persisted Bayesian convergence diagnostics.""" - return self._bayesian_convergence - - @property - def bayesian_parameter_posteriors(self) -> BayesianParameterPosteriors: - """Persisted Bayesian parameter posterior summaries.""" - return self._bayesian_parameter_posteriors - - @property - def bayesian_distribution_caches(self) -> BayesianDistributionCaches: - """Persisted Bayesian distribution-cache manifests.""" - return self._bayesian_distribution_caches - - @property - def bayesian_pair_caches(self) -> BayesianPairCaches: - """Persisted Bayesian pair-cache manifests.""" - return self._bayesian_pair_caches - - @property - def bayesian_predictive_datasets(self) -> BayesianPredictiveDatasets: - """Persisted Bayesian predictive-dataset manifests.""" - return self._bayesian_predictive_datasets - def _has_persisted_fit_state(self) -> bool: """ Return whether a persisted fit-state projection is present. """ return self._has_persisted_fit_state_data - def _set_has_persisted_fit_state(self, value: bool) -> None: - """ - Set the persisted fit-state presence flag for internal callers. - """ + def _set_has_persisted_fit_state(self, *, value: bool) -> None: + """Set the persisted fit-state presence flag.""" self._has_persisted_fit_state_data = value def _fit_state_categories(self) -> list[object]: - """ - Return fit-state categories for the current persisted result - kind. - """ + """Return fit-state categories for the current result kind.""" categories: list[object] = [ self.fit_parameters, self.fit_result, @@ -1131,11 +1141,11 @@ def _clear_persisted_fit_state(self) -> None: self._bayesian_distribution_caches = BayesianDistributionCaches() self._bayesian_pair_caches = BayesianPairCaches() self._bayesian_predictive_datasets = BayesianPredictiveDatasets() - self._set_has_persisted_fit_state(False) + self._set_has_persisted_fit_state(value=False) self._persisted_fit_state_sidecar = {} def _capture_fit_parameter_state(self, parameters: list[Parameter]) -> None: - """Capture pre-fit parameter state into persisted fit-state categories.""" + """Capture pre-fit parameter state.""" self._clear_persisted_fit_state() for param in parameters: @@ -1148,7 +1158,7 @@ def _capture_fit_parameter_state(self, parameters: list[Parameter]) -> None: start_uncertainty=param.uncertainty, ) - self._set_has_persisted_fit_state(True) + self._set_has_persisted_fit_state(value=True) @staticmethod def _parameter_is_at_fit_bound( @@ -1156,7 +1166,7 @@ def _parameter_is_at_fit_bound( *, use_upper_bound: bool, ) -> bool: - """Return whether a parameter finished within tolerance of a fit bound.""" + """Return whether a parameter finished near a fit bound.""" value = param.value if value is None: return False @@ -1173,7 +1183,9 @@ def _parameter_is_at_fit_bound( return abs(value - bound) <= tolerance def _selected_parameters_for_fit(self, experiments: list[object]) -> list[Parameter]: - """Return unique live parameters involved in the current fit slice.""" + """ + Return unique live parameters involved in the current fit slice. + """ selected_parameters: list[Parameter] = [] seen_unique_names: set[str] = set() @@ -1198,7 +1210,7 @@ def _selected_parameters_for_fit(self, experiments: list[object]) -> list[Parame @staticmethod def _fit_data_point_count(experiments: list[object]) -> int: - """Return the total number of observed data points in the fit slice.""" + """Return observed data-point count for one fit slice.""" total = 0 for experiment in experiments: intensity_category = intensity_category_for(experiment) @@ -1207,7 +1219,9 @@ def _fit_data_point_count(experiments: list[object]) -> int: @staticmethod def _resolve_covariance_matrix(results: FitResults) -> np.ndarray | None: - """Return a covariance matrix when the raw fit result exposes one.""" + """ + Return a covariance matrix when the raw fit result exposes one. + """ raw_result = results.engine_result for attribute_name in ('covar', 'covariance_matrix'): covariance = getattr(raw_result, attribute_name, None) @@ -1215,7 +1229,7 @@ def _resolve_covariance_matrix(results: FitResults) -> np.ndarray | None: continue covariance_array = np.asarray(covariance, dtype=float) - if covariance_array.ndim != 2: + if covariance_array.ndim != _FLATTENED_POSTERIOR_SAMPLE_NDIM: continue if covariance_array.shape[0] != covariance_array.shape[1]: continue @@ -1225,7 +1239,9 @@ def _resolve_covariance_matrix(results: FitResults) -> np.ndarray | None: @staticmethod def _correlation_matrix_from_covariance(covariance: np.ndarray) -> np.ndarray | None: - """Return a correlation matrix derived from a covariance matrix.""" + """ + Return a correlation matrix derived from a covariance matrix. + """ diagonal = np.diag(covariance) if np.any(diagonal <= 0): return None @@ -1252,14 +1268,16 @@ def _store_common_fit_result_projection( *, result_kind: FitResultKindEnum, ) -> None: - """Store fields shared by deterministic and Bayesian fit results.""" + """ + Store fields shared by deterministic and Bayesian fit results. + """ self.fit_result._set_result_kind(result_kind.value) - self.fit_result._set_success(results.success) + self.fit_result._set_success(value=results.success) self.fit_result._set_message(results.message) self.fit_result._set_iterations(results.iterations) self.fit_result._set_fitting_time(results.fitting_time) self.fit_result._set_reduced_chi_square(results.reduced_chi_square) - self._set_has_persisted_fit_state(True) + self._set_has_persisted_fit_state(value=True) def _store_correlation_projection( self, @@ -1268,7 +1286,7 @@ def _store_correlation_projection( correlation_matrix: np.ndarray, source_kind: FitCorrelationSourceEnum, ) -> None: - """Store upper-triangle parameter correlations from a correlation matrix.""" + """Store upper-triangle correlations from one matrix.""" if len(unique_names) <= 1: return if correlation_matrix.shape != (len(unique_names), len(unique_names)): @@ -1293,7 +1311,7 @@ def _store_deterministic_result_projection( experiments: list[object], fitted_parameters: list[Parameter], ) -> None: - """Store deterministic fit-result projections into persisted categories.""" + """Store deterministic fit results in persisted categories.""" selected_parameters = self._selected_parameters_for_fit(experiments) n_parameters = len(selected_parameters) n_free_parameters = len(fitted_parameters) @@ -1316,8 +1334,8 @@ def _store_deterministic_result_projection( self.deterministic_result._set_n_parameters(n_parameters) self.deterministic_result._set_n_free_parameters(n_free_parameters) self.deterministic_result._set_degrees_of_freedom(degrees_of_freedom) - self.deterministic_result._set_covariance_available(covariance is not None) - self.deterministic_result._set_correlation_available(correlation_matrix is not None) + self.deterministic_result._set_covariance_available(value=covariance is not None) + self.deterministic_result._set_correlation_available(value=correlation_matrix is not None) for param in fitted_parameters: self.deterministic_parameter_results.create( @@ -1349,7 +1367,9 @@ def _store_bayesian_distribution_cache_projection( flattened_samples: np.ndarray, parameter_names: list[str], ) -> dict[str, dict[str, np.ndarray]]: - """Store cached posterior density curves into persisted manifests.""" + """ + Store cached posterior density curves into persisted manifests. + """ payload: dict[str, dict[str, np.ndarray]] = {} for parameter_index, parameter_name in enumerate(parameter_names): lower_bound, upper_bound = plotter._posterior_parameter_bounds( @@ -1383,12 +1403,83 @@ def _store_bayesian_distribution_cache_projection( @staticmethod def _posterior_pair_contour_levels(density: np.ndarray) -> np.ndarray: - """Return default contour levels for a cached posterior pair surface.""" + """Return default contour levels for one cached pair.""" density_max = float(np.max(density)) if not np.isfinite(density_max) or density_max <= 0: return np.asarray([], dtype=float) return density_max * np.asarray([0.20, 0.35, 0.50, 0.65, 0.80, 0.95], dtype=float) + @staticmethod + def _ordered_pair_metadata( + parameter_names: list[str], + first_index: int, + second_index: int, + ) -> tuple[int, int, str, str]: + """Return ordered pair indices and parameter names.""" + x_index = first_index + y_index = second_index + x_name = parameter_names[x_index] + y_name = parameter_names[y_index] + if x_name > y_name: + x_index, y_index = y_index, x_index + x_name, y_name = y_name, x_name + return x_index, y_index, x_name, y_name + + def _store_one_bayesian_pair_cache_projection( + self, + *, + plotter: object, + results: BayesianFitResults, + density_samples: np.ndarray, + pair_metadata: tuple[int, int, str, str], + contour_grid_size: int, + pair_id: str, + ) -> tuple[str, dict[str, np.ndarray]] | None: + """Store one cached pair surface and return its payload.""" + x_index, y_index, x_name, y_name = pair_metadata + + x_values = density_samples[:, x_index] + y_values = density_samples[:, y_index] + x_bounds, y_bounds = plotter._posterior_pair_bounds( + fit_results=results, + x_parameter_name=x_name, + y_parameter_name=y_name, + x_values=x_values, + y_values=y_values, + ) + density_surface = plotter._posterior_pair_density_surface( + x_values=x_values, + y_values=y_values, + x_bounds=x_bounds, + y_bounds=y_bounds, + grid_size=contour_grid_size, + ) + if density_surface is None: + return None + + x_grid_array = np.asarray(density_surface[0], dtype=float) + y_grid_array = np.asarray(density_surface[1], dtype=float) + density_array = np.asarray(density_surface[2], dtype=float) + contour_levels = self._posterior_pair_contour_levels(density_array) + self.bayesian_pair_caches.create( + id=pair_id, + parameter_names=(x_name, y_name), + paths=BayesianPairCachePaths( + x_path=f'/posterior/pairs/{pair_id}/x', + y_path=f'/posterior/pairs/{pair_id}/y', + density_path=f'/posterior/pairs/{pair_id}/density', + contour_level_path=f'/posterior/pairs/{pair_id}/contour_levels', + ), + grid_shape=(float(x_grid_array.size), float(y_grid_array.size)), + n_draws_cached=float(density_samples.shape[0]), + ) + return pair_id, { + 'x': x_grid_array, + 'y': y_grid_array, + 'density': density_array, + 'contour_levels': contour_levels, + } + def _store_bayesian_pair_cache_projection( self, *, @@ -1397,7 +1488,7 @@ def _store_bayesian_pair_cache_projection( flattened_samples: np.ndarray, parameter_names: list[str], ) -> dict[str, dict[str, np.ndarray]]: - """Store cached posterior pair-density surfaces into persisted manifests.""" + """Store cached pair-density surfaces in manifests.""" n_parameters = len(parameter_names) if n_parameters <= 1: return {} @@ -1409,57 +1500,24 @@ def _store_bayesian_pair_cache_projection( contour_grid_size = plotter._posterior_pair_contour_grid_size(n_parameters) payload: dict[str, dict[str, np.ndarray]] = {} for first_index, second_index in combinations(range(n_parameters), 2): - x_index = first_index - y_index = second_index - x_name = parameter_names[x_index] - y_name = parameter_names[y_index] - if x_name > y_name: - x_index, y_index = y_index, x_index - x_name, y_name = y_name, x_name - - x_values = density_samples[:, x_index] - y_values = density_samples[:, y_index] - x_bounds, y_bounds = plotter._posterior_pair_bounds( - fit_results=results, - x_parameter_name=x_name, - y_parameter_name=y_name, - x_values=x_values, - y_values=y_values, - ) - density_surface = plotter._posterior_pair_density_surface( - x_values=x_values, - y_values=y_values, - x_bounds=x_bounds, - y_bounds=y_bounds, - grid_size=contour_grid_size, + pair_id = str(len(payload) + 1) + cache_projection = self._store_one_bayesian_pair_cache_projection( + plotter=plotter, + results=results, + density_samples=density_samples, + pair_metadata=self._ordered_pair_metadata( + parameter_names, + first_index, + second_index, + ), + contour_grid_size=contour_grid_size, + pair_id=pair_id, ) - if density_surface is None: + if cache_projection is None: continue - x_grid, y_grid, density = density_surface - x_grid_array = np.asarray(x_grid, dtype=float) - y_grid_array = np.asarray(y_grid, dtype=float) - density_array = np.asarray(density, dtype=float) - contour_levels = self._posterior_pair_contour_levels(density_array) - pair_id = str(len(payload) + 1) - self.bayesian_pair_caches.create( - id=pair_id, - param_unique_name_x=x_name, - param_unique_name_y=y_name, - x_path=f'/posterior/pairs/{pair_id}/x', - y_path=f'/posterior/pairs/{pair_id}/y', - density_path=f'/posterior/pairs/{pair_id}/density', - contour_level_path=f'/posterior/pairs/{pair_id}/contour_levels', - n_grid_x=float(x_grid_array.size), - n_grid_y=float(y_grid_array.size), - n_draws_cached=float(density_samples.shape[0]), - ) - payload[pair_id] = { - 'x': x_grid_array, - 'y': y_grid_array, - 'density': density_array, - 'contour_levels': contour_levels, - } + pair_id, pair_payload = cache_projection + payload[pair_id] = pair_payload return payload @staticmethod @@ -1489,7 +1547,9 @@ def _store_bayesian_predictive_projection( plotter: object, results: BayesianFitResults, ) -> dict[str, dict[str, np.ndarray]]: - """Store posterior predictive summaries into persisted manifests.""" + """ + Store posterior predictive summaries into persisted manifests. + """ predictive_payload: dict[str, dict[str, np.ndarray]] = {} for experiment_name in self.project.experiments.names: experiment = self.project.experiments[experiment_name] @@ -1519,23 +1579,23 @@ def _store_bayesian_predictive_projection( self.bayesian_predictive_datasets.create( experiment_name=summary.experiment_name, x_axis_name=str(x_axis_name), - x_path=f'{predictive_root}/x', - best_sample_prediction_path=( - f'{predictive_root}/best_sample_prediction' - ), - lower_95_path=( - None if summary.lower_95 is None else f'{predictive_root}/lower_95' - ), - upper_95_path=( - None if summary.upper_95 is None else f'{predictive_root}/upper_95' - ), - lower_68_path=( - None if summary.lower_68 is None else f'{predictive_root}/lower_68' - ), - upper_68_path=( - None if summary.upper_68 is None else f'{predictive_root}/upper_68' + paths=BayesianPredictiveDatasetPaths( + x_path=f'{predictive_root}/x', + best_sample_prediction_path=(f'{predictive_root}/best_sample_prediction'), + lower_95_path=( + None if summary.lower_95 is None else f'{predictive_root}/lower_95' + ), + upper_95_path=( + None if summary.upper_95 is None else f'{predictive_root}/upper_95' + ), + lower_68_path=( + None if summary.lower_68 is None else f'{predictive_root}/lower_68' + ), + upper_68_path=( + None if summary.upper_68 is None else f'{predictive_root}/upper_68' + ), + draws_path=(None if summary.draws is None else f'{predictive_root}/draws'), ), - draws_path=(None if summary.draws is None else f'{predictive_root}/draws'), n_x=float(np.asarray(summary.x).size), n_draws_cached=( 0.0 if summary.draws is None else float(np.asarray(summary.draws).shape[0]) @@ -1544,30 +1604,30 @@ def _store_bayesian_predictive_projection( return predictive_payload def _store_bayesian_plot_cache_projection(self, results: BayesianFitResults) -> None: - """Populate persisted Bayesian plot caches from live posterior results.""" + """Populate persisted Bayesian plot caches.""" posterior_samples = results.posterior_samples if posterior_samples is None: self._persisted_fit_state_sidecar['distribution_caches'] = {} self._persisted_fit_state_sidecar['pair_caches'] = {} self._persisted_fit_state_sidecar['predictive_datasets'] = {} - self.bayesian_result._set_has_distribution_cache(False) - self.bayesian_result._set_has_pair_cache(False) - self.bayesian_result._set_has_posterior_predictive(False) + self.bayesian_result._set_has_distribution_cache(value=False) + self.bayesian_result._set_has_pair_cache(value=False) + self.bayesian_result._set_has_posterior_predictive(value=False) return flattened_samples = np.asarray(posterior_samples.flattened(), dtype=float) parameter_names = list(posterior_samples.parameter_names) if ( - flattened_samples.ndim != 2 + flattened_samples.ndim != _FLATTENED_POSTERIOR_SAMPLE_NDIM or not parameter_names or flattened_samples.shape[1] != len(parameter_names) ): self._persisted_fit_state_sidecar['distribution_caches'] = {} self._persisted_fit_state_sidecar['pair_caches'] = {} self._persisted_fit_state_sidecar['predictive_datasets'] = {} - self.bayesian_result._set_has_distribution_cache(False) - self.bayesian_result._set_has_pair_cache(False) - self.bayesian_result._set_has_posterior_predictive(False) + self.bayesian_result._set_has_distribution_cache(value=False) + self.bayesian_result._set_has_pair_cache(value=False) + self.bayesian_result._set_has_posterior_predictive(value=False) return plotter = self.project.rendering.plotter @@ -1591,15 +1651,15 @@ def _store_bayesian_plot_cache_projection(self, results: BayesianFitResults) -> self._persisted_fit_state_sidecar['distribution_caches'] = distribution_payload self._persisted_fit_state_sidecar['pair_caches'] = pair_payload self._persisted_fit_state_sidecar['predictive_datasets'] = predictive_payload - self.bayesian_result._set_has_distribution_cache(bool(distribution_payload)) - self.bayesian_result._set_has_pair_cache(bool(pair_payload)) - self.bayesian_result._set_has_posterior_predictive(bool(predictive_payload)) + self.bayesian_result._set_has_distribution_cache(value=bool(distribution_payload)) + self.bayesian_result._set_has_pair_cache(value=bool(pair_payload)) + self.bayesian_result._set_has_posterior_predictive(value=bool(predictive_payload)) def _store_bayesian_posterior_sidecar_projection( self, results: BayesianFitResults, ) -> None: - """Persist canonical posterior arrays while live samples are available.""" + """Persist posterior arrays while live samples exist.""" posterior_samples = results.posterior_samples if posterior_samples is None: self._persisted_fit_state_sidecar['posterior'] = {} @@ -1623,10 +1683,12 @@ def _store_bayesian_posterior_sidecar_projection( } def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None: - """Store Bayesian fit-result projections into persisted categories.""" + """ + Store Bayesian fit-result projections into persisted categories. + """ credible_interval_inner = 0.68 credible_interval_outer = 0.95 - if len(results.credible_interval_levels) >= 2: + if len(results.credible_interval_levels) >= _CREDIBLE_INTERVAL_LEVEL_COUNT: credible_interval_inner = float(results.credible_interval_levels[0]) credible_interval_outer = float(results.credible_interval_levels[1]) @@ -1636,15 +1698,17 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None self.bayesian_result._set_sampler_name(results.sampler_name) self.bayesian_result._set_point_estimate_name(point_estimate_name) - self.bayesian_result._set_success(results.success) - self.bayesian_result._set_sampler_completed(results.sampler_completed) + self.bayesian_result._set_success(value=results.success) + self.bayesian_result._set_sampler_completed(value=results.sampler_completed) self.bayesian_result._set_best_log_posterior(results.best_log_posterior) self.bayesian_result._set_credible_interval_inner(credible_interval_inner) self.bayesian_result._set_credible_interval_outer(credible_interval_outer) - self.bayesian_result._set_has_posterior_samples(results.posterior_samples is not None) - self.bayesian_result._set_has_distribution_cache(False) - self.bayesian_result._set_has_pair_cache(False) - self.bayesian_result._set_has_posterior_predictive(False) + self.bayesian_result._set_has_posterior_samples( + value=results.posterior_samples is not None + ) + self.bayesian_result._set_has_distribution_cache(value=False) + self.bayesian_result._set_has_pair_cache(value=False) + self.bayesian_result._set_has_posterior_predictive(value=False) self.bayesian_result._set_sidecar_file('results.h5') self._store_bayesian_posterior_sidecar_projection(results) @@ -1655,11 +1719,9 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None self.bayesian_sampler._set_parallel(int(sampler_settings.get('parallel', 0))) self.bayesian_sampler._set_init(str(sampler_settings.get('init', ''))) random_seed = sampler_settings.get('random_seed') - self.bayesian_sampler._set_random_seed( - None if random_seed is None else int(random_seed) - ) + self.bayesian_sampler._set_random_seed(None if random_seed is None else int(random_seed)) - self.bayesian_convergence._set_converged(bool(convergence.get('converged', False))) + self.bayesian_convergence._set_converged(value=bool(convergence.get('converged', False))) self.bayesian_convergence._set_max_r_hat(convergence.get('max_r_hat')) self.bayesian_convergence._set_min_ess_bulk(convergence.get('min_ess_bulk')) self.bayesian_convergence._set_n_draws(int(convergence.get('n_draws', 0))) @@ -1667,19 +1729,7 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None self.bayesian_convergence._set_n_parameters(int(convergence.get('n_parameters', 0))) for summary in results.posterior_parameter_summaries: - self.bayesian_parameter_posteriors.create( - unique_name=summary.unique_name, - display_name=summary.display_name, - best_sample_value=summary.best_sample_value, - median=summary.median, - uncertainty=summary.standard_deviation, - interval_68_lower=summary.interval_68[0], - interval_68_upper=summary.interval_68[1], - interval_95_lower=summary.interval_95[0], - interval_95_upper=summary.interval_95[1], - ess_bulk=summary.ess_bulk, - r_hat=summary.r_hat, - ) + self.bayesian_parameter_posteriors.create(summary=summary) posterior_samples = results.posterior_samples if posterior_samples is None: @@ -1703,7 +1753,9 @@ def _store_fit_result_projection( experiments: list[object], fitted_parameters: list[Parameter], ) -> None: - """Store the latest fit result into persisted fit-state categories.""" + """ + Store the latest fit result into persisted fit-state categories. + """ if isinstance(results, BayesianFitResults): self._store_common_fit_result_projection( results, diff --git a/src/easydiffraction/analysis/categories/bayesian_convergence/default.py b/src/easydiffraction/analysis/categories/bayesian_convergence/default.py index 06584f25..48d00fa4 100644 --- a/src/easydiffraction/analysis/categories/bayesian_convergence/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_convergence/default.py @@ -71,7 +71,7 @@ def converged(self) -> BoolDescriptor: """Whether the Bayesian fit met convergence criteria.""" return self._converged - def _set_converged(self, value: bool) -> None: + def _set_converged(self, *, value: bool) -> None: """Set the convergence flag for internal callers.""" self._converged.value = value diff --git a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py index 59eb1354..41183dc3 100644 --- a/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_distribution_caches/default.py @@ -137,9 +137,9 @@ def create( HDF5 dataset path for the distribution x-grid. density_path : str HDF5 dataset path for the cached density values. - n_grid : int | float + n_grid : float Number of grid points in the cached distribution. - n_draws_cached : int | float + n_draws_cached : float Number of draws summarized into the cached distribution. """ item = BayesianDistributionCacheItem() diff --git a/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py b/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py index 7a2561fe..937c77a5 100644 --- a/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_pair_caches/default.py @@ -4,6 +4,8 @@ from __future__ import annotations +from dataclasses import dataclass + from easydiffraction.analysis.categories.bayesian_pair_caches.factory import ( BayesianPairCachesFactory, ) @@ -27,6 +29,16 @@ def _normalized_parameter_pair( return param_unique_name_y, param_unique_name_x +@dataclass(frozen=True, slots=True) +class BayesianPairCachePaths: + """HDF5 dataset paths for one persisted pair cache.""" + + x_path: str + y_path: str + density_path: str + contour_level_path: str + + class BayesianPairCacheItem(CategoryItem): """Single persisted Bayesian pair-cache manifest row.""" @@ -211,14 +223,9 @@ def __init__(self) -> None: def create( self, *, - param_unique_name_x: str, - param_unique_name_y: str, - x_path: str, - y_path: str, - density_path: str, - contour_level_path: str, - n_grid_x: float, - n_grid_y: float, + parameter_names: tuple[str, str], + paths: BayesianPairCachePaths, + grid_shape: tuple[float, float], n_draws_cached: float, id: str | None = None, ) -> None: @@ -227,39 +234,31 @@ def create( Parameters ---------- - param_unique_name_x : str - First unique parameter name in the cached pair. - param_unique_name_y : str - Second unique parameter name in the cached pair. - x_path : str - HDF5 dataset path for the pair-cache x-grid. - y_path : str - HDF5 dataset path for the pair-cache y-grid. - density_path : str - HDF5 dataset path for the pair-cache density grid. - contour_level_path : str - HDF5 dataset path for cached contour levels. - n_grid_x : int | float - Number of x-grid points in the cached pair. - n_grid_y : int | float - Number of y-grid points in the cached pair. - n_draws_cached : int | float + parameter_names : tuple[str, str] + Unique parameter names for the cached pair. + paths : BayesianPairCachePaths + HDF5 dataset paths for the cached pair payloads. + grid_shape : tuple[float, float] + Number of x-grid and y-grid points in the cached pair. + n_draws_cached : float Number of draws summarized into the cached pair. id : str | None, default=None - Explicit persisted row id. When omitted, a simple - sequential identifier is generated. + Explicit persisted row id. When omitted, a simple sequential + identifier is generated. """ + param_unique_name_x, param_unique_name_y = parameter_names normalized_x, normalized_y = _normalized_parameter_pair( param_unique_name_x, param_unique_name_y, ) + n_grid_x, n_grid_y = grid_shape item = BayesianPairCacheItem() item._set_param_unique_name_x(normalized_x) item._set_param_unique_name_y(normalized_y) - item._set_x_path(x_path) - item._set_y_path(y_path) - item._set_density_path(density_path) - item._set_contour_level_path(contour_level_path) + item._set_x_path(paths.x_path) + item._set_y_path(paths.y_path) + item._set_density_path(paths.density_path) + item._set_contour_level_path(paths.contour_level_path) item._set_n_grid_x(n_grid_x) item._set_n_grid_y(n_grid_y) item._set_n_draws_cached(n_draws_cached) diff --git a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py index 6bc9173e..ea234f0e 100644 --- a/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/default.py @@ -4,6 +4,8 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from easydiffraction.analysis.categories.bayesian_parameter_posteriors.factory import ( BayesianParameterPosteriorsFactory, ) @@ -16,6 +18,9 @@ from easydiffraction.core.variable import StringDescriptor from easydiffraction.io.cif.handler import CifHandler +if TYPE_CHECKING: + from easydiffraction.analysis.fit_helpers.bayesian import PosteriorParameterSummary + class BayesianParameterPosteriorItem(CategoryItem): """Single persisted Bayesian parameter posterior summary row.""" @@ -212,56 +217,26 @@ def __init__(self) -> None: def create( self, *, - unique_name: str, - display_name: str, - best_sample_value: float | None = None, - median: float | None = None, - uncertainty: float | None = None, - interval_68_lower: float | None = None, - interval_68_upper: float | None = None, - interval_95_lower: float | None = None, - interval_95_upper: float | None = None, - ess_bulk: float | None = None, - r_hat: float | None = None, + summary: PosteriorParameterSummary, ) -> None: """ Create a persisted Bayesian parameter posterior summary row. Parameters ---------- - unique_name : str - Unique EasyDiffraction parameter name. - display_name : str - Human-readable parameter label. - best_sample_value : int | float | None, default=None - Committed sampled parameter value. - median : int | float | None, default=None - Posterior median value. - uncertainty : int | float | None, default=None - Posterior standard deviation. - interval_68_lower : int | float | None, default=None - Lower bound of the 68% credible interval. - interval_68_upper : int | float | None, default=None - Upper bound of the 68% credible interval. - interval_95_lower : int | float | None, default=None - Lower bound of the 95% credible interval. - interval_95_upper : int | float | None, default=None - Upper bound of the 95% credible interval. - ess_bulk : int | float | None, default=None - Bulk effective sample size when available. - r_hat : int | float | None, default=None - Rank-normalized split-R-hat when available. + summary : PosteriorParameterSummary + Runtime posterior summary to persist. """ item = BayesianParameterPosteriorItem() - item._set_unique_name(unique_name) - item._set_display_name(display_name) - item._set_best_sample_value(best_sample_value) - item._set_median(median) - item._set_uncertainty(uncertainty) - item._set_interval_68_lower(interval_68_lower) - item._set_interval_68_upper(interval_68_upper) - item._set_interval_95_lower(interval_95_lower) - item._set_interval_95_upper(interval_95_upper) - item._set_ess_bulk(ess_bulk) - item._set_r_hat(r_hat) + item._set_unique_name(summary.unique_name) + item._set_display_name(summary.display_name) + item._set_best_sample_value(summary.best_sample_value) + item._set_median(summary.median) + item._set_uncertainty(summary.standard_deviation) + item._set_interval_68_lower(summary.interval_68[0]) + item._set_interval_68_upper(summary.interval_68[1]) + item._set_interval_95_lower(summary.interval_95[0]) + item._set_interval_95_upper(summary.interval_95[1]) + item._set_ess_bulk(summary.ess_bulk) + item._set_r_hat(summary.r_hat) self.add(item) diff --git a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py index 25f9b58f..9bb8acf0 100644 --- a/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_predictive_datasets/default.py @@ -4,6 +4,8 @@ from __future__ import annotations +from dataclasses import dataclass + from easydiffraction.analysis.categories.bayesian_predictive_datasets.factory import ( BayesianPredictiveDatasetsFactory, ) @@ -16,6 +18,19 @@ from easydiffraction.io.cif.handler import CifHandler +@dataclass(frozen=True, slots=True) +class BayesianPredictiveDatasetPaths: + """HDF5 dataset paths for one predictive dataset.""" + + x_path: str + best_sample_prediction_path: str + lower_95_path: str | None = None + upper_95_path: str | None = None + lower_68_path: str | None = None + upper_68_path: str | None = None + draws_path: str | None = None + + class BayesianPredictiveDatasetItem(CategoryItem): """Single persisted Bayesian predictive-dataset manifest row.""" @@ -210,13 +225,7 @@ def create( *, experiment_name: str, x_axis_name: str, - x_path: str, - best_sample_prediction_path: str, - lower_95_path: str | None = None, - upper_95_path: str | None = None, - lower_68_path: str | None = None, - upper_68_path: str | None = None, - draws_path: str | None = None, + paths: BayesianPredictiveDatasetPaths, n_x: float, n_draws_cached: float, ) -> None: @@ -229,35 +238,23 @@ def create( Experiment name for the cached predictive dataset. x_axis_name : str Name of the predictive dataset x-axis. - x_path : str - HDF5 dataset path for the predictive x-axis values. - best_sample_prediction_path : str - HDF5 dataset path for the committed predictive curve. - lower_95_path : str | None, default=None - HDF5 dataset path for the lower 95% predictive band. - upper_95_path : str | None, default=None - HDF5 dataset path for the upper 95% predictive band. - lower_68_path : str | None, default=None - HDF5 dataset path for the lower 68% predictive band. - upper_68_path : str | None, default=None - HDF5 dataset path for the upper 68% predictive band. - draws_path : str | None, default=None - HDF5 dataset path for cached predictive draws. - n_x : int | float + paths : BayesianPredictiveDatasetPaths + HDF5 dataset paths for the predictive dataset payloads. + n_x : float Number of x-axis points in the cached predictive dataset. - n_draws_cached : int | float + n_draws_cached : float Number of cached predictive draws. """ item = BayesianPredictiveDatasetItem() item._set_experiment_name(experiment_name) item._set_x_axis_name(x_axis_name) - item._set_x_path(x_path) - item._set_best_sample_prediction_path(best_sample_prediction_path) - item._set_lower_95_path(lower_95_path) - item._set_upper_95_path(upper_95_path) - item._set_lower_68_path(lower_68_path) - item._set_upper_68_path(upper_68_path) - item._set_draws_path(draws_path) + item._set_x_path(paths.x_path) + item._set_best_sample_prediction_path(paths.best_sample_prediction_path) + item._set_lower_95_path(paths.lower_95_path) + item._set_upper_95_path(paths.upper_95_path) + item._set_lower_68_path(paths.lower_68_path) + item._set_upper_68_path(paths.upper_68_path) + item._set_draws_path(paths.draws_path) item._set_n_x(n_x) item._set_n_draws_cached(n_draws_cached) self.add(item) diff --git a/src/easydiffraction/analysis/categories/bayesian_result/default.py b/src/easydiffraction/analysis/categories/bayesian_result/default.py index 66cf888e..dd421071 100644 --- a/src/easydiffraction/analysis/categories/bayesian_result/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_result/default.py @@ -125,7 +125,7 @@ def success(self) -> BoolDescriptor: """ return self._success - def _set_success(self, value: bool) -> None: + def _set_success(self, *, value: bool) -> None: """Set the success flag for internal callers.""" self._success.value = value @@ -134,7 +134,7 @@ def sampler_completed(self) -> BoolDescriptor: """Whether the sampler completed and returned posterior data.""" return self._sampler_completed - def _set_sampler_completed(self, value: bool) -> None: + def _set_sampler_completed(self, *, value: bool) -> None: """Set the sampler-completed flag for internal callers.""" self._sampler_completed.value = value @@ -174,7 +174,7 @@ def has_posterior_samples(self) -> BoolDescriptor: """Whether posterior samples were persisted.""" return self._has_posterior_samples - def _set_has_posterior_samples(self, value: bool) -> None: + def _set_has_posterior_samples(self, *, value: bool) -> None: """Set the posterior-samples flag for internal callers.""" self._has_posterior_samples.value = value @@ -183,7 +183,7 @@ def has_distribution_cache(self) -> BoolDescriptor: """Whether distribution-cache manifests were persisted.""" return self._has_distribution_cache - def _set_has_distribution_cache(self, value: bool) -> None: + def _set_has_distribution_cache(self, *, value: bool) -> None: """Set the distribution-cache flag for internal callers.""" self._has_distribution_cache.value = value @@ -192,7 +192,7 @@ def has_pair_cache(self) -> BoolDescriptor: """Whether pair-cache manifests were persisted.""" return self._has_pair_cache - def _set_has_pair_cache(self, value: bool) -> None: + def _set_has_pair_cache(self, *, value: bool) -> None: """Set the pair-cache flag for internal callers.""" self._has_pair_cache.value = value @@ -201,7 +201,7 @@ def has_posterior_predictive(self) -> BoolDescriptor: """Whether posterior predictive manifests were persisted.""" return self._has_posterior_predictive - def _set_has_posterior_predictive(self, value: bool) -> None: + def _set_has_posterior_predictive(self, *, value: bool) -> None: """Set the posterior-predictive flag for internal callers.""" self._has_posterior_predictive.value = value diff --git a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py index 12b7c661..c75ffa15 100644 --- a/src/easydiffraction/analysis/categories/bayesian_sampler/default.py +++ b/src/easydiffraction/analysis/categories/bayesian_sampler/default.py @@ -9,7 +9,6 @@ from easydiffraction.core.metadata import TypeInfo from easydiffraction.core.validation import AttributeSpec from easydiffraction.core.variable import IntegerDescriptor -from easydiffraction.core.variable import NumericDescriptor from easydiffraction.core.variable import StringDescriptor from easydiffraction.io.cif.handler import CifHandler diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py index defa5845..2f13d759 100644 --- a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py +++ b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py @@ -92,7 +92,7 @@ def at_lower_bound(self) -> BoolDescriptor: """Whether the parameter finished at the lower fit bound.""" return self._at_lower_bound - def _set_at_lower_bound(self, value: bool) -> None: + def _set_at_lower_bound(self, *, value: bool) -> None: """Set the lower-bound flag for internal callers.""" self._at_lower_bound.value = value @@ -101,7 +101,7 @@ def at_upper_bound(self) -> BoolDescriptor: """Whether the parameter finished at the upper fit bound.""" return self._at_upper_bound - def _set_at_upper_bound(self, value: bool) -> None: + def _set_at_upper_bound(self, *, value: bool) -> None: """Set the upper-bound flag for internal callers.""" self._at_upper_bound.value = value @@ -134,9 +134,9 @@ def create( ---------- param_unique_name : str Unique name of the persisted parameter result row. - final_value : int | float | None, default=None + final_value : float | None, default=None Final fitted value for the persisted parameter result. - final_uncertainty : int | float | None, default=None + final_uncertainty : float | None, default=None Final uncertainty for the persisted parameter result. at_lower_bound : bool, default=False Whether the parameter finished at the lower fit bound. @@ -147,6 +147,6 @@ def create( item._set_param_unique_name(param_unique_name) item._set_final_value(final_value) item._set_final_uncertainty(final_uncertainty) - item._set_at_lower_bound(at_lower_bound) - item._set_at_upper_bound(at_upper_bound) + item._set_at_lower_bound(value=at_lower_bound) + item._set_at_upper_bound(value=at_upper_bound) self.add(item) diff --git a/src/easydiffraction/analysis/categories/deterministic_result/default.py b/src/easydiffraction/analysis/categories/deterministic_result/default.py index d929ca41..f66f2018 100644 --- a/src/easydiffraction/analysis/categories/deterministic_result/default.py +++ b/src/easydiffraction/analysis/categories/deterministic_result/default.py @@ -141,10 +141,7 @@ def _set_n_data_points(self, value: float) -> None: @property def n_parameters(self) -> NumericDescriptor: - """ - Number of parameters considered in the persisted deterministic - fit. - """ + """Number of parameters considered in the persisted fit.""" return self._n_parameters def _set_n_parameters(self, value: float) -> None: @@ -173,24 +170,18 @@ def _set_degrees_of_freedom(self, value: float) -> None: @property def covariance_available(self) -> BoolDescriptor: - """ - Whether covariance was available for the persisted deterministic - fit. - """ + """Whether covariance was available for the persisted fit.""" return self._covariance_available - def _set_covariance_available(self, value: bool) -> None: + def _set_covariance_available(self, *, value: bool) -> None: """Set the covariance-available flag for internal callers.""" self._covariance_available.value = value @property def correlation_available(self) -> BoolDescriptor: - """ - Whether correlations were available for the persisted - deterministic fit. - """ + """Whether correlations were available for the persisted fit.""" return self._correlation_available - def _set_correlation_available(self, value: bool) -> None: + def _set_correlation_available(self, *, value: bool) -> None: """Set the correlation-available flag for internal callers.""" self._correlation_available.value = value diff --git a/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py b/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py index d1d02ce1..4b08057e 100644 --- a/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py +++ b/src/easydiffraction/analysis/categories/fit_parameter_correlations/default.py @@ -164,7 +164,7 @@ def create( First unique parameter name in the pair. param_unique_name_j : str Second unique parameter name in the pair. - correlation : int | float + correlation : float Correlation coefficient for the parameter pair. id : str | None, default=None Explicit persisted row identifier. When omitted, a simple diff --git a/src/easydiffraction/analysis/categories/fit_parameters/default.py b/src/easydiffraction/analysis/categories/fit_parameters/default.py index 79dc8549..a5e1f94f 100644 --- a/src/easydiffraction/analysis/categories/fit_parameters/default.py +++ b/src/easydiffraction/analysis/categories/fit_parameters/default.py @@ -156,15 +156,15 @@ def create( ---------- param_unique_name : str Unique name of the referenced live parameter. - fit_min : int | float + fit_min : float Persisted lower fit bound. - fit_max : int | float + fit_max : float Persisted upper fit bound. - fit_bounds_uncertainty_multiplier : int | float | None, default=None + fit_bounds_uncertainty_multiplier : float | None, default=None Multiplier used to derive fit bounds from uncertainty. - start_value : int | float | None, default=None + start_value : float | None, default=None Persisted pre-fit value snapshot. - start_uncertainty : int | float | None, default=None + start_uncertainty : float | None, default=None Persisted pre-fit uncertainty snapshot. """ item = FitParameterItem() diff --git a/src/easydiffraction/analysis/categories/fit_result/default.py b/src/easydiffraction/analysis/categories/fit_result/default.py index e75775e9..564df008 100644 --- a/src/easydiffraction/analysis/categories/fit_result/default.py +++ b/src/easydiffraction/analysis/categories/fit_result/default.py @@ -88,7 +88,7 @@ def success(self) -> BoolDescriptor: """ return self._success - def _set_success(self, value: bool) -> None: + def _set_success(self, *, value: bool) -> None: """Set the success flag for internal callers.""" self._success.value = value diff --git a/src/easydiffraction/analysis/enums.py b/src/easydiffraction/analysis/enums.py index c70c81b8..94d78cc6 100644 --- a/src/easydiffraction/analysis/enums.py +++ b/src/easydiffraction/analysis/enums.py @@ -4,7 +4,6 @@ from __future__ import annotations -from enum import Enum from enum import StrEnum @@ -31,7 +30,7 @@ def description(self) -> str: return '' -class FitResultKindEnum(str, Enum): +class FitResultKindEnum(StrEnum): """Persisted kind of the latest fit-result projection.""" DETERMINISTIC = 'deterministic' @@ -43,7 +42,7 @@ def default(cls) -> FitResultKindEnum: return cls.DETERMINISTIC -class FitCorrelationSourceEnum(str, Enum): +class FitCorrelationSourceEnum(StrEnum): """Source of a persisted fit-parameter correlation summary.""" DETERMINISTIC = 'deterministic' diff --git a/src/easydiffraction/analysis/fit_helpers/tracking.py b/src/easydiffraction/analysis/fit_helpers/tracking.py index 5d716831..151512b7 100644 --- a/src/easydiffraction/analysis/fit_helpers/tracking.py +++ b/src/easydiffraction/analysis/fit_helpers/tracking.py @@ -247,7 +247,7 @@ def track_sampler_progress(self, update: SamplerProgressUpdate) -> None: self._last_iteration = update.iteration def start_sampler_pre_processing(self, *, total_iterations: int) -> None: - """Mark sampler setup so its status row appears on first progress update.""" + """Mark sampler setup so a status row appears on update.""" self._tracking_mode = TRACKING_MODE_SAMPLER self._sampler_total_iterations = max(1, total_iterations) self._last_sampler_phase = SAMPLER_PHASE_PRE_PROCESSING @@ -311,7 +311,9 @@ def stop_timer(self) -> None: self._fitting_time = self._end_time - self._start_time def _elapsed_since_start(self) -> float | None: - """Return elapsed wall time using the active timer when available.""" + """ + Return elapsed wall time using the active timer when available. + """ if self._start_time is None: return None if self._end_time is not None: @@ -492,7 +494,9 @@ def _sampler_status_row( elapsed_time: float | None, log_posterior: float | None = None, ) -> list[str]: - """Return a status-only sampler row without iteration metrics.""" + """ + Return a status-only sampler row without iteration metrics. + """ return [ iteration_label, '', @@ -672,7 +676,7 @@ def _replace_last_tracking_row(self, row: list[str]) -> None: def _default_activity_label(self) -> str: if self._tracking_mode == TRACKING_MODE_SAMPLER: - return ACTIVITY_LABEL_PRE_PROCESSING + return ACTIVITY_LABEL_PROCESSING return ACTIVITY_LABEL_FITTING @staticmethod diff --git a/src/easydiffraction/analysis/fitting.py b/src/easydiffraction/analysis/fitting.py index f628a9ab..f446e858 100644 --- a/src/easydiffraction/analysis/fitting.py +++ b/src/easydiffraction/analysis/fitting.py @@ -76,6 +76,80 @@ def __init__(self, selection: str = MinimizerTypeEnum.default()) -> None: self.minimizer = MinimizerFactory.create(selection) self.results: FitResults | None = None + @staticmethod + def _collect_fit_parameters( + structures: Structures, + experiments: list[ExperimentBase], + ) -> list[Parameter]: + """Return free parameters from structures and experiments.""" + expt_free_params: list[Parameter] = [] + for expt in experiments: + expt_free_params.extend( + p + for p in expt.parameters + if isinstance(p, Parameter) and not p.user_constrained and p.free + ) + return structures.free_parameters + expt_free_params + + def _build_objective_function( + self, + *, + params: list[Parameter], + structures: Structures, + experiments: list[ExperimentBase], + weights: np.ndarray | None, + analysis: object, + ) -> object: + """Return the residual function for the current fit context.""" + + def objective_function(engine_params: dict[str, Any]) -> np.ndarray: + """Evaluate residuals for the current minimizer state.""" + return self._residual_function( + engine_params=engine_params, + parameters=params, + structures=structures, + experiments=experiments, + weights=weights, + analysis=analysis, + ) + + return objective_function + + def _postprocess_fit_results( + self, + *, + analysis: object, + experiments: list[ExperimentBase], + fitted_parameters: list[Parameter], + ) -> bool: + """Populate result fields and persist fit projections.""" + if self.results is None: + return False + + self.results.message = _resolve_fit_result_message(self.results) + self.results.iterations = _resolve_fit_result_iterations(self.results) + self.results.chi_square = _resolve_fit_result_chi_square(self.results) + + if analysis is None: + return False + + warn_poorly_mixed = False + if isinstance(self.results, BayesianFitResults): + warn_poorly_mixed = not self.results.convergence_diagnostics.get( + 'converged', + True, + ) + self.minimizer.tracker.start_sampler_post_processing( + log_posterior=self.results.best_log_posterior, + ) + + analysis._store_fit_result_projection( + self.results, + experiments=experiments, + fitted_parameters=fitted_parameters, + ) + return warn_poorly_mixed + def fit( self, structures: Structures, @@ -122,14 +196,7 @@ def fit( structure._need_categories_update = True structure._update_categories() - expt_free_params: list[Parameter] = [] - for expt in experiments: - expt_free_params.extend( - p - for p in expt.parameters - if isinstance(p, Parameter) and not p.user_constrained and p.free - ) - params = structures.free_parameters + expt_free_params + params = self._collect_fit_parameters(structures, experiments) if not params: if analysis is not None: @@ -145,28 +212,13 @@ def fit( for param in params: param._fit_start_value = param.value - def objective_function(engine_params: dict[str, Any]) -> np.ndarray: - """ - Evaluate the residual for the current minimizer parameters. - - Parameters - ---------- - engine_params : dict[str, Any] - Parameter values provided by the minimizer engine. - - Returns - ------- - np.ndarray - Residual array passed back to the minimizer. - """ - return self._residual_function( - engine_params=engine_params, - parameters=params, - structures=structures, - experiments=experiments, - weights=weights, - analysis=analysis, - ) + objective_function = self._build_objective_function( + params=params, + structures=structures, + experiments=experiments, + weights=weights, + analysis=analysis, + ) # Perform fitting self.results = self.minimizer.fit( @@ -177,26 +229,12 @@ def objective_function(engine_params: dict[str, Any]) -> np.ndarray: random_seed=random_seed, ) - warn_poorly_mixed = False try: - if self.results is not None: - self.results.message = _resolve_fit_result_message(self.results) - self.results.iterations = _resolve_fit_result_iterations(self.results) - self.results.chi_square = _resolve_fit_result_chi_square(self.results) - if analysis is not None: - if isinstance(self.results, BayesianFitResults): - warn_poorly_mixed = not self.results.convergence_diagnostics.get( - 'converged', - True, - ) - self.minimizer.tracker.start_sampler_post_processing( - log_posterior=self.results.best_log_posterior, - ) - analysis._store_fit_result_projection( - self.results, - experiments=experiments, - fitted_parameters=params, - ) + warn_poorly_mixed = self._postprocess_fit_results( + analysis=analysis, + experiments=experiments, + fitted_parameters=params, + ) finally: self.minimizer._stop_tracking() diff --git a/src/easydiffraction/analysis/minimizers/base.py b/src/easydiffraction/analysis/minimizers/base.py index c1ff8d17..aecbba02 100644 --- a/src/easydiffraction/analysis/minimizers/base.py +++ b/src/easydiffraction/analysis/minimizers/base.py @@ -343,11 +343,9 @@ def fit( if resolved_random_seed is not None: solver_args['random_seed'] = resolved_random_seed raw_result = self._run_solver(objective_function, **solver_args) - except Exception: + return self._finalize_fit(parameters, raw_result) + finally: self._stop_tracking() - raise - - return self._finalize_fit(parameters, raw_result) def _objective_function( self, diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index fdf248b5..9d935776 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -887,6 +887,8 @@ def _build_success_result( draw_index=np.asarray(draw_index, dtype=float), ) convergence_diagnostics = compute_convergence_diagnostics(posterior_samples) + if not convergence_diagnostics.get('converged', True): + log.warning('Convergence diagnostics indicate the posterior may be poorly mixed.') posterior_parameter_summaries = summarize_posterior_parameters( parameter_names=context.parameter_names, posterior_samples=posterior_samples, diff --git a/src/easydiffraction/display/plotting.py b/src/easydiffraction/display/plotting.py index 16e3e680..7d85a36f 100644 --- a/src/easydiffraction/display/plotting.py +++ b/src/easydiffraction/display/plotting.py @@ -91,6 +91,7 @@ class PosteriorPairPlotStyleEnum(StrEnum): POSTERIOR_FLATTENED_SAMPLE_NDIM = 2 MIN_POSTERIOR_PARAMETER_COUNT = 2 MIN_POSTERIOR_SAMPLE_COUNT = 2 +PAIR_DENSITY_SURFACE_NDIM = 2 POSTERIOR_DENSITY_LINE_COLOR = 'rgb(99, 110, 250)' POSTERIOR_DENSITY_FILL_COLOR = 'rgba(99, 110, 250, 0.22)' POSTERIOR_PAIR_MARGINAL_DENSITY_LINE_COLOR = 'rgb(44, 160, 44)' @@ -1621,7 +1622,9 @@ def _correlation_dataframe_from_persisted_projection( self, fit_results: object, ) -> pd.DataFrame | None: - """Return correlations restored from persisted fit-state rows.""" + """ + Return correlations restored from persisted fit-state rows. + """ if self._project is None: return None @@ -1651,11 +1654,9 @@ def _correlation_dataframe_from_persisted_projection( ] for row in correlation_rows: - parameter_names.extend( - [row.param_unique_name_i.value, row.param_unique_name_j.value] - ) + parameter_names.extend([row.param_unique_name_i.value, row.param_unique_name_j.value]) parameter_names = list(dict.fromkeys(parameter_names)) - if len(parameter_names) < 2: + if len(parameter_names) < MIN_POSTERIOR_PARAMETER_COUNT: return None correlation_values = np.eye(len(parameter_names), dtype=float) @@ -2491,6 +2492,44 @@ def _plot_legend_background_color(self) -> str: return legend_background_color() return PlotlyPlotter._legend_background_color() + def _resolved_posterior_contour_surface( + self, + *, + fit_results: object, + x_parameter_name: str, + y_parameter_name: str, + x_values: np.ndarray, + y_values: np.ndarray, + grid_size: int, + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray | None] | None: + """Return cached or computed posterior contour surface data.""" + cached_surface = self._cached_posterior_pair_surface( + x_parameter_name=x_parameter_name, + y_parameter_name=y_parameter_name, + ) + if cached_surface is not None: + return cached_surface + + bounds = self._posterior_pair_bounds( + fit_results=fit_results, + x_parameter_name=x_parameter_name, + y_parameter_name=y_parameter_name, + x_values=x_values, + y_values=y_values, + ) + density_surface = self._posterior_pair_density_surface( + x_values=x_values, + y_values=y_values, + x_bounds=bounds[0], + y_bounds=bounds[1], + grid_size=grid_size, + ) + if density_surface is None: + return None + + x_grid, y_grid, density = density_surface + return x_grid, y_grid, density, None + def _posterior_contour_traces( self, *, @@ -2506,32 +2545,18 @@ def _posterior_contour_traces( """ go = __import__('plotly.graph_objects', fromlist=['Contour']) - cached_surface = self._cached_posterior_pair_surface( + surface = self._resolved_posterior_contour_surface( + fit_results=fit_results, x_parameter_name=x_parameter_name, y_parameter_name=y_parameter_name, + x_values=x_values, + y_values=y_values, + grid_size=grid_size, ) - contour_levels = None - if cached_surface is None: - bounds = self._posterior_pair_bounds( - fit_results=fit_results, - x_parameter_name=x_parameter_name, - y_parameter_name=y_parameter_name, - x_values=x_values, - y_values=y_values, - ) - density_surface = self._posterior_pair_density_surface( - x_values=x_values, - y_values=y_values, - x_bounds=bounds[0], - y_bounds=bounds[1], - grid_size=grid_size, - ) - if density_surface is None: - return None + if surface is None: + return None - x_grid, y_grid, density = density_surface - else: - x_grid, y_grid, density, contour_levels = cached_surface + x_grid, y_grid, density, contour_levels = surface fill_colorscale, line_colorscale = self._posterior_pair_contour_colorscales( x_values, @@ -2592,7 +2617,9 @@ def _cached_posterior_pair_surface( x_parameter_name: str, y_parameter_name: str, ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray | None] | None: - """Return a restored posterior pair-density surface when available.""" + """ + Return a restored posterior pair-density surface when available. + """ if self._project is None: return None @@ -2618,7 +2645,7 @@ def _cached_posterior_pair_surface( if x_parameter_name != cache_x or y_parameter_name != cache_y: x_grid, y_grid = y_grid, x_grid - if density.ndim == 2: + if density.ndim == PAIR_DENSITY_SURFACE_NDIM: density = density.T expected_shape = (y_grid.size, x_grid.size) @@ -2638,7 +2665,7 @@ def _posterior_contour_levels( density: np.ndarray, contour_levels: np.ndarray | None, ) -> tuple[float, float, float]: - """Return contour start, end, and step for one pair-density surface.""" + """Return contour start, end, and step for pair density.""" if contour_levels is not None and contour_levels.ndim == 1 and contour_levels.size > 0: finite_levels = contour_levels[np.isfinite(contour_levels)] if finite_levels.size > 0: @@ -2777,7 +2804,9 @@ def _cached_posterior_density_curve( self, parameter_name: str, ) -> tuple[np.ndarray, np.ndarray] | None: - """Return a restored posterior density curve for one parameter.""" + """ + Return a restored posterior density curve for one parameter. + """ if self._project is None: return None diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index b844b1ff..b2436e06 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -589,10 +589,7 @@ def _has_persisted_fit_state_sections(block: object) -> bool: def _restore_common_fit_state(analysis: object, block: object) -> None: - """ - Restore fit-state categories shared by deterministic and Bayesian - fits. - """ + """Restore fit-state categories shared by both fit kinds.""" analysis.fit_parameters.from_cif(block) analysis.fit_result.from_cif(block) analysis.fit_parameter_correlations.from_cif(block) @@ -621,7 +618,7 @@ def _restore_persisted_fit_state(analysis: object, block: object) -> None: """ from easydiffraction.analysis.enums import FitResultKindEnum # noqa: PLC0415 - analysis._set_has_persisted_fit_state(True) + analysis._set_has_persisted_fit_state(value=True) _restore_common_fit_state(analysis, block) result_kind_value = analysis.fit_result.result_kind.value @@ -831,45 +828,9 @@ def param_from_cif( if not found_values: return - # If found, pick the one at the given index + # If found, pick the one at the given index. raw = found_values[idx] - - # CIF unknown / inapplicable markers → keep default - if raw in {'?', '.'}: - return - - # If numeric, parse with uncertainty if present - if self._value_type == DataTypes.INTEGER: - numeric_value = str_to_ufloat(raw).n - integer_value = int(round(numeric_value)) - if not np.isclose(numeric_value, integer_value): - log.warning( - f'Ignoring non-integer CIF value {raw!r} for integer field ' - f'{self.unique_name}.' - ) - return - self.value = integer_value - - # If numeric, parse with uncertainty if present - elif self._value_type == DataTypes.NUMERIC: - has_brackets = '(' in raw - u = str_to_ufloat(raw) - self.value = u.n - if has_brackets and hasattr(self, 'free'): - self.free = True # type: ignore[attr-defined] - if not np.isnan(u.s) and hasattr(self, 'uncertainty'): - self.uncertainty = u.s # type: ignore[attr-defined] - - # If string, strip quotes if present - elif self._value_type == DataTypes.STRING: - self.value = _strip_optional_quotes(raw) - - elif self._value_type == DataTypes.BOOL: - self.value = _parse_bool_cif_value(raw) - - # Other types are not supported - else: - log.debug(f'Unrecognized type: {self._value_type}') + _set_param_from_raw_cif_value(self, raw) def category_item_from_cif( @@ -905,11 +866,10 @@ def _set_param_from_raw_cif_value( if param._value_type == DataTypes.INTEGER: numeric_value = str_to_ufloat(raw).n - integer_value = int(round(numeric_value)) + integer_value = round(numeric_value) if not np.isclose(numeric_value, integer_value): log.warning( - f'Ignoring non-integer CIF value {raw!r} for integer field ' - f'{param.unique_name}.' + f'Ignoring non-integer CIF value {raw!r} for integer field {param.unique_name}.' ) return param.value = integer_value diff --git a/src/easydiffraction/io/results_sidecar.py b/src/easydiffraction/io/results_sidecar.py index db56ef23..a8157b7c 100644 --- a/src/easydiffraction/io/results_sidecar.py +++ b/src/easydiffraction/io/results_sidecar.py @@ -1,8 +1,11 @@ -"""Read and write persisted Bayesian fit arrays in ``analysis/results.h5``.""" +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Bayesian fit sidecar read/write helpers.""" from __future__ import annotations from pathlib import Path +from tempfile import NamedTemporaryFile import numpy as np @@ -11,6 +14,8 @@ _POSTERIOR_PARAMETER_SAMPLES_PATH = '/posterior/parameter_samples' _POSTERIOR_LOG_POSTERIOR_PATH = '/posterior/log_posterior' _POSTERIOR_DRAW_INDEX_PATH = '/posterior/draw_index' +_POSTERIOR_SAMPLE_NDIM = 3 +_PREDICTIVE_DRAWS_NDIM = 2 def _normalized_hdf5_path(path: str) -> str: @@ -36,7 +41,9 @@ def _sidecar_path(*, analysis: object, analysis_dir: Path) -> Path: def _should_use_sidecar(analysis: object) -> bool: - """Return whether the analysis currently expects a Bayesian sidecar.""" + """ + Return whether the analysis currently expects a Bayesian sidecar. + """ has_fit_state = getattr(analysis, '_has_persisted_fit_state', None) if not callable(has_fit_state) or not has_fit_state(): return False @@ -44,18 +51,18 @@ def _should_use_sidecar(analysis: object) -> bool: if analysis.fit_result.result_kind.value != 'bayesian': return False - return any( - ( - analysis.bayesian_result.has_posterior_samples.value, - len(analysis.bayesian_distribution_caches) > 0, - len(analysis.bayesian_pair_caches) > 0, - len(analysis.bayesian_predictive_datasets) > 0, - ) - ) + return any(( + analysis.bayesian_result.has_posterior_samples.value, + len(analysis.bayesian_distribution_caches) > 0, + len(analysis.bayesian_pair_caches) > 0, + len(analysis.bayesian_predictive_datasets) > 0, + )) def _delete_stale_sidecar(sidecar_path: Path) -> None: - """Delete an existing sidecar when no persisted arrays should remain.""" + """ + Delete an existing sidecar when no persisted arrays should remain. + """ if sidecar_path.is_file(): sidecar_path.unlink() @@ -79,7 +86,7 @@ def _read_dataset(handle: object, path: str) -> np.ndarray | None: def _posterior_payload_from_analysis(analysis: object) -> dict[str, np.ndarray | None]: - """Return canonical posterior arrays from runtime results or restored sidecar data.""" + """Return posterior arrays from runtime or restored data.""" fit_results = getattr(analysis, 'fit_results', None) posterior_samples = getattr(fit_results, 'posterior_samples', None) if posterior_samples is not None: @@ -102,7 +109,7 @@ def _posterior_payload_from_analysis(analysis: object) -> dict[str, np.ndarray | def _distribution_cache_payload(analysis: object) -> dict[str, dict[str, np.ndarray]]: - """Return persisted distribution-cache arrays keyed by parameter name.""" + """Return persisted distribution caches keyed by parameter name.""" sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) return dict(sidecar_data.get('distribution_caches', {})) @@ -150,7 +157,7 @@ def _validate_posterior_payload( analysis: object, payload: dict[str, np.ndarray | None], ) -> bool: - """Return whether canonical posterior arrays match manifest metadata.""" + """Return whether posterior arrays match stored metadata.""" parameter_samples = payload.get('parameter_samples') if parameter_samples is None: if analysis.bayesian_result.has_posterior_samples.value: @@ -158,25 +165,58 @@ def _validate_posterior_payload( return False parameter_samples = np.asarray(parameter_samples, dtype=float) - if parameter_samples.ndim != 3: + if parameter_samples.ndim != _POSTERIOR_SAMPLE_NDIM: log.warning( 'Posterior parameter samples must have shape (n_draws, n_chains, n_parameters).' ) return False n_draws, n_chains, n_parameters = parameter_samples.shape - if analysis.bayesian_convergence.n_draws.value not in (0, n_draws): + if not _posterior_manifest_counts_match( + analysis, + n_draws=n_draws, + n_chains=n_chains, + n_parameters=n_parameters, + ): + return False + + return _posterior_aux_shapes_match( + payload, + n_draws=n_draws, + n_chains=n_chains, + ) + + +def _posterior_manifest_counts_match( + analysis: object, + *, + n_draws: int, + n_chains: int, + n_parameters: int, +) -> bool: + """Return whether manifest counts match the sample shape.""" + if analysis.bayesian_convergence.n_draws.value not in {0, n_draws}: log.warning('Posterior sample draw count does not match bayesian_convergence.n_draws.') return False - if analysis.bayesian_convergence.n_chains.value not in (0, n_chains): + if analysis.bayesian_convergence.n_chains.value not in {0, n_chains}: log.warning('Posterior sample chain count does not match bayesian_convergence.n_chains.') return False - if analysis.bayesian_convergence.n_parameters.value not in (0, n_parameters): + if analysis.bayesian_convergence.n_parameters.value not in {0, n_parameters}: log.warning( 'Posterior sample parameter count does not match bayesian_convergence.n_parameters.' ) return False + return True + + +def _posterior_aux_shapes_match( + payload: dict[str, np.ndarray | None], + *, + n_draws: int, + n_chains: int, +) -> bool: + """Return whether auxiliary arrays match the sample shape.""" log_posterior = payload.get('log_posterior') if log_posterior is not None and np.asarray(log_posterior).shape != (n_draws, n_chains): log.warning( @@ -193,7 +233,7 @@ def _validate_posterior_payload( def _write_posterior_payload(handle: object, analysis: object) -> bool: - """Write canonical posterior arrays when they are available and valid.""" + """Write canonical posterior arrays when they are available.""" payload = _posterior_payload_from_analysis(analysis) if not _validate_posterior_payload(analysis, payload): return False @@ -227,7 +267,7 @@ def _write_distribution_caches(handle: object, analysis: object) -> bool: if x_values.shape != (n_grid,) or density_values.shape != (n_grid,): log.warning( 'Skipping Bayesian distribution cache with shape mismatch for ' - f"{cache.param_unique_name.value!r}." + f'{cache.param_unique_name.value!r}.' ) continue @@ -254,14 +294,17 @@ def _write_pair_caches(handle: object, analysis: object) -> bool: n_grid_x = int(cache.n_grid_x.value) n_grid_y = int(cache.n_grid_y.value) - valid_density_shape = density_values.shape in ( + valid_density_shape = density_values.shape in { (n_grid_y, n_grid_x), (n_grid_x, n_grid_y), - ) - if x_values.shape != (n_grid_x,) or y_values.shape != (n_grid_y,) or not valid_density_shape: + } + if ( + x_values.shape != (n_grid_x,) + or y_values.shape != (n_grid_y,) + or not valid_density_shape + ): log.warning( - 'Skipping Bayesian pair cache with shape mismatch for ' - f"{cache.id.value!r}." + f'Skipping Bayesian pair cache with shape mismatch for {cache.id.value!r}.' ) continue @@ -289,7 +332,7 @@ def _write_predictive_datasets(handle: object, analysis: object) -> bool: if x_values.shape != (n_x,) or best_sample_prediction.shape != (n_x,): log.warning( 'Skipping Bayesian predictive dataset with shape mismatch for ' - f"{dataset.experiment_name.value!r}." + f'{dataset.experiment_name.value!r}.' ) continue @@ -313,7 +356,7 @@ def _write_predictive_datasets(handle: object, analysis: object) -> bool: if values_array.shape != (n_x,): log.warning( 'Skipping Bayesian predictive band with shape mismatch for ' - f"{dataset.experiment_name.value!r}:{field_name}." + f'{dataset.experiment_name.value!r}:{field_name}.' ) continue _create_dataset(handle, path_value, values_array) @@ -321,19 +364,18 @@ def _write_predictive_datasets(handle: object, analysis: object) -> bool: draws = dataset_data.get('draws') if draws is not None and dataset.draws_path.value is not None: draws_array = np.asarray(draws) - if draws_array.ndim != 2 or draws_array.shape[1] != n_x: + if draws_array.ndim != _PREDICTIVE_DRAWS_NDIM or draws_array.shape[1] != n_x: log.warning( 'Skipping Bayesian predictive draws with shape mismatch for ' - f"{dataset.experiment_name.value!r}." + f'{dataset.experiment_name.value!r}.' + ) + elif dataset.n_draws_cached.value not in {0, draws_array.shape[0]}: + log.warning( + 'Skipping Bayesian predictive draws whose draw count does not match ' + 'the manifest metadata.' ) else: - if dataset.n_draws_cached.value not in (0, draws_array.shape[0]): - log.warning( - 'Skipping Bayesian predictive draws whose draw count does not match ' - 'the manifest metadata.' - ) - else: - _create_dataset(handle, dataset.draws_path.value, draws_array) + _create_dataset(handle, dataset.draws_path.value, draws_array) wrote_any = True @@ -351,10 +393,16 @@ def write_analysis_results_sidecar( Parameters ---------- analysis : object - Analysis instance that owns fit-state categories and runtime - fit results. + Analysis instance that owns fit-state categories and runtime fit + results. analysis_dir : Path The project ``analysis/`` directory. + + Raises + ------ + Exception + Propagated when sidecar writing fails after temporary-file + cleanup. """ sidecar_path = _sidecar_path(analysis=analysis, analysis_dir=analysis_dir) if not _should_use_sidecar(analysis): @@ -362,17 +410,15 @@ def write_analysis_results_sidecar( return import h5py # noqa: PLC0415 - from tempfile import NamedTemporaryFile # noqa: PLC0415 analysis_dir.mkdir(parents=True, exist_ok=True) - temporary_file = NamedTemporaryFile( + with NamedTemporaryFile( delete=False, dir=analysis_dir, prefix=f'{sidecar_path.stem}.', suffix=sidecar_path.suffix, - ) - temporary_path = Path(temporary_file.name) - temporary_file.close() + ) as temporary_file: + temporary_path = Path(temporary_file.name) try: with h5py.File(temporary_path, 'w') as handle: @@ -414,7 +460,9 @@ def _read_posterior_payload(handle: object, analysis: object) -> dict[str, np.nd return payload -def _read_distribution_caches(handle: object, analysis: object) -> dict[str, dict[str, np.ndarray]]: +def _read_distribution_caches( + handle: object, analysis: object +) -> dict[str, dict[str, np.ndarray]]: """Read cached posterior distribution arrays for manifest rows.""" payload: dict[str, dict[str, np.ndarray]] = {} for cache in analysis.bayesian_distribution_caches: @@ -427,7 +475,7 @@ def _read_distribution_caches(handle: object, analysis: object) -> dict[str, dic ): log.warning( 'Skipping restored Bayesian distribution cache with shape mismatch for ' - f"{cache.param_unique_name.value!r}." + f'{cache.param_unique_name.value!r}.' ) continue payload[cache.param_unique_name.value] = { @@ -450,14 +498,18 @@ def _read_pair_caches(handle: object, analysis: object) -> dict[str, dict[str, n n_grid_x = int(cache.n_grid_x.value) n_grid_y = int(cache.n_grid_y.value) - valid_density_shape = density_values.shape in ( + valid_density_shape = density_values.shape in { (n_grid_y, n_grid_x), (n_grid_x, n_grid_y), - ) - if x_values.shape != (n_grid_x,) or y_values.shape != (n_grid_y,) or not valid_density_shape: + } + if ( + x_values.shape != (n_grid_x,) + or y_values.shape != (n_grid_y,) + or not valid_density_shape + ): log.warning( 'Skipping restored Bayesian pair cache with shape mismatch for ' - f"{cache.id.value!r}." + f'{cache.id.value!r}.' ) continue @@ -470,7 +522,9 @@ def _read_pair_caches(handle: object, analysis: object) -> dict[str, dict[str, n return payload -def _read_predictive_datasets(handle: object, analysis: object) -> dict[str, dict[str, np.ndarray]]: +def _read_predictive_datasets( + handle: object, analysis: object +) -> dict[str, dict[str, np.ndarray]]: """Read cached posterior predictive arrays for manifest rows.""" payload: dict[str, dict[str, np.ndarray]] = {} for dataset in analysis.bayesian_predictive_datasets: @@ -483,7 +537,7 @@ def _read_predictive_datasets(handle: object, analysis: object) -> dict[str, dic if x_values.shape != (n_x,) or best_sample_prediction.shape != (n_x,): log.warning( 'Skipping restored Bayesian predictive dataset with shape mismatch for ' - f"{dataset.experiment_name.value!r}." + f'{dataset.experiment_name.value!r}.' ) continue @@ -506,16 +560,16 @@ def _read_predictive_datasets(handle: object, analysis: object) -> dict[str, dic continue values_array = np.asarray(values) if field_name == 'draws': - if values_array.ndim != 2 or values_array.shape[1] != n_x: + if values_array.ndim != _PREDICTIVE_DRAWS_NDIM or values_array.shape[1] != n_x: log.warning( 'Skipping restored Bayesian predictive draws with shape mismatch for ' - f"{dataset.experiment_name.value!r}." + f'{dataset.experiment_name.value!r}.' ) continue elif values_array.shape != (n_x,): log.warning( 'Skipping restored Bayesian predictive band with shape mismatch for ' - f"{dataset.experiment_name.value!r}:{field_name}." + f'{dataset.experiment_name.value!r}:{field_name}.' ) continue dataset_payload[field_name] = values_array @@ -572,4 +626,4 @@ def read_analysis_results_sidecar( if predictive_datasets: sidecar_data['predictive_datasets'] = predictive_datasets - analysis._persisted_fit_state_sidecar = sidecar_data \ No newline at end of file + analysis._persisted_fit_state_sidecar = sidecar_data diff --git a/src/easydiffraction/project/display.py b/src/easydiffraction/project/display.py index 2f74245d..5b760d99 100644 --- a/src/easydiffraction/project/display.py +++ b/src/easydiffraction/project/display.py @@ -140,7 +140,9 @@ def _pairs_need_processing_indicator( *, parameters: list[object] | None, ) -> bool: - """Return whether posterior pair plotting still needs processing.""" + """ + Return whether posterior pair plotting still needs processing. + """ if parameters is not None: return True @@ -160,7 +162,7 @@ def _predictive_needs_processing_indicator( style: str, x: object | None, ) -> bool: - """Return whether posterior predictive plotting still needs processing.""" + """Return whether predictive plotting still needs processing.""" analysis = self._project.analysis sidecar_data = getattr(analysis, '_persisted_fit_state_sidecar', {}) predictive_datasets = sidecar_data.get('predictive_datasets', {}) @@ -174,10 +176,10 @@ def _predictive_needs_processing_indicator( experiment = self._project.experiments[expt_name] plotter = self._project.rendering.plotter _, x_axis_name, _, _, _ = plotter._resolve_x_axis(experiment.type, x) - require_draws = ( - plotter.engine == PlotterEngineEnum.PLOTLY.value - and style in {'draws', 'band+draws'} - ) + require_draws = plotter.engine == PlotterEngineEnum.PLOTLY.value and style in { + 'draws', + 'band+draws', + } matching_rows = [ row diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index 016b697d..4a58eda3 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -16,8 +16,12 @@ from easydiffraction.core.guard import GuardedBase from easydiffraction.datablocks.experiment.collection import Experiments from easydiffraction.datablocks.structure.collection import Structures +from easydiffraction.io.cif.serialize import analysis_from_cif +from easydiffraction.io.cif.serialize import project_config_from_cif from easydiffraction.io.cif.serialize import project_config_to_cif from easydiffraction.io.cif.serialize import project_to_cif +from easydiffraction.io.results_sidecar import read_analysis_results_sidecar +from easydiffraction.io.results_sidecar import write_analysis_results_sidecar from easydiffraction.project.display import ProjectDisplay from easydiffraction.project.project_config import ProjectConfig from easydiffraction.summary.summary import Summary @@ -27,6 +31,8 @@ from easydiffraction.utils.logging import log if TYPE_CHECKING: + from collections.abc import Callable + from easydiffraction.project.categories.rendering import Rendering from easydiffraction.project.categories.verbosity import Verbosity from easydiffraction.project.project_info import ProjectInfo @@ -110,6 +116,63 @@ def _resolve_data_path_from_results_csv( return project_path / path +def _load_cif_directory( + cif_dir: pathlib.Path, + add_from_cif_path: Callable[[str], None], +) -> None: + """Load all CIF files from one directory using the given loader.""" + if not cif_dir.is_dir(): + return + + for cif_file in sorted(cif_dir.glob('*.cif')): + add_from_cif_path(str(cif_file)) + + +def _create_loading_project(project_cls: type[Project]) -> Project: + """Create a project instance while suppressing varname lookup.""" + project_cls._loading = True + try: + return project_cls() + finally: + project_cls._loading = False + + +def _load_project_info(project: Project, project_path: pathlib.Path) -> None: + """ + Restore project configuration from ``project.cif`` when present. + """ + project_cif_path = project_path / 'project.cif' + if project_cif_path.is_file(): + project_config_from_cif(project, project_cif_path.read_text()) + + +def _resolved_analysis_cif_path(project_path: pathlib.Path) -> pathlib.Path | None: + """Return the preferred analysis CIF path for a saved project.""" + analysis_cif_path = project_path / 'analysis' / 'analysis.cif' + if analysis_cif_path.is_file(): + return analysis_cif_path + + analysis_cif_path = project_path / 'analysis.cif' + if analysis_cif_path.is_file(): + return analysis_cif_path + return None + + +def _load_project_analysis(project: Project, project_path: pathlib.Path) -> None: + """Restore analysis categories and sidecar state from disk.""" + analysis_cif_path = _resolved_analysis_cif_path(project_path) + if analysis_cif_path is None: + return + + analysis_from_cif(project._analysis, analysis_cif_path.read_text()) + read_analysis_results_sidecar( + analysis=project._analysis, + analysis_dir=analysis_cif_path.parent, + ) + if project._analysis._has_persisted_fit_state(): + project._analysis._restore_live_parameter_state(project._build_parameter_map()) + + class Project(GuardedBase): """ Central API for managing a diffraction data analysis project. @@ -298,57 +361,19 @@ def load(cls, dir_path: str) -> Project: FileNotFoundError If *dir_path* does not exist. """ - from easydiffraction.io.cif.serialize import analysis_from_cif # noqa: PLC0415 - from easydiffraction.io.cif.serialize import project_config_from_cif # noqa: PLC0415 - from easydiffraction.io.results_sidecar import read_analysis_results_sidecar # noqa: PLC0415 - project_path = pathlib.Path(dir_path) if not project_path.is_dir(): msg = f"Project directory not found: '{dir_path}'" raise FileNotFoundError(msg) - # Create a minimal project. - # Use _loading sentinel to skip varname() inside __init__. - cls._loading = True - try: - project = cls() - finally: - cls._loading = False + project = _create_loading_project(cls) project._saved = True - # 1. Load project info - project_cif_path = project_path / 'project.cif' - if project_cif_path.is_file(): - cif_text = project_cif_path.read_text() - project_config_from_cif(project, cif_text) - + _load_project_info(project, project_path) project.info.path = project_path - - # 2. Load structures - structures_dir = project_path / 'structures' - if structures_dir.is_dir(): - for cif_file in sorted(structures_dir.glob('*.cif')): - project._structures.add_from_cif_path(str(cif_file)) - - # 3. Load experiments - experiments_dir = project_path / 'experiments' - if experiments_dir.is_dir(): - for cif_file in sorted(experiments_dir.glob('*.cif')): - project._experiments.add_from_cif_path(str(cif_file)) - - # 4. Load analysis - # Check analysis/analysis.cif first (future layout), then - # fall back to analysis.cif at root (current layout). - analysis_cif_path = project_path / 'analysis' / 'analysis.cif' - if not analysis_cif_path.is_file(): - analysis_cif_path = project_path / 'analysis.cif' - if analysis_cif_path.is_file(): - cif_text = analysis_cif_path.read_text() - analysis_from_cif(project._analysis, cif_text) - read_analysis_results_sidecar( - analysis=project._analysis, - analysis_dir=analysis_cif_path.parent, - ) + _load_cif_directory(project_path / 'structures', project._structures.add_from_cif_path) + _load_cif_directory(project_path / 'experiments', project._experiments.add_from_cif_path) + _load_project_analysis(project, project_path) # 5. Resolve alias param references project._resolve_alias_references() @@ -403,8 +428,6 @@ def _build_parameter_map(self) -> dict[str, object]: def save(self) -> None: """Save the project into the existing project directory.""" - from easydiffraction.io.results_sidecar import write_analysis_results_sidecar # noqa: PLC0415 - if self.info.path is None: log.error('Project path not specified. Use save_as() to define the path first.') return diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index dde42928..7d54036e 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -123,6 +123,34 @@ def _existing_project_dir(extraction_dir: pathlib.Path) -> pathlib.Path | None: return project_files[0].parent.resolve() +def _download_data_message(data_id: int | str, record: dict) -> str: + """Return the console message for one downloadable data record.""" + description = record.get('description', '') + message = f'Data #{data_id}' + if description: + message += f': {description}' + return message + + +def _download_data_targets( + data_id: int | str, + destination: str, + record: dict, +) -> tuple[str, bool, pathlib.Path, pathlib.Path, pathlib.Path, str]: + """Return URL and filesystem targets for one download request.""" + record_path = _record_path(record) + url = _build_data_url(record_path) + _validate_url(url) + + fname = _filename_for_id_from_path(data_id, record_path) + is_project_archive = record.get('kind') == 'project' and fname.endswith('.zip') + dest_path = resolve_artifact_path(destination) + dest_path.mkdir(parents=True, exist_ok=True) + file_path = dest_path / fname + extraction_dir = dest_path / pathlib.Path(fname).stem + return url, is_project_archive, dest_path, file_path, extraction_dir, fname + + @functools.lru_cache(maxsize=1) def _fetch_tutorials_index() -> dict: """ @@ -172,8 +200,8 @@ def download_data( Numeric dataset id (e.g. 12). destination : str, default='data' Directory to save the downloaded file or extracted project into - (created if missing). Relative destinations are resolved - against the configured artifact root when + (created if missing). Relative destinations are resolved against + the configured artifact root when ``EASYDIFFRACTION_ARTIFACT_ROOT`` is set. overwrite : bool, default=False Whether to overwrite the file if it already exists. @@ -201,21 +229,10 @@ def download_data( raise KeyError(msg) record = index[key] - record_path = _record_path(record) - url = _build_data_url(record_path) - _validate_url(url) - fname = _filename_for_id_from_path(id, record_path) - is_project_archive = record.get('kind') == 'project' and fname.endswith('.zip') - - dest_path = resolve_artifact_path(destination) - dest_path.mkdir(parents=True, exist_ok=True) - file_path = dest_path / fname - extraction_dir = dest_path / pathlib.Path(fname).stem - - description = record.get('description', '') - message = f'Data #{id}' - if description: - message += f': {description}' + url, is_project_archive, dest_path, file_path, extraction_dir, fname = _download_data_targets( + id, destination, record + ) + message = _download_data_message(id, record) console.paragraph('Getting data...') console.print(f'{message}') @@ -281,14 +298,12 @@ def list_data() -> None: for data_id in sorted(index, key=lambda value: int(value) if value.isdigit() else value): record = index[data_id] - columns_data.append( - [ - data_id, - pathlib.PurePosixPath(_record_path(record)).name, - record.get('kind', ''), - record.get('description', ''), - ] - ) + columns_data.append([ + data_id, + pathlib.PurePosixPath(_record_path(record)).name, + record.get('kind', ''), + record.get('description', ''), + ]) render_table( columns_headers=columns_headers, diff --git a/tests/integration/fitting/test_bayesian_dream.py b/tests/integration/fitting/test_bayesian_dream.py index d6695973..ef52c1f1 100644 --- a/tests/integration/fitting/test_bayesian_dream.py +++ b/tests/integration/fitting/test_bayesian_dream.py @@ -163,7 +163,7 @@ def test_lm_prefit_followed_by_dream_uses_uncertainty_based_bounds(): assert len(results.posterior_parameter_summaries) == 3 -def test_bayesian_fit_results_are_runtime_only_after_save_load(tmp_path): +def test_bayesian_fit_results_reload_from_persisted_fit_state(tmp_path): project = _create_lbco_project() length_a, scale, offset = _dream_parameters(project) for parameter in (length_a, scale, offset): @@ -185,8 +185,13 @@ def test_bayesian_fit_results_are_runtime_only_after_save_load(tmp_path): project.save_as(str(proj_dir)) analysis_cif = proj_dir / 'analysis' / 'analysis.cif' + results_sidecar = proj_dir / 'analysis' / 'results.h5' assert analysis_cif.is_file() - assert 'posterior' not in analysis_cif.read_text().lower() + assert results_sidecar.is_file() loaded = Project.load(str(proj_dir)) - assert loaded.analysis.fit_results is None + loaded_results = loaded.analysis.fit_results + assert loaded_results is not None + assert loaded_results.sampler_completed is True + assert loaded_results.posterior_samples is not None + assert loaded_results.posterior_samples.parameter_samples.ndim == 3 diff --git a/tests/unit/easydiffraction/analysis/categories/test_bayesian_convergence.py b/tests/unit/easydiffraction/analysis/categories/test_bayesian_convergence.py new file mode 100644 index 00000000..dbcaf729 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_bayesian_convergence.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/bayesian_convergence/.""" + + +def test_bayesian_convergence_factory_create(): + from easydiffraction.analysis.categories.bayesian_convergence.default import ( + BayesianConvergence, + ) + from easydiffraction.analysis.categories.bayesian_convergence.factory import ( + BayesianConvergenceFactory, + ) + + convergence = BayesianConvergenceFactory.create('default') + + assert BayesianConvergenceFactory.default_tag() == 'default' + assert isinstance(convergence, BayesianConvergence) diff --git a/tests/unit/easydiffraction/analysis/categories/test_bayesian_distribution_caches.py b/tests/unit/easydiffraction/analysis/categories/test_bayesian_distribution_caches.py new file mode 100644 index 00000000..26e91f73 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_bayesian_distribution_caches.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/bayesian_distribution_caches/.""" + + +def test_bayesian_distribution_caches_factory_create(): + from easydiffraction.analysis.categories.bayesian_distribution_caches.default import ( + BayesianDistributionCaches, + ) + from easydiffraction.analysis.categories.bayesian_distribution_caches.factory import ( + BayesianDistributionCachesFactory, + ) + + caches = BayesianDistributionCachesFactory.create('default') + + assert BayesianDistributionCachesFactory.default_tag() == 'default' + assert isinstance(caches, BayesianDistributionCaches) diff --git a/tests/unit/easydiffraction/analysis/categories/test_bayesian_pair_caches.py b/tests/unit/easydiffraction/analysis/categories/test_bayesian_pair_caches.py new file mode 100644 index 00000000..f4307f2c --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_bayesian_pair_caches.py @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/bayesian_pair_caches/.""" + + +def test_bayesian_pair_caches_factory_create(): + from easydiffraction.analysis.categories.bayesian_pair_caches.default import BayesianPairCaches + from easydiffraction.analysis.categories.bayesian_pair_caches.factory import ( + BayesianPairCachesFactory, + ) + + caches = BayesianPairCachesFactory.create('default') + + assert BayesianPairCachesFactory.default_tag() == 'default' + assert isinstance(caches, BayesianPairCaches) diff --git a/tests/unit/easydiffraction/analysis/categories/test_bayesian_parameter_posteriors.py b/tests/unit/easydiffraction/analysis/categories/test_bayesian_parameter_posteriors.py new file mode 100644 index 00000000..18aa5f01 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_bayesian_parameter_posteriors.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/bayesian_parameter_posteriors/.""" + + +def test_bayesian_parameter_posteriors_factory_create(): + from easydiffraction.analysis.categories.bayesian_parameter_posteriors.default import ( + BayesianParameterPosteriors, + ) + from easydiffraction.analysis.categories.bayesian_parameter_posteriors.factory import ( + BayesianParameterPosteriorsFactory, + ) + + posteriors = BayesianParameterPosteriorsFactory.create('default') + + assert BayesianParameterPosteriorsFactory.default_tag() == 'default' + assert isinstance(posteriors, BayesianParameterPosteriors) diff --git a/tests/unit/easydiffraction/analysis/categories/test_bayesian_predictive_datasets.py b/tests/unit/easydiffraction/analysis/categories/test_bayesian_predictive_datasets.py new file mode 100644 index 00000000..93746fb4 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_bayesian_predictive_datasets.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/bayesian_predictive_datasets/.""" + + +def test_bayesian_predictive_datasets_factory_create(): + from easydiffraction.analysis.categories.bayesian_predictive_datasets.default import ( + BayesianPredictiveDatasets, + ) + from easydiffraction.analysis.categories.bayesian_predictive_datasets.factory import ( + BayesianPredictiveDatasetsFactory, + ) + + datasets = BayesianPredictiveDatasetsFactory.create('default') + + assert BayesianPredictiveDatasetsFactory.default_tag() == 'default' + assert isinstance(datasets, BayesianPredictiveDatasets) diff --git a/tests/unit/easydiffraction/analysis/categories/test_bayesian_result.py b/tests/unit/easydiffraction/analysis/categories/test_bayesian_result.py new file mode 100644 index 00000000..e1d98538 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_bayesian_result.py @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/bayesian_result/.""" + + +def test_bayesian_result_factory_create(): + from easydiffraction.analysis.categories.bayesian_result.default import BayesianResult + from easydiffraction.analysis.categories.bayesian_result.factory import BayesianResultFactory + + result = BayesianResultFactory.create('default') + + assert BayesianResultFactory.default_tag() == 'default' + assert isinstance(result, BayesianResult) diff --git a/tests/unit/easydiffraction/analysis/categories/test_bayesian_sampler.py b/tests/unit/easydiffraction/analysis/categories/test_bayesian_sampler.py new file mode 100644 index 00000000..97634176 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_bayesian_sampler.py @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/bayesian_sampler/.""" + + +def test_bayesian_sampler_factory_create(): + from easydiffraction.analysis.categories.bayesian_sampler.default import BayesianSampler + from easydiffraction.analysis.categories.bayesian_sampler.factory import BayesianSamplerFactory + + sampler = BayesianSamplerFactory.create('default') + + assert BayesianSamplerFactory.default_tag() == 'default' + assert isinstance(sampler, BayesianSampler) diff --git a/tests/unit/easydiffraction/analysis/categories/test_deterministic_parameter_results.py b/tests/unit/easydiffraction/analysis/categories/test_deterministic_parameter_results.py new file mode 100644 index 00000000..7dcb37b7 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_deterministic_parameter_results.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/deterministic_parameter_results/.""" + + +def test_deterministic_parameter_results_factory_create(): + from easydiffraction.analysis.categories.deterministic_parameter_results.default import ( + DeterministicParameterResults, + ) + from easydiffraction.analysis.categories.deterministic_parameter_results.factory import ( + DeterministicParameterResultsFactory, + ) + + results = DeterministicParameterResultsFactory.create('default') + + assert DeterministicParameterResultsFactory.default_tag() == 'default' + assert isinstance(results, DeterministicParameterResults) diff --git a/tests/unit/easydiffraction/analysis/categories/test_deterministic_result.py b/tests/unit/easydiffraction/analysis/categories/test_deterministic_result.py new file mode 100644 index 00000000..ed764146 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_deterministic_result.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/deterministic_result/.""" + + +def test_deterministic_result_factory_create(): + from easydiffraction.analysis.categories.deterministic_result.default import ( + DeterministicResult, + ) + from easydiffraction.analysis.categories.deterministic_result.factory import ( + DeterministicResultFactory, + ) + + result = DeterministicResultFactory.create('default') + + assert DeterministicResultFactory.default_tag() == 'default' + assert isinstance(result, DeterministicResult) diff --git a/tests/unit/easydiffraction/analysis/categories/test_fit_parameter_correlations.py b/tests/unit/easydiffraction/analysis/categories/test_fit_parameter_correlations.py new file mode 100644 index 00000000..8cb72d57 --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_fit_parameter_correlations.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/fit_parameter_correlations/.""" + + +def test_fit_parameter_correlations_factory_create(): + from easydiffraction.analysis.categories.fit_parameter_correlations.default import ( + FitParameterCorrelations, + ) + from easydiffraction.analysis.categories.fit_parameter_correlations.factory import ( + FitParameterCorrelationsFactory, + ) + + correlations = FitParameterCorrelationsFactory.create('default') + + assert FitParameterCorrelationsFactory.default_tag() == 'default' + assert isinstance(correlations, FitParameterCorrelations) diff --git a/tests/unit/easydiffraction/analysis/categories/test_fit_parameters.py b/tests/unit/easydiffraction/analysis/categories/test_fit_parameters.py new file mode 100644 index 00000000..e3d0cedd --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_fit_parameters.py @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/fit_parameters/.""" + + +def test_fit_parameters_factory_create(): + from easydiffraction.analysis.categories.fit_parameters.default import FitParameters + from easydiffraction.analysis.categories.fit_parameters.factory import FitParametersFactory + + collection = FitParametersFactory.create('default') + + assert FitParametersFactory.default_tag() == 'default' + assert isinstance(collection, FitParameters) diff --git a/tests/unit/easydiffraction/analysis/categories/test_fit_result.py b/tests/unit/easydiffraction/analysis/categories/test_fit_result.py new file mode 100644 index 00000000..4c9d869d --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_fit_result.py @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for analysis/categories/fit_result/.""" + + +def test_fit_result_factory_create(): + from easydiffraction.analysis.categories.fit_result.default import FitResult + from easydiffraction.analysis.categories.fit_result.factory import FitResultFactory + + fit_result = FitResultFactory.create('default') + + assert FitResultFactory.default_tag() == 'default' + assert isinstance(fit_result, FitResult) diff --git a/tests/unit/easydiffraction/analysis/categories/test_fit_state.py b/tests/unit/easydiffraction/analysis/categories/test_fit_state.py new file mode 100644 index 00000000..236c798a --- /dev/null +++ b/tests/unit/easydiffraction/analysis/categories/test_fit_state.py @@ -0,0 +1,207 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for persisted fit-state analysis categories.""" + +from __future__ import annotations + +import gemmi + + +def test_fit_parameter_collection_is_empty_until_rows_exist(): + from easydiffraction.analysis.categories.fit_parameters.default import FitParameters + + collection = FitParameters() + + assert collection.as_cif == '' + + +def test_fit_parameter_collection_serializes_expected_tags_and_values(): + from easydiffraction.analysis.categories.fit_parameters.default import FitParameters + + collection = FitParameters() + collection.create( + param_unique_name='lbco.cell.length_a', + fit_min=3.88, + fit_max=3.90, + fit_bounds_uncertainty_multiplier=4.0, + start_value=3.89, + start_uncertainty=0.01, + ) + + cif_text = collection.as_cif + + assert '_fit_parameter.param_unique_name' in cif_text + assert '_fit_parameter.fit_bounds_uncertainty_multiplier' in cif_text + assert 'lbco.cell.length_a' in cif_text + + +def test_fit_result_serializes_expected_tags_and_enum_value(): + from easydiffraction.analysis.categories.fit_result.default import FitResult + + fit_result = FitResult() + fit_result._set_result_kind('bayesian') + fit_result._set_success(value=True) + fit_result._set_message('Sampler completed') + fit_result._set_iterations(3000) + fit_result._set_fitting_time(82.4) + fit_result._set_reduced_chi_square(1.031) + + cif_text = fit_result.as_cif + + assert '_fit_result.result_kind bayesian' in cif_text + assert '_fit_result.iterations 3000' in cif_text + assert '_fit_result.reduced_chi_square' in cif_text + + +def test_fit_parameter_correlations_normalize_pair_order_and_replace_duplicate_ids(): + from easydiffraction.analysis.categories.fit_parameter_correlations.default import ( + FitParameterCorrelations, + ) + + correlations = FitParameterCorrelations() + correlations.create( + source_kind='posterior', + param_unique_name_i='z.param', + param_unique_name_j='a.param', + correlation=0.87, + id='1', + ) + correlations.create( + source_kind='posterior', + param_unique_name_i='b.param', + param_unique_name_j='c.param', + correlation=0.55, + id='1', + ) + + assert len(correlations) == 1 + assert correlations['1'].param_unique_name_i.value == 'b.param' + assert correlations['1'].param_unique_name_j.value == 'c.param' + + +def test_fit_parameter_correlations_rebuild_index_from_cif(): + from easydiffraction.analysis.categories.fit_parameter_correlations.default import ( + FitParameterCorrelations, + ) + + cif_text = """data_fit_state +loop_ +_fit_parameter_correlation.id +_fit_parameter_correlation.source_kind +_fit_parameter_correlation.param_unique_name_i +_fit_parameter_correlation.param_unique_name_j +_fit_parameter_correlation.correlation +2 posterior hrpt.scale lbco.cell.length_a 0.42 +""" + document = gemmi.cif.read_string(cif_text) + + correlations = FitParameterCorrelations() + correlations.from_cif(document.sole_block()) + + assert correlations.names == ['2'] + assert correlations['2'].correlation.value == 0.42 + + +def test_bayesian_cache_manifest_collections_serialize_expected_keys(): + from easydiffraction.analysis.categories.bayesian_distribution_caches.default import ( + BayesianDistributionCaches, + ) + from easydiffraction.analysis.categories.bayesian_pair_caches.default import ( + BayesianPairCachePaths, + BayesianPairCaches, + ) + from easydiffraction.analysis.categories.bayesian_predictive_datasets.default import ( + BayesianPredictiveDatasetPaths, + BayesianPredictiveDatasets, + ) + + distributions = BayesianDistributionCaches() + distributions.create( + param_unique_name='lbco.cell.length_a', + x_path='/posterior/distribution/0/x', + density_path='/posterior/distribution/0/density', + n_grid=256, + n_draws_cached=48000, + ) + pairs = BayesianPairCaches() + pairs.create( + parameter_names=('z.param', 'a.param'), + paths=BayesianPairCachePaths( + x_path='/posterior/pairs/0/x', + y_path='/posterior/pairs/0/y', + density_path='/posterior/pairs/0/density', + contour_level_path='/posterior/pairs/0/contour_levels', + ), + grid_shape=(64, 64), + n_draws_cached=4000, + id='7', + ) + predictive = BayesianPredictiveDatasets() + predictive.create( + experiment_name='hrpt', + x_axis_name='two_theta', + paths=BayesianPredictiveDatasetPaths( + x_path='/predictive/hrpt/x', + best_sample_prediction_path='/predictive/hrpt/best_sample_prediction', + lower_95_path='/predictive/hrpt/lower_95', + upper_95_path='/predictive/hrpt/upper_95', + ), + n_x=2500, + n_draws_cached=0, + ) + + assert '_bayesian_distribution_cache.param_unique_name' in distributions.as_cif + assert pairs['7'].param_unique_name_x.value == 'a.param' + assert pairs['7'].param_unique_name_y.value == 'z.param' + assert '_bayesian_predictive_dataset.experiment_name' in predictive.as_cif + + +def test_bayesian_sampler_and_convergence_use_integer_fields_in_cif(): + from easydiffraction.analysis.categories.bayesian_convergence.default import ( + BayesianConvergence, + ) + from easydiffraction.analysis.categories.bayesian_sampler.default import BayesianSampler + + sampler = BayesianSampler() + sampler._set_steps(100) + sampler._set_burn(20) + sampler._set_parallel(0) + sampler._set_random_seed(123) + + convergence = BayesianConvergence() + convergence._set_n_draws(80) + convergence._set_n_chains(4) + convergence._set_n_parameters(3) + + assert '_bayesian_sampler.steps 100' in sampler.as_cif + assert '_bayesian_sampler.parallel 0' in sampler.as_cif + assert '_bayesian_convergence.n_draws 80' in convergence.as_cif + + +def test_bayesian_parameter_posteriors_preserve_row_order_from_cif(): + from easydiffraction.analysis.categories.bayesian_parameter_posteriors.default import ( + BayesianParameterPosteriors, + ) + + cif_text = """data_fit_state +loop_ +_bayesian_parameter_posterior.unique_name +_bayesian_parameter_posterior.display_name +_bayesian_parameter_posterior.best_sample_value +_bayesian_parameter_posterior.median +_bayesian_parameter_posterior.uncertainty +_bayesian_parameter_posterior.interval_68_lower +_bayesian_parameter_posterior.interval_68_upper +_bayesian_parameter_posterior.interval_95_lower +_bayesian_parameter_posterior.interval_95_upper +_bayesian_parameter_posterior.ess_bulk +_bayesian_parameter_posterior.r_hat +second.param second 2.0 2.1 0.2 1.9 2.3 1.8 2.4 20 1.01 +first.param first 1.0 1.1 0.1 0.9 1.3 0.8 1.4 10 1.00 +""" + document = gemmi.cif.read_string(cif_text) + + posteriors = BayesianParameterPosteriors() + posteriors.from_cif(document.sole_block()) + + assert [row.unique_name.value for row in posteriors] == ['second.param', 'first.param'] diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_base.py b/tests/unit/easydiffraction/analysis/minimizers/test_base.py index 3b18f535..57c064b0 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_base.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_base.py @@ -87,7 +87,8 @@ def _compute_residuals( assert minim.synced is True assert isinstance(result.parameters, list) assert result.parameters[0].value == 42 - # Fitting time should be a positive float + # Successful fits are finalized by the caller after any post-processing. + minim._stop_tracking() assert minim.tracker.fitting_time is not None assert minim.tracker.fitting_time >= 0.0 diff --git a/tests/unit/easydiffraction/analysis/test_enums.py b/tests/unit/easydiffraction/analysis/test_enums.py index fe854e53..75e1ca3e 100644 --- a/tests/unit/easydiffraction/analysis/test_enums.py +++ b/tests/unit/easydiffraction/analysis/test_enums.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: BSD-3-Clause """Tests for analysis/enums.py.""" +from easydiffraction.analysis.enums import FitCorrelationSourceEnum +from easydiffraction.analysis.enums import FitResultKindEnum from easydiffraction.analysis.enums import FitModeEnum @@ -20,3 +22,15 @@ def test_fit_mode_enum_descriptions(): description = member.description() assert isinstance(description, str) assert description + + +def test_fit_result_kind_enum_members_and_default(): + assert FitResultKindEnum.DETERMINISTIC == 'deterministic' + assert FitResultKindEnum.BAYESIAN == 'bayesian' + assert FitResultKindEnum.default() is FitResultKindEnum.DETERMINISTIC + + +def test_fit_correlation_source_enum_members_and_default(): + assert FitCorrelationSourceEnum.DETERMINISTIC == 'deterministic' + assert FitCorrelationSourceEnum.POSTERIOR == 'posterior' + assert FitCorrelationSourceEnum.default() is FitCorrelationSourceEnum.DETERMINISTIC diff --git a/tests/unit/easydiffraction/analysis/test_fitting.py b/tests/unit/easydiffraction/analysis/test_fitting.py index 0f7ef88b..c53a6754 100644 --- a/tests/unit/easydiffraction/analysis/test_fitting.py +++ b/tests/unit/easydiffraction/analysis/test_fitting.py @@ -71,7 +71,11 @@ class DummyExperiment: parameters = [] class MockFitResults: - pass + def __init__(self): + self.message = '' + self.iterations = 0 + self.chi_square = None + self.engine_result = object() class DummyMin: tracker = type('T', (), {'track': staticmethod(lambda a, b: a)})() @@ -82,6 +86,9 @@ def fit(self, params, obj, verbosity=None, **kwargs): def _sync_result_to_parameters(self, params, engine_params): pass + def _stop_tracking(self): + return None + f = Fitter() f.minimizer = DummyMin() diff --git a/tests/unit/easydiffraction/io/cif/test_serialize.py b/tests/unit/easydiffraction/io/cif/test_serialize.py index 3a24b6ce..375e5aa8 100644 --- a/tests/unit/easydiffraction/io/cif/test_serialize.py +++ b/tests/unit/easydiffraction/io/cif/test_serialize.py @@ -27,7 +27,7 @@ def __init__(self): self.value = 3 p = P() - assert MUT.param_to_cif(p) == '_x.y 3.' + assert MUT.param_to_cif(p) == '_x.y 3' def test_format_param_value_with_uncertainty_uses_two_sig_digits(): diff --git a/tests/unit/easydiffraction/io/cif/test_serialize_more.py b/tests/unit/easydiffraction/io/cif/test_serialize_more.py index f526f21a..5158764e 100644 --- a/tests/unit/easydiffraction/io/cif/test_serialize_more.py +++ b/tests/unit/easydiffraction/io/cif/test_serialize_more.py @@ -34,7 +34,7 @@ def __init__(self): out = MUT.datablock_item_to_cif(DB()) assert out.startswith('data_block1') - assert '_aa 42.' in out + assert '_aa 42' in out assert 'loop_' in out assert '_aa' in out assert '7' in out @@ -80,7 +80,7 @@ def __init__(self): self.empty_coll = CategoryCollection(item_type=Item) out = MUT.datablock_item_to_cif(DB()) - assert out == 'data_block1\n\n_aa 42.\n\nloop_\n_aa\n7.' + assert out == 'data_block1\n\n_aa 42\n\nloop_\n_aa\n7' assert '\n\n\n' not in out @@ -159,7 +159,7 @@ def as_cif(self): out_without = MUT.experiment_to_cif(Exp('')) assert out_without.startswith('data_expA') - assert out_without.endswith('1.') + assert out_without.endswith('1') def test_analysis_to_cif_renders_all_sections(): diff --git a/tests/unit/easydiffraction/io/test_results_sidecar.py b/tests/unit/easydiffraction/io/test_results_sidecar.py new file mode 100644 index 00000000..68021f85 --- /dev/null +++ b/tests/unit/easydiffraction/io/test_results_sidecar.py @@ -0,0 +1,104 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Tests for persisted Bayesian results sidecars.""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace + +import numpy as np + + +def _analysis_with_predictive_sidecar() -> object: + from easydiffraction.analysis.categories.bayesian_predictive_datasets.default import ( + BayesianPredictiveDatasetPaths, + BayesianPredictiveDatasets, + ) + from easydiffraction.analysis.categories.bayesian_result.default import BayesianResult + from easydiffraction.analysis.categories.fit_result.default import FitResult + + fit_result = FitResult() + fit_result._set_result_kind('bayesian') + bayesian_result = BayesianResult() + bayesian_result._set_has_posterior_predictive(value=True) + predictive = BayesianPredictiveDatasets() + predictive.create( + experiment_name='hrpt', + x_axis_name='two_theta', + paths=BayesianPredictiveDatasetPaths( + x_path='/predictive/hrpt/x', + best_sample_prediction_path='/predictive/hrpt/best_sample_prediction', + lower_95_path='/predictive/hrpt/lower_95', + upper_95_path='/predictive/hrpt/upper_95', + ), + n_x=2, + n_draws_cached=0, + ) + return SimpleNamespace( + fit_result=fit_result, + bayesian_result=bayesian_result, + bayesian_convergence=SimpleNamespace( + n_draws=SimpleNamespace(value=0), + n_chains=SimpleNamespace(value=0), + n_parameters=SimpleNamespace(value=0), + ), + bayesian_distribution_caches=[], + bayesian_pair_caches=[], + bayesian_predictive_datasets=predictive, + fit_results=SimpleNamespace( + posterior_predictive={ + 'hrpt': SimpleNamespace( + experiment_name='hrpt', + x_axis_name='two_theta', + x=np.asarray([1.0, 2.0]), + best_sample_prediction=np.asarray([3.0, 4.0]), + lower_95=np.asarray([2.5, 3.5]), + upper_95=np.asarray([3.5, 4.5]), + lower_68=None, + upper_68=None, + draws=None, + ) + } + ), + _persisted_fit_state_sidecar={}, + _has_persisted_fit_state=lambda: True, + ) + + +def test_write_and_read_analysis_results_sidecar_round_trip_predictive(tmp_path): + from easydiffraction.io.results_sidecar import read_analysis_results_sidecar + from easydiffraction.io.results_sidecar import write_analysis_results_sidecar + + analysis_dir = Path(tmp_path) + analysis = _analysis_with_predictive_sidecar() + + write_analysis_results_sidecar(analysis=analysis, analysis_dir=analysis_dir) + + sidecar_path = analysis_dir / 'results.h5' + assert sidecar_path.is_file() + + restored = _analysis_with_predictive_sidecar() + restored.fit_results = None + read_analysis_results_sidecar(analysis=restored, analysis_dir=analysis_dir) + + assert 'predictive_datasets' in restored._persisted_fit_state_sidecar + dataset = restored._persisted_fit_state_sidecar['predictive_datasets']['hrpt'] + assert np.allclose(dataset['x'], np.asarray([1.0, 2.0])) + assert np.allclose(dataset['best_sample_prediction'], np.asarray([3.0, 4.0])) + + +def test_read_analysis_results_sidecar_warns_when_expected_file_is_missing(tmp_path, monkeypatch): + from easydiffraction.io import results_sidecar as results_sidecar_mod + + analysis = _analysis_with_predictive_sidecar() + warnings: list[str] = [] + monkeypatch.setattr(results_sidecar_mod.log, 'warning', warnings.append) + + results_sidecar_mod.read_analysis_results_sidecar( + analysis=analysis, + analysis_dir=Path(tmp_path), + ) + + assert analysis._persisted_fit_state_sidecar == {} + assert any('Expected Bayesian results sidecar is missing' in warning for warning in warnings) diff --git a/tests/unit/easydiffraction/project/test_display.py b/tests/unit/easydiffraction/project/test_display.py index f7e11e37..f534da49 100644 --- a/tests/unit/easydiffraction/project/test_display.py +++ b/tests/unit/easydiffraction/project/test_display.py @@ -47,10 +47,23 @@ def _recorder(*args, **kwargs): plot_calc=record('plot_calc'), plot_meas_vs_calc=record('plot_meas_vs_calc'), _plot_meas_vs_calc_request=record('_plot_meas_vs_calc_request'), + engine='plotly', + _resolve_x_axis=lambda expt_type, x: ('two_theta', 'two_theta', None, None, None), ) project = SimpleNamespace( - analysis=SimpleNamespace(display=analysis_display), + analysis=SimpleNamespace( + display=analysis_display, + fit_results=SimpleNamespace(posterior_predictive={}), + bayesian_result=SimpleNamespace( + has_pair_cache=SimpleNamespace(value=False), + has_posterior_predictive=SimpleNamespace(value=False), + ), + bayesian_pair_caches=[], + bayesian_predictive_datasets=[], + _persisted_fit_state_sidecar={}, + ), rendering=SimpleNamespace(plotter=plotter), + experiments={'hrpt': SimpleNamespace(type=SimpleNamespace())}, free_parameters=[], verbosity=SimpleNamespace(fit=SimpleNamespace(value='full')), ) @@ -267,6 +280,67 @@ def fake_activity_indicator(label, *, verbosity): ] +def test_posterior_predictive_skips_processing_indicator_for_restored_cache(monkeypatch): + import easydiffraction.project.display as display_mod + + project, calls = _make_project_stub() + project.analysis = SimpleNamespace( + fit_results=object(), + bayesian_result=SimpleNamespace(has_posterior_predictive=SimpleNamespace(value=True)), + bayesian_predictive_datasets=[ + SimpleNamespace( + experiment_name=SimpleNamespace(value='hrpt'), + x_axis_name=SimpleNamespace(value='two_theta'), + draws_path=SimpleNamespace(value=None), + ) + ], + _persisted_fit_state_sidecar={ + 'predictive_datasets': { + 'hrpt': { + 'x': [1.0, 2.0], + 'best_sample_prediction': [3.0, 4.0], + } + } + }, + ) + project.experiments = {'hrpt': SimpleNamespace(type=SimpleNamespace())} + project.rendering.plotter.engine = 'plotly' + project.rendering.plotter._resolve_x_axis = lambda expt_type, x: ( + 'two_theta', + 'two_theta', + None, + None, + None, + ) + display = ProjectDisplay(project) + indicator_calls: list[tuple[str, VerbosityEnum]] = [] + + @contextmanager + def fake_activity_indicator(label, *, verbosity): + indicator_calls.append((label, verbosity)) + yield object() + + monkeypatch.setattr(display_mod, 'activity_indicator', fake_activity_indicator) + + display.posterior.predictive('hrpt') + + assert calls == [ + ( + 'plot_posterior_predictive', + (), + { + 'expt_name': 'hrpt', + 'style': 'band', + 'x_min': None, + 'x_max': None, + 'show_residual': None, + 'x': None, + }, + ) + ] + assert indicator_calls == [] + + def test_posterior_distribution_without_param_plots_all_free_parameters(): project, calls = _make_project_stub() project.free_parameters = ['a', 'b'] diff --git a/tests/unit/easydiffraction/project/test_project_load.py b/tests/unit/easydiffraction/project/test_project_load.py index e44c1260..8190e2f3 100644 --- a/tests/unit/easydiffraction/project/test_project_load.py +++ b/tests/unit/easydiffraction/project/test_project_load.py @@ -125,6 +125,58 @@ def test_round_trips_constraints(self, tmp_path): assert loaded.analysis.constraints[0].expression.value == 'b_param = a_param' assert loaded.analysis.constraints.enabled is True + def test_round_trips_deterministic_fit_state_and_keeps_live_parameter_values(self, tmp_path): + original = Project(name='fit_state') + original.structures.create(name='lbco') + structure = original.structures['lbco'] + structure.space_group.name_h_m = 'P m -3 m' + structure.cell.length_a = 3.88 + parameter = structure.cell.length_a + parameter.fit_min = 3.8 + parameter.fit_max = 3.9 + parameter._set_fit_bounds_uncertainty_multiplier(4.0) + parameter._fit_start_value = 3.87 + parameter._fit_start_uncertainty = 0.02 + + original.analysis.fit_parameters.create( + param_unique_name=parameter.unique_name, + fit_min=parameter.fit_min, + fit_max=parameter.fit_max, + fit_bounds_uncertainty_multiplier=4.0, + start_value=3.87, + start_uncertainty=0.02, + ) + original.analysis.fit_result._set_result_kind('deterministic') + original.analysis.fit_result._set_success(value=True) + original.analysis.fit_result._set_message('Fit converged') + original.analysis.fit_result._set_iterations(37) + original.analysis.fit_result._set_fitting_time(1.82) + original.analysis.fit_result._set_reduced_chi_square(1.031) + original.analysis.deterministic_result._set_optimizer_name('lmfit') + original.analysis.deterministic_result._set_method_name('leastsq') + original.analysis.deterministic_parameter_results.create( + param_unique_name=parameter.unique_name, + final_value=9.99, + final_uncertainty=0.07, + at_lower_bound=False, + at_upper_bound=False, + ) + original.analysis._set_has_persisted_fit_state(value=True) + original.save_as(str(tmp_path / 'proj')) + + loaded = Project.load(str(tmp_path / 'proj')) + loaded_parameter = loaded.structures['lbco'].cell.length_a + + assert loaded.analysis.fit_result.result_kind.value == 'deterministic' + assert loaded.analysis.fit_parameters[parameter.unique_name].fit_min.value == 3.8 + assert loaded_parameter.value == 3.88 + assert loaded_parameter.fit_min == 3.8 + assert loaded_parameter.fit_max == 3.9 + assert loaded_parameter.fit_bounds_uncertainty_multiplier == 4.0 + assert loaded_parameter._fit_start_value == 3.87 + assert loaded_parameter._fit_start_uncertainty == 0.02 + assert loaded_parameter.uncertainty == 0.07 + class TestLoadAnalysisCifFallback: """Load falls back from analysis/analysis.cif to analysis.cif at root.""" diff --git a/tests/unit/easydiffraction/project/test_project_save.py b/tests/unit/easydiffraction/project/test_project_save.py index 27212251..ec6a5ca6 100644 --- a/tests/unit/easydiffraction/project/test_project_save.py +++ b/tests/unit/easydiffraction/project/test_project_save.py @@ -62,3 +62,15 @@ def test_project_save_lists_existing_analysis_results_csv(tmp_path, monkeypatch, out = capsys.readouterr().out assert 'analysis.cif' in out assert 'results.csv' in out + + +def test_project_save_omits_empty_fit_state_sections(tmp_path): + from easydiffraction.project.project import Project + + project = Project(name='no_fit_state') + project.save_as(str(tmp_path / 'proj')) + + analysis_cif = (tmp_path / 'proj' / 'analysis' / 'analysis.cif').read_text() + + assert '_fit_parameter.param_unique_name' not in analysis_cif + assert '_fit_result.result_kind' not in analysis_cif diff --git a/tests/unit/easydiffraction/test___main__.py b/tests/unit/easydiffraction/test___main__.py index 62ad8c68..a78710b3 100644 --- a/tests/unit/easydiffraction/test___main__.py +++ b/tests/unit/easydiffraction/test___main__.py @@ -44,6 +44,14 @@ def test_cli_subcommands_call_utils(monkeypatch): import easydiffraction.__main__ as main_mod logs = [] + monkeypatch.setattr(ed, 'list_data', lambda: logs.append('LIST_DATA')) + monkeypatch.setattr( + ed, + 'download_data', + lambda id, destination='data', overwrite=False: logs.append( + f'DATA_{id}_{destination}_{overwrite}' + ), + ) monkeypatch.setattr(ed, 'list_tutorials', lambda: logs.append('LIST')) monkeypatch.setattr( ed, @@ -56,14 +64,25 @@ def test_cli_subcommands_call_utils(monkeypatch): lambda id, destination='tutorials', overwrite=False: logs.append(f'DOWNLOAD_{id}'), ) - res1 = runner.invoke(main_mod.app, ['list-tutorials']) - res2 = runner.invoke(main_mod.app, ['download-all-tutorials']) - res3 = runner.invoke(main_mod.app, ['download-tutorial', '1']) + res0 = runner.invoke(main_mod.app, ['list-data']) + res1 = runner.invoke(main_mod.app, ['download-data', '30', '--destination', 'projects']) + res2 = runner.invoke(main_mod.app, ['list-tutorials']) + res3 = runner.invoke(main_mod.app, ['download-all-tutorials']) + res4 = runner.invoke(main_mod.app, ['download-tutorial', '1']) + assert res0.exit_code == 0 assert res1.exit_code == 0 assert res2.exit_code == 0 assert res3.exit_code == 0 - assert logs == ['LIST', 'DOWNLOAD_ALL', 'DOWNLOAD_1'] + assert res4.exit_code == 0 + assert logs == ['LIST_DATA', 'DATA_30_projects_False', 'LIST', 'DOWNLOAD_ALL', 'DOWNLOAD_1'] + + +def test_cli_project_first_argument_normalization_supports_global_data_commands(): + import easydiffraction.__main__ as main_mod + + assert main_mod._normalized_cli_args(['list-data']) == ['list-data'] + assert main_mod._normalized_cli_args(['download-data', '30']) == ['download-data', '30'] def test_cli_fit_loads_and_fits(monkeypatch, tmp_path): From 9d0542c96277ca7c494ed8f5afe79e56a4fe71e3 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 22:14:13 +0200 Subject: [PATCH 36/72] Remove deterministic parameter-result persistence --- .github/copilot-instructions.md | 9 ++ src/easydiffraction/analysis/__init__.py | 9 -- src/easydiffraction/analysis/analysis.py | 63 +------- .../analysis/categories/__init__.py | 6 - .../__init__.py | 12 -- .../default.py | 152 ------------------ .../factory.py | 17 -- src/easydiffraction/io/cif/serialize.py | 10 +- .../test_deterministic_parameter_results.py | 17 -- .../io/cif/test_serialize_more.py | 9 +- .../project/test_project_load.py | 9 +- 11 files changed, 20 insertions(+), 293 deletions(-) delete mode 100644 src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py delete mode 100644 src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py delete mode 100644 src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py delete mode 100644 tests/unit/easydiffraction/analysis/categories/test_deterministic_parameter_results.py diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index d5f96553..46a066d2 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -160,6 +160,12 @@ Notes: `docs/dev/package-structure/short.md` automatically — never edit those by hand. Don't review auto-fixes; accept and move on. Then `pixi run check` until clean. +- When a check command needs saved output for analysis, capture the log + and preserve the command exit code with a zsh-safe variable name: + `pixi run check > /tmp/easydiffraction-check.log 2>&1; check_exit_code=$?; tail -n 200 /tmp/easydiffraction-check.log; exit $check_exit_code`. + Never assign to `status` in zsh; it is readonly. Use task-specific + names such as `check_exit_code`, `unit_tests_exit_code`, or + `script_tests_exit_code`. - Open issues / design questions / planned improvements live in `docs/dev/issues/open.md` (priority-ordered). On resolution, move to `docs/dev/issues/closed.md` and update the relevant ADR or @@ -204,6 +210,9 @@ When asked to create a plan: files likely to change, decisions already made, open questions, verification commands for Phase 2, and a short suggested commit message or branch name when useful. +- Verification commands in plans must include the zsh-safe log-capture + pattern from **Workflow** whenever saved output is needed for later + analysis. - Before saving a plan, verify that referenced files, directories, scripts, and task names exist locally when that is practical. If a referenced tool is optional or missing, include an available fallback. diff --git a/src/easydiffraction/analysis/__init__.py b/src/easydiffraction/analysis/__init__.py index 17d10219..f9d5e94d 100644 --- a/src/easydiffraction/analysis/__init__.py +++ b/src/easydiffraction/analysis/__init__.py @@ -37,15 +37,6 @@ from easydiffraction.analysis.categories.bayesian_result import BayesianResultFactory from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler from easydiffraction.analysis.categories.bayesian_sampler import BayesianSamplerFactory -from easydiffraction.analysis.categories.deterministic_parameter_results import ( - DeterministicParameterResultItem, -) -from easydiffraction.analysis.categories.deterministic_parameter_results import ( - DeterministicParameterResults, -) -from easydiffraction.analysis.categories.deterministic_parameter_results import ( - DeterministicParameterResultsFactory, -) from easydiffraction.analysis.categories.deterministic_result import DeterministicResult from easydiffraction.analysis.categories.deterministic_result import DeterministicResultFactory from easydiffraction.analysis.categories.fit_parameter_correlations import ( diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 352f2d1f..63fe7f80 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -29,9 +29,6 @@ from easydiffraction.analysis.categories.bayesian_result import BayesianResult from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler from easydiffraction.analysis.categories.constraints.factory import ConstraintsFactory -from easydiffraction.analysis.categories.deterministic_parameter_results import ( - DeterministicParameterResults, -) from easydiffraction.analysis.categories.deterministic_result import DeterministicResult from easydiffraction.analysis.categories.fit_parameter_correlations import FitParameterCorrelations from easydiffraction.analysis.categories.fit_parameters import FitParameters @@ -53,7 +50,6 @@ from easydiffraction.analysis.fit_helpers.bayesian import PosteriorSamples from easydiffraction.analysis.fit_helpers.reporting import FitResults from easydiffraction.analysis.fitting import Fitter -from easydiffraction.analysis.minimizers.base import BOUNDARY_PROXIMITY_FRACTION from easydiffraction.core.category_owner import CategoryOwner from easydiffraction.core.guard import _apply_help_filter from easydiffraction.core.singleton import ConstraintsHandler @@ -448,11 +444,6 @@ def deterministic_result(self) -> DeterministicResult: """Persisted deterministic fit-result metadata.""" return self._deterministic_result - @property - def deterministic_parameter_results(self) -> DeterministicParameterResults: - """Persisted deterministic parameter-result summaries.""" - return self._deterministic_parameter_results - @property def bayesian_result(self) -> BayesianResult: """Persisted Bayesian fit-result metadata.""" @@ -529,7 +520,6 @@ def __init__(self, project: object) -> None: self._fit_result = FitResult() self._fit_parameter_correlations = FitParameterCorrelations() self._deterministic_result = DeterministicResult() - self._deterministic_parameter_results = DeterministicParameterResults() self._bayesian_result = BayesianResult() self._bayesian_sampler = BayesianSampler() self._bayesian_convergence = BayesianConvergence() @@ -573,10 +563,6 @@ def _ordered_restored_parameter_names(self) -> list[str]: if posterior_rows: return [row.unique_name.value for row in posterior_rows] - deterministic_rows = list(self.deterministic_parameter_results) - if deterministic_rows: - return [row.param_unique_name.value for row in deterministic_rows] - return [row.param_unique_name.value for row in self.fit_parameters] def _restore_live_parameter_state(self, param_map: dict[str, Parameter]) -> None: @@ -598,12 +584,6 @@ def _restore_live_parameter_state(self, param_map: dict[str, Parameter]) -> None parameter._fit_start_value = row.start_value.value parameter._fit_start_uncertainty = row.start_uncertainty.value - for row in self.deterministic_parameter_results: - parameter = param_map.get(row.param_unique_name.value) - if parameter is None or row.final_uncertainty.value is None: - continue - parameter.uncertainty = float(row.final_uncertainty.value) - for row in self.bayesian_parameter_posteriors: parameter = param_map.get(row.unique_name.value) if parameter is None or row.uncertainty.value is None: @@ -1110,10 +1090,7 @@ def _fit_state_categories(self) -> list[object]: return categories if result_kind is FitResultKindEnum.DETERMINISTIC: - categories.extend([ - self.deterministic_result, - self.deterministic_parameter_results, - ]) + categories.append(self.deterministic_result) return categories categories.extend([ @@ -1133,7 +1110,6 @@ def _clear_persisted_fit_state(self) -> None: self._fit_result = FitResult() self._fit_parameter_correlations = FitParameterCorrelations() self._deterministic_result = DeterministicResult() - self._deterministic_parameter_results = DeterministicParameterResults() self._bayesian_result = BayesianResult() self._bayesian_sampler = BayesianSampler() self._bayesian_convergence = BayesianConvergence() @@ -1160,28 +1136,6 @@ def _capture_fit_parameter_state(self, parameters: list[Parameter]) -> None: self._set_has_persisted_fit_state(value=True) - @staticmethod - def _parameter_is_at_fit_bound( - param: Parameter, - *, - use_upper_bound: bool, - ) -> bool: - """Return whether a parameter finished near a fit bound.""" - value = param.value - if value is None: - return False - - bound = param.fit_max if use_upper_bound else param.fit_min - if not np.isfinite(bound): - return False - - span = param.fit_max - param.fit_min - if np.isfinite(span) and span > 0: - tolerance = BOUNDARY_PROXIMITY_FRACTION * span - else: - tolerance = BOUNDARY_PROXIMITY_FRACTION * max(abs(bound), 1.0) - return abs(value - bound) <= tolerance - def _selected_parameters_for_fit(self, experiments: list[object]) -> list[Parameter]: """ Return unique live parameters involved in the current fit slice. @@ -1337,21 +1291,6 @@ def _store_deterministic_result_projection( self.deterministic_result._set_covariance_available(value=covariance is not None) self.deterministic_result._set_correlation_available(value=correlation_matrix is not None) - for param in fitted_parameters: - self.deterministic_parameter_results.create( - param_unique_name=param.unique_name, - final_value=param.value, - final_uncertainty=param.uncertainty, - at_lower_bound=self._parameter_is_at_fit_bound( - param, - use_upper_bound=False, - ), - at_upper_bound=self._parameter_is_at_fit_bound( - param, - use_upper_bound=True, - ), - ) - if correlation_matrix is not None: self._store_correlation_projection( unique_names=[param.unique_name for param in fitted_parameters], diff --git a/src/easydiffraction/analysis/categories/__init__.py b/src/easydiffraction/analysis/categories/__init__.py index 278f2692..743e0ff0 100644 --- a/src/easydiffraction/analysis/categories/__init__.py +++ b/src/easydiffraction/analysis/categories/__init__.py @@ -28,12 +28,6 @@ from easydiffraction.analysis.categories.bayesian_sampler import BayesianSampler from easydiffraction.analysis.categories.constraints import Constraint from easydiffraction.analysis.categories.constraints import Constraints -from easydiffraction.analysis.categories.deterministic_parameter_results import ( - DeterministicParameterResultItem, -) -from easydiffraction.analysis.categories.deterministic_parameter_results import ( - DeterministicParameterResults, -) from easydiffraction.analysis.categories.deterministic_result import DeterministicResult from easydiffraction.analysis.categories.fit_parameter_correlations import ( FitParameterCorrelationItem, diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py deleted file mode 100644 index ea4e4467..00000000 --- a/src/easydiffraction/analysis/categories/deterministic_parameter_results/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-FileCopyrightText: 2026 EasyScience contributors -# SPDX-License-Identifier: BSD-3-Clause - -from easydiffraction.analysis.categories.deterministic_parameter_results.default import ( - DeterministicParameterResultItem, -) -from easydiffraction.analysis.categories.deterministic_parameter_results.default import ( - DeterministicParameterResults, -) -from easydiffraction.analysis.categories.deterministic_parameter_results.factory import ( - DeterministicParameterResultsFactory, -) diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py deleted file mode 100644 index 2f13d759..00000000 --- a/src/easydiffraction/analysis/categories/deterministic_parameter_results/default.py +++ /dev/null @@ -1,152 +0,0 @@ -# SPDX-FileCopyrightText: 2026 EasyScience contributors -# SPDX-License-Identifier: BSD-3-Clause -"""Deterministic fit parameter-result rows.""" - -from __future__ import annotations - -from easydiffraction.analysis.categories.deterministic_parameter_results.factory import ( - DeterministicParameterResultsFactory, -) -from easydiffraction.core.category import CategoryCollection -from easydiffraction.core.category import CategoryItem -from easydiffraction.core.metadata import TypeInfo -from easydiffraction.core.validation import AttributeSpec -from easydiffraction.core.validation import RegexValidator -from easydiffraction.core.variable import BoolDescriptor -from easydiffraction.core.variable import NumericDescriptor -from easydiffraction.core.variable import StringDescriptor -from easydiffraction.io.cif.handler import CifHandler - - -class DeterministicParameterResultItem(CategoryItem): - """Single persisted deterministic parameter-result row.""" - - _category_code = 'deterministic_parameter_result' - _category_entry_name = 'param_unique_name' - - def __init__(self) -> None: - super().__init__() - self._param_unique_name = StringDescriptor( - name='param_unique_name', - description='Unique name of the persisted parameter result row.', - value_spec=AttributeSpec( - default='_', - validator=RegexValidator(pattern=r'^[A-Za-z_][A-Za-z0-9_.]*$'), - ), - cif_handler=CifHandler(names=['_deterministic_parameter_result.param_unique_name']), - ) - self._final_value = NumericDescriptor( - name='final_value', - description='Final fitted value for the persisted parameter result.', - value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler(names=['_deterministic_parameter_result.final_value']), - ) - self._final_uncertainty = NumericDescriptor( - name='final_uncertainty', - description='Final uncertainty for the persisted parameter result.', - value_spec=AttributeSpec(default=None, allow_none=True), - cif_handler=CifHandler(names=['_deterministic_parameter_result.final_uncertainty']), - ) - self._at_lower_bound = BoolDescriptor( - name='at_lower_bound', - description='Whether the parameter finished at the lower fit bound.', - value_spec=AttributeSpec(default=False), - cif_handler=CifHandler(names=['_deterministic_parameter_result.at_lower_bound']), - ) - self._at_upper_bound = BoolDescriptor( - name='at_upper_bound', - description='Whether the parameter finished at the upper fit bound.', - value_spec=AttributeSpec(default=False), - cif_handler=CifHandler(names=['_deterministic_parameter_result.at_upper_bound']), - ) - - @property - def param_unique_name(self) -> StringDescriptor: - """Unique name of the persisted parameter result row.""" - return self._param_unique_name - - def _set_param_unique_name(self, value: str) -> None: - """Set the parameter unique name for internal callers.""" - self._param_unique_name.value = value - - @property - def final_value(self) -> NumericDescriptor: - """Final fitted value for the persisted parameter result.""" - return self._final_value - - def _set_final_value(self, value: float | None) -> None: - """Set the final fitted value for internal callers.""" - self._final_value.value = value - - @property - def final_uncertainty(self) -> NumericDescriptor: - """Final uncertainty for the persisted parameter result.""" - return self._final_uncertainty - - def _set_final_uncertainty(self, value: float | None) -> None: - """Set the final uncertainty for internal callers.""" - self._final_uncertainty.value = value - - @property - def at_lower_bound(self) -> BoolDescriptor: - """Whether the parameter finished at the lower fit bound.""" - return self._at_lower_bound - - def _set_at_lower_bound(self, *, value: bool) -> None: - """Set the lower-bound flag for internal callers.""" - self._at_lower_bound.value = value - - @property - def at_upper_bound(self) -> BoolDescriptor: - """Whether the parameter finished at the upper fit bound.""" - return self._at_upper_bound - - def _set_at_upper_bound(self, *, value: bool) -> None: - """Set the upper-bound flag for internal callers.""" - self._at_upper_bound.value = value - - -@DeterministicParameterResultsFactory.register -class DeterministicParameterResults(CategoryCollection): - """Collection of persisted deterministic parameter-result rows.""" - - type_info = TypeInfo( - tag='default', - description='Persisted deterministic parameter-result rows', - ) - - def __init__(self) -> None: - super().__init__(item_type=DeterministicParameterResultItem) - - def create( - self, - *, - param_unique_name: str, - final_value: float | None = None, - final_uncertainty: float | None = None, - at_lower_bound: bool = False, - at_upper_bound: bool = False, - ) -> None: - """ - Create a persisted deterministic parameter-result row. - - Parameters - ---------- - param_unique_name : str - Unique name of the persisted parameter result row. - final_value : float | None, default=None - Final fitted value for the persisted parameter result. - final_uncertainty : float | None, default=None - Final uncertainty for the persisted parameter result. - at_lower_bound : bool, default=False - Whether the parameter finished at the lower fit bound. - at_upper_bound : bool, default=False - Whether the parameter finished at the upper fit bound. - """ - item = DeterministicParameterResultItem() - item._set_param_unique_name(param_unique_name) - item._set_final_value(final_value) - item._set_final_uncertainty(final_uncertainty) - item._set_at_lower_bound(value=at_lower_bound) - item._set_at_upper_bound(value=at_upper_bound) - self.add(item) diff --git a/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py b/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py deleted file mode 100644 index ff50b256..00000000 --- a/src/easydiffraction/analysis/categories/deterministic_parameter_results/factory.py +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-FileCopyrightText: 2026 EasyScience contributors -# SPDX-License-Identifier: BSD-3-Clause -"""Deterministic-parameter-results factory.""" - -from __future__ import annotations - -from typing import ClassVar - -from easydiffraction.core.factory import FactoryBase - - -class DeterministicParameterResultsFactory(FactoryBase): - """Create deterministic-parameter-result collections by tag.""" - - _default_rules: ClassVar[dict] = { - frozenset(): 'default', - } diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index b2436e06..a47d5ef6 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -341,19 +341,19 @@ def project_info_to_cif(info: object) -> str: title = f'{info.title}' if ' ' in title: - title = f"'{title}'" + title = format_value(info.title) if len(info.description) > _CIF_DESCRIPTION_WRAP_LEN: description = f'\n;\n{info.description}\n;' elif info.description: description = f'{info.description}' if ' ' in description: - description = f"'{description}'" + description = format_value(info.description) else: description = '?' - created = f"'{info.created.strftime('%d %b %Y %H:%M:%S')}'" - last_modified = f"'{info.last_modified.strftime('%d %b %Y %H:%M:%S')}'" + created = format_value(info.created.strftime('%d %b %Y %H:%M:%S')) + last_modified = format_value(info.last_modified.strftime('%d %b %Y %H:%M:%S')) return ( f'_project.id {name}\n' @@ -576,7 +576,6 @@ def _has_persisted_fit_state_sections(block: object) -> bool: loop_tags = ( '_fit_parameter.param_unique_name', '_fit_parameter_correlation.param_unique_name_i', - '_deterministic_parameter_result.param_unique_name', '_bayesian_parameter_posterior.unique_name', '_bayesian_distribution_cache.param_unique_name', '_bayesian_pair_cache.param_unique_name_x', @@ -598,7 +597,6 @@ def _restore_common_fit_state(analysis: object, block: object) -> None: def _restore_deterministic_fit_state(analysis: object, block: object) -> None: """Restore deterministic-only persisted fit-state categories.""" analysis.deterministic_result.from_cif(block) - analysis.deterministic_parameter_results.from_cif(block) def _restore_bayesian_fit_state(analysis: object, block: object) -> None: diff --git a/tests/unit/easydiffraction/analysis/categories/test_deterministic_parameter_results.py b/tests/unit/easydiffraction/analysis/categories/test_deterministic_parameter_results.py deleted file mode 100644 index 7dcb37b7..00000000 --- a/tests/unit/easydiffraction/analysis/categories/test_deterministic_parameter_results.py +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-FileCopyrightText: 2026 EasyScience contributors -# SPDX-License-Identifier: BSD-3-Clause -"""Tests for analysis/categories/deterministic_parameter_results/.""" - - -def test_deterministic_parameter_results_factory_create(): - from easydiffraction.analysis.categories.deterministic_parameter_results.default import ( - DeterministicParameterResults, - ) - from easydiffraction.analysis.categories.deterministic_parameter_results.factory import ( - DeterministicParameterResultsFactory, - ) - - results = DeterministicParameterResultsFactory.create('default') - - assert DeterministicParameterResultsFactory.default_tag() == 'default' - assert isinstance(results, DeterministicParameterResults) diff --git a/tests/unit/easydiffraction/io/cif/test_serialize_more.py b/tests/unit/easydiffraction/io/cif/test_serialize_more.py index 5158764e..8328f4e1 100644 --- a/tests/unit/easydiffraction/io/cif/test_serialize_more.py +++ b/tests/unit/easydiffraction/io/cif/test_serialize_more.py @@ -107,11 +107,10 @@ def test_project_info_to_cif_contains_core_fields(): info = ProjectInfo(name='p1', title='My Title', description='Some description text') out = MUT.project_info_to_cif(info) assert '_project.id p1' in out - assert '_project.title' in out - assert 'My Title' in out - assert '_project.description' in out - assert '_project.created' in out - assert '_project.last_modified' in out + assert '_project.title "My Title"' in out + assert '_project.description "Some description text"' in out + assert '_project.created "' in out + assert '_project.last_modified "' in out def test_experiment_to_cif_with_and_without_data(): diff --git a/tests/unit/easydiffraction/project/test_project_load.py b/tests/unit/easydiffraction/project/test_project_load.py index 8190e2f3..f60056ed 100644 --- a/tests/unit/easydiffraction/project/test_project_load.py +++ b/tests/unit/easydiffraction/project/test_project_load.py @@ -132,6 +132,8 @@ def test_round_trips_deterministic_fit_state_and_keeps_live_parameter_values(sel structure.space_group.name_h_m = 'P m -3 m' structure.cell.length_a = 3.88 parameter = structure.cell.length_a + parameter.free = True + parameter.uncertainty = 0.07 parameter.fit_min = 3.8 parameter.fit_max = 3.9 parameter._set_fit_bounds_uncertainty_multiplier(4.0) @@ -154,13 +156,6 @@ def test_round_trips_deterministic_fit_state_and_keeps_live_parameter_values(sel original.analysis.fit_result._set_reduced_chi_square(1.031) original.analysis.deterministic_result._set_optimizer_name('lmfit') original.analysis.deterministic_result._set_method_name('leastsq') - original.analysis.deterministic_parameter_results.create( - param_unique_name=parameter.unique_name, - final_value=9.99, - final_uncertainty=0.07, - at_lower_bound=False, - at_upper_bound=False, - ) original.analysis._set_has_persisted_fit_state(value=True) original.save_as(str(tmp_path / 'proj')) From 6e741ff75136e804de74c87df5b6cf2dc12a13ec Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 22:55:18 +0200 Subject: [PATCH 37/72] Update data index reference and hash --- src/easydiffraction/utils/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index 7d54036e..02115d39 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -31,9 +31,9 @@ _DATA_REPO = 'easyscience/diffraction' _DATA_ROOT = 'data' # commit SHA preferred -_DATA_INDEX_REF = '39dad256ba1faedf4b26fad3e44a361c802fd8e4' +_DATA_INDEX_REF = '0e3a916ab27c36ee1f600889de8aef1fb0fd0d82' # macOS: sha256sum index.json -_DATA_INDEX_HASH = 'sha256:301aaca0f35927cd63715b858a1f03164e4d05d1d39234325a3798d2b4a5f4ea' +_DATA_INDEX_HASH = 'sha256:887ab81f440ed32455994347d34ea66f27314e895ee2fb719eaebb3acb228d5e' def _build_data_url(path: str) -> str: From 4814a59603bc0fd1a02cac81cf0e10b0c235538f Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 23:06:10 +0200 Subject: [PATCH 38/72] Consolidate saved project tutorials to download_data --- docs/docs/tutorials/ed-18.ipynb | 39 ++++---------- docs/docs/tutorials/ed-18.py | 7 +-- docs/docs/tutorials/ed-23.ipynb | 50 ++++++------------ docs/docs/tutorials/ed-23.py | 2 +- docs/docs/tutorials/ed-24.ipynb | 93 ++++++++------------------------- docs/docs/tutorials/ed-24.py | 13 ++--- 6 files changed, 56 insertions(+), 148 deletions(-) diff --git a/docs/docs/tutorials/ed-18.ipynb b/docs/docs/tutorials/ed-18.ipynb index c1200010..7473bc6e 100644 --- a/docs/docs/tutorials/ed-18.ipynb +++ b/docs/docs/tutorials/ed-18.ipynb @@ -53,8 +53,7 @@ "outputs": [], "source": [ "from easydiffraction import Project\n", - "from easydiffraction import download_data\n", - "from easydiffraction import extract_project_from_zip" + "from easydiffraction import download_data" ] }, { @@ -62,7 +61,7 @@ "id": "4", "metadata": {}, "source": [ - "## Download Project Archive" + "## Download Saved Project" ] }, { @@ -72,31 +71,13 @@ "metadata": {}, "outputs": [], "source": [ - "zip_path = download_data(id=30, destination='data')" + "project_dir = download_data(id=36, destination='projects')" ] }, { "cell_type": "markdown", "id": "6", "metadata": {}, - "source": [ - "## Extract Project" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "project_dir = extract_project_from_zip(zip_path, destination='data')" - ] - }, - { - "cell_type": "markdown", - "id": "8", - "metadata": {}, "source": [ "## Load Project" ] @@ -104,7 +85,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -113,7 +94,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "8", "metadata": {}, "source": [ "## Perform Analysis" @@ -122,7 +103,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -131,7 +112,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "10", "metadata": {}, "source": [ "## Show Results" @@ -140,7 +121,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -150,7 +131,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -160,7 +141,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "13", "metadata": {}, "outputs": [], "source": [ diff --git a/docs/docs/tutorials/ed-18.py b/docs/docs/tutorials/ed-18.py index fe44056e..31e903c0 100644 --- a/docs/docs/tutorials/ed-18.py +++ b/docs/docs/tutorials/ed-18.py @@ -19,13 +19,10 @@ from easydiffraction import download_data # %% [markdown] -# ## Download Project -# -# Project archives are extracted automatically, and the returned path -# points to the saved project directory. +# ## Download Saved Project # %% -project_dir = download_data(id=30, destination='projects') +project_dir = download_data(id=36, destination='projects') # %% [markdown] # ## Load Project diff --git a/docs/docs/tutorials/ed-23.ipynb b/docs/docs/tutorials/ed-23.ipynb index 02318d4e..9dd2d831 100644 --- a/docs/docs/tutorials/ed-23.ipynb +++ b/docs/docs/tutorials/ed-23.ipynb @@ -56,10 +56,11 @@ "id": "4", "metadata": {}, "source": [ - "## Download Saved Project Archive\n", + "## Download Saved Project\n", "\n", - "The archive should contain a saved project directory with a partially\n", - "completed sequential fit, including `analysis/results.csv`." + "The returned path points directly to the saved project directory with\n", + "a partially completed sequential fit, including\n", + "`analysis/results.csv`." ] }, { @@ -69,34 +70,13 @@ "metadata": {}, "outputs": [], "source": [ - "zip_path = ed.download_data(id=34, destination='data')" + "project_dir = ed.download_data(id=37, destination='projects')" ] }, { "cell_type": "markdown", "id": "6", "metadata": {}, - "source": [ - "## Extract Project\n", - "\n", - "Extract the saved project directory locally. For a project you\n", - "already have on disk, set `project_dir` directly instead." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "project_dir = ed.extract_project_from_zip(zip_path, destination='projects')" - ] - }, - { - "cell_type": "markdown", - "id": "8", - "metadata": {}, "source": [ "## Load Saved Project" ] @@ -104,7 +84,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -113,7 +93,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "8", "metadata": {}, "source": [ "## Resume Sequential Analysis\n", @@ -127,7 +107,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -136,7 +116,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "10", "metadata": {}, "source": [ "## Replay Fitted Datasets\n", @@ -147,7 +127,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -157,7 +137,7 @@ }, { "cell_type": "markdown", - "id": "14", + "id": "12", "metadata": {}, "source": [ "\n", @@ -167,7 +147,7 @@ { "cell_type": "code", "execution_count": null, - "id": "15", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -177,7 +157,7 @@ }, { "cell_type": "markdown", - "id": "16", + "id": "14", "metadata": {}, "source": [ "## Plot Parameter Evolution\n", @@ -190,7 +170,7 @@ { "cell_type": "code", "execution_count": null, - "id": "17", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -200,7 +180,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "16", "metadata": {}, "outputs": [], "source": [ diff --git a/docs/docs/tutorials/ed-23.py b/docs/docs/tutorials/ed-23.py index 6388efcd..8789fece 100644 --- a/docs/docs/tutorials/ed-23.py +++ b/docs/docs/tutorials/ed-23.py @@ -21,7 +21,7 @@ # `analysis/results.csv`. # %% -project_dir = ed.download_data(id=34, destination='projects') +project_dir = ed.download_data(id=37, destination='projects') # %% [markdown] # ## Load Saved Project diff --git a/docs/docs/tutorials/ed-24.ipynb b/docs/docs/tutorials/ed-24.ipynb index 1f1112f3..745d962f 100644 --- a/docs/docs/tutorials/ed-24.ipynb +++ b/docs/docs/tutorials/ed-24.ipynb @@ -49,8 +49,6 @@ "metadata": {}, "outputs": [], "source": [ - "from pathlib import Path\n", - "\n", "import easydiffraction as ed" ] }, @@ -59,12 +57,11 @@ "id": "4", "metadata": {}, "source": [ - "## Locate the Saved Project\n", + "## Download Saved Project\n", "\n", - "In the repository, the saved project currently lives under\n", - "`tmp/tutorials/projects/lbco_hrpt_bayesian`. Once a downloadable\n", - "archive is available, replace this path with the extracted project\n", - "directory instead." + "The returned path points directly to the saved project directory with\n", + "the completed Bayesian fit and persisted posterior samples and plot\n", + "caches." ] }, { @@ -74,20 +71,12 @@ "metadata": {}, "outputs": [], "source": [ - "project_dir = Path('../../../tmp/tutorials/projects/lbco_hrpt_bayesian')" + "project_dir = ed.download_data(id=35, destination='projects')" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", - "id": "7", + "id": "6", "metadata": {}, "source": [ "## Load the Saved Bayesian Project\n", @@ -99,7 +88,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -108,7 +97,7 @@ }, { "cell_type": "markdown", - "id": "9", + "id": "8", "metadata": {}, "source": [ "## Review the Saved Fit Summary\n", @@ -121,7 +110,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -130,19 +119,18 @@ }, { "cell_type": "markdown", - "id": "11", + "id": "10", "metadata": {}, "source": [ - "## Show Correlations and the Fitted Pattern\n", + "## Show Correlations\n", "\n", - "The correlation matrix and measured-vs-calculated pattern are restored\n", - "from the saved project state." + "The correlation matrix is restored from the saved project state." ] }, { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -151,44 +139,7 @@ }, { "cell_type": "markdown", - "id": "13", - "metadata": {}, - "source": [ - "Show the standard measured vs calculated pattern for the full range." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "14", - "metadata": {}, - "outputs": [], - "source": [ - "project.display.pattern(expt_name='hrpt')" - ] - }, - { - "cell_type": "markdown", - "id": "15", - "metadata": {}, - "source": [ - "A zoomed view is useful for checking the fit quality in a narrow\n", - "region of the diffraction pattern." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16", - "metadata": {}, - "outputs": [], - "source": [ - "project.display.pattern(expt_name='hrpt', x_min=92, x_max=93)" - ] - }, - { - "cell_type": "markdown", - "id": "17", + "id": "12", "metadata": {}, "source": [ "## Inspect Posterior Densities and Pair Structure\n", @@ -200,7 +151,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -210,7 +161,7 @@ { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -219,19 +170,21 @@ }, { "cell_type": "markdown", - "id": "20", + "id": "15", "metadata": {}, "source": [ "## Plot Posterior Predictive Checks\n", "\n", "The posterior predictive view reuses the cached predictive summary\n", - "stored in the project rather than recalculating it on first display." + "stored in the project rather than recalculating it on first display.\n", + "It overlays the 95% credible interval propagated from the posterior\n", + "samples." ] }, { "cell_type": "code", "execution_count": null, - "id": "21", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -240,7 +193,7 @@ }, { "cell_type": "markdown", - "id": "22", + "id": "17", "metadata": {}, "source": [ "A zoomed view is useful for checking the propagated uncertainty in a\n", @@ -250,7 +203,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "18", "metadata": {}, "outputs": [], "source": [ diff --git a/docs/docs/tutorials/ed-24.py b/docs/docs/tutorials/ed-24.py index d5e08ad2..9e5d40f8 100644 --- a/docs/docs/tutorials/ed-24.py +++ b/docs/docs/tutorials/ed-24.py @@ -12,20 +12,17 @@ # ## Import Library # %% -from pathlib import Path - import easydiffraction as ed # %% [markdown] -# ## Locate the Saved Project +# ## Download Saved Project # -# In the repository, the saved project currently lives under -# `tmp/tutorials/projects/lbco_hrpt_bayesian`. Once a downloadable -# archive is available, replace this path with the extracted project -# directory instead. +# The returned path points directly to the saved project directory with +# the completed Bayesian fit and persisted posterior samples and plot +# caches. # %% -project_dir = Path('../../../tmp/tutorials/projects/lbco_hrpt_bayesian') +project_dir = ed.download_data(id=35, destination='projects') # %% [markdown] # ## Load the Saved Bayesian Project From dfe3f6cdb182f387094dae75084794f2c58a736b Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 23:06:27 +0200 Subject: [PATCH 39/72] Remove deterministic_parameter_results from docs --- docs/dev/package-structure/full.md | 6 ------ docs/dev/package-structure/short.md | 3 --- 2 files changed, 9 deletions(-) diff --git a/docs/dev/package-structure/full.md b/docs/dev/package-structure/full.md index 06c3aa60..d29e21b7 100644 --- a/docs/dev/package-structure/full.md +++ b/docs/dev/package-structure/full.md @@ -80,12 +80,6 @@ │ │ │ └── 📄 factory.py │ │ │ └── 🏷️ class ConstraintsFactory │ │ ├── 📁 deterministic_parameter_results -│ │ │ ├── 📄 __init__.py -│ │ │ ├── 📄 default.py -│ │ │ │ ├── 🏷️ class DeterministicParameterResultItem -│ │ │ │ └── 🏷️ class DeterministicParameterResults -│ │ │ └── 📄 factory.py -│ │ │ └── 🏷️ class DeterministicParameterResultsFactory │ │ ├── 📁 deterministic_result │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py diff --git a/docs/dev/package-structure/short.md b/docs/dev/package-structure/short.md index 3faaf97b..508b8599 100644 --- a/docs/dev/package-structure/short.md +++ b/docs/dev/package-structure/short.md @@ -48,9 +48,6 @@ │ │ │ ├── 📄 default.py │ │ │ └── 📄 factory.py │ │ ├── 📁 deterministic_parameter_results -│ │ │ ├── 📄 __init__.py -│ │ │ ├── 📄 default.py -│ │ │ └── 📄 factory.py │ │ ├── 📁 deterministic_result │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py From d98841068174087daec0c2220c4231f4f4353b6f Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 23:14:32 +0200 Subject: [PATCH 40/72] Revise docs for CLI syntax and project loading --- docs/docs/quick-reference/index.md | 17 ++++++-- .../user-guide/analysis-workflow/project.md | 40 +++++++++++-------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/docs/docs/quick-reference/index.md b/docs/docs/quick-reference/index.md index c3aa9f74..9c4d715b 100644 --- a/docs/docs/quick-reference/index.md +++ b/docs/docs/quick-reference/index.md @@ -403,8 +403,16 @@ project = ed.Project.load('lbco_hrpt') Run a saved project from the command line: ```bash -python -m easydiffraction fit lbco_hrpt -python -m easydiffraction fit lbco_hrpt --dry +python -m easydiffraction lbco_hrpt fit +python -m easydiffraction lbco_hrpt fit --dry +python -m easydiffraction lbco_hrpt display +``` + +Load a saved example project straight from `download_data()`: + +```python +saved_project_dir = ed.download_data(id=30, destination='projects') +project = ed.Project.load(saved_project_dir) ``` ## Command-Line Reminders @@ -412,8 +420,11 @@ python -m easydiffraction fit lbco_hrpt --dry ```bash python -m easydiffraction --help python -m easydiffraction --version +python -m easydiffraction list-data +python -m easydiffraction download-data 30 --destination projects python -m easydiffraction list-tutorials python -m easydiffraction download-tutorial 1 --destination tutorials python -m easydiffraction download-all-tutorials --destination tutorials -python -m easydiffraction fit PROJECT_DIR +python -m easydiffraction PROJECT_DIR fit +python -m easydiffraction PROJECT_DIR display ``` diff --git a/docs/docs/user-guide/analysis-workflow/project.md b/docs/docs/user-guide/analysis-workflow/project.md index 5e8e8b41..72f77dc1 100644 --- a/docs/docs/user-guide/analysis-workflow/project.md +++ b/docs/docs/user-guide/analysis-workflow/project.md @@ -14,7 +14,7 @@ contribution from multiple **structures**. EasyDiffraction allows you to: - **Manually create** a new project by specifying its metadata. -- **Load an existing project** from a file (**CIF** format). +- **Load an existing saved project** from its project directory. Below are instructions on how to set up a project in EasyDiffraction. It is assumed that you have already imported the `easydiffraction` package, @@ -55,14 +55,15 @@ you can just call the `save`: project.save() ``` -## Loading a Project from CIF +## Loading a Saved Project -If you have an existing project, you can load it directly from a CIF -file. This is useful for reusing previously defined projects or sharing -them with others. +If you have an existing saved project, load it from the project +directory created by `project.save_as()` or `project.save()`. This is +useful for continuing a previous session or reusing a downloaded saved +project. ```python -project.load('data/lbco_hrpt.cif') +project = ed.Project.load('lbco_hrpt') ``` ## Project Structure @@ -82,9 +83,10 @@ The example below illustrates a typical **project structure** for a ├── 📁 experiments - Folder with experiment settings and measured data. │ ├── 📄 hrpt.cif - Instrumental parameters, calculator selection and measured data from HRPT@PSI. │ └── ... -├── 📄 analysis.cif - Settings for data analysis (minimizer, fit mode, etc.). -└── 📁 summary - └── 📄 report.cif - Summary report after structure refinement. +├── 📁 analysis - Analysis settings and optional persisted Bayesian arrays. +│ ├── 📄 analysis.cif - Settings for data analysis (minimizer, fit mode, constraints, persisted fit state). +│ └── 📄 results.h5 - Optional Bayesian sidecar with posterior and predictive arrays. +└── 📄 summary.cif - Summary report after structure refinement. @@ -110,13 +112,16 @@ This file stores project-level metadata and display configuration.
-data_La0.5Ba0.5CoO3
-
+_project.id          lbco_hrpt
 _project.title       "La0.5Ba0.5CoO3 from neutron diffraction at HRPT@PSI"
 _project.description "neutrons, powder, constant wavelength, HRPT@PSI"
 
-_display.chart_engine  asciichartpy
-_display.table_engine   rich
+_project.created     "18 May 2026 10:15:00"
+_project.last_modified "18 May 2026 10:20:00"
+
+_rendering.chart_engine auto
+_rendering.table_engine auto
+_verbosity.fit         full
 
@@ -233,7 +238,7 @@ loop_ -### 4. analysis.cif +### 4. analysis / analysis.cif This file contains settings used for data analysis, including the choice of **calculation** and **fitting** engines, as well as user defined @@ -243,8 +248,8 @@ of **calculation** and **fitting** engines, as well as user defined
-_fit.minimizer_type          "lmfit (leastsq)"
-_fit.mode                    single
+_fitting.mode_type              single
+_fitting.minimizer_type         lmfit
 
 loop_
 _alias.label
@@ -263,6 +268,9 @@ loop_
 
 
 
+When a Bayesian fit stores persisted posterior or predictive arrays, the
+same `analysis/` directory also contains `results.h5`.
+
 
--- From 18a69f2a555b2a729399faa0be0a5ea025d09a19 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 23:43:49 +0200 Subject: [PATCH 41/72] Update undo command to show it's a placeholder --- src/easydiffraction/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/easydiffraction/__main__.py b/src/easydiffraction/__main__.py index 0945331a..79dd262a 100644 --- a/src/easydiffraction/__main__.py +++ b/src/easydiffraction/__main__.py @@ -233,9 +233,9 @@ def undo( help='Path to the project directory (must contain project.cif).', ), ) -> None: - """Undo the last fit when fit-history support exists.""" + """Undo the last fit when fit-history support exists (not yet implemented).""" _load_project(project_dir) - typer.echo('Undo is not implemented yet. See undo-fit.md ADR.') + typer.echo('Undo is not yet implemented. This command is a placeholder for future fit history support.') raise typer.Exit(code=1) From c117a558a893f11e8a284b3cbadbd937dd2afa94 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Mon, 18 May 2026 23:57:07 +0200 Subject: [PATCH 42/72] Harden fit-state sidecar paths and finalize ADR --- .../adrs/accepted/analysis-cif-fit-state.md | 193 +++++ docs/dev/adrs/index.md | 2 +- .../suggestions/analysis-cif-fit-state.md | 620 --------------- .../fit-output-files-and-data-exports.md | 5 +- docs/dev/adrs/suggestions/undo-fit.md | 138 ++-- docs/dev/plans/analysis-cif-fit-state.md | 715 ------------------ src/easydiffraction/analysis/analysis.py | 3 +- .../analysis/categories/fit_state/__init__.py | 5 - .../analysis/categories/fit_state/default.py | 42 - .../analysis/categories/fit_state/factory.py | 17 - src/easydiffraction/io/results_sidecar.py | 33 +- .../analysis/test_analysis_coverage.py | 105 +++ .../io/test_results_sidecar.py | 34 + 13 files changed, 413 insertions(+), 1499 deletions(-) create mode 100644 docs/dev/adrs/accepted/analysis-cif-fit-state.md delete mode 100644 docs/dev/adrs/suggestions/analysis-cif-fit-state.md delete mode 100644 docs/dev/plans/analysis-cif-fit-state.md delete mode 100644 src/easydiffraction/analysis/categories/fit_state/__init__.py delete mode 100644 src/easydiffraction/analysis/categories/fit_state/default.py delete mode 100644 src/easydiffraction/analysis/categories/fit_state/factory.py diff --git a/docs/dev/adrs/accepted/analysis-cif-fit-state.md b/docs/dev/adrs/accepted/analysis-cif-fit-state.md new file mode 100644 index 00000000..a38f0e7c --- /dev/null +++ b/docs/dev/adrs/accepted/analysis-cif-fit-state.md @@ -0,0 +1,193 @@ +# ADR: Analysis CIF Fit State + +## Status + +Accepted current design. + +## Date + +2026-05-18 + +## Group + +Analysis and fitting. + +## Context + +`analysis/analysis.cif` already persists analysis configuration such as +`_fitting.minimizer_type`, `_fitting.mode_type`, aliases, constraints, +and active fit-mode settings. That configuration alone is not enough to +reopen a saved project and continue the same fit-result, plotting, and +command-line workflow. + +Analysis-owned fit state needs to persist: + +- fit bounds and bound provenance +- pre-fit scalar snapshots for recovery workflows +- compact status metadata for the latest saved fit projection +- deterministic correlation summaries +- Bayesian summary metadata and manifests for bulk array sidecars +- plot-ready Bayesian caches so restored posterior displays do not need + to recompute on first use + +Committed model parameter values and uncertainties already persist in +structure and experiment CIF files through the accepted free-flag CIF +encoding. Those committed values must remain the source of truth for the +current model state. + +The accepted runtime fit-results ADR keeps backend runtime objects +runtime-only unless a narrower persistence ADR defines a saved +projection. This ADR defines that narrower saved projection. + +## Decision + +Persist analysis-owned fit state as explicit sibling categories in +`analysis/analysis.cif`, with large Bayesian arrays stored in +`analysis/results.h5`. + +Do not add a dedicated `_fit_state` category or +`_fit_state.schema_version`. Persisted fit state is detected from +`_fit_result` and the related fit-state categories. + +### Common fit-state categories + +Persist these common categories for any saved fit projection: + +- `_fit_parameter` +- `_fit_result` +- `_fit_parameter_correlation` + +`_fit_parameter` stores analysis-owned per-parameter fit controls and +pre-fit scalar snapshots: + +- `param_unique_name` +- `fit_min` +- `fit_max` +- `fit_bounds_uncertainty_multiplier` +- `start_value` +- `start_uncertainty` + +`_fit_result` stores the latest saved fit header: + +- `result_kind` +- `success` +- `message` +- `iterations` +- `fitting_time` +- `reduced_chi_square` + +`_fit_parameter_correlation` stores pairwise deterministic or posterior +correlation summaries keyed by a persisted `id`. Only unique parameter +pairs are stored. + +### Deterministic fit projection + +Deterministic fits persist `_deterministic_result` in addition to the +common categories above. + +`_deterministic_result` stores compact optimizer metadata and counts: + +- `optimizer_name` +- `method_name` +- `objective_name` +- `objective_value` +- `n_data_points` +- `n_parameters` +- `n_free_parameters` +- `degrees_of_freedom` +- `covariance_available` +- `correlation_available` + +Do not persist a `_deterministic_parameter_result` category. Final +deterministic parameter values and uncertainties already persist in the +model CIF files, and restored deterministic ordering comes from +`_fit_parameter`. + +### Bayesian fit projection + +Bayesian fits persist these additional categories: + +- `_bayesian_result` +- `_bayesian_sampler` +- `_bayesian_convergence` +- `_bayesian_parameter_posterior` +- `_bayesian_distribution_cache` +- `_bayesian_pair_cache` +- `_bayesian_predictive_dataset` + +`_bayesian_result` stores the saved Bayesian header and sidecar flags, +including `sidecar_file`, `has_posterior_samples`, +`has_distribution_cache`, `has_pair_cache`, and +`has_posterior_predictive`. + +`_bayesian_sampler` stores the resolved sampler settings used for the +run. `parallel` persists the resolved non-negative worker count as an +integer. + +`_bayesian_convergence` stores convergence metadata and posterior array +shape counts. + +`_bayesian_parameter_posterior` stores one summary row per sampled +parameter, including credible intervals, uncertainty, ESS, and R-hat. +Its row order defines the saved posterior parameter order. + +`_bayesian_distribution_cache`, `_bayesian_pair_cache`, and +`_bayesian_predictive_dataset` store manifest rows for plot-ready +posterior caches. Distribution and predictive caches are persisted for +any Bayesian fit with posterior samples, including single-parameter +fits. Pair caches and posterior correlation summaries are only persisted +when more than one parameter was sampled. + +`parameter.posterior` is not part of this accepted design. This ADR +persists analysis-level posterior summaries and caches only. Any future +parameter-level posterior API remains a separate decision. + +### Bayesian sidecar + +Persist large Bayesian arrays in `analysis/results.h5` using `h5py`. +This includes canonical posterior arrays and any saved distribution, +pair, and predictive cache arrays referenced by the CIF manifests. + +The persisted `sidecar_file` value is a local file name only. It must +resolve to a basename inside the project `analysis/` directory. Absolute +paths and traversal paths are rejected and fall back to `results.h5`. + +If the sidecar is missing on load, summary rows in +`analysis/analysis.cif` still restore fit tables and metadata. Features +that require missing bulk arrays must warn clearly instead of failing +silently. + +### Save and restore behavior + +After a fit completes, project save writes the fit-state projection +before the project is considered fully persisted. For Bayesian fits, +that includes the prepared summaries and saved plot caches used by +posterior displays. + +Load order is: + +1. standard analysis configuration +2. common fit-state categories +3. deterministic or Bayesian fit-specific categories according to + `_fit_result.result_kind` +4. Bayesian sidecar arrays when a Bayesian sidecar is expected + +Persist backend runtime objects, optimizer instances, and raw driver +payloads nowhere in this design. + +## Consequences + +Saved projects reopen with enough fit-state context to display the last +saved result and rerun fits without rebuilding analysis-owned bounds by +hand. + +Deterministic persistence stays compact because committed parameter +values remain in the model CIF files instead of being duplicated in a +second deterministic per-parameter result loop. + +Bayesian persistence spans CIF metadata and an HDF5 sidecar, so save and +load must validate consistency between manifest rows and bulk datasets. + +The accepted runtime fit-results ADR should now be read as runtime-only +except where this narrower projection explicitly persists fit-state +metadata, summaries, and cache arrays. diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 1ad2c0ae..9f4a2563 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -17,7 +17,7 @@ folders. | -------------------- | ---------- | ----------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | | Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | | Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | -| Analysis and fitting | Suggestion | Analysis CIF Fit State | Proposes persisted fit-state categories, result projections, and Bayesian result manifests. | [`analysis-cif-fit-state.md`](suggestions/analysis-cif-fit-state.md) | +| Analysis and fitting | Accepted | Analysis CIF Fit State | Defines the persisted fit-state projection in `analysis/analysis.cif` and `analysis/results.h5`. | [`analysis-cif-fit-state.md`](accepted/analysis-cif-fit-state.md) | | Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Proposes role-based filenames for fit results, data archives, and external plotting exports. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | | Analysis and fitting | Suggestion | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | | Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Proposes the `parameter.posterior` API as a projection of analysis-level Bayesian state. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | diff --git a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md b/docs/dev/adrs/suggestions/analysis-cif-fit-state.md deleted file mode 100644 index f493ed95..00000000 --- a/docs/dev/adrs/suggestions/analysis-cif-fit-state.md +++ /dev/null @@ -1,620 +0,0 @@ -# ADR: Analysis CIF Fit State - -**Status:** Proposed **Date:** 2026-05-13 **Updated:** 2026-05-18 - -## Context - -`analysis/analysis.cif` currently persists analysis configuration such -as `_fitting.minimizer_type`, `_fitting.mode_type`, aliases, -constraints, and active mode-specific settings. It does not yet persist -the analysis-owned fit state needed to reopen a saved project and -continue the same command-line or plotting workflow. - -Parameter CIF serialization already carries the committed parameter -`value`, the current `free` state, and the current `uncertainty` via CIF -bracket notation. That data belongs to structure or experiment CIF -files. Analysis-owned fit state should not be duplicated there. - -The missing analysis-owned state includes: - -- fit controls that apply to parameters during fitting but are not model - values -- fit bounds and bound provenance needed by deterministic and Bayesian - minimizers -- pre-fit scalar snapshots needed by fit recovery and undo workflows -- compact status metadata for the latest persisted fit projection -- Bayesian summary metadata and manifests for bulk array sidecars -- plot-ready Bayesian caches that make restored posterior displays - instant rather than recomputing after load - -The accepted `runtime-fit-results.md` ADR keeps full backend runtime -objects runtime-only unless a later ADR narrows the persisted -projection. This ADR defines that narrower persisted projection. It -persists stable metadata, summaries, and canonical/cached numerical -arrays, not backend driver objects. - -This ADR is the canonical storage contract for fit-state persistence. -The parameter-level posterior ADR defines only the `parameter.posterior` -API projection and depends on the saved state described here. - -## Decision - -### 1. Every new persisted concept gets an explicit CIF category - -New analysis fit-state data must be represented by named CIF categories. -Do not add loose ad-hoc tags, JSON blobs, or overload existing model -parameter tags for analysis-owned fit state. - -Existing categories remain responsible for existing configuration: - -- `_fitting` stores common fitting configuration. -- `_alias` and `_constraint` store symbolic analysis configuration. -- `_joint_fit`, `_sequential_fit`, and `_sequential_fit_extract` store - active fit-mode settings. - -New common fit-state categories are: - -- `_fit_parameter` -- `_fit_result` -- `_fit_parameter_correlation` - -Deterministic-specific categories are: - -- `_deterministic_result` -- `_deterministic_parameter_result` - -Bayesian-specific categories are: - -- `_bayesian_result` -- `_bayesian_sampler` -- `_bayesian_convergence` -- `_bayesian_parameter_posterior` -- `_bayesian_distribution_cache` -- `_bayesian_pair_cache` -- `_bayesian_predictive_dataset` - -Bulk arrays referenced by Bayesian categories live in -`analysis/results.h5`. - -### 2. Do not add a dedicated `_fit_state` schema category - -Persisted fit state is detected from the presence of `_fit_result` and -the related fit-state loops. Do not add a dedicated `_fit_state` -category or a standalone `schema_version` tag for this feature. - -### 3. Add `_fit_parameter` for per-parameter fit controls - -`_fit_parameter` is an analysis-owned loop keyed by live parameter -unique name: - -```cif -loop_ -_fit_parameter.param_unique_name -_fit_parameter.fit_min -_fit_parameter.fit_max -_fit_parameter.fit_bounds_uncertainty_multiplier -_fit_parameter.start_value -_fit_parameter.start_uncertainty -lbco.cell.length_a 3.8895 3.8920 4.0 3.8909 0.0003 -hrpt.peak.broad_gauss_u 0.05 0.11 4.0 0.08 0.007 -``` - -Fields: - -- `param_unique_name` -- `fit_min` -- `fit_max` -- `fit_bounds_uncertainty_multiplier` -- `start_value` -- `start_uncertainty` - -`fit_min` and `fit_max` are required so saved DREAM projects can be -rerun from the CLI without recreating bounds in Python. The -`fit_bounds_uncertainty_multiplier` field preserves how -uncertainty-derived bounds were created. `start_value` and -`start_uncertainty` capture the most recent pre-fit scalar state for -fit-result displays and undo workflows. - -The committed parameter value after a fit remains in structure or -experiment CIF. `_fit_parameter` does not duplicate active values. - -### 4. Add `_fit_result` for common fit status - -`_fit_result` is a single-item category for fields shared across fit -types: - -```cif -_fit_result.result_kind bayesian -_fit_result.success true -_fit_result.message "Sampler completed" -_fit_result.iterations 3000 -_fit_result.fitting_time 82.4 -_fit_result.reduced_chi_square 1.031 -``` - -Fields: - -- `result_kind` -- `success` -- `message` -- `iterations` -- `fitting_time` -- `reduced_chi_square` - -`result_kind` identifies the latest persisted projection, for example -`deterministic` or `bayesian`. Backend runtime objects, optimizer -instances, driver state, and arbitrary engine payloads are not stored in -this category. - -### 5. Add `_fit_parameter_correlation` for reusable correlations - -`_fit_parameter_correlation` stores compact pairwise correlation -summaries keyed by a persisted `id`: - -```cif -loop_ -_fit_parameter_correlation.id -_fit_parameter_correlation.source_kind -_fit_parameter_correlation.param_unique_name_i -_fit_parameter_correlation.param_unique_name_j -_fit_parameter_correlation.correlation -1 posterior lbco.cell.length_a hrpt.peak.broad_gauss_u 0.87 -``` - -Fields: - -- `id` -- `source_kind` -- `param_unique_name_i` -- `param_unique_name_j` -- `correlation` - -Rows are keyed by the persisted `id` field so each correlation pair has -stable collection identity in both Python and CIF. When a caller does -not provide an explicit `id`, implementations should generate a simple -sequential numeric identifier such as `1`, `2`, `3`, and so on. - -Only the upper triangle excluding the diagonal is stored. Correlation -heatmaps can be restored from this loop alone. Posterior pair plots -still use the Bayesian pair cache or posterior samples. - -### 6. Store deterministic metadata in dedicated categories - -Deterministic fits use the common `_fit_parameter`, `_fit_result`, and -`_fit_parameter_correlation` categories, plus deterministic-specific -categories for optimizer details and parameter-result display state. - -`_deterministic_result` stores one saved deterministic result header: - -- `optimizer_name` -- `method_name` -- `objective_name` -- `objective_value` -- `n_data_points` -- `n_parameters` -- `n_free_parameters` -- `degrees_of_freedom` -- `covariance_available` -- `correlation_available` - -`_deterministic_parameter_result` stores one row per parameter varied in -the latest deterministic fit: - -- `param_unique_name` -- `final_value` -- `final_uncertainty` -- `at_lower_bound` -- `at_upper_bound` - -Loop order is the display order for restored deterministic parameter -results. - -`final_value` and `final_uncertainty` are a result projection for -display and consistency checks. The calculation source of truth remains -the live parameter value and uncertainty restored from structure and -experiment CIF. If the deterministic result projection disagrees with -the live parameter state on load, loaders should warn and prefer the -live parameter state for calculations. - -Pre-fit values and uncertainties are not duplicated in -`_deterministic_parameter_result`; they come from `_fit_parameter`. -Parameter correlations, when available from covariance, are stored in -`_fit_parameter_correlation` with `source_kind deterministic`. - -### 7. Store Bayesian metadata in dedicated categories - -Bayesian persistence extends the common categories with explicit -Bayesian categories in `analysis/analysis.cif`. - -`_bayesian_result` stores one saved Bayesian result header: - -- `sampler_name` -- `point_estimate_name` -- `success` -- `sampler_completed` -- `best_log_posterior` -- `credible_interval_inner` -- `credible_interval_outer` -- `has_posterior_samples` -- `has_distribution_cache` -- `has_pair_cache` -- `has_posterior_predictive` -- `sidecar_file` - -`_bayesian_sampler` stores resolved sampler settings actually used: - -- `steps` -- `burn` -- `thin` -- `pop` -- `parallel` -- `init` -- `random_seed` - -`parallel` stores the resolved non-negative DREAM worker count. `0` -means use all CPUs. - -`_bayesian_convergence` stores top-level diagnostics and shapes: - -- `converged` -- `max_r_hat` -- `min_ess_bulk` -- `n_draws` -- `n_chains` -- `n_parameters` - -`_bayesian_parameter_posterior` stores one posterior summary row per -sampled parameter: - -- `unique_name` -- `display_name` -- `best_sample_value` -- `median` -- `uncertainty` -- `interval_68_lower` -- `interval_68_upper` -- `interval_95_lower` -- `interval_95_upper` -- `ess_bulk` -- `r_hat` - -Loop order defines the parameter column order in posterior sample arrays -stored in the HDF5 sidecar. `parameter.posterior` is rebuilt from this -loop on load; posterior summary data is not duplicated in structure or -experiment CIF files. - -### 8. Store plot-ready Bayesian caches in explicit manifest categories - -Bayesian plotting should not require expensive post-load preparation -when the project was saved after a successful Bayesian fit. Plot-ready -caches therefore have their own manifest categories in -`analysis/analysis.cif`, with the actual arrays stored in HDF5. - -`_bayesian_distribution_cache` supports -`project.display.posterior.distribution(...)`: - -- `param_unique_name` -- `x_path` -- `density_path` -- `n_grid` -- `n_draws_cached` - -`_bayesian_pair_cache` supports `project.display.posterior.pairs(...)`: - -- `param_unique_name_x` -- `param_unique_name_y` -- `id` -- `x_path` -- `y_path` -- `density_path` -- `contour_level_path` -- `n_grid_x` -- `n_grid_y` -- `n_draws_cached` - -`_bayesian_pair_cache` rows are keyed by the persisted `id` field so -each cached parameter pair has stable identity in both Python and CIF. -When a caller does not provide an explicit `id`, implementations should -generate a simple sequential numeric identifier such as `1`, `2`, `3`, -and so on. - -`_bayesian_predictive_dataset` supports -`project.display.posterior.predictive(...)`: - -- `experiment_name` -- `x_axis_name` -- `x_path` -- `best_sample_prediction_path` -- `lower_95_path` -- `upper_95_path` -- `lower_68_path` -- `upper_68_path` -- `draws_path` -- `n_x` -- `n_draws_cached` - -`_bayesian_predictive_dataset` is keyed by `experiment_name` in this -schema, with at most one cached predictive dataset per experiment. - -The manifest rows are the source of truth for HDF5 paths. HDF5 group -naming conventions are implementation details and may change as long as -the manifest remains valid. - -### 9. Store bulk Bayesian arrays in `analysis/results.h5` - -`analysis/analysis.cif` remains the text metadata entry point. Numerical -arrays large enough to make CIF unwieldy are stored in: - -- `analysis/results.h5` - -The reference implementation uses a direct `h5py` dependency to read and -write this sidecar. - -Required canonical posterior arrays, when available: - -- `/posterior/parameter_samples` -- `/posterior/log_posterior` -- `/posterior/draw_index` - -Expected shapes: - -- `/posterior/parameter_samples`: `(n_draws, n_chains, n_parameters)` -- `/posterior/log_posterior`: `(n_draws, n_chains)` -- `/posterior/draw_index`: `(n_draws,)` - -Recommended plot-cache array layout: - -- `/posterior/distribution//x` -- `/posterior/distribution//density` -- `/posterior/pairs//x` -- `/posterior/pairs//y` -- `/posterior/pairs//density` -- `/posterior/pairs//contour_levels` -- `/predictive//x` -- `/predictive//best_sample_prediction` -- `/predictive//lower_95` -- `/predictive//upper_95` -- `/predictive//lower_68` -- `/predictive//upper_68` -- `/predictive//draws` - -The sidecar is optional for summary-only restore. If it is missing, -`_bayesian_parameter_posterior` can still restore parameter summaries -and fit-result tables, but posterior plots that require arrays or -plot-ready caches must warn clearly or offer recomputation. - -Do not persist backend-specific runtime objects such as DREAM driver -instances, raw engine result objects, or ArviZ `InferenceData`. - -### 10. Prepare Bayesian plot data immediately after sampling - -After DREAM sampling completes, the UX should include an explicit -post-processing step before the fit is considered fully saved: - -```text -Processing Bayesian results... -``` - -During this step, EasyDiffraction should prepare: - -- posterior parameter summaries -- convergence diagnostics -- parameter correlation summaries -- distribution density cache arrays -- pair density and contour cache arrays -- posterior predictive bands and cached draws for available experiments -- HDF5 sidecar datasets and CIF manifest rows - -For saved projects, `project.analysis.fit()` already triggers a save at -the end of fitting. In that case the post-processing step should run -before the automatic save writes `analysis/analysis.cif` and -`analysis/results.h5`. For unsaved projects, the same prepared data -remains in memory and is written on the next `project.save_as(...)` or -`project.save()`. - -The display methods should then prefer persisted plot caches: - -```python -project.display.posterior.distribution() -project.display.posterior.pairs() -project.display.posterior.predictive(expt_name='hrpt') -``` - -When valid caches are available, these calls should only load arrays and -render plots. They should not rerun posterior summarization, KDE, -contour preparation, or posterior predictive calculations. - -### 11. Restore order is configuration first, fit state second - -Load order should be: - -1. standard analysis configuration -2. aliases and constraints -3. active mode-specific settings -4. `_fit_parameter` -5. `_fit_result` -6. `_fit_parameter_correlation` -7. deterministic metadata categories when `result_kind` is - `deterministic` -8. Bayesian metadata categories when `result_kind` is `bayesian` -9. Bayesian HDF5 sidecar arrays and plot caches - -This ensures bounds and live parameter references are available before -fit-specific summaries and cached plot data are attached. - -### 12. Saved examples use current `_fitting.*` tags - -Suggested deterministic `analysis/analysis.cif` fragment: - -```cif -_fitting.mode_type single -_fitting.minimizer_type "lmfit (leastsq)" - -loop_ -_fit_parameter.param_unique_name -_fit_parameter.fit_min -_fit_parameter.fit_max -_fit_parameter.fit_bounds_uncertainty_multiplier -_fit_parameter.start_value -_fit_parameter.start_uncertainty -lbco.cell.length_a 3.8895 3.8920 4.0 3.8909 0.0003 -hrpt.peak.broad_gauss_u 0.05 0.11 4.0 0.08 0.007 - -_fit_result.result_kind deterministic -_fit_result.success true -_fit_result.message "Fit converged" -_fit_result.iterations 37 -_fit_result.fitting_time 1.82 -_fit_result.reduced_chi_square 1.031 - -_deterministic_result.optimizer_name lmfit -_deterministic_result.method_name leastsq -_deterministic_result.objective_name chi_square -_deterministic_result.objective_value 2568.4 -_deterministic_result.n_data_points 2500 -_deterministic_result.n_parameters 5 -_deterministic_result.n_free_parameters 2 -_deterministic_result.degrees_of_freedom 2498 -_deterministic_result.covariance_available true -_deterministic_result.correlation_available true - -loop_ -_deterministic_parameter_result.param_unique_name -_deterministic_parameter_result.final_value -_deterministic_parameter_result.final_uncertainty -_deterministic_parameter_result.at_lower_bound -_deterministic_parameter_result.at_upper_bound -lbco.cell.length_a 3.89091 0.0003 false false -hrpt.peak.broad_gauss_u 0.08 0.007 false false - -loop_ -_fit_parameter_correlation.id -_fit_parameter_correlation.source_kind -_fit_parameter_correlation.param_unique_name_i -_fit_parameter_correlation.param_unique_name_j -_fit_parameter_correlation.correlation -1 deterministic lbco.cell.length_a hrpt.peak.broad_gauss_u 0.42 -``` - -Suggested Bayesian `analysis/analysis.cif` fragment: - -```cif -_fitting.mode_type single -_fitting.minimizer_type "bumps (dream)" - -loop_ -_fit_parameter.param_unique_name -_fit_parameter.fit_min -_fit_parameter.fit_max -_fit_parameter.fit_bounds_uncertainty_multiplier -_fit_parameter.start_value -_fit_parameter.start_uncertainty -lbco.cell.length_a 3.8895 3.8920 4.0 3.8909 0.0003 -hrpt.peak.broad_gauss_u 0.05 0.11 4.0 0.08 0.007 - -_fit_result.result_kind bayesian -_fit_result.success true -_fit_result.message "Sampler completed" -_fit_result.iterations 3000 -_fit_result.fitting_time 82.4 -_fit_result.reduced_chi_square 1.031 - -_bayesian_result.sampler_name dream -_bayesian_result.point_estimate_name best_sample -_bayesian_result.success true -_bayesian_result.sampler_completed true -_bayesian_result.best_log_posterior -1542.77 -_bayesian_result.credible_interval_inner 0.68 -_bayesian_result.credible_interval_outer 0.95 -_bayesian_result.has_posterior_samples true -_bayesian_result.has_distribution_cache true -_bayesian_result.has_pair_cache true -_bayesian_result.has_posterior_predictive true -_bayesian_result.sidecar_file "results.h5" - -_bayesian_sampler.steps 3000 -_bayesian_sampler.burn 600 -_bayesian_sampler.thin 1 -_bayesian_sampler.pop 20 -_bayesian_sampler.parallel 0 -_bayesian_sampler.init lhs -_bayesian_sampler.random_seed 12345 - -_bayesian_convergence.converged true -_bayesian_convergence.max_r_hat 1.01 -_bayesian_convergence.min_ess_bulk 812.4 -_bayesian_convergence.n_draws 2400 -_bayesian_convergence.n_chains 20 -_bayesian_convergence.n_parameters 2 - -loop_ -_bayesian_parameter_posterior.unique_name -_bayesian_parameter_posterior.display_name -_bayesian_parameter_posterior.best_sample_value -_bayesian_parameter_posterior.median -_bayesian_parameter_posterior.uncertainty -_bayesian_parameter_posterior.interval_68_lower -_bayesian_parameter_posterior.interval_68_upper -_bayesian_parameter_posterior.interval_95_lower -_bayesian_parameter_posterior.interval_95_upper -_bayesian_parameter_posterior.ess_bulk -_bayesian_parameter_posterior.r_hat -lbco.cell.length_a "length_a" 3.89091 3.89090 0.0003 3.8906 3.8912 3.8903 3.8915 812.4 1.01 - -loop_ -_bayesian_distribution_cache.param_unique_name -_bayesian_distribution_cache.x_path -_bayesian_distribution_cache.density_path -_bayesian_distribution_cache.n_grid -_bayesian_distribution_cache.n_draws_cached -lbco.cell.length_a /posterior/distribution/0/x /posterior/distribution/0/density 256 48000 - -loop_ -_bayesian_predictive_dataset.experiment_name -_bayesian_predictive_dataset.x_axis_name -_bayesian_predictive_dataset.x_path -_bayesian_predictive_dataset.best_sample_prediction_path -_bayesian_predictive_dataset.lower_95_path -_bayesian_predictive_dataset.upper_95_path -_bayesian_predictive_dataset.lower_68_path -_bayesian_predictive_dataset.upper_68_path -_bayesian_predictive_dataset.draws_path -_bayesian_predictive_dataset.n_x -_bayesian_predictive_dataset.n_draws_cached -hrpt ttheta /predictive/hrpt/x /predictive/hrpt/best_sample_prediction /predictive/hrpt/lower_95 /predictive/hrpt/upper_95 /predictive/hrpt/lower_68 /predictive/hrpt/upper_68 /predictive/hrpt/draws 2500 200 -``` - -## Consequences - -### Positive - -- `analysis/analysis.cif` becomes the single text manifest for - analysis-owned fit state. -- Saved DREAM projects have enough fit bounds to run again from the CLI. -- Bayesian save/load separates compact CIF metadata from large HDF5 - arrays. -- Restored posterior displays can render from cached arrays without - expensive recomputation. -- Parameter posterior summaries are rebuilt from analysis-level data - rather than duplicated in model CIF. - -### Trade-offs - -- The runtime fit-results ADR must be read as "runtime-only unless a - narrower persistence ADR defines a saved projection"; this ADR defines - that projection. -- Bayesian persistence now spans CIF and HDF5, so save/load must - validate consistency between manifest rows and sidecar datasets. -- Post-fit processing increases the time between sampler completion and - final saved project state, but makes later display calls much faster. -- Cached plot arrays are derived data and must be invalidated when a new - fit runs or when the project changes in ways that make the saved fit - result stale. - -## Deferred Work - -- Exact compression and chunking policy for HDF5 datasets. -- Multiple saved Bayesian runs per project. -- Optional covariance persistence beyond correlation summaries. -- Cache invalidation UX for manual edits after a saved fit. -- Persistence for posterior-capable minimizers beyond DREAM. diff --git a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md index c1106cd4..15ccaafa 100644 --- a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md +++ b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md @@ -36,8 +36,9 @@ Use `analysis/exports/` for optional user-facing CSV files intended for external plotting and inspection. This naming keeps the fit type out of the filename. The fit type and -schema are recorded in `analysis/analysis.cif` manifests such as -`_fit_result.result_kind` and `_fit_state.schema_version`. +saved fit-state manifests are recorded in `analysis/analysis.cif`, +principally through `_fit_result.result_kind` and the related fit-state +categories. ### 2. Sequential deterministic results stay CSV diff --git a/docs/dev/adrs/suggestions/undo-fit.md b/docs/dev/adrs/suggestions/undo-fit.md index 578d43a3..f394aca9 100644 --- a/docs/dev/adrs/suggestions/undo-fit.md +++ b/docs/dev/adrs/suggestions/undo-fit.md @@ -1,57 +1,53 @@ # ADR: Undo Fit **Status:** Proposed -**Date:** 2026-05-13 +**Date:** 2026-05-18 ## Context -The new `_fit_parameter.start_value` and -`_fit_parameter.start_uncertainty` fields in `analysis/analysis.cif` -capture the last committed pre-fit scalar state for each fitted -parameter. This is useful when a minimization run produces a poor result -and the user wants to return to the state from immediately before the -fit. +The accepted fit-state persistence design now stores +`_fit_parameter.start_value` and `_fit_parameter.start_uncertainty` in +`analysis/analysis.cif`. Those fields capture the last committed pre-fit +scalar state for each fitted parameter and are the essential rollback +anchors for any undo feature. -This need is especially important in command-line workflows, where the -user may save a project after a bad fit and reopen it later expecting a -simple way to roll back to the pre-fit state. +This branch also introduced project-first CLI routing and reserved a +top-level `undo` command shape, but the command is still only a +placeholder. The actual rollback semantics are still undecided. -However, these snapshots alone do not define undo semantics. The API -owner, rollback scope, and interaction with fit-derived metadata must be -explicit. +Parameter-level posterior access remains a separate proposal. Undo must +not depend on `parameter.posterior` existing. ## Decision ### 1. Add an analysis-owned `undo_fit()` operation -The rollback operation belongs on `Analysis`, for example: +The rollback operation belongs on `Analysis`: ```python project.analysis.undo_fit() ``` -`Analysis` owns the fit lifecycle, fit metadata, and persisted -`analysis.cif` state, so it is the correct owner for this operation. +`Analysis` owns fit execution, fit metadata, and the persisted fit-state +projection, so it is the correct public owner. -### 2. Initial undo scope is scalar rollback plus posterior clear +### 2. Initial undo scope is scalar rollback plus fit-state clear The first undo implementation restores each fitted parameter's saved -pre-fit scalar state and clears fit state that belongs only to the -discarded fit. - -It does not attempt to restore every possible runtime detail of the -previous fit result. +pre-fit scalar state and clears fit-derived state that belongs only to +the discarded fit. After `undo_fit()`: - `parameter.value` is restored from `_fit_parameter.start_value` - `parameter.uncertainty` is restored from `_fit_parameter.start_uncertainty` -- `parameter.posterior` is cleared - `analysis.fit_results` is cleared +- persisted fit-state summaries and Bayesian caches for the discarded + fit are cleared -This gives the user a safe, predictable return to the pre-fit visible -parameter state without pretending to restore a full historical result. +If a future `parameter.posterior` API exists, undo should clear that +projection too. It is not a prerequisite for the initial implementation. If an older saved project lacks `start_uncertainty`, clearing `parameter.uncertainty` remains an acceptable compatibility fallback. @@ -67,51 +63,20 @@ The initial undo operation does not revert: - fit mode - joint-fit weights -These are analysis configuration, not fit output. +These belong to analysis configuration, not fit output. ### 4. Undo is single-level for now -Only the latest saved pre-fit state is addressable. - -The initial API does not create a stack of historical fits. Supporting -multiple undo levels would require a dedicated snapshot history design -and is deferred. - -### 5. Persisted scalar snapshots are the rollback anchors - -The minimum persisted state required for clean cross-session undo is the -pair of `_fit_parameter.start_value` and -`_fit_parameter.start_uncertainty` defined in -`analysis-cif-fit-state.md`. - -If a parameter has no saved `start_value`, `undo_fit()` leaves that -parameter unchanged. - -If a parameter has no saved `start_uncertainty`, `undo_fit()` may clear -that parameter's uncertainty as a compatibility fallback for older saved -projects. - -### 6. Suggested user flow - -```python -project.analysis.fit() - -# Decide that the latest fit should be discarded. -project.analysis.undo_fit() - -# Save the recovered state if desired. -project.save() -``` - -### 7. Add a top-level `undo-fit` CLI command +Only the latest saved pre-fit snapshot is addressable. Multi-level undo +and redo require a dedicated snapshot-history design and remain +deferred. -Because command-line recovery is one of the main motivations for this -feature, undo must also be exposed through the existing top-level CLI. +### 5. CLI exposure follows the project-first command style -Suggested command: +The command-line surface should follow the current CLI style: ```bash -python -m easydiffraction undo-fit PROJECT_DIR +python -m easydiffraction PROJECT_DIR undo ``` This command should: @@ -119,44 +84,35 @@ This command should: - load the saved project from `PROJECT_DIR` - execute `project.analysis.undo_fit()` - save the recovered state back to the same project directory by default -- support `--dry` to perform the rollback in memory without overwriting - project files -- emit a clear message describing whether the latest fit snapshot was - successfully discarded - -Suggested dry-run form: - -```bash -python -m easydiffraction undo-fit PROJECT_DIR --dry -``` +- support `--dry` to preview the rollback without overwriting files +- fail with a clear non-zero exit status when no usable undo snapshot is + available -If the project does not contain a usable undo snapshot, the command -should fail with a clear non-zero exit status instead of silently doing -nothing. +Compatibility aliases may remain if the CLI supports them, but the +project-first form is the canonical user-facing syntax. ## Consequences ### Positive -- Users gain a simple recovery path after a poor fit. -- The feature works naturally with saved projects and both Python and - command-line workflows. -- The initial scope stays small and does not require full historical fit - snapshots. +- The accepted fit-state persistence already provides the minimum saved + anchors required for cross-session undo. +- Users gain a predictable recovery path after a poor fit without + needing full historical fit snapshots. +- The feature aligns naturally with saved-project workflows in both + Python and the CLI. ### Trade-offs -- Undo restores pre-fit scalar parameter state, not a full historical - `fit_results` object. -- Older saved projects that do not carry `start_uncertainty` may still - fall back to clearing uncertainty. +- Undo restores visible scalar parameter state, not a full historical + runtime result object. +- Older saved projects may still need the uncertainty-clearing fallback. - Multi-level undo remains unsupported. ## Deferred Work -- exact restoration of previous posterior-derived projections beyond the - scalar parameter snapshot -- multi-level undo / redo -- user-facing confirmation or preview APIs -- rollback of fit-type-specific persisted summaries beyond parameter - values +- exact restoration of previous posterior-derived displays beyond the + scalar rollback anchors +- multi-level undo and redo +- confirmation or preview UX beyond `--dry` +- any dependency on a future `parameter.posterior` API diff --git a/docs/dev/plans/analysis-cif-fit-state.md b/docs/dev/plans/analysis-cif-fit-state.md deleted file mode 100644 index a22ead8e..00000000 --- a/docs/dev/plans/analysis-cif-fit-state.md +++ /dev/null @@ -1,715 +0,0 @@ -# Analysis CIF Fit State Implementation Plan - -This plan follows `.github/copilot-instructions.md`. Deliberate -exceptions: none. - -Source ADR: `docs/dev/adrs/suggestions/analysis-cif-fit-state.md`. -Related decisions read while preparing this plan: - -- `docs/dev/adrs/accepted/runtime-fit-results.md` -- `docs/dev/adrs/accepted/project-facade-and-persistence.md` -- `docs/dev/adrs/accepted/category-owner-sections.md` -- `docs/dev/adrs/accepted/free-flag-cif-encoding.md` -- `docs/dev/adrs/accepted/loop-category-key-identity.md` -- `docs/dev/adrs/accepted/fit-mode-categories.md` -- `docs/dev/adrs/accepted/test-strategy.md` -- `docs/dev/adrs/suggestions/parameter-correlation-persistence.md` -- `docs/dev/adrs/suggestions/parameter-posterior-summary.md` - -## Goal - -Persist analysis-owned fit state in `analysis/analysis.cif` and, for -large Bayesian arrays, `analysis/results.h5`. Saved projects should be -able to restore fit bounds, pre-fit snapshots, deterministic result -summaries, Bayesian summaries, posterior manifests, and plot-ready cache -metadata without duplicating committed model parameter values in -structure or experiment CIF files. - -## Status Checklist - -- [x] Gather planning context from ADRs, source files, and tests. -- [x] Confirm ADR status: implement from the suggestion for now. -- [x] Confirm HDF5 strategy: add `h5py` as a direct dependency. -- [x] Confirm schema strategy: do not add a dedicated `_fit_state` - category. -- [x] Confirm loop identity strategy: keep persisted `id` columns with - simple autogenerated numeric ids. -- [x] Confirm public surface: expose read-only `Analysis` properties. -- [x] Confirm predictive cache identity: key by `experiment_name`. -- [x] Phase 1 step 1: update the ADR suggestion with clarifications. -- [x] Phase 1 step 2: add common fit-state category models. -- [x] Phase 1 step 3: add deterministic result category models. -- [x] Phase 1 step 4: add Bayesian metadata category models. -- [x] Phase 1 step 5: add Bayesian cache manifest category models. -- [x] Phase 1 step 6: wire analysis CIF save/load for fit state. -- [x] Phase 1 step 7: capture fit projections after fitting. -- [x] Phase 1 step 8: add HDF5 sidecar save/load. -- [x] Phase 1 step 9: restore result objects and display cache inputs. -- [ ] Phase 1 review gate: stop for human review. -- [ ] Phase 2 step 1: add unit tests for new categories. -- [ ] Phase 2 step 2: add CIF and project save/load tests. -- [ ] Phase 2 step 3: add display and sidecar behavior tests. -- [ ] Phase 2 step 4: run the verification commands. - -## Clarified Decisions - -These questions were answered on 2026-05-18. - -1. Implement from `docs/dev/adrs/suggestions/analysis-cif-fit-state.md` - for now. Do not move the ADR to `accepted/` as part of this plan. -2. Add `h5py` as a direct dependency for `analysis/results.h5`. -3. Do not add a dedicated `_fit_state` category or `schema_version` - field for persisted analysis fit state. -4. Keep persisted `id` columns where the collection layer needs a single - key, but auto-generate simple numeric ids instead of derived - user-facing composite strings. -5. Expose all new fit-state categories as public read-only properties on - `Analysis`. -6. Key `_bayesian_predictive_dataset` rows by `experiment_name`, keeping - one cached predictive dataset per experiment. - -No remaining required gates are known. If implementation uncovers a new -schema conflict, dependency concern, or public API ambiguity, stop and -ask before changing this plan. - -## Agent Safety Rules - -This plan is written for a less advanced agent. Follow it literally. - -- Work on one numbered Phase 1 step at a time. -- Run `git status --short` before each step. If unrelated dirty files - overlap with the files for that step, stop and ask for guidance. -- Use `apply_patch` for manual edits. Do not write files with shell - redirection, Python scripts, or ad hoc generated output. -- Do not create or run tests during Phase 1 unless the user explicitly - asks. Phase 1 is implementation and documentation only. -- After each completed Phase 1 step, inspect the diff for only that - step, stage explicit paths, and commit locally before moving on. -- Use explicit paths with `git add`. Never stage the whole tree. -- Keep each commit atomic and single-purpose. -- If implementation uncovers a missing requirement, dependency problem, - schema conflict, or public API ambiguity, stop and ask. - -Before each implementation step, write down the exact files you expect -to edit. If a file is not listed in the current step and the edit is not -obviously mechanical, stop and ask before changing it. - -For every new category package in Phase 1: - -1. Create `factory.py` with a factory class following the neighboring - analysis category packages. -2. Create `default.py` with concrete `CategoryItem` or - `CategoryCollection` classes. -3. Decorate every concrete top-level category class with - `@Factory.register`. Do not decorate row item classes unless a - neighboring package already does that for the same shape. -4. Create `__init__.py` with explicit imports for the factory and - concrete classes. -5. Update `src/easydiffraction/analysis/categories/__init__.py` and - `src/easydiffraction/analysis/__init__.py` with explicit imports. -6. Keep public descriptors read-only unless the category is meant to be - user-editable. Use private `_set_` helpers for internal restore - when a public setter would create the wrong user-facing contract. -7. If a collection row has read-only public fields, do not rely on the - generic `CategoryCollection.create(**kwargs)` path. Add an explicit - `create(...)` method that builds the row and uses private helpers. - -Complexity guardrails: - -- Steps 7 and 9 are broad. Start with the smallest central hook, then - edit individual minimizers or display helpers only when the required - data is not available through that central hook. -- If one step needs more than six source files, more than one new public - class family beyond the planned categories, or a public API change not - named in this plan, stop and ask to split the step. -- When auditing usages or renaming symbols, search code, tests, - tutorials, and docs with `git grep -n` before editing. -- Do not fix unrelated lint, formatting, typing, or test failures while - implementing this plan. Mention them at the review gate instead. - -Required commit discipline for any AI agent following this plan: - -```text -Every completed Phase 1 implementation step must be staged with -explicit paths and committed locally before moving to the next -implementation step or the Phase 1 review gate. -``` - -Suggested branch name: - -```text -feature/analysis-cif-fit-state -``` - -## Verified Repository Facts - -- `Analysis` inherits `CategoryOwner` in - `src/easydiffraction/analysis/analysis.py`. -- `Analysis._serializable_categories()` currently emits fitting, - aliases, constraints, and active fit-mode categories. -- `analysis.as_cif` delegates to `analysis_to_cif()` in - `src/easydiffraction/io/cif/serialize.py`. -- `category_owner_to_cif()` serializes explicit `CategoryItem` and - `CategoryCollection` instances in the order returned by - `_serializable_categories()`. -- `analysis_from_cif()` restores fitting configuration, active fit-mode - sections, aliases, and constraints. -- `Project.save()` writes `analysis/analysis.cif` from - `self.analysis.as_cif` and lists all files already present under the - `analysis/` directory. -- `Project.load()` loads structures and experiments before analysis, - then resolves alias references. -- `GenericParameter` already has `fit_min`, `fit_max`, and - `fit_bounds_uncertainty_multiplier` runtime state. -- `Fitter.fit()` currently captures only `param._fit_start_value` before - fitting. It does not capture pre-fit uncertainty. -- `FitResults` and `BayesianFitResults` already contain most scalar - result information needed by the ADR. -- `project.display.posterior.*` currently reads from runtime - `analysis.fit_results`, not persisted caches. -- Existing verification tasks include `pixi run fix`, `pixi run check`, - `pixi run test-structure-check`, `pixi run unit-tests`, - `pixi run integration-tests`, and `pixi run script-tests`. - -## Naming Decisions For Implementation - -Use exact CIF category codes from the ADR. For Python attributes on -`Analysis`, use singular names for single-item categories and plural -names for collections: - -| Python attribute | CIF category | Shape | -| --------------------------------- | --------------------------------- | ----------- | -| `fit_parameters` | `_fit_parameter` | collection | -| `fit_result` | `_fit_result` | single item | -| `fit_parameter_correlations` | `_fit_parameter_correlation` | collection | -| `deterministic_result` | `_deterministic_result` | single item | -| `deterministic_parameter_results` | `_deterministic_parameter_result` | collection | -| `bayesian_result` | `_bayesian_result` | single item | -| `bayesian_sampler` | `_bayesian_sampler` | single item | -| `bayesian_convergence` | `_bayesian_convergence` | single item | -| `bayesian_parameter_posteriors` | `_bayesian_parameter_posterior` | collection | -| `bayesian_distribution_caches` | `_bayesian_distribution_cache` | collection | -| `bayesian_pair_caches` | `_bayesian_pair_cache` | collection | -| `bayesian_predictive_datasets` | `_bayesian_predictive_dataset` | collection | - -If this public surface feels too noisy during implementation, stop and -ask before hiding these properties from `Analysis.help()`. Do not move -the categories under another category; the ADR requires flat analysis -siblings. - -## Phase 1: Implementation - -Phase 1 is code and documentation only. Do not add or run tests here -unless explicitly instructed by the user. - -### Step 1: Update The ADR Suggestion With Clarifications - -Files likely to change: - -- `docs/dev/adrs/suggestions/analysis-cif-fit-state.md` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Keep the ADR in `suggestions/`; do not move it to `accepted/`. -2. Amend the ADR suggestion so loops that need a persisted single-key - identity keep `id` columns, but autogenerated values are simple - numeric strings. At minimum this applies to - `_fit_parameter_correlation` and `_bayesian_pair_cache`. -3. Document that `_bayesian_predictive_dataset` remains keyed by - `experiment_name`. -4. Document that `analysis/results.h5` uses `h5py` as a direct - dependency. -5. Update this plan checklist for Step 1. - -Suggested commit message: - -```text -Clarify analysis fit-state ADR schema -``` - -### Step 2: Add Common Fit-State Categories - -Files likely to change: - -- `src/easydiffraction/analysis/categories/fit_state/` -- `src/easydiffraction/analysis/categories/fit_parameters/` -- `src/easydiffraction/analysis/categories/fit_result/` -- `src/easydiffraction/analysis/categories/fit_parameter_correlations/` -- `src/easydiffraction/analysis/categories/__init__.py` -- `src/easydiffraction/analysis/__init__.py` -- `src/easydiffraction/analysis/enums.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Add `(str, Enum)` classes for closed values: `FitResultKindEnum` with - `deterministic` and `bayesian`, and `FitCorrelationSourceEnum` with - `deterministic` and `posterior`. -2. Add category modules following existing analysis category patterns: - `default.py`, `factory.py`, and `__init__.py` with explicit imports. -3. Add `FitParameterItem` and `FitParameters` for `_fit_parameter`. Use - `_category_entry_name = 'param_unique_name'`. -4. Add `FitResult` for `_fit_result` with `result_kind`, `success`, - `message`, `iterations`, `fitting_time`, and `reduced_chi_square`. -5. Add `FitParameterCorrelationItem` and collection for - `_fit_parameter_correlation`. Include persisted - `_fit_parameter_correlation.id` and use - `_category_entry_name = 'id'`. Generate a simple numeric id when - callers do not provide one. -6. Normalize correlation pairs so only upper-triangle rows are stored. -7. Use `StringDescriptor`, `NumericDescriptor`, and `BoolDescriptor` as - appropriate. Avoid raw Python attributes for persisted fields. -8. Do not add JSON fields or loose tags. -9. Update imports in the package `__init__.py` files so concrete classes - are registered and importable. -10. Update this plan checklist for Step 2. - -Implementation notes: - -- The collection `add()` path assumes one key. For categories with a - persisted `id`, set `_category_entry_name = 'id'` on the item and - generate a simple numeric-string `id` before adding the item to the - collection. -- Keep CIF tag names exactly as in the ADR, for example - `_fit_parameter.param_unique_name`. -- If an enum value from CIF is invalid, warn clearly and keep the - default. Do not fail silently. - -Suggested commit message: - -```text -Add common analysis fit-state categories -``` - -### Step 3: Add Deterministic Result Categories - -Files likely to change: - -- `src/easydiffraction/analysis/categories/deterministic_result/` -- `src/easydiffraction/analysis/categories/deterministic_parameter_results/` -- `src/easydiffraction/analysis/categories/__init__.py` -- `src/easydiffraction/analysis/__init__.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Add `DeterministicResult` as a single-item category with the ADR - fields: `optimizer_name`, `method_name`, `objective_name`, - `objective_value`, `n_data_points`, `n_parameters`, - `n_free_parameters`, `degrees_of_freedom`, `covariance_available`, - and `correlation_available`. -2. Add `DeterministicParameterResultItem` and collection for - `_deterministic_parameter_result` with `param_unique_name`, - `final_value`, `final_uncertainty`, `at_lower_bound`, and - `at_upper_bound`. -3. Use `_category_entry_name = 'param_unique_name'` for deterministic - parameter result rows. Preserve display order from CIF loop order. -4. Do not duplicate pre-fit values here; those belong to - `_fit_parameter`. -5. Add explicit package imports. -6. Update this plan checklist for Step 3. - -Suggested commit message: - -```text -Add deterministic fit-result categories -``` - -### Step 4: Add Bayesian Metadata Categories - -Files likely to change: - -- `src/easydiffraction/analysis/categories/bayesian_result/` -- `src/easydiffraction/analysis/categories/bayesian_sampler/` -- `src/easydiffraction/analysis/categories/bayesian_convergence/` -- `src/easydiffraction/analysis/categories/bayesian_parameter_posteriors/` -- `src/easydiffraction/analysis/categories/__init__.py` -- `src/easydiffraction/analysis/__init__.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Add `BayesianResult` as a single-item category with all ADR fields. -2. Add `BayesianSampler` as a single-item category with resolved DREAM - sampler settings: `steps`, `burn`, `thin`, `pop`, `parallel`, `init`, - and `random_seed`. Persist `parallel` as the non-negative worker - count; `0` means all CPUs. -3. Add `BayesianConvergence` as a single-item category with `converged`, - `max_r_hat`, `min_ess_bulk`, `n_draws`, `n_chains`, and - `n_parameters`. -4. Add `BayesianParameterPosteriorItem` and collection with all ADR - posterior summary fields except `order_index`. Use - `_category_entry_name = 'unique_name'` and preserve parameter order - from CIF loop order. -5. Preserve the repo naming rule from prior Bayesian work: `best_sample` - and `Best posterior sample` refer to the committed sampled point, not - a continuous MAP estimate. -6. Add explicit package imports. -7. Update this plan checklist for Step 4. - -Suggested commit message: - -```text -Add Bayesian fit-result metadata categories -``` - -### Step 5: Add Bayesian Cache Manifest Categories - -Files likely to change: - -- `src/easydiffraction/analysis/categories/bayesian_distribution_caches/` -- `src/easydiffraction/analysis/categories/bayesian_pair_caches/` -- `src/easydiffraction/analysis/categories/bayesian_predictive_datasets/` -- `src/easydiffraction/analysis/categories/__init__.py` -- `src/easydiffraction/analysis/__init__.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Add distribution cache manifest rows keyed by `param_unique_name`. -2. Add pair cache manifest rows with persisted `_bayesian_pair_cache.id` - and `_category_entry_name = 'id'`. Generate a simple numeric id when - callers do not provide one. -3. Add predictive dataset manifest rows keyed by `experiment_name`. If - multiple predictive datasets per experiment become necessary, stop - and ask before changing the ADR schema. -4. Store only HDF5 dataset paths and shape/count metadata in CIF. -5. Do not write numerical arrays into CIF loops. -6. Add explicit package imports. -7. Update this plan checklist for Step 5. - -Suggested commit message: - -```text -Add Bayesian fit-cache manifest categories -``` - -### Step 6: Wire Analysis CIF Save And Load - -Files likely to change: - -- `src/easydiffraction/analysis/analysis.py` -- `src/easydiffraction/io/cif/serialize.py` -- `src/easydiffraction/project/project.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Instantiate the new fit-state categories in `Analysis.__init__`. -2. Add read-only properties using the names in this plan. -3. Add `Analysis._has_persisted_fit_state()` or an equivalent helper. -4. Update `Analysis._serializable_categories()` so fit-state categories - are appended only when a fit-state projection exists. -5. Keep the order from the ADR: normal analysis configuration first, - then `_fit_parameter`, `_fit_result`, correlations, deterministic - categories, Bayesian categories, and cache manifests. -6. Update `analysis_from_cif()` to restore the new categories after - existing fitting, aliases, constraints, and active mode-specific - configuration. -7. Make missing fit-state categories a no-op for older saved projects. -8. Add a project-level helper to build a `{unique_name: parameter}` map - from structures and experiments. Reuse it for alias and fit-state - reference restoration if practical. -9. Update this plan checklist for Step 6. - -Suggested commit message: - -```text -Wire analysis fit-state CIF restore -``` - -### Step 7: Capture Fit Projections After Fitting - -Files likely to change: - -- `src/easydiffraction/analysis/analysis.py` -- `src/easydiffraction/analysis/fitting.py` -- `src/easydiffraction/analysis/minimizers/base.py` -- `src/easydiffraction/analysis/minimizers/bumps.py` -- `src/easydiffraction/analysis/minimizers/bumps_dream.py` -- `src/easydiffraction/analysis/minimizers/lmfit.py` -- `src/easydiffraction/analysis/fit_helpers/reporting.py` -- `src/easydiffraction/analysis/fit_helpers/bayesian.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Add an analysis-owned method such as - `_capture_fit_parameter_state(parameters)` that records - `param_unique_name`, `fit_min`, `fit_max`, - `fit_bounds_uncertainty_multiplier`, `start_value`, and - `start_uncertainty` before the minimizer mutates parameters. -2. Do not rely on `GenericParameter._start_value`; it exists but is not - currently the value used by fit result reporting. -3. Continue supporting existing `_fit_start_value` until a separate - approved refactor replaces it. -4. Add `_store_fit_result_projection(results)` or equivalent on - `Analysis` to fill common, deterministic, and Bayesian categories - from `FitResults` or `BayesianFitResults`. -5. Prefer calling the analysis-owned capture and projection methods from - `Fitter.fit()` or the existing `Analysis._fit_*` methods. Only edit - individual minimizer classes when a required result field is missing - from `FitResults` or `BayesianFitResults`. -6. For deterministic fits, prefer live parameter values for calculations - and store final values only as display projections. -7. If deterministic projection values disagree with live parameter state - on load, warn and keep the live parameter state. -8. For Bayesian fits, keep `point_estimate_name = 'best_sample'` unless - the result object says otherwise. -9. Store upper-triangle parameter correlations only. -10. Clear stale fit-state categories at the start of a new fit so old - cache manifests cannot survive a new result. -11. Update this plan checklist for Step 7. - -Suggested commit message: - -```text -Capture persisted fit-state projections -``` - -### Step 8: Add HDF5 Sidecar Save And Load - -The HDF5 dependency decision is approved and already satisfied: -`pyproject.toml` already lists `h5py` directly. - -Files likely to change: - -- `pyproject.toml` -- `src/easydiffraction/analysis/analysis.py` -- `src/easydiffraction/io/` -- `src/easydiffraction/project/project.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Confirm `h5py` remains a direct dependency; no edit is needed while - it is already present. -2. Add a small sidecar module for `analysis/results.h5`; keep imports - local if the package is heavy. -3. Write canonical posterior arrays when available: - `/posterior/parameter_samples`, `/posterior/log_posterior`, and - `/posterior/draw_index`. -4. Write cache arrays only when the corresponding manifest rows are - present. -5. Validate that the HDF5 dataset shape matches manifest metadata. -6. Make the sidecar optional for summary-only restore. If it is missing, - warn clearly and keep available CIF summaries. -7. Call the sidecar writer from `Project.save()` after `analysis.cif` - data has been prepared and before analysis directory contents are - listed. -8. Call the sidecar reader from `Project.load()` after - `analysis_from_cif()` and before restored display state is used. -9. Do not persist backend runtime objects, DREAM drivers, raw engine - results, or ArviZ `InferenceData`. -10. Update this plan checklist for Step 8. - -Suggested commit message: - -```text -Persist Bayesian fit arrays in results sidecar -``` - -### Step 9: Restore Result Objects And Display Cache Inputs - -Files likely to change: - -- `src/easydiffraction/analysis/analysis.py` -- `src/easydiffraction/analysis/fit_helpers/reporting.py` -- `src/easydiffraction/analysis/fit_helpers/bayesian.py` -- `src/easydiffraction/project/display.py` -- `src/easydiffraction/display/plotting.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Actions: - -1. Rebuild a lightweight `FitResults` or `BayesianFitResults` from the - persisted categories after project load. -2. Attach restored live parameter objects where their unique names are - still present. -3. Keep backend runtime fields such as `engine_result` as `None`. -4. Make `analysis.display.fit_results()` work from the restored result - projection. -5. First restore non-plotting result behavior and correlation summaries. - Only then add cache-aware posterior distribution, pair, and - predictive plotting. -6. Update correlation plotting so it can use - `_fit_parameter_correlation` when raw covariance or posterior samples - are not available. -7. Keep correlation heatmaps compact. Do not replace the heatmap path - with many per-cell Plotly traces. -8. Make posterior distribution, pair, and predictive display methods - prefer valid persisted cache arrays when available. -9. If a requested cache is unavailable or invalid, warn clearly and use - the existing recomputation path only when enough runtime data exists. -10. Do not make display methods recompute KDE, contours, or predictive - bands when valid cache arrays were restored. -11. If cache-aware display requires a new helper object or cache API not - named in this plan, stop and ask before adding it. -12. Update this plan checklist for Step 9. - -Suggested commit message: - -```text -Restore fit results from saved analysis state -``` - -### Phase 1 Review Gate - -After Step 9, stop. Present the implementation for human review before -creating or running tests. Mention any deviations from this plan and any -open design questions that appeared during implementation. - -Suggested commit message if only the plan checklist changes at the gate: - -```text -Update analysis fit-state plan progress -``` - -## Phase 2: Verification - -Only start Phase 2 after the user approves the Phase 1 implementation. - -### Step 1: Add Category Unit Tests - -Files likely to change: - -- `tests/unit/easydiffraction/analysis/categories/` -- `tests/unit/easydiffraction/analysis/test_enums.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Tests to add: - -1. Each new category has the expected CIF tags. -2. Each descriptor validates basic type and enum constraints. -3. Empty collections serialize to an empty string. -4. Collections rebuild indexes after `from_cif()`. -5. Persisted-id collections reject duplicate ids and normalize duplicate - pair rows. -6. Correlation rows store only the upper triangle excluding the - diagonal. - -### Step 2: Add CIF And Project Save/Load Tests - -Files likely to change: - -- `tests/unit/easydiffraction/io/cif/` -- `tests/unit/easydiffraction/project/test_project_save.py` -- `tests/unit/easydiffraction/project/test_project_load.py` -- `tests/functional/test_fitting_workflow.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Tests to add: - -1. A project with no fit state does not emit empty fit-state loops. -2. Deterministic fit-state categories round-trip through - `analysis/analysis.cif`. -3. Fit bounds, bound provenance, start value, and start uncertainty - round-trip by parameter unique name. -4. Live structure or experiment parameter values remain the calculation - source of truth after load. -5. Mismatched deterministic result projections warn and keep live - parameter values. -6. Unknown fit-state schema versions warn clearly. -7. Older projects without fit-state categories still load. - -### Step 3: Add Bayesian Sidecar And Display Tests - -Files likely to change: - -- `tests/unit/easydiffraction/analysis/fit_helpers/` -- `tests/unit/easydiffraction/display/test_plotting.py` -- `tests/unit/easydiffraction/project/test_display.py` -- `tests/unit/easydiffraction/project/test_project_load.py` -- `docs/dev/plans/analysis-cif-fit-state.md` - -Tests to add: - -1. Bayesian summary-only restore works when `analysis/results.h5` is - missing and emits a clear warning. -2. Posterior sample arrays round-trip through `analysis/results.h5`. -3. Manifest rows and HDF5 dataset shapes are validated. -4. Posterior distributions use cache arrays when valid. -5. Posterior pair plots use cache arrays when valid and preserve sample - pairing semantics for contours. -6. Posterior predictive displays use saved predictive arrays when valid. -7. Recompute paths remain available when runtime posterior samples are - present but caches are absent. - -### Step 4: Run Verification Commands - -Run in this order from the repository root: - -```text -pixi run test-structure-check -pixi run fix -pixi run check -pixi run unit-tests -pixi run integration-tests -pixi run script-tests -``` - -Notes: - -- `pixi run fix` may regenerate `docs/dev/package-structure/full.md` and - `docs/dev/package-structure/short.md`. Accept those generated changes - if the command produced them. -- If a command fails for an unrelated existing problem, do not fix - unrelated code. Record the failure and ask for guidance. - -## Files Most Likely To Change - -Implementation files: - -- `pyproject.toml` -- `src/easydiffraction/analysis/analysis.py` -- `src/easydiffraction/analysis/enums.py` -- `src/easydiffraction/analysis/fitting.py` -- `src/easydiffraction/analysis/categories/` -- `src/easydiffraction/analysis/fit_helpers/reporting.py` -- `src/easydiffraction/analysis/fit_helpers/bayesian.py` -- `src/easydiffraction/analysis/minimizers/base.py` -- `src/easydiffraction/analysis/minimizers/bumps.py` -- `src/easydiffraction/analysis/minimizers/bumps_dream.py` -- `src/easydiffraction/analysis/minimizers/lmfit.py` -- `src/easydiffraction/io/cif/serialize.py` -- `src/easydiffraction/project/project.py` -- `src/easydiffraction/project/display.py` -- `src/easydiffraction/display/plotting.py` - -Test files: - -- `tests/unit/easydiffraction/analysis/` -- `tests/unit/easydiffraction/io/cif/` -- `tests/unit/easydiffraction/project/` -- `tests/unit/easydiffraction/display/` -- `tests/functional/test_fitting_workflow.py` -- `tests/integration/fitting/` - -Documentation files: - -- `docs/dev/adrs/suggestions/analysis-cif-fit-state.md` -- `docs/dev/plans/analysis-cif-fit-state.md` - -## Do Not Change Without Approval - -- Do not serialize posterior summaries inside structure or experiment - CIF files. -- Do not rename `Project`, `project.cif`, or the existing saved project - layout. -- Do not remove the legacy `analysis.cif` root fallback in - `Project.load()`. -- Do not add a generic posterior-minimizer capability abstraction until - there is a second concrete posterior-capable minimizer. -- Do not change tutorial notebooks directly. Edit tutorial `.py` files - and run notebook preparation only if the user asks for tutorial work. -- Do not persist raw backend result objects, optimizer instances, DREAM - drivers, or ArviZ objects. - -## Suggested Pull Request - -Title: Persist analysis fit state in saved projects - -Description: Save fit bounds, result summaries, and Bayesian result -manifests with projects so users can reopen fitted analyses with the -same fit-state and posterior display context available. diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 63fe7f80..9256eb20 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -1673,6 +1673,8 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None posterior_samples = results.posterior_samples if posterior_samples is None: return + + self._store_bayesian_plot_cache_projection(results) if len(posterior_samples.parameter_names) <= 1: return @@ -1683,7 +1685,6 @@ def _store_bayesian_result_projection(self, results: BayesianFitResults) -> None correlation_matrix=correlation_matrix, source_kind=FitCorrelationSourceEnum.POSTERIOR, ) - self._store_bayesian_plot_cache_projection(results) def _store_fit_result_projection( self, diff --git a/src/easydiffraction/analysis/categories/fit_state/__init__.py b/src/easydiffraction/analysis/categories/fit_state/__init__.py deleted file mode 100644 index 43b065a9..00000000 --- a/src/easydiffraction/analysis/categories/fit_state/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-FileCopyrightText: 2026 EasyScience contributors -# SPDX-License-Identifier: BSD-3-Clause - -from easydiffraction.analysis.categories.fit_state.default import FitState -from easydiffraction.analysis.categories.fit_state.factory import FitStateFactory diff --git a/src/easydiffraction/analysis/categories/fit_state/default.py b/src/easydiffraction/analysis/categories/fit_state/default.py deleted file mode 100644 index 7ec6dac2..00000000 --- a/src/easydiffraction/analysis/categories/fit_state/default.py +++ /dev/null @@ -1,42 +0,0 @@ -# SPDX-FileCopyrightText: 2026 EasyScience contributors -# SPDX-License-Identifier: BSD-3-Clause -"""Fit-state schema metadata category.""" - -from __future__ import annotations - -from easydiffraction.analysis.categories.fit_state.factory import FitStateFactory -from easydiffraction.core.category import CategoryItem -from easydiffraction.core.metadata import TypeInfo -from easydiffraction.core.validation import AttributeSpec -from easydiffraction.core.variable import NumericDescriptor -from easydiffraction.io.cif.handler import CifHandler - - -@FitStateFactory.register -class FitState(CategoryItem): - """Persisted fit-state schema metadata.""" - - _category_code = 'fit_state' - - type_info = TypeInfo( - tag='default', - description='Persisted fit-state schema metadata', - ) - - def __init__(self) -> None: - super().__init__() - self._schema_version = NumericDescriptor( - name='schema_version', - description='Persisted fit-state schema version.', - value_spec=AttributeSpec(default=1), - cif_handler=CifHandler(names=['_fit_state.schema_version']), - ) - - @property - def schema_version(self) -> NumericDescriptor: - """Persisted fit-state schema version.""" - return self._schema_version - - def _set_schema_version(self, value: float) -> None: - """Set the fit-state schema version for internal callers.""" - self._schema_version.value = value diff --git a/src/easydiffraction/analysis/categories/fit_state/factory.py b/src/easydiffraction/analysis/categories/fit_state/factory.py deleted file mode 100644 index 1705bcc8..00000000 --- a/src/easydiffraction/analysis/categories/fit_state/factory.py +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-FileCopyrightText: 2026 EasyScience contributors -# SPDX-License-Identifier: BSD-3-Clause -"""Fit-state factory.""" - -from __future__ import annotations - -from typing import ClassVar - -from easydiffraction.core.factory import FactoryBase - - -class FitStateFactory(FactoryBase): - """Create fit-state categories by tag.""" - - _default_rules: ClassVar[dict] = { - frozenset(): 'default', - } diff --git a/src/easydiffraction/io/results_sidecar.py b/src/easydiffraction/io/results_sidecar.py index a8157b7c..36d442c7 100644 --- a/src/easydiffraction/io/results_sidecar.py +++ b/src/easydiffraction/io/results_sidecar.py @@ -11,6 +11,7 @@ from easydiffraction.utils.logging import log +_DEFAULT_SIDECAR_FILE_NAME = 'results.h5' _POSTERIOR_PARAMETER_SAMPLES_PATH = '/posterior/parameter_samples' _POSTERIOR_LOG_POSTERIOR_PATH = '/posterior/log_posterior' _POSTERIOR_DRAW_INDEX_PATH = '/posterior/draw_index' @@ -27,17 +28,39 @@ def _sidecar_file_name(analysis: object) -> str: """Return the configured sidecar file name for an analysis.""" bayesian_result = getattr(analysis, 'bayesian_result', None) if bayesian_result is None: - return 'results.h5' + return _DEFAULT_SIDECAR_FILE_NAME file_name = bayesian_result.sidecar_file.value - if isinstance(file_name, str) and file_name.strip(): - return file_name - return 'results.h5' + if not isinstance(file_name, str) or not file_name.strip(): + return _DEFAULT_SIDECAR_FILE_NAME + + normalized_name = file_name.strip() + normalized_path = Path(normalized_name) + if ( + normalized_path.is_absolute() + or normalized_path.name in {'', '.', '..'} + or normalized_path.name != normalized_name + ): + log.warning( + 'Ignoring Bayesian sidecar file path outside the analysis directory: ' + f'{normalized_name!r}. Using {_DEFAULT_SIDECAR_FILE_NAME!r} instead.' + ) + return _DEFAULT_SIDECAR_FILE_NAME + + return normalized_path.name def _sidecar_path(*, analysis: object, analysis_dir: Path) -> Path: """Return the results sidecar path inside the analysis directory.""" - return analysis_dir / _sidecar_file_name(analysis) + resolved_analysis_dir = analysis_dir.resolve() + sidecar_path = (resolved_analysis_dir / _sidecar_file_name(analysis)).resolve() + if sidecar_path.parent != resolved_analysis_dir: + log.warning( + 'Resolved Bayesian sidecar file path escaped the analysis directory. ' + f'Using {_DEFAULT_SIDECAR_FILE_NAME!r} instead.' + ) + return resolved_analysis_dir / _DEFAULT_SIDECAR_FILE_NAME + return sidecar_path def _should_use_sidecar(analysis: object) -> bool: diff --git a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py index 13072544..8a741fef 100644 --- a/tests/unit/easydiffraction/analysis/test_analysis_coverage.py +++ b/tests/unit/easydiffraction/analysis/test_analysis_coverage.py @@ -2,6 +2,10 @@ # SPDX-License-Identifier: BSD-3-Clause """Additional unit tests for analysis.py to cover patch gaps.""" +from types import SimpleNamespace + +import numpy as np + def _make_project(): class ExpCol: @@ -199,3 +203,104 @@ class FakeResults: assert 'expt1' in a._parameter_snapshots assert a._parameter_snapshots['expt1']['p1']['value'] == 1.23 assert a._parameter_snapshots['expt1']['p1']['uncertainty'] == 0.01 + + +class TestBayesianProjection: + def test_single_parameter_projection_persists_distribution_and_predictive_caches(self): + from easydiffraction.analysis.analysis import Analysis + from easydiffraction.analysis.fit_helpers.bayesian import BayesianFitResults + from easydiffraction.analysis.fit_helpers.bayesian import PosteriorParameterSummary + from easydiffraction.analysis.fit_helpers.bayesian import PosteriorPredictiveSummary + from easydiffraction.analysis.fit_helpers.bayesian import PosteriorSamples + + class Plotter: + @staticmethod + def _posterior_parameter_bounds(*, fit_results, parameter_name): + del fit_results, parameter_name + return 0.5, 1.5 + + @staticmethod + def _posterior_density_curve(values, *, lower_bound, upper_bound): + del values + return ( + np.asarray([lower_bound, upper_bound], dtype=float), + np.asarray([0.25, 0.75], dtype=float), + ) + + @staticmethod + def _resolve_x_axis(experiment_type, _axis_name): + del experiment_type + return np.asarray([1.0, 2.0], dtype=float), 'two_theta', None, None, None + + @staticmethod + def _build_posterior_predictive_summary( + *, + fit_results, + experiment, + expt_name, + x_axis, + include_draws, + ): + del fit_results, experiment, x_axis, include_draws + return PosteriorPredictiveSummary( + experiment_name=expt_name, + x_axis_name='two_theta', + x=np.asarray([1.0, 2.0], dtype=float), + best_sample_prediction=np.asarray([3.0, 4.0], dtype=float), + lower_95=np.asarray([2.5, 3.5], dtype=float), + upper_95=np.asarray([3.5, 4.5], dtype=float), + ) + + class Experiments: + names = ['hrpt'] + + def __getitem__(self, name): + del name + return SimpleNamespace(type='powder') + + project = SimpleNamespace( + experiments=Experiments(), + structures=object(), + rendering=SimpleNamespace(plotter=Plotter()), + _varname='proj', + ) + analysis = Analysis(project=project) + + results = BayesianFitResults( + success=True, + parameters=[], + posterior_samples=PosteriorSamples( + parameter_names=['alpha'], + parameter_samples=np.asarray([[[1.0]], [[1.2]]], dtype=float), + ), + posterior_parameter_summaries=[ + PosteriorParameterSummary( + unique_name='alpha', + display_name='Alpha', + best_sample_value=1.2, + median=1.1, + standard_deviation=0.1, + interval_68=(1.0, 1.2), + interval_95=(0.9, 1.3), + ) + ], + posterior_predictive={}, + sampler_settings={}, + convergence_diagnostics={}, + ) + + analysis._store_bayesian_result_projection(results) + + assert analysis.bayesian_result.has_distribution_cache.value is True + assert analysis.bayesian_result.has_pair_cache.value is False + assert analysis.bayesian_result.has_posterior_predictive.value is True + assert np.allclose( + analysis._persisted_fit_state_sidecar['distribution_caches']['alpha']['x'], + np.asarray([0.5, 1.5], dtype=float), + ) + assert np.allclose( + analysis._persisted_fit_state_sidecar['predictive_datasets']['hrpt'][ + 'best_sample_prediction' + ], + np.asarray([3.0, 4.0], dtype=float), + ) diff --git a/tests/unit/easydiffraction/io/test_results_sidecar.py b/tests/unit/easydiffraction/io/test_results_sidecar.py index 68021f85..556e21e5 100644 --- a/tests/unit/easydiffraction/io/test_results_sidecar.py +++ b/tests/unit/easydiffraction/io/test_results_sidecar.py @@ -102,3 +102,37 @@ def test_read_analysis_results_sidecar_warns_when_expected_file_is_missing(tmp_p assert analysis._persisted_fit_state_sidecar == {} assert any('Expected Bayesian results sidecar is missing' in warning for warning in warnings) + + +def test_sidecar_path_traversal_falls_back_to_local_results_file(tmp_path, monkeypatch): + from easydiffraction.io import results_sidecar as results_sidecar_mod + + analysis_dir = Path(tmp_path) / 'analysis' + external_sidecar = Path(tmp_path) / 'outside.h5' + analysis = _analysis_with_predictive_sidecar() + analysis.bayesian_result._set_sidecar_file('../outside.h5') + + warnings: list[str] = [] + monkeypatch.setattr(results_sidecar_mod.log, 'warning', warnings.append) + + results_sidecar_mod.write_analysis_results_sidecar( + analysis=analysis, + analysis_dir=analysis_dir, + ) + + assert (analysis_dir / 'results.h5').is_file() + assert not external_sidecar.exists() + + restored = _analysis_with_predictive_sidecar() + restored.fit_results = None + restored.bayesian_result._set_sidecar_file('../outside.h5') + results_sidecar_mod.read_analysis_results_sidecar( + analysis=restored, + analysis_dir=analysis_dir, + ) + + assert 'predictive_datasets' in restored._persisted_fit_state_sidecar + assert any( + 'Ignoring Bayesian sidecar file path outside the analysis directory' in warning + for warning in warnings + ) From 0ae7d57f2e5826d13cb997029e6ed1a502db2f44 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 00:10:19 +0200 Subject: [PATCH 43/72] Reorder CLI command definitions --- src/easydiffraction/__main__.py | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/easydiffraction/__main__.py b/src/easydiffraction/__main__.py index 79dd262a..dbaee349 100644 --- a/src/easydiffraction/__main__.py +++ b/src/easydiffraction/__main__.py @@ -123,18 +123,18 @@ def main( # Otherwise, let the chosen subcommand execute. -@app.command('list-tutorials') -def list_tutorials() -> None: - """List available tutorial notebooks.""" - ed.list_tutorials() - - @app.command('list-data') def list_data() -> None: """List available example data and project archives.""" ed.list_data() +@app.command('list-tutorials') +def list_tutorials() -> None: + """List available tutorial notebooks.""" + ed.list_tutorials() + + @app.command('download-data') def download_data( id: int = typer.Argument(..., help='Data ID to download.'), @@ -194,6 +194,18 @@ def download_all_tutorials( ed.download_all_tutorials(destination=destination, overwrite=overwrite) +@app.command('display') +def display( + project_dir: str = typer.Argument( + ..., + help='Path to the project directory (must contain project.cif).', + ), +) -> None: + """Display the typical outputs for a saved project state.""" + project = _load_project(project_dir) + _display_project_outputs(project) + + @app.command('fit') def fit( project_dir: str = typer.Argument( @@ -214,18 +226,6 @@ def fit( _display_fit_outputs(project) -@app.command('display') -def display( - project_dir: str = typer.Argument( - ..., - help='Path to the project directory (must contain project.cif).', - ), -) -> None: - """Display the typical outputs for a saved project state.""" - project = _load_project(project_dir) - _display_project_outputs(project) - - @app.command('undo') def undo( project_dir: str = typer.Argument( From 9671b863652d039fcca3919cdc93b4f773ef86d0 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 00:35:30 +0200 Subject: [PATCH 44/72] Restore saved DREAM sampler settings on project load --- src/easydiffraction/analysis/analysis.py | 33 +++++++++ src/easydiffraction/analysis/fitting.py | 25 ++----- .../analysis/minimizers/base.py | 29 +++++++- .../analysis/minimizers/bumps_dream.py | 13 ++-- src/easydiffraction/io/cif/serialize.py | 1 + .../fitting/test_bumps_dream_support.py | 5 +- .../analysis/fit_helpers/test_tracking.py | 30 ++++++++ .../easydiffraction/analysis/test_fitting.py | 74 +++++++++++++++++++ .../project/test_project_load.py | 24 ++++++ 9 files changed, 204 insertions(+), 30 deletions(-) diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 9256eb20..24704ed4 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -50,6 +50,7 @@ from easydiffraction.analysis.fit_helpers.bayesian import PosteriorSamples from easydiffraction.analysis.fit_helpers.reporting import FitResults from easydiffraction.analysis.fitting import Fitter +from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.core.category_owner import CategoryOwner from easydiffraction.core.guard import _apply_help_filter from easydiffraction.core.singleton import ConstraintsHandler @@ -590,6 +591,37 @@ def _restore_live_parameter_state(self, param_map: dict[str, Parameter]) -> None continue parameter.uncertainty = float(row.uncertainty.value) + def _sync_live_minimizer_from_persisted_fit_state(self) -> None: + """Apply saved sampler settings to the live minimizer.""" + if not self._has_persisted_fit_state(): + return + + if self.fit_result.result_kind.value != FitResultKindEnum.BAYESIAN.value: + return + + if self.fitting.minimizer_type.value != MinimizerTypeEnum.BUMPS_DREAM.value: + return + + minimizer = self.fitting.minimizer + if minimizer is None: + return + + steps = int(self.bayesian_sampler.steps.value) + thin = int(self.bayesian_sampler.thin.value) + pop = int(self.bayesian_sampler.pop.value) + if steps <= 0 or thin <= 0 or pop <= 0: + return + + minimizer.steps = steps + minimizer.burn = int(self.bayesian_sampler.burn.value) + minimizer.thin = thin + minimizer.pop = pop + minimizer.parallel = int(self.bayesian_sampler.parallel.value) + + init_value = str(self.bayesian_sampler.init.value) + if init_value: + minimizer.init = init_value + def _restored_fit_parameters(self, param_map: dict[str, Parameter]) -> list[Parameter]: """Return live parameters in the persisted fit-result order.""" restored_parameters: list[Parameter] = [] @@ -1748,6 +1780,7 @@ def _prepare_fit_run(self) -> tuple[VerbosityEnum, object, object] | None: # Apply constraints before fitting so that user-constrained # parameters are marked and excluded from the free parameter # list built by the fitter. + self._sync_live_minimizer_from_persisted_fit_state() self._update_categories() return verb, structures, experiments diff --git a/src/easydiffraction/analysis/fitting.py b/src/easydiffraction/analysis/fitting.py index f446e858..69f66c27 100644 --- a/src/easydiffraction/analysis/fitting.py +++ b/src/easydiffraction/analysis/fitting.py @@ -8,14 +8,12 @@ import numpy as np -from easydiffraction.analysis.fit_helpers.bayesian import BayesianFitResults from easydiffraction.analysis.fit_helpers.metrics import get_reliability_inputs from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.analysis.minimizers.factory import MinimizerFactory from easydiffraction.core.variable import Parameter from easydiffraction.datablocks.experiment.item.base import intensity_category_for from easydiffraction.utils.enums import VerbosityEnum -from easydiffraction.utils.logging import log if TYPE_CHECKING: from easydiffraction.analysis.fit_helpers.reporting import FitResults @@ -121,34 +119,23 @@ def _postprocess_fit_results( analysis: object, experiments: list[ExperimentBase], fitted_parameters: list[Parameter], - ) -> bool: + ) -> None: """Populate result fields and persist fit projections.""" if self.results is None: - return False + return self.results.message = _resolve_fit_result_message(self.results) self.results.iterations = _resolve_fit_result_iterations(self.results) self.results.chi_square = _resolve_fit_result_chi_square(self.results) if analysis is None: - return False - - warn_poorly_mixed = False - if isinstance(self.results, BayesianFitResults): - warn_poorly_mixed = not self.results.convergence_diagnostics.get( - 'converged', - True, - ) - self.minimizer.tracker.start_sampler_post_processing( - log_posterior=self.results.best_log_posterior, - ) + return analysis._store_fit_result_projection( self.results, experiments=experiments, fitted_parameters=fitted_parameters, ) - return warn_poorly_mixed def fit( self, @@ -225,12 +212,13 @@ def fit( params, objective_function, verbosity=verbosity, + finalize_tracking=False, use_physical_limits=use_physical_limits, random_seed=random_seed, ) try: - warn_poorly_mixed = self._postprocess_fit_results( + self._postprocess_fit_results( analysis=analysis, experiments=experiments, fitted_parameters=params, @@ -238,9 +226,6 @@ def fit( finally: self.minimizer._stop_tracking() - if warn_poorly_mixed: - log.warning('Convergence diagnostics indicate the posterior may be poorly mixed.') - def _process_fit_results( self, structures: Structures, diff --git a/src/easydiffraction/analysis/minimizers/base.py b/src/easydiffraction/analysis/minimizers/base.py index aecbba02..5579e86c 100644 --- a/src/easydiffraction/analysis/minimizers/base.py +++ b/src/easydiffraction/analysis/minimizers/base.py @@ -42,6 +42,8 @@ def __init__( self._best_iteration: int | None = None self._fitting_time: float | None = None self._resolved_random_seed: int | None = None + self._tracking_active: bool = False + self._deferred_warning_messages: list[str] = [] self.tracker: FitProgressTracker = FitProgressTracker() def _start_tracking( @@ -61,13 +63,34 @@ def _start_tracking( """ self.tracker.reset() self.tracker._verbosity = verbosity + self._tracking_active = True + self._deferred_warning_messages = [] self.tracker.start_tracking(minimizer_name, mode=self._tracking_mode()) self.tracker.start_timer() def _stop_tracking(self) -> None: """Stop timer and finalize tracking.""" + if not self._tracking_active: + self._emit_deferred_warnings() + return + + self._tracking_active = False self.tracker.stop_timer() self.tracker.finish_tracking() + self._emit_deferred_warnings() + + def _warn_after_tracking(self, message: str) -> None: + """Log immediately or defer a warning until tracking stops.""" + if self._tracking_active: + self._deferred_warning_messages.append(message) + return + + log.warning(message) + + def _emit_deferred_warnings(self) -> None: + """Flush warnings deferred during live progress display.""" + while self._deferred_warning_messages: + log.warning(self._deferred_warning_messages.pop(0)) @staticmethod def _tracking_mode() -> str: @@ -300,6 +323,7 @@ def fit( objective_function: Callable[..., object], verbosity: VerbosityEnum = VerbosityEnum.FULL, *, + finalize_tracking: bool = True, use_physical_limits: bool = False, random_seed: int | None = None, ) -> FitResults: @@ -315,6 +339,8 @@ def fit( arguments. verbosity : VerbosityEnum, default=VerbosityEnum.FULL Console output verbosity. + finalize_tracking : bool, default=True + Whether to stop and finalize live tracking before returning. use_physical_limits : bool, default=False When ``True``, fall back to physical limits from the value spec for parameters whose ``fit_min``/``fit_max`` are @@ -345,7 +371,8 @@ def fit( raw_result = self._run_solver(objective_function, **solver_args) return self._finalize_fit(parameters, raw_result) finally: - self._stop_tracking() + if finalize_tracking: + self._stop_tracking() def _objective_function( self, diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index 9d935776..cf28be55 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -30,7 +30,6 @@ from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.analysis.minimizers.factory import MinimizerFactory from easydiffraction.core.metadata import TypeInfo -from easydiffraction.utils.logging import log DEFAULT_METHOD = 'dream' DEFAULT_MAX_ITERATIONS = 3000 @@ -623,6 +622,8 @@ def _run_solver( sampler_completed=False, ) + self.tracker.start_sampler_post_processing() + return self._build_success_result( context=context, raw_state=driver_result.raw_state, @@ -731,7 +732,7 @@ def _build_mapper(self, problem: FitProblem) -> object | None: return None if self._requires_serial_mapper_for_spawn_main_module(): - log.warning( + self._warn_after_tracking( 'DREAM parallel evaluation requires an import-safe main ' 'module on spawn-based multiprocessing; falling back to ' 'serial execution.' @@ -747,7 +748,7 @@ def _build_mapper(self, problem: FitProblem) -> object | None: try: if not can_pickle(problem): - log.warning( + self._warn_after_tracking( 'DREAM parallel evaluation requires a picklable ' 'problem; falling back to serial execution.' ) @@ -758,7 +759,7 @@ def _build_mapper(self, problem: FitProblem) -> object | None: message = str(error) if 'bootstrapping phase' not in message: raise - log.warning( + self._warn_after_tracking( 'DREAM parallel evaluation requires an import-safe main ' 'module on spawn-based multiprocessing; falling back to ' 'serial execution.' @@ -888,7 +889,9 @@ def _build_success_result( ) convergence_diagnostics = compute_convergence_diagnostics(posterior_samples) if not convergence_diagnostics.get('converged', True): - log.warning('Convergence diagnostics indicate the posterior may be poorly mixed.') + self._warn_after_tracking( + 'Convergence diagnostics indicate the posterior may be poorly mixed.' + ) posterior_parameter_summaries = summarize_posterior_parameters( parameter_names=context.parameter_names, posterior_samples=posterior_samples, diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index a47d5ef6..1bc86a88 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -608,6 +608,7 @@ def _restore_bayesian_fit_state(analysis: object, block: object) -> None: analysis.bayesian_distribution_caches.from_cif(block) analysis.bayesian_pair_caches.from_cif(block) analysis.bayesian_predictive_datasets.from_cif(block) + analysis._sync_live_minimizer_from_persisted_fit_state() def _restore_persisted_fit_state(analysis: object, block: object) -> None: diff --git a/tests/integration/fitting/test_bumps_dream_support.py b/tests/integration/fitting/test_bumps_dream_support.py index c09f0b92..a824347e 100644 --- a/tests/integration/fitting/test_bumps_dream_support.py +++ b/tests/integration/fitting/test_bumps_dream_support.py @@ -277,10 +277,7 @@ def test_build_mapper_falls_back_for_serial_and_unpicklable(monkeypatch): monkeypatch.setattr( 'easydiffraction.analysis.minimizers.bumps_dream.can_pickle', lambda problem: False ) - monkeypatch.setattr( - 'easydiffraction.analysis.minimizers.bumps_dream.log.warning', - lambda message: warnings.append(message), - ) + monkeypatch.setattr(minimizer, '_warn_after_tracking', warnings.append) assert minimizer._build_mapper('problem') is None assert warnings == [ diff --git a/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking.py b/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking.py index 877a46b1..89a9c86b 100644 --- a/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking.py +++ b/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking.py @@ -77,3 +77,33 @@ def test_tracker_fit_adds_timed_rows_and_resets_counter(monkeypatch): ] assert tracker._last_progress_time == 12.2 assert tracker._previous_chi2 == 4.0 + + +def test_tracker_sampler_post_processing_adds_final_status_row(): + from easydiffraction.analysis.fit_helpers.tracking import FitProgressTracker + from easydiffraction.analysis.fit_helpers.tracking import SamplerProgressUpdate + + tracker = FitProgressTracker() + tracker.start_tracking('dream', mode='sampling') + tracker.start_timer() + tracker.track_sampler_progress( + SamplerProgressUpdate( + iteration=10, + total_iterations=10, + phase='sampling', + progress_percent=100.0, + log_posterior=-3.0, + reduced_chi2=1.0, + elapsed_time=5.0, + force_report=True, + ) + ) + + tracker.start_sampler_post_processing() + tracker.stop_timer() + tracker.finish_tracking() + + assert tracker._df_rows[-1][0] == '' + assert tracker._df_rows[-1][1] == '' + assert tracker._df_rows[-1][3] == '' + assert tracker._df_rows[-1][4] == 'post-processing' diff --git a/tests/unit/easydiffraction/analysis/test_fitting.py b/tests/unit/easydiffraction/analysis/test_fitting.py index c53a6754..86e5a079 100644 --- a/tests/unit/easydiffraction/analysis/test_fitting.py +++ b/tests/unit/easydiffraction/analysis/test_fitting.py @@ -1,6 +1,10 @@ # SPDX-FileCopyrightText: 2025 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +from types import SimpleNamespace + +from easydiffraction.utils.enums import VerbosityEnum + def test_module_import(): import easydiffraction.analysis.fitting as MUT @@ -108,3 +112,73 @@ def mock_process(*args, **kwargs): 'Use Analysis.show_fit_results() instead.' ) assert f.results is not None, 'Fitter.fit() should still set results' + + +def test_fitter_fit_defers_minimizer_tracking_until_postprocessing(monkeypatch): + from easydiffraction.analysis.fit_helpers.bayesian import BayesianFitResults + from easydiffraction.analysis.fitting import Fitter + + class DummyParam: + value = 1.0 + uncertainty = 0.1 + _fit_start_value = None + + class DummyStructure: + _need_categories_update = False + + def _update_categories(self): + return None + + class DummyStructures: + def __iter__(self): + return iter([DummyStructure()]) + + class DummyExperiment: + parameters = [] + + class DummyMin: + def __init__(self): + self.fit_calls: list[dict[str, object]] = [] + self.stop_calls = 0 + self.tracker = SimpleNamespace(track=lambda residuals, parameters: residuals) + + def fit(self, params, obj, verbosity=None, **kwargs): + del params, obj + self.fit_calls.append({'verbosity': verbosity, **kwargs}) + return BayesianFitResults( + success=True, + reduced_chi_square=1.2, + convergence_diagnostics={'converged': False}, + sampler_settings={'steps': 300}, + best_log_posterior=-10.0, + ) + + def _stop_tracking(self): + self.stop_calls += 1 + + analysis_events: list[str] = [] + analysis = SimpleNamespace( + _capture_fit_parameter_state=lambda params: analysis_events.append('capture'), + _store_fit_result_projection=lambda results, experiments, fitted_parameters: analysis_events.append( + 'store' + ), + ) + + fitter = Fitter() + fitter.minimizer = DummyMin() + monkeypatch.setattr( + fitter, + '_collect_fit_parameters', + lambda structures, experiments: [DummyParam()], + ) + + fitter.fit( + structures=DummyStructures(), + experiments=[DummyExperiment()], + analysis=analysis, + verbosity=VerbosityEnum.FULL, + ) + + assert fitter.minimizer.fit_calls[0]['finalize_tracking'] is False + assert fitter.minimizer.stop_calls == 1 + assert analysis_events == ['capture', 'store'] diff --git a/tests/unit/easydiffraction/project/test_project_load.py b/tests/unit/easydiffraction/project/test_project_load.py index f60056ed..f1a71b8f 100644 --- a/tests/unit/easydiffraction/project/test_project_load.py +++ b/tests/unit/easydiffraction/project/test_project_load.py @@ -172,6 +172,30 @@ def test_round_trips_deterministic_fit_state_and_keeps_live_parameter_values(sel assert loaded_parameter._fit_start_uncertainty == 0.02 assert loaded_parameter.uncertainty == 0.07 + def test_round_trips_bayesian_sampler_settings_to_live_dream_minimizer(self, tmp_path): + original = Project(name='bayes_state') + original.analysis.fitting.minimizer_type = 'bumps (dream)' + original.analysis.fit_result._set_result_kind('bayesian') + original.analysis.bayesian_sampler._set_steps(300) + original.analysis.bayesian_sampler._set_burn(60) + original.analysis.bayesian_sampler._set_thin(2) + original.analysis.bayesian_sampler._set_pop(8) + original.analysis.bayesian_sampler._set_parallel(0) + original.analysis.bayesian_sampler._set_init('lhs') + original.analysis._set_has_persisted_fit_state(value=True) + original.save_as(str(tmp_path / 'proj')) + + loaded = Project.load(str(tmp_path / 'proj')) + minimizer = loaded.analysis.fitting.minimizer + + assert minimizer is not None + assert minimizer.steps == 300 + assert minimizer.burn == 60 + assert minimizer.thin == 2 + assert minimizer.pop == 8 + assert minimizer.parallel == 0 + assert minimizer.init.value == 'lhs' + class TestLoadAnalysisCifFallback: """Load falls back from analysis/analysis.cif to analysis.cif at root.""" From d1c7f97ca78a723b47aa05ed83173969892dc873 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 01:07:17 +0200 Subject: [PATCH 45/72] Restore legacy DREAM sampler settings before CLI fit --- src/easydiffraction/analysis/analysis.py | 15 ++-- src/easydiffraction/io/cif/serialize.py | 72 ++++++++++++------- .../io/cif/test_serialize_more.py | 13 ++++ .../project/test_project_config.py | 32 +++++++++ .../project/test_project_load.py | 18 +++++ 5 files changed, 119 insertions(+), 31 deletions(-) diff --git a/src/easydiffraction/analysis/analysis.py b/src/easydiffraction/analysis/analysis.py index 24704ed4..eccbf28d 100644 --- a/src/easydiffraction/analysis/analysis.py +++ b/src/easydiffraction/analysis/analysis.py @@ -607,15 +607,20 @@ def _sync_live_minimizer_from_persisted_fit_state(self) -> None: return steps = int(self.bayesian_sampler.steps.value) - thin = int(self.bayesian_sampler.thin.value) - pop = int(self.bayesian_sampler.pop.value) - if steps <= 0 or thin <= 0 or pop <= 0: + if steps <= 0: return minimizer.steps = steps minimizer.burn = int(self.bayesian_sampler.burn.value) - minimizer.thin = thin - minimizer.pop = pop + + thin = int(self.bayesian_sampler.thin.value) + if thin > 0: + minimizer.thin = thin + + pop = int(self.bayesian_sampler.pop.value) + if pop > 0: + minimizer.pop = pop + minimizer.parallel = int(self.bayesian_sampler.parallel.value) init_value = str(self.bayesian_sampler.init.value) diff --git a/src/easydiffraction/io/cif/serialize.py b/src/easydiffraction/io/cif/serialize.py index 1bc86a88..ef9fa277 100644 --- a/src/easydiffraction/io/cif/serialize.py +++ b/src/easydiffraction/io/cif/serialize.py @@ -3,6 +3,7 @@ from __future__ import annotations +import textwrap from typing import TYPE_CHECKING from typing import Any @@ -21,15 +22,15 @@ from easydiffraction.core.category import CategoryItem from easydiffraction.core.variable import GenericDescriptorBase -# Maximum CIF description length before using semicolon-delimited block -_CIF_DESCRIPTION_WRAP_LEN = 60 - # Minimum string length to check for surrounding quotes _MIN_QUOTED_LEN = 2 # Number of significant digits kept for CIF uncertainty notation _CIF_UNCERTAINTY_SIG_DIGITS = 2 +# Maximum CIF description length before using semicolon-delimited block +_CIF_DESCRIPTION_WRAP_LEN = 60 + def format_value(value: object) -> str: """ @@ -79,6 +80,13 @@ def _strip_optional_quotes(raw: str) -> str: return raw[1:-1] if is_quoted else raw +def _strip_cif_text_field_delimiters(raw: str) -> str: + """Return CIF text-field content without delimiter lines.""" + if raw.startswith(';\n') and raw.endswith('\n;'): + return raw[2:-2].strip() + return raw + + def _parse_bool_cif_value(raw: str) -> bool | str: """Parse CIF boolean tokens, returning the raw token if invalid.""" normalized_value = _strip_optional_quotes(raw).lower() @@ -335,6 +343,26 @@ def datablock_collection_to_cif(collection: object) -> str: return '\n\n'.join([block.as_cif for block in collection.values()]) +def _format_project_description(description: str) -> str: + """Format project descriptions as CIF text.""" + normalized_description = ' '.join(description.split()) + if not normalized_description: + return '?' + + if len(normalized_description) > _CIF_DESCRIPTION_WRAP_LEN: + wrapped_description = '\n'.join( + textwrap.wrap( + normalized_description, + width=_CIF_DESCRIPTION_WRAP_LEN, + break_long_words=False, + break_on_hyphens=False, + ) + ) + return f'\n;\n{wrapped_description}\n;' + + return format_value(normalized_description) + + def project_info_to_cif(info: object) -> str: """Render ProjectInfo to CIF text (id, title, description).""" name = f'{info.name}' @@ -343,14 +371,7 @@ def project_info_to_cif(info: object) -> str: if ' ' in title: title = format_value(info.title) - if len(info.description) > _CIF_DESCRIPTION_WRAP_LEN: - description = f'\n;\n{info.description}\n;' - elif info.description: - description = f'{info.description}' - if ' ' in description: - description = format_value(info.description) - else: - description = '?' + description = _format_project_description(info.description) created = format_value(info.created.strftime('%d %b %Y %H:%M:%S')) last_modified = format_value(info.last_modified.strftime('%d %b %Y %H:%M:%S')) @@ -458,6 +479,13 @@ def _wrap_in_data_block(cif_text: str, block_name: str = '_') -> str: return f'data_{block_name}\n\n{cif_text}' +def _project_block_from_cif_text(cif_text: str) -> gemmi.cif.Block: + """Parse project CIF text.""" + import gemmi # noqa: PLC0415 + + return gemmi.cif.read_string(_wrap_in_data_block(cif_text, 'project')).sole_block() + + def _populate_project_info_from_block( info: object, block: gemmi.cif.Block, @@ -487,9 +515,7 @@ def project_info_from_cif(info: object, cif_text: str) -> None: """ Populate a ProjectInfo instance from CIF text. - Reads ``_project.id``, ``_project.title``, and - ``_project.description`` from the given CIF string and sets them on - the *info* object. + Reads the core project metadata fields from CIF text. Parameters ---------- @@ -498,10 +524,7 @@ def project_info_from_cif(info: object, cif_text: str) -> None: cif_text : str CIF text content of ``project.cif``. """ - import gemmi # noqa: PLC0415 - - doc = gemmi.cif.read_string(_wrap_in_data_block(cif_text, 'project')) - block = doc.sole_block() + block = _project_block_from_cif_text(cif_text) _populate_project_info_from_block(info, block) @@ -510,10 +533,7 @@ def project_config_from_cif(project: object, cif_text: str) -> None: """ Populate project-level configuration from ``project.cif`` text. """ - import gemmi # noqa: PLC0415 - - doc = gemmi.cif.read_string(_wrap_in_data_block(cif_text, 'project')) - block = doc.sole_block() + block = _project_block_from_cif_text(cif_text) _populate_project_info_from_block(project.info, block) @@ -767,10 +787,8 @@ def _read(tag: str) -> str | None: # CIF unknown / inapplicable markers if raw in {'?', '.'}: return None - # Strip surrounding quotes - if len(raw) >= _MIN_QUOTED_LEN and raw[0] == raw[-1] and raw[0] in {"'", '"'}: - raw = raw[1:-1] - return raw + raw = _strip_cif_text_field_delimiters(raw) + return _strip_optional_quotes(raw) return _read @@ -859,6 +877,8 @@ def _set_param_from_raw_cif_value( raw : str The raw string from the CIF loop cell. """ + raw = _strip_cif_text_field_delimiters(raw) + # CIF unknown / inapplicable markers → keep default if raw in {'?', '.'}: return diff --git a/tests/unit/easydiffraction/io/cif/test_serialize_more.py b/tests/unit/easydiffraction/io/cif/test_serialize_more.py index 8328f4e1..70d40793 100644 --- a/tests/unit/easydiffraction/io/cif/test_serialize_more.py +++ b/tests/unit/easydiffraction/io/cif/test_serialize_more.py @@ -113,6 +113,19 @@ def test_project_info_to_cif_contains_core_fields(): assert '_project.last_modified "' in out +def test_project_info_to_cif_keeps_long_description_as_scalar_string(): + import easydiffraction.io.cif.serialize as MUT + from easydiffraction.project.project_info import ProjectInfo + + description = ' '.join(['long'] * 20) + info = ProjectInfo(name='p1', title='My Title', description=description) + + out = MUT.project_info_to_cif(info) + + assert f'_project.description "{description}"' in out + assert '\n;\n' not in out + + def test_experiment_to_cif_with_and_without_data(): import easydiffraction.io.cif.serialize as MUT diff --git a/tests/unit/easydiffraction/project/test_project_config.py b/tests/unit/easydiffraction/project/test_project_config.py index 0efd49b3..be6576e9 100644 --- a/tests/unit/easydiffraction/project/test_project_config.py +++ b/tests/unit/easydiffraction/project/test_project_config.py @@ -95,3 +95,35 @@ def test_project_save_and_load_keep_project_config_section_format(tmp_path): assert loaded.rendering.chart_engine.value == 'asciichartpy' assert loaded.rendering.table_engine.value == 'rich' assert loaded.verbosity.fit.value == 'full' + + +def test_project_save_wraps_long_description_as_cif_text_field(tmp_path): + from easydiffraction.project.project import Project + + description = ( + 'This is the most minimal example of using EasyDiffraction. ' + 'It shows how to load a previously saved project from a directory ' + 'and run refinement in just a few lines of code.' + ) + project = Project(name='beer', title='Beer title', description=description) + project.save_as(str(tmp_path / 'proj')) + + project_cif = (tmp_path / 'proj' / 'project.cif').read_text() + + assert '_project.description' in project_cif + description_tail = project_cif.split('_project.description', maxsplit=1)[1].lstrip(' ') + assert description_tail.startswith('\n;\n') + assert '\n;\n_project.created' in project_cif + description_block = description_tail.split('\n;\n', maxsplit=1)[1] + description_block = description_block.split('\n;\n_project.created', maxsplit=1)[0] + description_lines = description_block.splitlines() + + assert len(description_lines) > 1 + assert all(not line.startswith(';') for line in description_lines) + assert all(not line.endswith(';') for line in description_lines) + assert description_lines[0].startswith('This is the most minimal example') + assert description_lines[-1].endswith('lines of code.') + + loaded = Project.load(str(tmp_path / 'proj')) + + assert loaded.info.description == description diff --git a/tests/unit/easydiffraction/project/test_project_load.py b/tests/unit/easydiffraction/project/test_project_load.py index f1a71b8f..9481097f 100644 --- a/tests/unit/easydiffraction/project/test_project_load.py +++ b/tests/unit/easydiffraction/project/test_project_load.py @@ -196,6 +196,24 @@ def test_round_trips_bayesian_sampler_settings_to_live_dream_minimizer(self, tmp assert minimizer.parallel == 0 assert minimizer.init.value == 'lhs' + def test_round_trips_legacy_bayesian_steps_and_burn_to_live_dream_minimizer(self, tmp_path): + original = Project(name='legacy_bayes_state') + original.analysis.fitting.minimizer_type = 'bumps (dream)' + original.analysis.fit_result._set_result_kind('bayesian') + original.analysis.bayesian_sampler._set_steps(300) + original.analysis.bayesian_sampler._set_burn(60) + original.analysis._set_has_persisted_fit_state(value=True) + original.save_as(str(tmp_path / 'proj')) + + loaded = Project.load(str(tmp_path / 'proj')) + minimizer = loaded.analysis.fitting.minimizer + + assert minimizer is not None + assert minimizer.steps == 300 + assert minimizer.burn == 60 + assert minimizer.thin == 1 + assert minimizer.pop == 4 + class TestLoadAnalysisCifFallback: """Load falls back from analysis/analysis.cif to analysis.cif at root.""" From bbd4bd64bf027cc1be6fba734476e10acaacd7dc Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 01:10:13 +0200 Subject: [PATCH 46/72] Clarify DREAM multiprocessing issue for direct scripts --- docs/dev/issues/open.md | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/docs/dev/issues/open.md b/docs/dev/issues/open.md index 6c96f87e..925e7e6b 100644 --- a/docs/dev/issues/open.md +++ b/docs/dev/issues/open.md @@ -1557,26 +1557,36 @@ threaded because each step's output is the next step's input. --- -## 95. 🟡 Re-Enable DREAM Multiprocessing in CLI Workflows +## 95. 🟡 Re-Enable DREAM Multiprocessing in Direct Python Scripts -**Type:** Performance / CLI robustness +**Type:** Performance / Script runtime -On macOS and other spawn-based platforms, running Bayesian scripts via -direct CLI entry points such as `python script.py` can fail during BUMPS +On macOS and other spawn-based platforms, direct Bayesian tutorial +execution via `python script.py` or wrappers such as +`pixi run tutorial docs/docs/tutorials/ed-21.py` can fail during BUMPS `MPMapper` startup because worker processes re-import `__main__` and re-execute top-level tutorial code. The current defensive workaround is to fall back to serial execution for these direct-script entry points, -which avoids the crash but disables DREAM multiprocessing in terminal -workflows. +which avoids the crash but disables DREAM multiprocessing and causes a +large performance drop. + +Observed behavior for `ed-21` today: + +- Jupyter execution and `easydiffraction PROJECT_DIR fit` both appear to + use working parallel DREAM and complete `361/361` in about 40 + seconds. +- Direct Python-script execution of the same tutorial runs `361/361` in + about 220 seconds, consistent with the serial fallback path. **Possible solution:** keep the existing tracker-state cleanup before pickling and mapper startup, but replace the blanket serial fallback with an EasyDiffraction-controlled multiprocessing context policy. For -direct CLI script entry points, prefer a `fork` context when available -so workers do not re-import the tutorial top level. Keep the existing -behavior for import-safe module entry points and for platforms where -`fork` is unavailable. Document the tradeoff clearly because `fork` on -macOS is less conservative than `spawn`. +direct Python script entry points, prefer a `fork` context when +available so workers do not re-import the tutorial top level. Keep the +existing behavior for import-safe module entry points such as +`easydiffraction PROJECT_DIR fit` and for platforms where `fork` is +unavailable. Document the tradeoff clearly because `fork` on macOS is +less conservative than `spawn`. **Depends on:** related to issue 89, but independent. @@ -1770,7 +1780,7 @@ sampler progress displays — any fix should keep their visuals consistent | 87 | Redesign tutorial grouping/categorisation | 🟢 Low | Documentation | | 88 | Fix Dataset 26 description (47 not 57) | 🟢 Low | Data | | 89 | Parallel independent fits for single mode | 🟡 Med | Performance | -| 95 | Re-enable DREAM multiprocessing in CLI workflows | 🟡 Med | Performance / CLI robustness | +| 95 | Re-enable DREAM multiprocessing in direct scripts | 🟡 Med | Performance / Script runtime | | 90 | Show experiment number during sequential fitting | 🟢 Low | UX | | 91 | Disable TODO checks in CodeFactor PRs | 🟢 Low | CI / Tooling | | 92 | Make `save()` respect verbosity | 🟢 Low | UX | From e37e2dea4aa919a1cd1b43004ea94aa02ba53138 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 01:14:45 +0200 Subject: [PATCH 47/72] Update data index ref and hash --- src/easydiffraction/utils/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index 02115d39..94cc39fa 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -31,9 +31,9 @@ _DATA_REPO = 'easyscience/diffraction' _DATA_ROOT = 'data' # commit SHA preferred -_DATA_INDEX_REF = '0e3a916ab27c36ee1f600889de8aef1fb0fd0d82' +_DATA_INDEX_REF = 'c6315ccc38748069c4753a7c76d166baefb5ad00' # macOS: sha256sum index.json -_DATA_INDEX_HASH = 'sha256:887ab81f440ed32455994347d34ea66f27314e895ee2fb719eaebb3acb228d5e' +_DATA_INDEX_HASH = 'sha256:3a58fe90361916a391e0214e2d009c7603b920c64fb0851874b3ee36e2151942' def _build_data_url(path: str) -> str: From 4b4826d2c7cf1a035994414db957b2aceec7271f Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 01:17:12 +0200 Subject: [PATCH 48/72] Add DREAM multiprocessing issue #95 --- docs/dev/issues/open.md | 175 ++++++++++++++++++++-------------------- 1 file changed, 87 insertions(+), 88 deletions(-) diff --git a/docs/dev/issues/open.md b/docs/dev/issues/open.md index 925e7e6b..04d97ab8 100644 --- a/docs/dev/issues/open.md +++ b/docs/dev/issues/open.md @@ -1573,8 +1573,7 @@ large performance drop. Observed behavior for `ed-21` today: - Jupyter execution and `easydiffraction PROJECT_DIR fit` both appear to - use working parallel DREAM and complete `361/361` in about 40 - seconds. + use working parallel DREAM and complete `361/361` in about 40 seconds. - Direct Python-script execution of the same tutorial runs `361/361` in about 220 seconds, consistent with the serial fallback path. @@ -1698,90 +1697,90 @@ sampler progress displays — any fix should keep their visuals consistent ## Summary -| # | Issue | Severity | Type | -| --- | ------------------------------------------------ | -------- | ---------------------------- | -| 3 | Rebuild joint-fit weights | 🟡 Med | Fragility | -| 5 | `Analysis` as `DatablockItem` | 🟡 Med | Consistency | -| 8 | Explicit `create()` signatures | 🟡 Med | API safety | -| 9 | Future enum extensions | 🟢 Low | Design | -| 10 | Unify update orchestration | 🟢 Low | Maintainability | -| 11 | Document `_update` contract | 🟢 Low | Maintainability | -| 13 | Suppress redundant dirty-flag sets | 🟢 Low | Performance | -| 14 | Finer-grained change tracking | 🟢 Low | Performance | -| 15 | Validate joint-fit weights | 🟡 Med | Correctness | -| 17 | Use PDF-specific CIF names | 🟢 Low | Naming | -| 18 | Move CIF v2→v1 conversion out of calculator | 🟢 Low | Maintainability | -| 19 | Debug-mode logging for calculator imports | 🟢 Low | Diagnostics | -| 20 | Redirect/suppress CrysPy stderr | 🟢 Low | UX | -| 21 | Clarify CrysPy TOF background CIF tags | 🟡 Med | Correctness | -| 22 | Check SC instrument mapping in CrysPy | 🟢 Low | Correctness | -| 23 | Investigate PyCrysFML pattern length discrepancy | 🟢 Low | Correctness | -| 24 | Process defaults on experiment creation | 🟢 Low | Design | -| 25 | Refactor data `_update` methods | 🟡 Med | Maintainability | -| 26 | Clarify `dtype` usage in data arrays | 🟢 Low | Cleanup | -| 27 | Handle zero uncertainty in Bragg PD | 🟢 Low | Correctness | -| 28 | Clarify Bragg PD data collection description | 🟢 Low | Cleanup | -| 29 | Standardise CIF ID validator pattern | 🟡 Med | Consistency | -| 30 | Make `refinement_status` default an Enum | 🟢 Low | Design | -| 31 | Rename PD data point mixins | 🟢 Low | Naming | -| 32 | Move common methods to `DatablockCollection` | 🟡 Med | Maintainability | -| 33 | Make `_update_categories` abstract | 🟡 Med | Design | -| 34 | Auto-extract `PeakProfileTypeEnum` | 🟢 Low | Design | -| 35 | Rename `BeamModeEnum` members to CWL/TOF | 🟢 Low | Naming | -| 36 | Common `EnumBase` class | 🟢 Low | Design | -| 37 | Rename experiment `.type` property | 🟢 Low | Naming | -| 38 | Fix `@typechecked`/gemmi in factories | 🟡 Med | Bug | -| 39 | Improve `_update_priority` handling | 🟢 Low | Design | -| 40 | Reset `.user_constrained` to `False` | 🟢 Low | Feature | -| 41 | Check `_mark_dirty` in `_set_value` | 🟢 Low | Cleanup | -| 42 | MkDocs type unpacking in validation | 🟢 Low | Docs | -| 43 | Fix summary display inconsistencies | 🟢 Low | UX | -| 44 | Merge parameter record construction | 🟢 Low | Cleanup | -| 45 | Decide alias/constraint descriptor default | 🟢 Low | Design | -| 46 | Improve `JointFitItem` descriptions | 🟢 Low | Naming | -| 47 | Improve error handling in crystallography | 🟢 Low | Diagnostics | -| 48 | Fix CrysPy TOF instrument default | 🟢 Low | Bug workaround | -| 49 | Automate space group CIF name variants | 🟢 Low | Maintainability | -| 50 | Clarify `Cell._update` minimizer param | 🟢 Low | Cleanup | -| 51 | Access space group for Wyckoff letters | 🟢 Low | Design | -| 52 | Rename line-segment `y` to `intensity` | 🟢 Low | Naming | -| 53 | Move `show()` to `CategoryCollection` | 🟢 Low | Maintainability | -| 54 | Add `point_id` to excluded regions | 🟢 Low | Completeness | -| 55 | Fix Jupyter scroll disabling for MkDocs | 🟢 Low | Docs / UX | -| 56 | Make ASCII plot width configurable | 🟢 Low | UX | -| 57 | Clean up CIF deserialisation helpers | 🟢 Low | Maintainability | -| 58 | Move `ProjectInfo` CIF methods to `serialize` | 🟢 Low | Maintainability | -| 59 | Add CIF name validation in parse | 🟢 Low | Robustness | -| 60 | Unify `mkdir` usage | 🟢 Low | Cleanup | -| 61 | Clarify logger default reaction mode | 🟢 Low | Design | -| 62 | Complete `render_table` → `TableRenderer` | 🟢 Low | Cleanup | -| 63 | Fix calculator `calculate_pattern` signature | 🟢 Low | Design | -| 64 | Check unused-if-loading-from-CIF code | 🟢 Low | Cleanup | -| 65 | Replace all bare `print()` with logging | 🟡 Med | Code quality | -| 66 | Error-handling strategy: `log.error` vs `raise` | 🟡 Med | Design | -| 67 | Custom validation for params and category types | 🟡 Med | Design | -| 68 | `@typechecked` on all public methods? | 🟢 Low | Design | -| 69 | Shorter public API names via `__init__` | 🟢 Low | API ergonomics | -| 70 | Standardise class member ordering + headers | 🟡 Med | Code style | -| 71 | `_update_priority` reference table | 🟢 Low | Documentation | -| 72 | Warn on all switchable-category type changes | 🟡 Med | UX | -| 73 | Unify setter parameter naming | 🟢 Low | Code style | -| 74 | Sync property type hints + custom lint rules | 🟡 Med | Tooling | -| 75 | `show_supported_calculators()` on Analysis | 🟢 Low | API completeness | -| 76 | Consistent `_type` suffix in switchable APIs | 🟡 Med | Naming | -| 79 | Verify analysis CIF serialisation completeness | 🟢 Low | Correctness | -| 80 | Resolve `Any` vs `object` annotation policy | 🟢 Low | Code style | -| 81 | Enforce docstrings on all public methods | 🟡 Med | Code quality | -| 82 | Document `param-docstring-fix` workflow | 🟢 Low | Documentation | -| 83 | Remove redundant parameter listing | 🟢 Low | Cleanup | -| 84 | Serialise `None` as `.` in CIF output | 🟡 Med | Correctness | -| 85 | Retain per-experiment fitted params for plotting | 🟡 Med | Correctness | -| 86 | Auto-resolve `plot_param` x-axis + add units | 🟢 Low | UX | -| 87 | Redesign tutorial grouping/categorisation | 🟢 Low | Documentation | -| 88 | Fix Dataset 26 description (47 not 57) | 🟢 Low | Data | -| 89 | Parallel independent fits for single mode | 🟡 Med | Performance | +| # | Issue | Severity | Type | +| --- | ------------------------------------------------- | -------- | ---------------------------- | +| 3 | Rebuild joint-fit weights | 🟡 Med | Fragility | +| 5 | `Analysis` as `DatablockItem` | 🟡 Med | Consistency | +| 8 | Explicit `create()` signatures | 🟡 Med | API safety | +| 9 | Future enum extensions | 🟢 Low | Design | +| 10 | Unify update orchestration | 🟢 Low | Maintainability | +| 11 | Document `_update` contract | 🟢 Low | Maintainability | +| 13 | Suppress redundant dirty-flag sets | 🟢 Low | Performance | +| 14 | Finer-grained change tracking | 🟢 Low | Performance | +| 15 | Validate joint-fit weights | 🟡 Med | Correctness | +| 17 | Use PDF-specific CIF names | 🟢 Low | Naming | +| 18 | Move CIF v2→v1 conversion out of calculator | 🟢 Low | Maintainability | +| 19 | Debug-mode logging for calculator imports | 🟢 Low | Diagnostics | +| 20 | Redirect/suppress CrysPy stderr | 🟢 Low | UX | +| 21 | Clarify CrysPy TOF background CIF tags | 🟡 Med | Correctness | +| 22 | Check SC instrument mapping in CrysPy | 🟢 Low | Correctness | +| 23 | Investigate PyCrysFML pattern length discrepancy | 🟢 Low | Correctness | +| 24 | Process defaults on experiment creation | 🟢 Low | Design | +| 25 | Refactor data `_update` methods | 🟡 Med | Maintainability | +| 26 | Clarify `dtype` usage in data arrays | 🟢 Low | Cleanup | +| 27 | Handle zero uncertainty in Bragg PD | 🟢 Low | Correctness | +| 28 | Clarify Bragg PD data collection description | 🟢 Low | Cleanup | +| 29 | Standardise CIF ID validator pattern | 🟡 Med | Consistency | +| 30 | Make `refinement_status` default an Enum | 🟢 Low | Design | +| 31 | Rename PD data point mixins | 🟢 Low | Naming | +| 32 | Move common methods to `DatablockCollection` | 🟡 Med | Maintainability | +| 33 | Make `_update_categories` abstract | 🟡 Med | Design | +| 34 | Auto-extract `PeakProfileTypeEnum` | 🟢 Low | Design | +| 35 | Rename `BeamModeEnum` members to CWL/TOF | 🟢 Low | Naming | +| 36 | Common `EnumBase` class | 🟢 Low | Design | +| 37 | Rename experiment `.type` property | 🟢 Low | Naming | +| 38 | Fix `@typechecked`/gemmi in factories | 🟡 Med | Bug | +| 39 | Improve `_update_priority` handling | 🟢 Low | Design | +| 40 | Reset `.user_constrained` to `False` | 🟢 Low | Feature | +| 41 | Check `_mark_dirty` in `_set_value` | 🟢 Low | Cleanup | +| 42 | MkDocs type unpacking in validation | 🟢 Low | Docs | +| 43 | Fix summary display inconsistencies | 🟢 Low | UX | +| 44 | Merge parameter record construction | 🟢 Low | Cleanup | +| 45 | Decide alias/constraint descriptor default | 🟢 Low | Design | +| 46 | Improve `JointFitItem` descriptions | 🟢 Low | Naming | +| 47 | Improve error handling in crystallography | 🟢 Low | Diagnostics | +| 48 | Fix CrysPy TOF instrument default | 🟢 Low | Bug workaround | +| 49 | Automate space group CIF name variants | 🟢 Low | Maintainability | +| 50 | Clarify `Cell._update` minimizer param | 🟢 Low | Cleanup | +| 51 | Access space group for Wyckoff letters | 🟢 Low | Design | +| 52 | Rename line-segment `y` to `intensity` | 🟢 Low | Naming | +| 53 | Move `show()` to `CategoryCollection` | 🟢 Low | Maintainability | +| 54 | Add `point_id` to excluded regions | 🟢 Low | Completeness | +| 55 | Fix Jupyter scroll disabling for MkDocs | 🟢 Low | Docs / UX | +| 56 | Make ASCII plot width configurable | 🟢 Low | UX | +| 57 | Clean up CIF deserialisation helpers | 🟢 Low | Maintainability | +| 58 | Move `ProjectInfo` CIF methods to `serialize` | 🟢 Low | Maintainability | +| 59 | Add CIF name validation in parse | 🟢 Low | Robustness | +| 60 | Unify `mkdir` usage | 🟢 Low | Cleanup | +| 61 | Clarify logger default reaction mode | 🟢 Low | Design | +| 62 | Complete `render_table` → `TableRenderer` | 🟢 Low | Cleanup | +| 63 | Fix calculator `calculate_pattern` signature | 🟢 Low | Design | +| 64 | Check unused-if-loading-from-CIF code | 🟢 Low | Cleanup | +| 65 | Replace all bare `print()` with logging | 🟡 Med | Code quality | +| 66 | Error-handling strategy: `log.error` vs `raise` | 🟡 Med | Design | +| 67 | Custom validation for params and category types | 🟡 Med | Design | +| 68 | `@typechecked` on all public methods? | 🟢 Low | Design | +| 69 | Shorter public API names via `__init__` | 🟢 Low | API ergonomics | +| 70 | Standardise class member ordering + headers | 🟡 Med | Code style | +| 71 | `_update_priority` reference table | 🟢 Low | Documentation | +| 72 | Warn on all switchable-category type changes | 🟡 Med | UX | +| 73 | Unify setter parameter naming | 🟢 Low | Code style | +| 74 | Sync property type hints + custom lint rules | 🟡 Med | Tooling | +| 75 | `show_supported_calculators()` on Analysis | 🟢 Low | API completeness | +| 76 | Consistent `_type` suffix in switchable APIs | 🟡 Med | Naming | +| 79 | Verify analysis CIF serialisation completeness | 🟢 Low | Correctness | +| 80 | Resolve `Any` vs `object` annotation policy | 🟢 Low | Code style | +| 81 | Enforce docstrings on all public methods | 🟡 Med | Code quality | +| 82 | Document `param-docstring-fix` workflow | 🟢 Low | Documentation | +| 83 | Remove redundant parameter listing | 🟢 Low | Cleanup | +| 84 | Serialise `None` as `.` in CIF output | 🟡 Med | Correctness | +| 85 | Retain per-experiment fitted params for plotting | 🟡 Med | Correctness | +| 86 | Auto-resolve `plot_param` x-axis + add units | 🟢 Low | UX | +| 87 | Redesign tutorial grouping/categorisation | 🟢 Low | Documentation | +| 88 | Fix Dataset 26 description (47 not 57) | 🟢 Low | Data | +| 89 | Parallel independent fits for single mode | 🟡 Med | Performance | | 95 | Re-enable DREAM multiprocessing in direct scripts | 🟡 Med | Performance / Script runtime | -| 90 | Show experiment number during sequential fitting | 🟢 Low | UX | -| 91 | Disable TODO checks in CodeFactor PRs | 🟢 Low | CI / Tooling | -| 92 | Make `save()` respect verbosity | 🟢 Low | UX | -| 93 | Eliminate flicker in live progress tables | 🟡 Med | UX | +| 90 | Show experiment number during sequential fitting | 🟢 Low | UX | +| 91 | Disable TODO checks in CodeFactor PRs | 🟢 Low | CI / Tooling | +| 92 | Make `save()` respect verbosity | 🟢 Low | UX | +| 93 | Eliminate flicker in live progress tables | 🟡 Med | UX | From 265752e2b80d3efe0a8bd17859f605e65b4c5e28 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 01:17:34 +0200 Subject: [PATCH 49/72] Remove obsolete package references from docs --- docs/dev/package-structure/full.md | 6 ------ docs/dev/package-structure/short.md | 4 ---- src/easydiffraction/__main__.py | 6 ++++-- tests/unit/easydiffraction/analysis/test_fitting.py | 4 ++-- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/docs/dev/package-structure/full.md b/docs/dev/package-structure/full.md index d29e21b7..f9643f88 100644 --- a/docs/dev/package-structure/full.md +++ b/docs/dev/package-structure/full.md @@ -79,7 +79,6 @@ │ │ │ │ └── 🏷️ class Constraints │ │ │ └── 📄 factory.py │ │ │ └── 🏷️ class ConstraintsFactory -│ │ ├── 📁 deterministic_parameter_results │ │ ├── 📁 deterministic_result │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py @@ -107,11 +106,6 @@ │ │ │ └── 📄 factory.py │ │ │ └── 🏷️ class FitResultFactory │ │ ├── 📁 fit_state -│ │ │ ├── 📄 __init__.py -│ │ │ ├── 📄 default.py -│ │ │ │ └── 🏷️ class FitState -│ │ │ └── 📄 factory.py -│ │ │ └── 🏷️ class FitStateFactory │ │ ├── 📁 fitting │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py diff --git a/docs/dev/package-structure/short.md b/docs/dev/package-structure/short.md index 508b8599..367a36a5 100644 --- a/docs/dev/package-structure/short.md +++ b/docs/dev/package-structure/short.md @@ -47,7 +47,6 @@ │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py │ │ │ └── 📄 factory.py -│ │ ├── 📁 deterministic_parameter_results │ │ ├── 📁 deterministic_result │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py @@ -65,9 +64,6 @@ │ │ │ ├── 📄 default.py │ │ │ └── 📄 factory.py │ │ ├── 📁 fit_state -│ │ │ ├── 📄 __init__.py -│ │ │ ├── 📄 default.py -│ │ │ └── 📄 factory.py │ │ ├── 📁 fitting │ │ │ ├── 📄 __init__.py │ │ │ ├── 📄 default.py diff --git a/src/easydiffraction/__main__.py b/src/easydiffraction/__main__.py index dbaee349..a66d0d8f 100644 --- a/src/easydiffraction/__main__.py +++ b/src/easydiffraction/__main__.py @@ -233,9 +233,11 @@ def undo( help='Path to the project directory (must contain project.cif).', ), ) -> None: - """Undo the last fit when fit-history support exists (not yet implemented).""" + """ + Undo the last fit when fit-history support exists (not implemented). + """ _load_project(project_dir) - typer.echo('Undo is not yet implemented. This command is a placeholder for future fit history support.') + typer.echo('Undo is not yet implemented.') raise typer.Exit(code=1) diff --git a/tests/unit/easydiffraction/analysis/test_fitting.py b/tests/unit/easydiffraction/analysis/test_fitting.py index 86e5a079..21e87b1b 100644 --- a/tests/unit/easydiffraction/analysis/test_fitting.py +++ b/tests/unit/easydiffraction/analysis/test_fitting.py @@ -159,8 +159,8 @@ def _stop_tracking(self): analysis_events: list[str] = [] analysis = SimpleNamespace( _capture_fit_parameter_state=lambda params: analysis_events.append('capture'), - _store_fit_result_projection=lambda results, experiments, fitted_parameters: analysis_events.append( - 'store' + _store_fit_result_projection=lambda results, experiments, fitted_parameters: ( + analysis_events.append('store') ), ) From ba32f26fe7611b66e713e5007a291daf27a7c56e Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 01:26:16 +0200 Subject: [PATCH 50/72] Update CIF serializer test for long descriptions --- tests/unit/easydiffraction/io/cif/test_serialize_more.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/unit/easydiffraction/io/cif/test_serialize_more.py b/tests/unit/easydiffraction/io/cif/test_serialize_more.py index 70d40793..3c9db1bc 100644 --- a/tests/unit/easydiffraction/io/cif/test_serialize_more.py +++ b/tests/unit/easydiffraction/io/cif/test_serialize_more.py @@ -113,7 +113,7 @@ def test_project_info_to_cif_contains_core_fields(): assert '_project.last_modified "' in out -def test_project_info_to_cif_keeps_long_description_as_scalar_string(): +def test_project_info_to_cif_wraps_long_description_as_text_field(): import easydiffraction.io.cif.serialize as MUT from easydiffraction.project.project_info import ProjectInfo @@ -122,8 +122,9 @@ def test_project_info_to_cif_keeps_long_description_as_scalar_string(): out = MUT.project_info_to_cif(info) - assert f'_project.description "{description}"' in out - assert '\n;\n' not in out + assert '_project.description ' in out + assert '\n;\n' in out + assert 'long long long long long long long long long long long long' in out def test_experiment_to_cif_with_and_without_data(): From 8176617f1be2e607de5db9a6631956f4dc6a54b7 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 07:52:32 +0200 Subject: [PATCH 51/72] Update ADR suggestions for implemented fit-state work --- docs/dev/adrs/index.md | 8 +- .../fit-output-files-and-data-exports.md | 90 ++++++++++++------- .../parameter-correlation-persistence.md | 18 +++- .../parameter-posterior-summary.md | 25 +++++- docs/dev/adrs/suggestions/undo-fit.md | 8 ++ src/easydiffraction/utils/utils.py | 4 +- 6 files changed, 108 insertions(+), 45 deletions(-) diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 9f4a2563..7e8816eb 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -18,10 +18,10 @@ folders. | Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | | Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | | Analysis and fitting | Accepted | Analysis CIF Fit State | Defines the persisted fit-state projection in `analysis/analysis.cif` and `analysis/results.h5`. | [`analysis-cif-fit-state.md`](accepted/analysis-cif-fit-state.md) | -| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Proposes role-based filenames for fit results, data archives, and external plotting exports. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | -| Analysis and fitting | Suggestion | Parameter Correlation Persistence | Proposes persisting deterministic and posterior correlation summaries. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | -| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Proposes the `parameter.posterior` API as a projection of analysis-level Bayesian state. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | -| Analysis and fitting | Suggestion | Undo Fit | Proposes an analysis-owned rollback operation for the latest pre-fit scalar state. | [`undo-fit.md`](suggestions/undo-fit.md) | +| Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Narrows remaining archive/export questions after adopting `results.csv` and `results.h5`. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | +| Analysis and fitting | Suggestion | Parameter Correlation Persistence | Historical proposal whose core `_fit_parameter_correlation` design is already accepted. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | +| Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Narrows the still-open `parameter.posterior` API after analysis-level posterior summaries were accepted. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | +| Analysis and fitting | Suggestion | Undo Fit | Builds rollback semantics and CLI behavior on already-persisted pre-fit scalar snapshots. | [`undo-fit.md`](suggestions/undo-fit.md) | | Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | | Core model | Accepted | Enum-Backed Closed Value Sets | Requires finite option sets to use `(str, Enum)` classes for validation and dispatch. | [`enum-backed-closed-values.md`](accepted/enum-backed-closed-values.md) | | Core model | Accepted | Guarded Public Properties | Uses property setters as the public writability contract for guarded objects. | [`guarded-public-properties.md`](accepted/guarded-public-properties.md) | diff --git a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md index 15ccaafa..3946f1a6 100644 --- a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md +++ b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md @@ -1,13 +1,27 @@ # ADR: Fit Output Files and Data Exports -**Status:** Proposed **Date:** 2026-05-18 +**Status:** Proposed +**Date:** 2026-05-18 + +## Status Note + +The current branch already adopts two pieces of this naming scheme: + +- sequential deterministic results stay in `analysis/results.csv` +- Bayesian arrays and plot caches use `analysis/results.h5` + +Those decisions now live in +[Analysis CIF Fit State](../accepted/analysis-cif-fit-state.md). This +proposal is therefore narrowed to the still-open roles for +`analysis/data.h5`, `analysis/exports/`, and any extra deterministic +convenience exports. ## Context -Different fit modes produce different kinds of reusable output: +Different fit modes still produce different kinds of reusable output: - sequential deterministic fits produce a rectangular parameter - evolution table, currently saved as `analysis/results.csv` + evolution table, already saved as `analysis/results.csv` - Bayesian fits produce posterior samples, diagnostics, predictive arrays, and plot caches, which are too large and structured for CIF or CSV @@ -15,6 +29,12 @@ Different fit modes produce different kinds of reusable output: calculated data, reflection tables, residuals, and optional covariance/correlation summaries +The accepted fit-state ADR already standardizes the canonical saved fit +projection in `analysis/analysis.cif` plus `analysis/results.h5` for +Bayesian sidecars. What remains open here is whether project save should +also produce optional archives or user-facing export files beyond that +accepted baseline. + The project should keep naming consistent and avoid making users extract ordinary plotting data from CIF when a clearer CSV export is possible. At the same time, CIF remains the canonical model/configuration format, @@ -23,24 +43,31 @@ and large numerical arrays should not be embedded in ## Decision -### 1. Separate results, data archives, and exports +### 1. Keep the implemented results baseline -Use three file roles under `analysis/`: +The accepted baseline is: -- `analysis/results.csv` for flat tabular fit results. -- `analysis/results.h5` for large or structured result arrays and - result-derived plot caches. -- `analysis/data.h5` for optional archived input or measured data. +- `analysis/results.csv` for sequential deterministic fit tables +- `analysis/results.h5` for large Bayesian arrays and result-derived + caches + +Any future change to those canonical filenames would need a follow-up +ADR. + +### 2. Reserve separate roles for archives and exports -Use `analysis/exports/` for optional user-facing CSV files intended for -external plotting and inspection. +If extra persisted files are added under `analysis/`, keep their roles +separate: + +- `analysis/data.h5` for optional archived input or measured data. +- `analysis/exports/` for optional user-facing CSV files intended for + external plotting and inspection. -This naming keeps the fit type out of the filename. The fit type and -saved fit-state manifests are recorded in `analysis/analysis.cif`, -principally through `_fit_result.result_kind` and the related fit-state -categories. +The fit type and saved fit-state manifests stay recorded in +`analysis/analysis.cif`, principally through `_fit_result.result_kind` +and the related fit-state categories. -### 2. Sequential deterministic results stay CSV +### 3. Sequential deterministic results stay CSV Sequential deterministic fitting should keep `analysis/results.csv` as the canonical table for parameter evolution and extracted metadata. @@ -55,18 +82,18 @@ Sequential measured input data may optionally be archived in `analysis/data.h5`, but that archive is data, not results. It must not replace `analysis/results.csv`. -### 3. Bayesian arrays use `analysis/results.h5` +### 4. Bayesian arrays use `analysis/results.h5` Single Bayesian fits should store posterior samples, log posterior arrays, predictive arrays, and prepared plot caches in `analysis/results.h5`. -The previous candidate name `analysis/bayesian_data.h5` is avoided -because it mixes fit type with file role and blurs result arrays with -input data. Bayesian-specific meaning belongs in the CIF manifest and -HDF5 groups, not the sidecar filename. +The previous candidate name `analysis/bayesian_data.h5` remains +rejected because it mixes fit type with file role and blurs result +arrays with input data. Bayesian-specific meaning belongs in the CIF +manifest and HDF5 groups, not the sidecar filename. -### 4. Deterministic single and joint fits may gain CSV exports +### 5. Deterministic single and joint fits may gain CSV exports For single, joint, and sequential deterministic fits, EasyDiffraction should consider optional CSV exports for ordinary plotting data: @@ -97,12 +124,12 @@ analysis/ ## Fit-Type Mapping -| Fit type | Canonical fit state | Tabular results | Large arrays / caches | Optional data archive | Optional exports | -| ------------------------ | -------------------------------- | ---------------------------- | --------------------- | --------------------- | ------------------------------- | -| single deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | -| joint deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | -| sequential deterministic | `analysis/analysis.cif` | `analysis/results.csv` | none initially | `analysis/data.h5` | `analysis/exports/*.csv` | -| single Bayesian | `analysis/analysis.cif` manifest | optional summary export only | `analysis/results.h5` | none initially | optional summary/predictive CSV | +| Fit type | Canonical fit state | Tabular results | Large arrays / caches | Optional data archive | Optional exports | +| ------------------------ | ------------------------------------------------ | ---------------------------- | --------------------- | --------------------- | ------------------------------- | +| single deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | +| joint deterministic | `analysis/analysis.cif` | open question | none initially | none initially | `analysis/exports/*.csv` | +| sequential deterministic | `analysis/analysis.cif` + `analysis/results.csv` | `analysis/results.csv` | none initially | `analysis/data.h5` | `analysis/exports/*.csv` | +| single Bayesian | `analysis/analysis.cif` + `analysis/results.h5` | optional summary export only | `analysis/results.h5` | none initially | optional summary/predictive CSV | ## Open Questions @@ -118,10 +145,9 @@ analysis/ - Should sequential measured data archival in `analysis/data.h5` be opt-in, automatic below a size threshold, or always disabled unless requested? -- What size threshold and compression policy should control - `analysis/data.h5` and `analysis/results.h5`? -- Should `analysis/results.h5` store only the latest fit, or eventually - support multiple saved runs? +- What size threshold and compression policy should control the + optional `analysis/data.h5`, and does `analysis/results.h5` need a + matching convention? - Should external CSV exports be regenerated from canonical CIF/HDF5 on demand rather than stored persistently? diff --git a/docs/dev/adrs/suggestions/parameter-correlation-persistence.md b/docs/dev/adrs/suggestions/parameter-correlation-persistence.md index 9c494908..d406698b 100644 --- a/docs/dev/adrs/suggestions/parameter-correlation-persistence.md +++ b/docs/dev/adrs/suggestions/parameter-correlation-persistence.md @@ -3,6 +3,15 @@ **Status:** Proposed **Date:** 2026-05-13 +## Status Note + +The core of this proposal is now accepted and implemented. +[Analysis CIF Fit State](../accepted/analysis-cif-fit-state.md) +adopts `_fit_parameter_correlation` as a common fit-state category, and +current code stores both deterministic and posterior upper-triangle +correlation summaries. This document now mainly captures the rationale +and deferred extensions beyond that accepted baseline. + ## Context `plot_param_correlations()` can currently visualize either: @@ -21,7 +30,7 @@ experiment CIF files. ## Decision -### 1. Add a `_fit_parameter_correlation` loop category +### 1. Use a `_fit_parameter_correlation` loop category Persist pairwise parameter correlations in a new analysis-owned loop: @@ -73,7 +82,7 @@ The same loop category is used for both deterministic and Bayesian fit results. The distinction is carried by `source_kind`, not by separate loop names. -### 5. Suggested restore behavior +### 5. Restore behavior On load: @@ -82,7 +91,7 @@ On load: - if it is absent, correlation plots fall back to whatever live runtime information is available -### 6. Suggested user-facing behavior +### 6. User-facing behavior ```python # Restored from analysis.cif when available @@ -96,7 +105,8 @@ correlation heatmap without needing raw posterior samples. ### Positive -- Deterministic and Bayesian correlation summaries survive reload. +- Deterministic and Bayesian correlation summaries survive reload on + the current branch. - Correlation heatmaps no longer depend entirely on runtime-only data. - The schema is compact and fit-type-agnostic. diff --git a/docs/dev/adrs/suggestions/parameter-posterior-summary.md b/docs/dev/adrs/suggestions/parameter-posterior-summary.md index ccfd4a98..981c573a 100644 --- a/docs/dev/adrs/suggestions/parameter-posterior-summary.md +++ b/docs/dev/adrs/suggestions/parameter-posterior-summary.md @@ -1,6 +1,18 @@ # ADR: Parameter-Level Posterior Projection -**Status:** Proposed **Date:** 2026-05-13 +**Status:** Proposed +**Date:** 2026-05-13 + +## Status Note + +This proposal is narrower than its original draft. The accepted +[Analysis CIF Fit State](../accepted/analysis-cif-fit-state.md) ADR now +persists analysis-level posterior summaries in +`_bayesian_parameter_posterior`, and current Bayesian fits already +commit the best posterior sample to `parameter.value`. What remains +undecided is whether parameters should also expose a convenience +`parameter.posterior` projection and dedicated internal helpers for +updating fit-derived metadata atomically. ## Context @@ -12,8 +24,9 @@ Bayesian DREAM currently keeps posterior state only on `analysis.fit_results` via `BayesianFitResults`, including `posterior_samples`, `posterior_parameter_summaries`, `posterior_predictive`, diagnostics, and sampler settings. The accepted -runtime-fit-results ADR describes this state as runtime-only and not -serialized unless a narrower persistence ADR defines a saved projection. +runtime-fit-results ADR originally described this state as runtime-only, +but the accepted fit-state ADR now persists analysis-level posterior +summaries and cache manifests as a narrower saved projection. `analysis.fit_results` already changes by analysis type: deterministic fits use `FitResults`, while posterior-capable fits such as DREAM use @@ -191,6 +204,8 @@ while internal fit application installs fresh metadata atomically. After a posterior-capable fit, `parameter.value` is committed from the best posterior sample. +Current DREAM support already follows this rule. + The best posterior sample is chosen because it is a coherent joint point estimate across all free parameters. Marginal medians remain available on `parameter.posterior`, but they are summary data rather than the @@ -216,6 +231,10 @@ Canonical Bayesian state is owned by `analysis.fit_results`, not by individual parameters. The saved fit-state format and restore order are defined in `analysis-cif-fit-state.md`. +The accepted fit-state ADR currently rebuilds analysis-level posterior +state only. This proposal would add a parameter-level convenience +projection on top of that restored analysis state. + `parameter.posterior` is never serialized as a per-parameter property. It is rebuilt from the analysis-level saved result projection when that projection is available. diff --git a/docs/dev/adrs/suggestions/undo-fit.md b/docs/dev/adrs/suggestions/undo-fit.md index f394aca9..cd658aba 100644 --- a/docs/dev/adrs/suggestions/undo-fit.md +++ b/docs/dev/adrs/suggestions/undo-fit.md @@ -3,6 +3,14 @@ **Status:** Proposed **Date:** 2026-05-18 +## Status Note + +The rollback anchors described here are already persisted and restored. +Current code saves `_fit_parameter.start_value` and +`_fit_parameter.start_uncertainty` in `analysis/analysis.cif`, and the +CLI already reserves `PROJECT_DIR undo`, but no rollback operation is +implemented yet. + ## Context The accepted fit-state persistence design now stores diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index 94cc39fa..5294ceb9 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -31,9 +31,9 @@ _DATA_REPO = 'easyscience/diffraction' _DATA_ROOT = 'data' # commit SHA preferred -_DATA_INDEX_REF = 'c6315ccc38748069c4753a7c76d166baefb5ad00' +_DATA_INDEX_REF = '56cf18e6c451b44355fd7ae70f02ce5c44dc440f' # macOS: sha256sum index.json -_DATA_INDEX_HASH = 'sha256:3a58fe90361916a391e0214e2d009c7603b920c64fb0851874b3ee36e2151942' +_DATA_INDEX_HASH = 'sha256:c7af8b1cc3a8f91a1ec8736f3307a11534e1f6b9ea30e0972063a2c2cff497c6' def _build_data_url(path: str) -> str: From e828edcf8209d57430dffaf67e001fb805d9302e Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 08:08:38 +0200 Subject: [PATCH 52/72] Accept parameter correlation persistence ADR --- .../parameter-correlation-persistence.md | 104 +++++++++++++++ docs/dev/adrs/index.md | 2 +- .../parameter-correlation-persistence.md | 124 ------------------ .../project/test_project_load.py | 74 +++++++++++ 4 files changed, 179 insertions(+), 125 deletions(-) create mode 100644 docs/dev/adrs/accepted/parameter-correlation-persistence.md delete mode 100644 docs/dev/adrs/suggestions/parameter-correlation-persistence.md diff --git a/docs/dev/adrs/accepted/parameter-correlation-persistence.md b/docs/dev/adrs/accepted/parameter-correlation-persistence.md new file mode 100644 index 00000000..4c7742dc --- /dev/null +++ b/docs/dev/adrs/accepted/parameter-correlation-persistence.md @@ -0,0 +1,104 @@ +# ADR: Parameter Correlation Persistence + +## Status + +Accepted current design. + +## Date + +2026-05-19 + +## Group + +Analysis and fitting. + +## Context + +`plot_param_correlations()` can visualize either deterministic +parameter correlations derived from engine covariance or Bayesian +correlations derived from posterior samples. + +Reloaded projects still need correlation heatmaps even when the raw +runtime covariance or posterior arrays are unavailable. The broader +fit-state layout is defined in `analysis-cif-fit-state.md`; this ADR +records the narrower persisted correlation-summary projection within +that accepted design. + +Correlation data is analysis-owned derived state, not model state. It +therefore belongs in `analysis/analysis.cif`, not in structure or +experiment CIF files. + +## Decision + +Persist pairwise parameter correlations in `_fit_parameter_correlation` +rows inside `analysis/analysis.cif`. + +### Correlation summary schema + +Store one row per unique parameter pair with these fields: + +- `id` +- `source_kind` +- `param_unique_name_i` +- `param_unique_name_j` +- `correlation` + +Example: + +```cif +loop_ +_fit_parameter_correlation.id +_fit_parameter_correlation.source_kind +_fit_parameter_correlation.param_unique_name_i +_fit_parameter_correlation.param_unique_name_j +_fit_parameter_correlation.correlation +1 posterior cosio.atom_site.Co1.adp_iso cosio.atom_site.Co2.adp_iso 0.87 +``` + +Normalize each row to the upper triangle excluding the diagonal. +`param_unique_name_i` and `param_unique_name_j` use a stable ordering so +only one unordered pair is stored. The diagonal is omitted because it is +always `1.0` and can be rebuilt on load. + +Use the same loop for deterministic and Bayesian projections. The source +is carried by `source_kind`, currently `deterministic` or `posterior`. + +### Summary-only role + +`_fit_parameter_correlation` is a persisted summary. It does not replace +posterior samples, posterior pair densities, or covariance matrices. + +This summary is enough to restore correlation heatmaps. It is not enough +to restore richer pair-plot density surfaces or covariance-specific +workflows. + +### Restore behavior + +On load, restore `_fit_parameter_correlation` rows into an analysis- +owned correlation collection. + +When runtime covariance or posterior samples are unavailable, +`project.display.plotter.plot_param_correlations()` may rebuild a square +correlation matrix from the persisted rows and the restored fit-result +parameter ordering. + +### Relation to the broader fit-state ADR + +`analysis-cif-fit-state.md` remains the source of truth for the full +fit-state projection, save/load ordering, and Bayesian sidecar layout. +This ADR records the implemented correlation-specific piece of that +accepted design. + +## Consequences + +- Deterministic and Bayesian correlation summaries survive reload. +- Correlation heatmaps no longer depend entirely on runtime-only data. +- The schema stays compact and fit-type-agnostic. +- Posterior pair plots and covariance-specific workflows still need + richer runtime or persisted data. + +## Deferred Work + +- optional storage of covariance matrices in addition to correlations +- multiple named correlation sources for the same saved project +- full correlation restoration for pair-plot density surfaces diff --git a/docs/dev/adrs/index.md b/docs/dev/adrs/index.md index 7e8816eb..50a287db 100644 --- a/docs/dev/adrs/index.md +++ b/docs/dev/adrs/index.md @@ -18,8 +18,8 @@ folders. | Analysis and fitting | Accepted | Fit Mode Categories and Fit Execution API | Splits fitting configuration from execution and defines active sibling fit-mode categories. | [`fit-mode-categories.md`](accepted/fit-mode-categories.md) | | Analysis and fitting | Accepted | Runtime Fit Results | Keeps full fit outputs runtime-only in the current design unless a narrower persistence ADR is accepted. | [`runtime-fit-results.md`](accepted/runtime-fit-results.md) | | Analysis and fitting | Accepted | Analysis CIF Fit State | Defines the persisted fit-state projection in `analysis/analysis.cif` and `analysis/results.h5`. | [`analysis-cif-fit-state.md`](accepted/analysis-cif-fit-state.md) | +| Analysis and fitting | Accepted | Parameter Correlation Persistence | Persists deterministic and posterior correlation summaries in `_fit_parameter_correlation` | [`parameter-correlation-persistence.md`](accepted/parameter-correlation-persistence.md) | | Analysis and fitting | Suggestion | Fit Output Files and Data Exports | Narrows remaining archive/export questions after adopting `results.csv` and `results.h5`. | [`fit-output-files-and-data-exports.md`](suggestions/fit-output-files-and-data-exports.md) | -| Analysis and fitting | Suggestion | Parameter Correlation Persistence | Historical proposal whose core `_fit_parameter_correlation` design is already accepted. | [`parameter-correlation-persistence.md`](suggestions/parameter-correlation-persistence.md) | | Analysis and fitting | Suggestion | Parameter-Level Posterior Projection | Narrows the still-open `parameter.posterior` API after analysis-level posterior summaries were accepted. | [`parameter-posterior-summary.md`](suggestions/parameter-posterior-summary.md) | | Analysis and fitting | Suggestion | Undo Fit | Builds rollback semantics and CLI behavior on already-persisted pre-fit scalar snapshots. | [`undo-fit.md`](suggestions/undo-fit.md) | | Core model | Accepted | Category Owners and Real Datablocks | Introduces `CategoryOwner` so singleton sections do not pretend to be real CIF datablocks. | [`category-owner-sections.md`](accepted/category-owner-sections.md) | diff --git a/docs/dev/adrs/suggestions/parameter-correlation-persistence.md b/docs/dev/adrs/suggestions/parameter-correlation-persistence.md deleted file mode 100644 index d406698b..00000000 --- a/docs/dev/adrs/suggestions/parameter-correlation-persistence.md +++ /dev/null @@ -1,124 +0,0 @@ -# ADR: Parameter Correlation Persistence - -**Status:** Proposed -**Date:** 2026-05-13 - -## Status Note - -The core of this proposal is now accepted and implemented. -[Analysis CIF Fit State](../accepted/analysis-cif-fit-state.md) -adopts `_fit_parameter_correlation` as a common fit-state category, and -current code stores both deterministic and posterior upper-triangle -correlation summaries. This document now mainly captures the rationale -and deferred extensions beyond that accepted baseline. - -## Context - -`plot_param_correlations()` can currently visualize either: - -- deterministic parameter correlations derived from engine covariance -- Bayesian correlations derived from posterior samples - -After project reload, this correlation information is not available -unless the underlying runtime objects are rebuilt. For Bayesian fits, -full posterior samples may not always be restored. For deterministic -fits, engine covariance is typically not persisted at all. - -The correlation matrix is an analysis-owned summary, not model state. It -therefore belongs in `analysis/analysis.cif`, not in structure or -experiment CIF files. - -## Decision - -### 1. Use a `_fit_parameter_correlation` loop category - -Persist pairwise parameter correlations in a new analysis-owned loop: - -- `source_kind` -- `param_unique_name_i` -- `param_unique_name_j` -- `correlation` - -Suggested example: - -```cif -loop_ -_fit_parameter_correlation.source_kind -_fit_parameter_correlation.param_unique_name_i -_fit_parameter_correlation.param_unique_name_j -_fit_parameter_correlation.correlation -posterior cosio.atom_site.Co1.adp_iso cosio.atom_site.Co2.adp_iso 0.87 -``` - -`source_kind` records how the correlation was obtained, for example: - -- `deterministic` -- `posterior` - -### 2. Store only the upper triangle excluding the diagonal - -Each row stores one unordered parameter pair with -`param_unique_name_i < param_unique_name_j` in a stable ordering. - -The diagonal is omitted because it is always 1.0 and can be rebuilt on -load. - -This keeps the CIF loop compact while remaining lossless for the -correlation matrix. - -### 3. Treat the loop as a summary, not a replacement for raw samples - -For Bayesian fits, `_fit_parameter_correlation` is a persisted summary. -It does not replace posterior samples or posterior pair data. - -This means: - -- correlation heatmaps can be restored from the loop alone -- posterior pair plots still require posterior samples - -### 4. Deterministic and Bayesian fits share the same loop schema - -The same loop category is used for both deterministic and Bayesian fit -results. The distinction is carried by `source_kind`, not by separate -loop names. - -### 5. Restore behavior - -On load: - -- if `_fit_parameter_correlation` is present, correlation summaries are - restored into a lightweight analysis-owned correlation structure -- if it is absent, correlation plots fall back to whatever live runtime - information is available - -### 6. User-facing behavior - -```python -# Restored from analysis.cif when available -project.display.plotter.plot_param_correlations() -``` - -If only the correlation loop is restored, the user still gets the -correlation heatmap without needing raw posterior samples. - -## Consequences - -### Positive - -- Deterministic and Bayesian correlation summaries survive reload on - the current branch. -- Correlation heatmaps no longer depend entirely on runtime-only data. -- The schema is compact and fit-type-agnostic. - -### Trade-offs - -- The correlation loop is a derived summary, so it must be kept in sync - with the latest fit result. -- Restored correlation data is not enough for posterior pair plots or - predictive summaries. - -## Deferred Work - -- optional storage of covariance matrices in addition to correlations -- multiple named correlation sources for the same saved project -- full correlation restoration for pair-plot density surfaces diff --git a/tests/unit/easydiffraction/project/test_project_load.py b/tests/unit/easydiffraction/project/test_project_load.py index 9481097f..f19b4392 100644 --- a/tests/unit/easydiffraction/project/test_project_load.py +++ b/tests/unit/easydiffraction/project/test_project_load.py @@ -172,6 +172,80 @@ def test_round_trips_deterministic_fit_state_and_keeps_live_parameter_values(sel assert loaded_parameter._fit_start_uncertainty == 0.02 assert loaded_parameter.uncertainty == 0.07 + def test_round_trips_persisted_deterministic_correlation_summary_for_reloaded_display( + self, + tmp_path, + ): + from easydiffraction.display.plotting import Plotter + + original = Project(name='fit_correlation_state') + original.structures.create(name='lbco') + structure = original.structures['lbco'] + structure.space_group.name_h_m = 'P m -3 m' + structure.cell.length_a = 3.88 + structure.cell.length_b = 3.89 + + parameter_a = structure.cell.length_a + parameter_b = structure.cell.length_b + for parameter, start_value in ( + (parameter_a, 3.87), + (parameter_b, 3.88), + ): + parameter.free = True + parameter.uncertainty = 0.05 + parameter.fit_min = 3.8 + parameter.fit_max = 3.9 + parameter._set_fit_bounds_uncertainty_multiplier(4.0) + parameter._fit_start_value = start_value + parameter._fit_start_uncertainty = 0.02 + original.analysis.fit_parameters.create( + param_unique_name=parameter.unique_name, + fit_min=parameter.fit_min, + fit_max=parameter.fit_max, + fit_bounds_uncertainty_multiplier=4.0, + start_value=start_value, + start_uncertainty=0.02, + ) + + original.analysis.fit_result._set_result_kind('deterministic') + original.analysis.fit_result._set_success(value=True) + original.analysis.fit_result._set_message('Fit converged') + original.analysis.fit_result._set_iterations(21) + original.analysis.fit_result._set_fitting_time(0.74) + original.analysis.fit_result._set_reduced_chi_square(1.031) + original.analysis.deterministic_result._set_optimizer_name('lmfit') + original.analysis.deterministic_result._set_method_name('leastsq') + original.analysis.deterministic_result._set_objective_name('chi-square') + original.analysis.deterministic_result._set_objective_value(1.031) + original.analysis.deterministic_result._set_n_data_points(120) + original.analysis.deterministic_result._set_n_parameters(2) + original.analysis.deterministic_result._set_n_free_parameters(2) + original.analysis.deterministic_result._set_degrees_of_freedom(118) + original.analysis.deterministic_result._set_covariance_available(value=False) + original.analysis.deterministic_result._set_correlation_available(value=True) + original.analysis.fit_parameter_correlations.create( + source_kind='deterministic', + param_unique_name_i=parameter_b.unique_name, + param_unique_name_j=parameter_a.unique_name, + correlation=0.42, + ) + original.analysis._set_has_persisted_fit_state(value=True) + original.save_as(str(tmp_path / 'proj')) + + loaded = Project.load(str(tmp_path / 'proj')) + plotter = Plotter() + plotter._set_project(loaded) + + corr_df = plotter._get_param_correlation_dataframe() + + assert corr_df is not None + assert list(corr_df.index) == [parameter_a.unique_name, parameter_b.unique_name] + assert list(corr_df.columns) == [parameter_a.unique_name, parameter_b.unique_name] + assert corr_df.loc[parameter_a.unique_name, parameter_a.unique_name] == pytest.approx(1.0) + assert corr_df.loc[parameter_b.unique_name, parameter_b.unique_name] == pytest.approx(1.0) + assert corr_df.loc[parameter_a.unique_name, parameter_b.unique_name] == pytest.approx(0.42) + assert corr_df.loc[parameter_b.unique_name, parameter_a.unique_name] == pytest.approx(0.42) + def test_round_trips_bayesian_sampler_settings_to_live_dream_minimizer(self, tmp_path): original = Project(name='bayes_state') original.analysis.fitting.minimizer_type = 'bumps (dream)' From a850eea1d9ab0bdb58cf8bb4a025c872d6afa9c4 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 08:33:08 +0200 Subject: [PATCH 53/72] Rename sequential fit result CSV columns --- src/easydiffraction/analysis/sequential.py | 51 +++++++------ src/easydiffraction/display/plotting.py | 67 +++++++++++++---- tests/integration/fitting/test_sequential.py | 18 ++++- .../analysis/test_sequential.py | 71 ++++++++---------- .../easydiffraction/display/test_plotting.py | 73 +++++++++++++++++++ 5 files changed, 197 insertions(+), 83 deletions(-) diff --git a/src/easydiffraction/analysis/sequential.py b/src/easydiffraction/analysis/sequential.py index 5889aaa7..1fc17ea5 100644 --- a/src/easydiffraction/analysis/sequential.py +++ b/src/easydiffraction/analysis/sequential.py @@ -90,9 +90,9 @@ def _fit_worker( Returns ------- dict[str, Any] - Result dict with keys: ``file_path``, ``fit_success``, - ``chi_squared``, ``reduced_chi_squared``, ``n_iterations``, and - per-parameter ``{unique_name}`` / ``{unique_name}.uncertainty``. + Result dict with keys: ``file_path``, ``success``, + ``reduced_chi_square``, ``iterations``, and per-parameter + ``{unique_name}`` / ``{unique_name}.uncertainty``. """ # Lazy import to avoid circular dependencies and keep the module # importable without heavy imports at top level. @@ -163,10 +163,9 @@ def _fit_worker( IndexError, OSError, ) as exc: - result['fit_success'] = False - result['chi_squared'] = None - result['reduced_chi_squared'] = None - result['n_iterations'] = 0 + result['success'] = False + result['reduced_chi_square'] = None + result['iterations'] = 0 result['error'] = str(exc) return result @@ -352,17 +351,17 @@ def _collect_results( result: dict[str, Any] = {} fit_results = project.analysis.fit_results + tracker = getattr(project.analysis.fitter.minimizer, 'tracker', None) + best_iteration = getattr(tracker, 'best_iteration', None) if fit_results is not None: - result['fit_success'] = fit_results.success - result['chi_squared'] = fit_results.chi_square - result['reduced_chi_squared'] = fit_results.reduced_chi_square - result['n_iterations'] = project.analysis.fitter.minimizer.tracker.best_iteration or 0 + result['fit_result.success'] = fit_results.success + result['fit_result.reduced_chi_square'] = fit_results.reduced_chi_square + result['fit_result.iterations'] = fit_results.iterations or best_iteration or 0 else: - result['fit_success'] = False - result['chi_squared'] = None - result['reduced_chi_squared'] = None - result['n_iterations'] = 0 + result['fit_result.success'] = False + result['fit_result.reduced_chi_square'] = None + result['fit_result.iterations'] = best_iteration or 0 # Collect all free parameter values and uncertainties all_params = project.structures.parameters + project.experiments.parameters @@ -383,10 +382,9 @@ def _collect_results( _META_COLUMNS = [ 'file_path', - 'chi_squared', - 'reduced_chi_squared', - 'fit_success', - 'n_iterations', + 'fit_result.reduced_chi_square', + 'fit_result.success', + 'fit_result.iterations', ] @@ -551,7 +549,8 @@ def _read_csv_for_recovery( file_path = row.get('file_path', '') if file_path: fitted.add(_resolve_csv_file_path(csv_path, file_path)) - if row.get('fit_success', '').lower() == 'true': + success_value = row.get('fit_result.success', row.get('success', '')) + if success_value.lower() == 'true': params = _extract_params_from_row(row) if params: last_params = params @@ -725,9 +724,9 @@ class SequentialRunPlan: def _summarize_chunk_results(results: list[dict[str, Any]]) -> tuple[str, str]: """Return average reduced chi-square and status for a chunk.""" num_files = len(results) - successful = [r for r in results if r.get('fit_success')] + successful = [r for r in results if r.get('fit_result.success')] if successful: - avg_chi2 = sum(r['reduced_chi_squared'] for r in successful) / len(successful) + avg_chi2 = sum(r['fit_result.reduced_chi_square'] for r in successful) / len(successful) chi2_str = f'{avg_chi2:.2f}' else: chi2_str = '—' @@ -798,10 +797,10 @@ def _build_file_progress_rows( rows: list[list[str]] = [] time_str = _format_elapsed_seconds(elapsed_time) for index, result in enumerate(results, start=1): - reduced_chi2 = result.get('reduced_chi_squared') + reduced_chi2 = result.get('fit_result.reduced_chi_square') chi2_str = f'{reduced_chi2:.2f}' if reduced_chi2 is not None else '—' - iterations = str(result.get('n_iterations') or 0) - status = '✅' if result.get('fit_success') else '❌' + iterations = str(result.get('fit_result.iterations') or 0) + status = '✅' if result.get('fit_result.success') else '❌' rows.append([ Path(result['file_path']).name, _format_progress_percent(completed_files_before + index, total_files), @@ -1260,7 +1259,7 @@ def _run_fit_loop( def _find_last_successful(results: list[dict[str, Any]]) -> dict[str, Any] | None: """Return the last successful result dict, or None.""" for r in reversed(results): - if r.get('fit_success') and r.get('params'): + if r.get('fit_result.success') and r.get('params'): return r return None diff --git a/src/easydiffraction/display/plotting.py b/src/easydiffraction/display/plotting.py index 7d85a36f..25a93b82 100644 --- a/src/easydiffraction/display/plotting.py +++ b/src/easydiffraction/display/plotting.py @@ -744,7 +744,7 @@ def plot_param_series( Parameters ---------- param : object - Parameter descriptor whose ``unique_name`` identifies the + Descriptor whose ``unique_name`` or ``name`` identifies the values to plot. versus : str | None, default=None Persisted diffrn path (e.g. @@ -752,7 +752,10 @@ def plot_param_series( column is used as the x-axis. When ``None``, the experiment sequence number is used instead. """ - unique_name = param.unique_name + column_names = self._series_column_names(param) + if not column_names: + log.warning('Series plot target does not expose a CSV column name.') + return # Try CSV first (produced by fit_sequential or future fit) csv_path = None @@ -764,19 +767,51 @@ def plot_param_series( if csv_path is not None: self._plot_param_series_from_csv( csv_path=csv_path, - unique_name=unique_name, + column_names=column_names, param_descriptor=param, versus_path=versus, ) else: # Fallback: in-memory snapshots from fit() single mode self.plot_param_series_from_snapshots( - unique_name, + column_names[0], versus, self._project.experiments, self._project.analysis._parameter_snapshots, ) + @staticmethod + def _series_column_names(param: object) -> list[str]: + """Return candidate CSV column names for one plotted series.""" + names: list[str] = [] + + unique_name = getattr(param, 'unique_name', None) + if isinstance(unique_name, str) and unique_name: + names.append(unique_name) + + name = getattr(param, 'name', None) + if isinstance(name, str) and name and name not in names: + names.append(name) + + return names + + @staticmethod + def _numeric_series_values(values: object) -> list[float]: + """Return one CSV column normalized to numeric plot values.""" + series = pd.Series(values) + if series.dtype == bool: + return series.astype(float).tolist() + + normalized = series.replace( + { + 'True': 1.0, + 'False': 0.0, + 'true': 1.0, + 'false': 0.0, + } + ) + return pd.to_numeric(normalized, errors='raise').tolist() + def plot_all_param_series( self, versus: str | None = None, @@ -5742,7 +5777,7 @@ def _bragg_tick_d_spacing( def _plot_param_series_from_csv( self, csv_path: str, - unique_name: str, + column_names: list[str], param_descriptor: object, versus_path: str | None = None, ) -> None: @@ -5750,8 +5785,9 @@ def _plot_param_series_from_csv( Plot a parameter's value across sequential fit results. Reads data from the CSV file at *csv_path*. The y-axis values - come from the column named *unique_name*, uncertainties from - ``{unique_name}.uncertainty``. When *versus_path* is provided, + come from the first matching column named in *column_names*, + with uncertainties from ``{column_name}.uncertainty``. When + *versus_path* is provided, the x-axis uses the corresponding ``diffrn.*`` CSV column; otherwise the row index is used. @@ -5763,8 +5799,8 @@ def _plot_param_series_from_csv( ---------- csv_path : str Path to the ``results.csv`` file. - unique_name : str - Unique name of the parameter to plot (CSV column key). + column_names : list[str] + Candidate CSV column keys to plot. param_descriptor : object The live parameter descriptor (for axis label / units). versus_path : str | None, default=None @@ -5773,16 +5809,17 @@ def _plot_param_series_from_csv( """ df = pd.read_csv(csv_path) - if unique_name not in df.columns: + column_name = next((name for name in column_names if name in df.columns), None) + if column_name is None: log.warning( - f"Parameter '{unique_name}' not found in CSV columns. " + f"Parameter '{column_names[0]}' not found in CSV columns. " f'Available: {list(df.columns)}' ) return - y = df[unique_name].astype(float).tolist() - uncert_col = f'{unique_name}.uncertainty' - sy = df[uncert_col].astype(float).tolist() if uncert_col in df.columns else [0.0] * len(y) + y = self._numeric_series_values(df[column_name]) + uncert_col = f'{column_name}.uncertainty' + sy = self._numeric_series_values(df[uncert_col]) if uncert_col in df.columns else [0.0] * len(y) # X-axis: diffrn column or row index diffrn_col = versus_path @@ -5799,7 +5836,7 @@ def _plot_param_series_from_csv( param_units = getattr(param_descriptor, 'units', '') y_label = f'Parameter value ({param_units})' if param_units else 'Parameter value' - title = f"Parameter '{unique_name}' across fit results" + title = f"Parameter '{column_name}' across fit results" self._backend.plot_scatter( x=x, diff --git a/tests/integration/fitting/test_sequential.py b/tests/integration/fitting/test_sequential.py index da4c2877..73c068b3 100644 --- a/tests/integration/fitting/test_sequential.py +++ b/tests/integration/fitting/test_sequential.py @@ -165,9 +165,18 @@ def test_fit_sequential_produces_csv(tmp_path) -> None: assert len(rows) == 3, f'Expected 3 rows, got {len(rows)}' - # Each row should have fit_success + # Each row should have fit_result.success for row in rows: - assert row['fit_success'] == 'True', f'Fit failed for {row["file_path"]}' + assert row['fit_result.success'] == 'True', f'Fit failed for {row["file_path"]}' + assert int(row['fit_result.iterations']) > 0, ( + f'Expected non-zero iterations for {row["file_path"]}' + ) + + assert 'fit_result.reduced_chi_square' in rows[0] + assert 'fit_result.iterations' in rows[0] + assert 'success' not in rows[0] + assert 'reduced_chi_square' not in rows[0] + assert 'iterations' not in rows[0] # Each row should have parameter values assert 'lbco.cell.length_a' in rows[0] @@ -316,7 +325,10 @@ def test_fit_sequential_parallel(tmp_path) -> None: assert len(rows) == 3, f'Expected 3 rows, got {len(rows)}' for row in rows: - assert row['fit_success'] == 'True', f'Fit failed for {row["file_path"]}' + assert row['fit_result.success'] == 'True', f'Fit failed for {row["file_path"]}' + assert int(row['fit_result.iterations']) > 0, ( + f'Expected non-zero iterations for {row["file_path"]}' + ) # Parameter values should be present and reasonable assert 'lbco.cell.length_a' in rows[0] diff --git a/tests/unit/easydiffraction/analysis/test_sequential.py b/tests/unit/easydiffraction/analysis/test_sequential.py index a8294ec5..dc837570 100644 --- a/tests/unit/easydiffraction/analysis/test_sequential.py +++ b/tests/unit/easydiffraction/analysis/test_sequential.py @@ -198,7 +198,7 @@ def test_full_header_order(self): class TestCsvWriteAndAppend: def test_write_creates_file_with_header(self, tmp_path): csv_path = tmp_path / 'results.csv' - header = ['file_path', 'chi_squared', 'param_a'] + header = ['file_path', 'fit_result.reduced_chi_square', 'param_a'] _write_csv_header(csv_path, header) with csv_path.open() as f: @@ -336,19 +336,17 @@ def test_returns_fitted_file_paths(self, tmp_path): [ { 'file_path': str(project_dir / 'experiments' / 'a.dat'), - 'fit_success': 'True', - 'chi_squared': '5.0', - 'reduced_chi_squared': '2.5', - 'n_iterations': '10', + 'fit_result.success': 'True', + 'fit_result.reduced_chi_square': '2.5', + 'fit_result.iterations': '10', 'cell.a': '3.89', 'cell.a.uncertainty': '0.01', }, { 'file_path': str(project_dir / 'experiments' / 'b.dat'), - 'fit_success': 'False', - 'chi_squared': '', - 'reduced_chi_squared': '', - 'n_iterations': '0', + 'fit_result.success': 'False', + 'fit_result.reduced_chi_square': '', + 'fit_result.iterations': '0', 'cell.a': '', 'cell.a.uncertainty': '', }, @@ -374,10 +372,9 @@ def test_resolves_legacy_repo_relative_paths(self, tmp_path, monkeypatch): writer.writeheader() writer.writerow({ 'file_path': 'projects/cosio/experiments/d20_scan/scan_001.dat', - 'fit_success': 'True', - 'chi_squared': '5.0', - 'reduced_chi_squared': '2.5', - 'n_iterations': '10', + 'fit_result.success': 'True', + 'fit_result.reduced_chi_square': '2.5', + 'fit_result.iterations': '10', 'cell.a': '3.89', 'cell.a.uncertainty': '0.01', }) @@ -397,19 +394,17 @@ def test_returns_last_successful_params(self, tmp_path): [ { 'file_path': 'a.dat', - 'fit_success': 'True', - 'chi_squared': '5.0', - 'reduced_chi_squared': '2.5', - 'n_iterations': '10', + 'fit_result.success': 'True', + 'fit_result.reduced_chi_square': '2.5', + 'fit_result.iterations': '10', 'cell.a': '3.89', 'cell.a.uncertainty': '0.01', }, { 'file_path': 'b.dat', - 'fit_success': 'True', - 'chi_squared': '4.0', - 'reduced_chi_squared': '2.0', - 'n_iterations': '8', + 'fit_result.success': 'True', + 'fit_result.reduced_chi_square': '2.0', + 'fit_result.iterations': '8', 'cell.a': '3.90', 'cell.a.uncertainty': '0.02', }, @@ -436,10 +431,9 @@ def test_skips_meta_columns_and_diffrn_and_uncertainty(self, tmp_path): [ { 'file_path': 'a.dat', - 'fit_success': 'True', - 'chi_squared': '5.0', - 'reduced_chi_squared': '2.5', - 'n_iterations': '10', + 'fit_result.success': 'True', + 'fit_result.reduced_chi_square': '2.5', + 'fit_result.iterations': '10', 'diffrn.temp': '300', 'cell.a': '3.89', 'cell.a.uncertainty': '0.01', @@ -452,7 +446,7 @@ def test_skips_meta_columns_and_diffrn_and_uncertainty(self, tmp_path): assert 'cell.a' in params # Meta columns, diffrn, and uncertainty should be excluded assert 'file_path' not in params - assert 'fit_success' not in params + assert 'fit_result.success' not in params assert 'diffrn.temp' not in params assert 'cell.a.uncertainty' not in params @@ -466,10 +460,9 @@ def test_returns_none_params_when_no_successful_rows(self, tmp_path): [ { 'file_path': 'a.dat', - 'fit_success': 'False', - 'chi_squared': '', - 'reduced_chi_squared': '', - 'n_iterations': '0', + 'fit_result.success': 'False', + 'fit_result.reduced_chi_square': '', + 'fit_result.iterations': '0', 'cell.a': '', 'cell.a.uncertainty': '', }, @@ -537,15 +530,15 @@ def update(self, *, label=None, content=None): [ { 'file_path': _TEST_SCAN_001, - 'fit_success': True, - 'reduced_chi_squared': 4.0, - 'n_iterations': 11, + 'fit_result.success': True, + 'fit_result.reduced_chi_square': 4.0, + 'fit_result.iterations': 11, }, { 'file_path': _TEST_SCAN_002, - 'fit_success': False, - 'reduced_chi_squared': None, - 'n_iterations': 0, + 'fit_result.success': False, + 'fit_result.reduced_chi_square': None, + 'fit_result.iterations': 0, }, ], progress, @@ -631,9 +624,9 @@ def map(self, func, templates, paths): for path in paths: yield { 'file_path': path, - 'fit_success': True, - 'reduced_chi_squared': 1.0, - 'n_iterations': 5, + 'fit_result.success': True, + 'fit_result.reduced_chi_square': 1.0, + 'fit_result.iterations': 5, 'params': {'cell.a': 4.0}, } diff --git a/tests/unit/easydiffraction/display/test_plotting.py b/tests/unit/easydiffraction/display/test_plotting.py index 461e85ce..d5b6ae76 100644 --- a/tests/unit/easydiffraction/display/test_plotting.py +++ b/tests/unit/easydiffraction/display/test_plotting.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2026 EasyScience contributors # SPDX-License-Identifier: BSD-3-Clause +import csv import re from types import MethodType from types import SimpleNamespace @@ -66,6 +67,78 @@ def test_plotter_factory_supported_and_unsupported(): PlotterFactory.create('nope') +@pytest.mark.parametrize( + ('descriptor_name', 'column_values', 'expected_y'), + [ + ('reduced_chi_square', ['1.5', '2.5'], [1.5, 2.5]), + ('iterations', ['5', '8'], [5.0, 8.0]), + ('success', ['True', 'False'], [1.0, 0.0]), + ], +) +def test_plot_param_series_reads_fit_result_columns_from_csv( + monkeypatch, + tmp_path, + descriptor_name, + column_values, + expected_y, +): + from easydiffraction.display.plotting import Plotter + from easydiffraction.project.project import Project + + project = Project(name='series') + project.info.path = tmp_path + + analysis_dir = tmp_path / 'analysis' + analysis_dir.mkdir(parents=True) + csv_path = analysis_dir / 'results.csv' + with csv_path.open('w', newline='', encoding='utf-8') as handle: + writer = csv.DictWriter( + handle, + fieldnames=[ + 'file_path', + 'fit_result.reduced_chi_square', + 'fit_result.success', + 'fit_result.iterations', + ], + ) + writer.writeheader() + writer.writerow({ + 'file_path': 'a.dat', + 'fit_result.reduced_chi_square': column_values[0], + 'fit_result.success': column_values[0] if descriptor_name == 'success' else 'True', + 'fit_result.iterations': column_values[0] if descriptor_name == 'iterations' else '5', + }) + writer.writerow({ + 'file_path': 'b.dat', + 'fit_result.reduced_chi_square': column_values[1], + 'fit_result.success': column_values[1] if descriptor_name == 'success' else 'False', + 'fit_result.iterations': column_values[1] if descriptor_name == 'iterations' else '8', + }) + + captured: dict[str, object] = {} + + plotter = Plotter() + plotter._set_project(project) + + def fake_plot_scatter(*, x, y, sy, axes_labels, title, height): + captured['x'] = x + captured['y'] = y + captured['sy'] = sy + captured['axes_labels'] = axes_labels + captured['title'] = title + captured['height'] = height + + monkeypatch.setattr(plotter._backend, 'plot_scatter', fake_plot_scatter) + + plotter.plot_param_series(getattr(project.analysis.fit_result, descriptor_name)) + + assert captured['x'] == [1, 2] + assert captured['y'] == expected_y + assert captured['sy'] == [0.0, 0.0] + assert captured['axes_labels'] == ['Experiment No.', 'Parameter value'] + assert captured['title'] == f"Parameter 'fit_result.{descriptor_name}' across fit results" + + def test_plotter_error_paths_and_filtering(capsys, monkeypatch): from easydiffraction.datablocks.experiment.item.enums import BeamModeEnum from easydiffraction.datablocks.experiment.item.enums import SampleFormEnum From a573b6ba6cc2de98c010a2449a62dd61fc0af650 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 08:46:30 +0200 Subject: [PATCH 54/72] Add overwrite option to Project.save_as --- src/easydiffraction/project/project.py | 24 ++++++++-- .../project/test_project_save.py | 47 +++++++++++++++++++ 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index 4a58eda3..3e100992 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -5,6 +5,7 @@ from __future__ import annotations import pathlib +import shutil import tempfile from typing import TYPE_CHECKING from typing import ClassVar @@ -501,14 +502,29 @@ def save_as( dir_path: str, *, temporary: bool = False, + overwrite: bool = True, ) -> None: - """Save the project into a new directory.""" + """Save the project into a directory. + + Parameters + ---------- + dir_path : str + Destination directory for the saved project. + temporary : bool, default=False + Whether to save beneath the system temporary directory. + overwrite : bool, default=True + Whether to remove an existing target directory before saving. + """ if temporary: tmp: str = tempfile.gettempdir() - dir_path = pathlib.Path(tmp) / dir_path + project_dir = pathlib.Path(tmp) / dir_path else: - dir_path = resolve_artifact_path(dir_path) - self.info.path = dir_path + project_dir = resolve_artifact_path(dir_path) + + if overwrite and project_dir.is_dir(): + shutil.rmtree(project_dir) + + self.info.path = project_dir self.save() def apply_params_from_csv(self, row_index: int) -> None: diff --git a/tests/unit/easydiffraction/project/test_project_save.py b/tests/unit/easydiffraction/project/test_project_save.py index ec6a5ca6..ff8a9b9b 100644 --- a/tests/unit/easydiffraction/project/test_project_save.py +++ b/tests/unit/easydiffraction/project/test_project_save.py @@ -64,6 +64,53 @@ def test_project_save_lists_existing_analysis_results_csv(tmp_path, monkeypatch, assert 'results.csv' in out +def test_project_save_as_overwrites_existing_directory_by_default(tmp_path, monkeypatch): + from easydiffraction.analysis.analysis import Analysis + from easydiffraction.project.project import Project + from easydiffraction.project.project_info import ProjectInfo + from easydiffraction.summary.summary import Summary + + monkeypatch.setattr(ProjectInfo, 'as_cif', property(lambda self: 'info')) + monkeypatch.setattr(Analysis, 'as_cif', property(lambda self: 'analysis')) + monkeypatch.setattr(Summary, 'as_cif', lambda self: 'summary') + + target = tmp_path / 'proj_dir' + stale_file = target / 'stale.txt' + target.mkdir() + stale_file.write_text('stale') + + project = Project(name='p1') + project.save_as(str(target)) + + assert not stale_file.exists() + assert (target / 'project.cif').is_file() + + +def test_project_save_as_preserves_existing_directory_when_disabled(tmp_path, monkeypatch): + from easydiffraction.analysis.analysis import Analysis + from easydiffraction.project.project import Project + from easydiffraction.project.project_info import ProjectInfo + from easydiffraction.summary.summary import Summary + + monkeypatch.setattr(ProjectInfo, 'as_cif', property(lambda self: 'info')) + monkeypatch.setattr(Analysis, 'as_cif', property(lambda self: 'analysis')) + monkeypatch.setattr(Summary, 'as_cif', lambda self: 'summary') + + target = tmp_path / 'proj_dir' + stale_file = target / 'stale.txt' + target.mkdir() + stale_file.write_text('stale') + + project = Project(name='p1') + project.save_as( + str(target), + overwrite=False, + ) + + assert stale_file.exists() + assert (target / 'project.cif').is_file() + + def test_project_save_omits_empty_fit_state_sections(tmp_path): from easydiffraction.project.project import Project From 2f12067a8d2d0b56451209c0e3d3bd8997f80228 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 08:51:38 +0200 Subject: [PATCH 55/72] Improve code formatting and docstrings --- .../parameter-correlation-persistence.md | 6 ++--- .../fit-output-files-and-data-exports.md | 17 ++++++------- .../parameter-posterior-summary.md | 3 +-- src/easydiffraction/display/plotting.py | 25 ++++++++++--------- src/easydiffraction/project/project.py | 6 +++-- src/easydiffraction/utils/utils.py | 4 +-- 6 files changed, 31 insertions(+), 30 deletions(-) diff --git a/docs/dev/adrs/accepted/parameter-correlation-persistence.md b/docs/dev/adrs/accepted/parameter-correlation-persistence.md index 4c7742dc..9b51c036 100644 --- a/docs/dev/adrs/accepted/parameter-correlation-persistence.md +++ b/docs/dev/adrs/accepted/parameter-correlation-persistence.md @@ -14,9 +14,9 @@ Analysis and fitting. ## Context -`plot_param_correlations()` can visualize either deterministic -parameter correlations derived from engine covariance or Bayesian -correlations derived from posterior samples. +`plot_param_correlations()` can visualize either deterministic parameter +correlations derived from engine covariance or Bayesian correlations +derived from posterior samples. Reloaded projects still need correlation heatmaps even when the raw runtime covariance or posterior arrays are unavailable. The broader diff --git a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md index 3946f1a6..8705fe35 100644 --- a/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md +++ b/docs/dev/adrs/suggestions/fit-output-files-and-data-exports.md @@ -1,7 +1,6 @@ # ADR: Fit Output Files and Data Exports -**Status:** Proposed -**Date:** 2026-05-18 +**Status:** Proposed **Date:** 2026-05-18 ## Status Note @@ -88,10 +87,10 @@ Single Bayesian fits should store posterior samples, log posterior arrays, predictive arrays, and prepared plot caches in `analysis/results.h5`. -The previous candidate name `analysis/bayesian_data.h5` remains -rejected because it mixes fit type with file role and blurs result -arrays with input data. Bayesian-specific meaning belongs in the CIF -manifest and HDF5 groups, not the sidecar filename. +The previous candidate name `analysis/bayesian_data.h5` remains rejected +because it mixes fit type with file role and blurs result arrays with +input data. Bayesian-specific meaning belongs in the CIF manifest and +HDF5 groups, not the sidecar filename. ### 5. Deterministic single and joint fits may gain CSV exports @@ -145,9 +144,9 @@ analysis/ - Should sequential measured data archival in `analysis/data.h5` be opt-in, automatic below a size threshold, or always disabled unless requested? -- What size threshold and compression policy should control the - optional `analysis/data.h5`, and does `analysis/results.h5` need a - matching convention? +- What size threshold and compression policy should control the optional + `analysis/data.h5`, and does `analysis/results.h5` need a matching + convention? - Should external CSV exports be regenerated from canonical CIF/HDF5 on demand rather than stored persistently? diff --git a/docs/dev/adrs/suggestions/parameter-posterior-summary.md b/docs/dev/adrs/suggestions/parameter-posterior-summary.md index 981c573a..36c0b979 100644 --- a/docs/dev/adrs/suggestions/parameter-posterior-summary.md +++ b/docs/dev/adrs/suggestions/parameter-posterior-summary.md @@ -1,7 +1,6 @@ # ADR: Parameter-Level Posterior Projection -**Status:** Proposed -**Date:** 2026-05-13 +**Status:** Proposed **Date:** 2026-05-13 ## Status Note diff --git a/src/easydiffraction/display/plotting.py b/src/easydiffraction/display/plotting.py index 25a93b82..f8ca0fe7 100644 --- a/src/easydiffraction/display/plotting.py +++ b/src/easydiffraction/display/plotting.py @@ -802,14 +802,12 @@ def _numeric_series_values(values: object) -> list[float]: if series.dtype == bool: return series.astype(float).tolist() - normalized = series.replace( - { - 'True': 1.0, - 'False': 0.0, - 'true': 1.0, - 'false': 0.0, - } - ) + normalized = series.replace({ + 'True': 1.0, + 'False': 0.0, + 'true': 1.0, + 'false': 0.0, + }) return pd.to_numeric(normalized, errors='raise').tolist() def plot_all_param_series( @@ -5787,9 +5785,8 @@ def _plot_param_series_from_csv( Reads data from the CSV file at *csv_path*. The y-axis values come from the first matching column named in *column_names*, with uncertainties from ``{column_name}.uncertainty``. When - *versus_path* is provided, - the x-axis uses the corresponding ``diffrn.*`` CSV column; - otherwise the row index is used. + *versus_path* is provided, the x-axis uses the corresponding + ``diffrn.*`` CSV column; otherwise the row index is used. Axis labels use the live parameter descriptor and, when available, a template diffrn descriptor resolved from @@ -5819,7 +5816,11 @@ def _plot_param_series_from_csv( y = self._numeric_series_values(df[column_name]) uncert_col = f'{column_name}.uncertainty' - sy = self._numeric_series_values(df[uncert_col]) if uncert_col in df.columns else [0.0] * len(y) + sy = ( + self._numeric_series_values(df[uncert_col]) + if uncert_col in df.columns + else [0.0] * len(y) + ) # X-axis: diffrn column or row index diffrn_col = versus_path diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index 3e100992..a27cb900 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -504,7 +504,8 @@ def save_as( temporary: bool = False, overwrite: bool = True, ) -> None: - """Save the project into a directory. + """ + Save the project into a directory. Parameters ---------- @@ -513,7 +514,8 @@ def save_as( temporary : bool, default=False Whether to save beneath the system temporary directory. overwrite : bool, default=True - Whether to remove an existing target directory before saving. + Whether to remove an existing target directory before + saving. """ if temporary: tmp: str = tempfile.gettempdir() diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index 5294ceb9..0376f7ad 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -279,7 +279,7 @@ def download_data( console.print(f"✅ Data #{id} downloaded and extracted to '{project_dir}'") return str(project_dir) - console.print(f"✅ Data #{id} downloaded to '{file_path}'") + console.print(f"✅ Data #{id} downloaded to:\n'{file_path}'") return str(file_path) @@ -560,7 +560,7 @@ def download_tutorial( with _safe_urlopen(url) as resp: file_path.write_bytes(resp.read()) - console.print(f"✅ Tutorial #{id} downloaded to '{file_path}'") + console.print(f"✅ Tutorial #{id} downloaded to:\n'{file_path}'") return str(file_path) From de92f6e3bf53c70f9a2fa01f851d9c94c6e21b60 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 09:09:15 +0200 Subject: [PATCH 56/72] Update data index reference and hash --- src/easydiffraction/utils/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index 0376f7ad..418c155e 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -31,9 +31,9 @@ _DATA_REPO = 'easyscience/diffraction' _DATA_ROOT = 'data' # commit SHA preferred -_DATA_INDEX_REF = '56cf18e6c451b44355fd7ae70f02ce5c44dc440f' +_DATA_INDEX_REF = 'dbe92a87e0106c4742eee0ff9a8e32bdb8b483cb' # macOS: sha256sum index.json -_DATA_INDEX_HASH = 'sha256:c7af8b1cc3a8f91a1ec8736f3307a11534e1f6b9ea30e0972063a2c2cff497c6' +_DATA_INDEX_HASH = 'sha256:9e7bbaf2cb650f4126572e85157c63bc76f201408856fe4af566bee55dcdfbb4' def _build_data_url(path: str) -> str: From 33456bfc7efdc6585cef5cf6014b7704415154bb Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 09:09:33 +0200 Subject: [PATCH 57/72] Simplify tutorial with API shortcuts --- docs/docs/tutorials/ed-17.ipynb | 241 ++++++++++++++++---------------- docs/docs/tutorials/ed-17.py | 174 +++++++++++------------ 2 files changed, 201 insertions(+), 214 deletions(-) diff --git a/docs/docs/tutorials/ed-17.ipynb b/docs/docs/tutorials/ed-17.ipynb index cdc6d761..a8216c62 100644 --- a/docs/docs/tutorials/ed-17.ipynb +++ b/docs/docs/tutorials/ed-17.ipynb @@ -58,7 +58,8 @@ "source": [ "## Step 1: Define Project\n", "\n", - "The project object manages structures, experiments, and analysis." + "The project object manages structures, experiments, analysis, display,\n", + "and other related components." ] }, { @@ -68,7 +69,9 @@ "metadata": {}, "outputs": [], "source": [ - "project = ed.Project()" + "project = ed.Project(name='cosio_d20')\n", + "analysis = project.analysis\n", + "display = project.display" ] }, { @@ -87,7 +90,7 @@ "metadata": {}, "outputs": [], "source": [ - "project.save_as(dir_path='projects/cosio', temporary=False)" + "project.save_as(dir_path='projects/cosio_d20')" ] }, { @@ -111,7 +114,7 @@ "outputs": [], "source": [ "project.structures.create(name='cosio')\n", - "structure = project.structures['cosio']" + "struct = project.structures['cosio']" ] }, { @@ -129,8 +132,8 @@ "metadata": {}, "outputs": [], "source": [ - "structure.space_group.name_h_m = 'P n m a'\n", - "structure.space_group.it_coordinate_system_code = 'abc'" + "struct.space_group.name_h_m = 'P n m a'\n", + "struct.space_group.it_coordinate_system_code = 'abc'" ] }, { @@ -148,9 +151,9 @@ "metadata": {}, "outputs": [], "source": [ - "structure.cell.length_a = 10.31\n", - "structure.cell.length_b = 6.0\n", - "structure.cell.length_c = 4.79" + "struct.cell.length_a = 10.31\n", + "struct.cell.length_b = 6.0\n", + "struct.cell.length_c = 4.79" ] }, { @@ -168,7 +171,7 @@ "metadata": {}, "outputs": [], "source": [ - "structure.atom_sites.create(\n", + "struct.atom_sites.create(\n", " label='Co1',\n", " type_symbol='Co',\n", " fract_x=0,\n", @@ -177,7 +180,7 @@ " wyckoff_letter='a',\n", " adp_iso=0.3,\n", ")\n", - "structure.atom_sites.create(\n", + "struct.atom_sites.create(\n", " label='Co2',\n", " type_symbol='Co',\n", " fract_x=0.279,\n", @@ -186,7 +189,7 @@ " wyckoff_letter='c',\n", " adp_iso=0.3,\n", ")\n", - "structure.atom_sites.create(\n", + "struct.atom_sites.create(\n", " label='Si',\n", " type_symbol='Si',\n", " fract_x=0.094,\n", @@ -195,7 +198,7 @@ " wyckoff_letter='c',\n", " adp_iso=0.34,\n", ")\n", - "structure.atom_sites.create(\n", + "struct.atom_sites.create(\n", " label='O1',\n", " type_symbol='O',\n", " fract_x=0.091,\n", @@ -204,7 +207,7 @@ " wyckoff_letter='c',\n", " adp_iso=0.63,\n", ")\n", - "structure.atom_sites.create(\n", + "struct.atom_sites.create(\n", " label='O2',\n", " type_symbol='O',\n", " fract_x=0.448,\n", @@ -213,7 +216,7 @@ " wyckoff_letter='c',\n", " adp_iso=0.59,\n", ")\n", - "structure.atom_sites.create(\n", + "struct.atom_sites.create(\n", " label='O3',\n", " type_symbol='O',\n", " fract_x=0.164,\n", @@ -427,28 +430,28 @@ "metadata": {}, "outputs": [], "source": [ - "structure.cell.length_a.free = True\n", - "structure.cell.length_b.free = True\n", - "structure.cell.length_c.free = True\n", + "struct.cell.length_a.free = True\n", + "struct.cell.length_b.free = True\n", + "struct.cell.length_c.free = True\n", "\n", - "structure.atom_sites['Co2'].fract_x.free = True\n", - "structure.atom_sites['Co2'].fract_z.free = True\n", - "structure.atom_sites['Si'].fract_x.free = True\n", - "structure.atom_sites['Si'].fract_z.free = True\n", - "structure.atom_sites['O1'].fract_x.free = True\n", - "structure.atom_sites['O1'].fract_z.free = True\n", - "structure.atom_sites['O2'].fract_x.free = True\n", - "structure.atom_sites['O2'].fract_z.free = True\n", - "structure.atom_sites['O3'].fract_x.free = True\n", - "structure.atom_sites['O3'].fract_y.free = True\n", - "structure.atom_sites['O3'].fract_z.free = True\n", + "struct.atom_sites['Co2'].fract_x.free = True\n", + "struct.atom_sites['Co2'].fract_z.free = True\n", + "struct.atom_sites['Si'].fract_x.free = True\n", + "struct.atom_sites['Si'].fract_z.free = True\n", + "struct.atom_sites['O1'].fract_x.free = True\n", + "struct.atom_sites['O1'].fract_z.free = True\n", + "struct.atom_sites['O2'].fract_x.free = True\n", + "struct.atom_sites['O2'].fract_z.free = True\n", + "struct.atom_sites['O3'].fract_x.free = True\n", + "struct.atom_sites['O3'].fract_y.free = True\n", + "struct.atom_sites['O3'].fract_z.free = True\n", "\n", - "structure.atom_sites['Co1'].adp_iso.free = True\n", - "structure.atom_sites['Co2'].adp_iso.free = True\n", - "structure.atom_sites['Si'].adp_iso.free = True\n", - "structure.atom_sites['O1'].adp_iso.free = True\n", - "structure.atom_sites['O2'].adp_iso.free = True\n", - "structure.atom_sites['O3'].adp_iso.free = True" + "struct.atom_sites['Co1'].adp_iso.free = True\n", + "struct.atom_sites['Co2'].adp_iso.free = True\n", + "struct.atom_sites['Si'].adp_iso.free = True\n", + "struct.atom_sites['O1'].adp_iso.free = True\n", + "struct.atom_sites['O2'].adp_iso.free = True\n", + "struct.atom_sites['O3'].adp_iso.free = True" ] }, { @@ -488,13 +491,13 @@ "metadata": {}, "outputs": [], "source": [ - "project.analysis.aliases.create(\n", + "analysis.aliases.create(\n", " label='biso_Co1',\n", - " param=structure.atom_sites['Co1'].adp_iso,\n", + " param=struct.atom_sites['Co1'].adp_iso,\n", ")\n", - "project.analysis.aliases.create(\n", + "analysis.aliases.create(\n", " label='biso_Co2',\n", - " param=structure.atom_sites['Co2'].adp_iso,\n", + " param=struct.atom_sites['Co2'].adp_iso,\n", ")" ] }, @@ -513,7 +516,7 @@ "metadata": {}, "outputs": [], "source": [ - "project.analysis.constraints.create(expression='biso_Co2 = biso_Co1')" + "analysis.constraints.create(expression='biso_Co2 = biso_Co1')" ] }, { @@ -531,7 +534,7 @@ "metadata": {}, "outputs": [], "source": [ - "project.analysis.fitting.minimizer_type = 'bumps (lm)'" + "analysis.fitting.minimizer_type = 'bumps (lm)'" ] }, { @@ -554,13 +557,23 @@ "metadata": {}, "outputs": [], "source": [ - "project.analysis.fit()" + "analysis.fit()" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "44", "metadata": {}, + "outputs": [], + "source": [ + "display.fit.results()" + ] + }, + { + "cell_type": "markdown", + "id": "45", + "metadata": {}, "source": [ "#### Show parameter correlations" ] @@ -568,16 +581,16 @@ { "cell_type": "code", "execution_count": null, - "id": "45", + "id": "46", "metadata": {}, "outputs": [], "source": [ - "project.display.fit.correlations()" + "display.fit.correlations()" ] }, { "cell_type": "markdown", - "id": "46", + "id": "47", "metadata": {}, "source": [ "#### Compare measured and calculated patterns for the first fit." @@ -586,16 +599,16 @@ { "cell_type": "code", "execution_count": null, - "id": "47", + "id": "48", "metadata": {}, "outputs": [], "source": [ - "project.display.pattern(expt_name='d20')" + "display.pattern(expt_name='d20')" ] }, { "cell_type": "markdown", - "id": "48", + "id": "49", "metadata": {}, "source": [ "#### Run Sequential Fitting\n", @@ -607,7 +620,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49", + "id": "50", "metadata": {}, "outputs": [], "source": [ @@ -616,7 +629,7 @@ }, { "cell_type": "markdown", - "id": "50", + "id": "51", "metadata": { "lines_to_next_cell": 2 }, @@ -629,7 +642,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51", + "id": "52", "metadata": {}, "outputs": [], "source": [ @@ -639,11 +652,11 @@ { "cell_type": "code", "execution_count": null, - "id": "52", + "id": "53", "metadata": {}, "outputs": [], "source": [ - "project.analysis.sequential_fit_extract.create(\n", + "analysis.sequential_fit_extract.create(\n", " id='temperature',\n", " target=temperature,\n", " pattern=r'^TEMP\\s+([0-9.]+)',\n", @@ -653,7 +666,7 @@ }, { "cell_type": "markdown", - "id": "53", + "id": "54", "metadata": {}, "source": [ "Set the sequential fitting parameters." @@ -662,19 +675,19 @@ { "cell_type": "code", "execution_count": null, - "id": "54", + "id": "55", "metadata": {}, "outputs": [], "source": [ - "project.analysis.fitting_mode_type = 'sequential'\n", - "project.analysis.sequential_fit.data_dir = scan_data_dir\n", - "project.analysis.sequential_fit.max_workers = 'auto'\n", - "project.analysis.sequential_fit.reverse = True" + "analysis.fitting_mode_type = 'sequential'\n", + "analysis.sequential_fit.data_dir = scan_data_dir\n", + "analysis.sequential_fit.max_workers = 'auto'\n", + "analysis.sequential_fit.reverse = True" ] }, { "cell_type": "markdown", - "id": "55", + "id": "56", "metadata": {}, "source": [ "Run the sequential fit over all data files in the scan directory." @@ -683,16 +696,16 @@ { "cell_type": "code", "execution_count": null, - "id": "56", + "id": "57", "metadata": {}, "outputs": [], "source": [ - "project.analysis.fit()" + "analysis.fit()" ] }, { "cell_type": "markdown", - "id": "57", + "id": "58", "metadata": {}, "source": [ "#### Replay a Dataset\n", @@ -703,17 +716,17 @@ { "cell_type": "code", "execution_count": null, - "id": "58", + "id": "59", "metadata": {}, "outputs": [], "source": [ "project.apply_params_from_csv(row_index=0)\n", - "project.display.pattern(expt_name='d20')" + "display.pattern(expt_name='d20')" ] }, { "cell_type": "markdown", - "id": "59", + "id": "60", "metadata": {}, "source": [ "\n", @@ -723,17 +736,17 @@ { "cell_type": "code", "execution_count": null, - "id": "60", + "id": "61", "metadata": {}, "outputs": [], "source": [ "project.apply_params_from_csv(row_index=-1)\n", - "project.display.pattern(expt_name='d20')" + "display.pattern(expt_name='d20')" ] }, { "cell_type": "markdown", - "id": "61", + "id": "62", "metadata": {}, "source": [ "#### Plot Parameter Evolution\n", @@ -743,7 +756,27 @@ }, { "cell_type": "markdown", - "id": "62", + "id": "63", + "metadata": {}, + "source": [ + "Plot fit quality metrics vs. temperature." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64", + "metadata": {}, + "outputs": [], + "source": [ + "display.fit.series(analysis.fit_result.success, versus=temperature)\n", + "display.fit.series(analysis.fit_result.reduced_chi_square, versus=temperature)\n", + "display.fit.series(analysis.fit_result.iterations, versus=temperature)" + ] + }, + { + "cell_type": "markdown", + "id": "65", "metadata": {}, "source": [ "Plot unit cell parameters vs. temperature." @@ -752,18 +785,18 @@ { "cell_type": "code", "execution_count": null, - "id": "63", + "id": "66", "metadata": {}, "outputs": [], "source": [ - "project.display.fit.series(structure.cell.length_a, versus=temperature)\n", - "project.display.fit.series(structure.cell.length_b, versus=temperature)\n", - "project.display.fit.series(structure.cell.length_c, versus=temperature)" + "display.fit.series(struct.cell.length_a, versus=temperature)\n", + "display.fit.series(struct.cell.length_b, versus=temperature)\n", + "display.fit.series(struct.cell.length_c, versus=temperature)" ] }, { "cell_type": "markdown", - "id": "64", + "id": "67", "metadata": {}, "source": [ "Plot isotropic displacement parameters vs. temperature." @@ -772,35 +805,20 @@ { "cell_type": "code", "execution_count": null, - "id": "65", + "id": "68", "metadata": {}, "outputs": [], "source": [ - "project.display.fit.series(\n", - " structure.atom_sites['Co1'].adp_iso,\n", - " versus=temperature,\n", - ")\n", - "project.display.fit.series(\n", - " structure.atom_sites['Si'].adp_iso,\n", - " versus=temperature,\n", - ")\n", - "project.display.fit.series(\n", - " structure.atom_sites['O1'].adp_iso,\n", - " versus=temperature,\n", - ")\n", - "project.display.fit.series(\n", - " structure.atom_sites['O2'].adp_iso,\n", - " versus=temperature,\n", - ")\n", - "project.display.fit.series(\n", - " structure.atom_sites['O3'].adp_iso,\n", - " versus=temperature,\n", - ")" + "display.fit.series(struct.atom_sites['Co1'].adp_iso, versus=temperature)\n", + "display.fit.series(struct.atom_sites['Si'].adp_iso, versus=temperature)\n", + "display.fit.series(struct.atom_sites['O1'].adp_iso, versus=temperature)\n", + "display.fit.series(struct.atom_sites['O2'].adp_iso, versus=temperature)\n", + "display.fit.series(struct.atom_sites['O3'].adp_iso, versus=temperature)" ] }, { "cell_type": "markdown", - "id": "66", + "id": "69", "metadata": {}, "source": [ "Plot selected fractional coordinates vs. temperature." @@ -809,30 +827,15 @@ { "cell_type": "code", "execution_count": null, - "id": "67", + "id": "70", "metadata": {}, "outputs": [], "source": [ - "project.display.fit.series(\n", - " structure.atom_sites['Co2'].fract_x,\n", - " versus=temperature,\n", - ")\n", - "project.display.fit.series(\n", - " structure.atom_sites['Co2'].fract_z,\n", - " versus=temperature,\n", - ")\n", - "project.display.fit.series(\n", - " structure.atom_sites['O1'].fract_z,\n", - " versus=temperature,\n", - ")\n", - "project.display.fit.series(\n", - " structure.atom_sites['O2'].fract_z,\n", - " versus=temperature,\n", - ")\n", - "project.display.fit.series(\n", - " structure.atom_sites['O3'].fract_z,\n", - " versus=temperature,\n", - ")" + "display.fit.series(struct.atom_sites['Co2'].fract_x, versus=temperature)\n", + "display.fit.series(struct.atom_sites['Co2'].fract_z, versus=temperature)\n", + "display.fit.series(struct.atom_sites['O1'].fract_z, versus=temperature)\n", + "display.fit.series(struct.atom_sites['O2'].fract_z, versus=temperature)\n", + "display.fit.series(struct.atom_sites['O3'].fract_z, versus=temperature)" ] } ], diff --git a/docs/docs/tutorials/ed-17.py b/docs/docs/tutorials/ed-17.py index 24e648c5..a2a5e272 100644 --- a/docs/docs/tutorials/ed-17.py +++ b/docs/docs/tutorials/ed-17.py @@ -16,17 +16,20 @@ # %% [markdown] # ## Step 1: Define Project # -# The project object manages structures, experiments, and analysis. +# The project object manages structures, experiments, analysis, display, +# and other related components. # %% -project = ed.Project() +project = ed.Project(name='cosio_d20') +analysis = project.analysis +display = project.display # %% [markdown] # The project must be saved before running sequential fitting, so that # results can be written to `analysis/results.csv`. # %% -project.save_as(dir_path='projects/cosio', temporary=False) +project.save_as(dir_path='projects/cosio_d20') # %% [markdown] # ## Step 2: Define Crystal Structure @@ -38,28 +41,28 @@ # %% project.structures.create(name='cosio') -structure = project.structures['cosio'] +struct = project.structures['cosio'] # %% [markdown] # #### Set Space Group # %% -structure.space_group.name_h_m = 'P n m a' -structure.space_group.it_coordinate_system_code = 'abc' +struct.space_group.name_h_m = 'P n m a' +struct.space_group.it_coordinate_system_code = 'abc' # %% [markdown] # #### Set Unit Cell # %% -structure.cell.length_a = 10.31 -structure.cell.length_b = 6.0 -structure.cell.length_c = 4.79 +struct.cell.length_a = 10.31 +struct.cell.length_b = 6.0 +struct.cell.length_c = 4.79 # %% [markdown] # #### Set Atom Sites # %% -structure.atom_sites.create( +struct.atom_sites.create( label='Co1', type_symbol='Co', fract_x=0, @@ -68,7 +71,7 @@ wyckoff_letter='a', adp_iso=0.3, ) -structure.atom_sites.create( +struct.atom_sites.create( label='Co2', type_symbol='Co', fract_x=0.279, @@ -77,7 +80,7 @@ wyckoff_letter='c', adp_iso=0.3, ) -structure.atom_sites.create( +struct.atom_sites.create( label='Si', type_symbol='Si', fract_x=0.094, @@ -86,7 +89,7 @@ wyckoff_letter='c', adp_iso=0.34, ) -structure.atom_sites.create( +struct.atom_sites.create( label='O1', type_symbol='O', fract_x=0.091, @@ -95,7 +98,7 @@ wyckoff_letter='c', adp_iso=0.63, ) -structure.atom_sites.create( +struct.atom_sites.create( label='O2', type_symbol='O', fract_x=0.448, @@ -104,7 +107,7 @@ wyckoff_letter='c', adp_iso=0.59, ) -structure.atom_sites.create( +struct.atom_sites.create( label='O3', type_symbol='O', fract_x=0.164, @@ -205,28 +208,28 @@ # #### Set Free Parameters # %% -structure.cell.length_a.free = True -structure.cell.length_b.free = True -structure.cell.length_c.free = True +struct.cell.length_a.free = True +struct.cell.length_b.free = True +struct.cell.length_c.free = True -structure.atom_sites['Co2'].fract_x.free = True -structure.atom_sites['Co2'].fract_z.free = True -structure.atom_sites['Si'].fract_x.free = True -structure.atom_sites['Si'].fract_z.free = True -structure.atom_sites['O1'].fract_x.free = True -structure.atom_sites['O1'].fract_z.free = True -structure.atom_sites['O2'].fract_x.free = True -structure.atom_sites['O2'].fract_z.free = True -structure.atom_sites['O3'].fract_x.free = True -structure.atom_sites['O3'].fract_y.free = True -structure.atom_sites['O3'].fract_z.free = True +struct.atom_sites['Co2'].fract_x.free = True +struct.atom_sites['Co2'].fract_z.free = True +struct.atom_sites['Si'].fract_x.free = True +struct.atom_sites['Si'].fract_z.free = True +struct.atom_sites['O1'].fract_x.free = True +struct.atom_sites['O1'].fract_z.free = True +struct.atom_sites['O2'].fract_x.free = True +struct.atom_sites['O2'].fract_z.free = True +struct.atom_sites['O3'].fract_x.free = True +struct.atom_sites['O3'].fract_y.free = True +struct.atom_sites['O3'].fract_z.free = True -structure.atom_sites['Co1'].adp_iso.free = True -structure.atom_sites['Co2'].adp_iso.free = True -structure.atom_sites['Si'].adp_iso.free = True -structure.atom_sites['O1'].adp_iso.free = True -structure.atom_sites['O2'].adp_iso.free = True -structure.atom_sites['O3'].adp_iso.free = True +struct.atom_sites['Co1'].adp_iso.free = True +struct.atom_sites['Co2'].adp_iso.free = True +struct.atom_sites['Si'].adp_iso.free = True +struct.atom_sites['O1'].adp_iso.free = True +struct.atom_sites['O2'].adp_iso.free = True +struct.atom_sites['O3'].adp_iso.free = True # %% expt.linked_phases['cosio'].scale.free = True @@ -247,26 +250,26 @@ # Set aliases for parameters. # %% -project.analysis.aliases.create( +analysis.aliases.create( label='biso_Co1', - param=structure.atom_sites['Co1'].adp_iso, + param=struct.atom_sites['Co1'].adp_iso, ) -project.analysis.aliases.create( +analysis.aliases.create( label='biso_Co2', - param=structure.atom_sites['Co2'].adp_iso, + param=struct.atom_sites['Co2'].adp_iso, ) # %% [markdown] # Set constraints. # %% -project.analysis.constraints.create(expression='biso_Co2 = biso_Co1') +analysis.constraints.create(expression='biso_Co2 = biso_Co1') # %% [markdown] # #### Set Minimizer # %% -project.analysis.fitting.minimizer_type = 'bumps (lm)' +analysis.fitting.minimizer_type = 'bumps (lm)' # %% [markdown] # #### Run Single Fitting @@ -277,19 +280,22 @@ # if the initial parameters are far from optimal. # %% -project.analysis.fit() +analysis.fit() + +# %% +display.fit.results() # %% [markdown] # #### Show parameter correlations # %% -project.display.fit.correlations() +display.fit.correlations() # %% [markdown] # #### Compare measured and calculated patterns for the first fit. # %% -project.display.pattern(expt_name='d20') +display.pattern(expt_name='d20') # %% [markdown] # #### Run Sequential Fitting @@ -310,7 +316,7 @@ temperature = 'diffrn.ambient_temperature' # %% -project.analysis.sequential_fit_extract.create( +analysis.sequential_fit_extract.create( id='temperature', target=temperature, pattern=r'^TEMP\s+([0-9.]+)', @@ -321,16 +327,16 @@ # Set the sequential fitting parameters. # %% -project.analysis.fitting_mode_type = 'sequential' -project.analysis.sequential_fit.data_dir = scan_data_dir -project.analysis.sequential_fit.max_workers = 'auto' -project.analysis.sequential_fit.reverse = True +analysis.fitting_mode_type = 'sequential' +analysis.sequential_fit.data_dir = scan_data_dir +analysis.sequential_fit.max_workers = 'auto' +analysis.sequential_fit.reverse = True # %% [markdown] # Run the sequential fit over all data files in the scan directory. # %% -project.analysis.fit() +analysis.fit() # %% [markdown] # #### Replay a Dataset @@ -339,7 +345,7 @@ # %% project.apply_params_from_csv(row_index=0) -project.display.pattern(expt_name='d20') +display.pattern(expt_name='d20') # %% [markdown] # @@ -347,67 +353,45 @@ # %% project.apply_params_from_csv(row_index=-1) -project.display.pattern(expt_name='d20') +display.pattern(expt_name='d20') # %% [markdown] # #### Plot Parameter Evolution # # Reuse the extracted diffrn path as the x-axis in the following plots. +# %% [markdown] +# Plot fit quality metrics vs. temperature. + +# %% +display.fit.series(analysis.fit_result.success, versus=temperature) +display.fit.series(analysis.fit_result.reduced_chi_square, versus=temperature) +display.fit.series(analysis.fit_result.iterations, versus=temperature) + # %% [markdown] # Plot unit cell parameters vs. temperature. # %% -project.display.fit.series(structure.cell.length_a, versus=temperature) -project.display.fit.series(structure.cell.length_b, versus=temperature) -project.display.fit.series(structure.cell.length_c, versus=temperature) +display.fit.series(struct.cell.length_a, versus=temperature) +display.fit.series(struct.cell.length_b, versus=temperature) +display.fit.series(struct.cell.length_c, versus=temperature) # %% [markdown] # Plot isotropic displacement parameters vs. temperature. # %% -project.display.fit.series( - structure.atom_sites['Co1'].adp_iso, - versus=temperature, -) -project.display.fit.series( - structure.atom_sites['Si'].adp_iso, - versus=temperature, -) -project.display.fit.series( - structure.atom_sites['O1'].adp_iso, - versus=temperature, -) -project.display.fit.series( - structure.atom_sites['O2'].adp_iso, - versus=temperature, -) -project.display.fit.series( - structure.atom_sites['O3'].adp_iso, - versus=temperature, -) +display.fit.series(struct.atom_sites['Co1'].adp_iso, versus=temperature) +display.fit.series(struct.atom_sites['Si'].adp_iso, versus=temperature) +display.fit.series(struct.atom_sites['O1'].adp_iso, versus=temperature) +display.fit.series(struct.atom_sites['O2'].adp_iso, versus=temperature) +display.fit.series(struct.atom_sites['O3'].adp_iso, versus=temperature) # %% [markdown] # Plot selected fractional coordinates vs. temperature. # %% -project.display.fit.series( - structure.atom_sites['Co2'].fract_x, - versus=temperature, -) -project.display.fit.series( - structure.atom_sites['Co2'].fract_z, - versus=temperature, -) -project.display.fit.series( - structure.atom_sites['O1'].fract_z, - versus=temperature, -) -project.display.fit.series( - structure.atom_sites['O2'].fract_z, - versus=temperature, -) -project.display.fit.series( - structure.atom_sites['O3'].fract_z, - versus=temperature, -) +display.fit.series(struct.atom_sites['Co2'].fract_x, versus=temperature) +display.fit.series(struct.atom_sites['Co2'].fract_z, versus=temperature) +display.fit.series(struct.atom_sites['O1'].fract_z, versus=temperature) +display.fit.series(struct.atom_sites['O2'].fract_z, versus=temperature) +display.fit.series(struct.atom_sites['O3'].fract_z, versus=temperature) From 4dc0a1bf3b51cf110fffbde8a94632418916725b Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 09:15:28 +0200 Subject: [PATCH 58/72] Improve tutorial index descriptions --- docs/docs/tutorials/index.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/docs/tutorials/index.md b/docs/docs/tutorials/index.md index 8aeffb4d..2a6dae67 100644 --- a/docs/docs/tutorials/index.md +++ b/docs/docs/tutorials/index.md @@ -42,15 +42,14 @@ The tutorials are organized into the following categories: - [LBCO Standard, continuation](ed-18.ipynb) – The most minimal example showing how to load a previously saved project from a directory and - run refinement. Useful when a project has already been set up and - saved in a prior session. -- [Co2SiO4 Sequential, resumed](ed-23.ipynb) – Continue a saved - sequential refinement of Co2SiO4 from an existing - `analysis/results.csv` after an incomplete previous run. + continue working with it. +- [Co2SiO4 Sequential, resumed](ed-23.ipynb) – Resumes a sequential + refinement from an existing `analysis/results.csv` after an incomplete + previous run. - [LBCO Bayesian, display](ed-24.ipynb) – Shows how to load the saved - Bayesian LBCO project created in the previous tutorial and inspect the - persisted fit summary, correlation matrix, posterior plots, and - predictive checks without rerunning DREAM. + project after a Bayesian analysis and inspect the persisted fit + summary, correlation matrix, posterior plots, and predictive checks + without rerunning MCMC sampling. ## Powder Diffraction From e796e6eeb5d287ca6c5f7af5d3594a6e0420bd3d Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 09:24:32 +0200 Subject: [PATCH 59/72] Accept str or list for column_names --- src/easydiffraction/display/plotting.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/easydiffraction/display/plotting.py b/src/easydiffraction/display/plotting.py index f8ca0fe7..3bfbd5ec 100644 --- a/src/easydiffraction/display/plotting.py +++ b/src/easydiffraction/display/plotting.py @@ -5775,7 +5775,7 @@ def _bragg_tick_d_spacing( def _plot_param_series_from_csv( self, csv_path: str, - column_names: list[str], + column_names: str | list[str], param_descriptor: object, versus_path: str | None = None, ) -> None: @@ -5796,7 +5796,7 @@ def _plot_param_series_from_csv( ---------- csv_path : str Path to the ``results.csv`` file. - column_names : list[str] + column_names : str | list[str] Candidate CSV column keys to plot. param_descriptor : object The live parameter descriptor (for axis label / units). @@ -5806,10 +5806,12 @@ def _plot_param_series_from_csv( """ df = pd.read_csv(csv_path) - column_name = next((name for name in column_names if name in df.columns), None) + column_candidates = [column_names] if isinstance(column_names, str) else column_names + + column_name = next((name for name in column_candidates if name in df.columns), None) if column_name is None: log.warning( - f"Parameter '{column_names[0]}' not found in CSV columns. " + f"Parameter '{column_candidates[0]}' not found in CSV columns. " f'Available: {list(df.columns)}' ) return From 7b078fd03b4ca47b64ec168541ed383863476848 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 09:28:58 +0200 Subject: [PATCH 60/72] Fix fit tracking finalization cleanup --- src/easydiffraction/analysis/fitting.py | 20 +++---- .../analysis/minimizers/base.py | 2 + .../analysis/minimizers/test_base.py | 60 ++++++++++++++++++- .../easydiffraction/analysis/test_fitting.py | 53 ++++++++++++++++ 4 files changed, 123 insertions(+), 12 deletions(-) diff --git a/src/easydiffraction/analysis/fitting.py b/src/easydiffraction/analysis/fitting.py index 69f66c27..59b9d926 100644 --- a/src/easydiffraction/analysis/fitting.py +++ b/src/easydiffraction/analysis/fitting.py @@ -207,17 +207,17 @@ def fit( analysis=analysis, ) - # Perform fitting - self.results = self.minimizer.fit( - params, - objective_function, - verbosity=verbosity, - finalize_tracking=False, - use_physical_limits=use_physical_limits, - random_seed=random_seed, - ) - try: + # Keep tracker finalization in this layer so post-processing + # can run before the live display is closed. + self.results = self.minimizer.fit( + params, + objective_function, + verbosity=verbosity, + finalize_tracking=False, + use_physical_limits=use_physical_limits, + random_seed=random_seed, + ) self._postprocess_fit_results( analysis=analysis, experiments=experiments, diff --git a/src/easydiffraction/analysis/minimizers/base.py b/src/easydiffraction/analysis/minimizers/base.py index 5579e86c..a14a4677 100644 --- a/src/easydiffraction/analysis/minimizers/base.py +++ b/src/easydiffraction/analysis/minimizers/base.py @@ -77,6 +77,8 @@ def _stop_tracking(self) -> None: self._tracking_active = False self.tracker.stop_timer() self.tracker.finish_tracking() + if self.result is not None: + self.result.fitting_time = self.tracker.fitting_time self._emit_deferred_warnings() def _warn_after_tracking(self, message: str) -> None: diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_base.py b/tests/unit/easydiffraction/analysis/minimizers/test_base.py index 57c064b0..45328ad2 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_base.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_base.py @@ -87,8 +87,8 @@ def _compute_residuals( assert minim.synced is True assert isinstance(result.parameters, list) assert result.parameters[0].value == 42 - # Successful fits are finalized by the caller after any post-processing. - minim._stop_tracking() + assert result.fitting_time is not None + assert result.fitting_time >= 0.0 assert minim.tracker.fitting_time is not None assert minim.tracker.fitting_time >= 0.0 @@ -202,3 +202,59 @@ def _check_success(self, raw_result): with pytest.raises(ValueError, match='prep failed'): minimizer.fit(parameters=[_DummyParam(1.0)], objective_function=lambda _: np.array([0.0])) + + +def test_minimizer_base_stop_tracking_backfills_result_fitting_time(): + from easydiffraction.analysis.minimizers.base import MinimizerBase + + class DummyResult: + success = True + + class DummyMinimizer(MinimizerBase): + def __init__(self): + super().__init__(name='dummy', method='m', max_iterations=5) + + def _prepare_solver_args(self, parameters): + del parameters + return {'engine_parameters': {'ok': True}} + + def _run_solver(self, objective_function, **kwargs): + residuals = objective_function(kwargs.get('engine_parameters')) + self.tracker.track(residuals=np.array(residuals), parameters=[1]) + return DummyResult() + + def _sync_result_to_parameters(self, parameters, raw_result): + del parameters, raw_result + + def _check_success(self, raw_result): + del raw_result + return True + + def _compute_residuals( + self, engine_params, parameters, structures, experiments, calculator + ): + del parameters, structures, experiments, calculator + assert engine_params == {'ok': True} + return np.array([0.0]) + + minimizer = DummyMinimizer() + params = [_DummyParam(1.0)] + objective = minimizer._create_objective_function( + parameters=params, + structures=None, + experiments=None, + calculator=None, + ) + + result = minimizer.fit( + parameters=params, + objective_function=objective, + finalize_tracking=False, + ) + + assert result.fitting_time is None + + minimizer._stop_tracking() + + assert result.fitting_time is not None + assert result.fitting_time >= 0.0 diff --git a/tests/unit/easydiffraction/analysis/test_fitting.py b/tests/unit/easydiffraction/analysis/test_fitting.py index 21e87b1b..bf9f6523 100644 --- a/tests/unit/easydiffraction/analysis/test_fitting.py +++ b/tests/unit/easydiffraction/analysis/test_fitting.py @@ -182,3 +182,56 @@ def _stop_tracking(self): assert fitter.minimizer.fit_calls[0]['finalize_tracking'] is False assert fitter.minimizer.stop_calls == 1 assert analysis_events == ['capture', 'store'] + + +def test_fitter_fit_stops_tracking_when_minimizer_fit_raises(monkeypatch): + import pytest + + from easydiffraction.analysis.fitting import Fitter + + class DummyParam: + value = 1.0 + _fit_start_value = None + + class DummyStructure: + _need_categories_update = False + + def _update_categories(self): + return None + + class DummyStructures: + def __iter__(self): + return iter([DummyStructure()]) + + class DummyExperiment: + parameters = [] + + class DummyMin: + def __init__(self): + self.stop_calls = 0 + self.tracker = SimpleNamespace(track=lambda residuals, parameters: residuals) + + def fit(self, params, obj, verbosity=None, **kwargs): + del params, obj, verbosity, kwargs + msg = 'fit failed' + raise RuntimeError(msg) + + def _stop_tracking(self): + self.stop_calls += 1 + + fitter = Fitter() + fitter.minimizer = DummyMin() + monkeypatch.setattr( + fitter, + '_collect_fit_parameters', + lambda structures, experiments: [DummyParam()], + ) + + with pytest.raises(RuntimeError, match='fit failed'): + fitter.fit( + structures=DummyStructures(), + experiments=[DummyExperiment()], + verbosity=VerbosityEnum.FULL, + ) + + assert fitter.minimizer.stop_calls == 1 From 37fc39081b55fc0fc2370fcae26a1e586b0544ec Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 10:07:55 +0200 Subject: [PATCH 61/72] Add tutorial benchmark runner and pixi task --- pixi.toml | 1 + tools/benchmark_tutorials.py | 209 +++++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 tools/benchmark_tutorials.py diff --git a/pixi.toml b/pixi.toml index 71ac80fd..f02a04ba 100644 --- a/pixi.toml +++ b/pixi.toml @@ -186,6 +186,7 @@ cov = { depends-on = [ python = { cmd = 'python', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } tutorial = { cmd = 'python', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } +tutorial-benchmarks = { cmd = 'python tools/benchmark_tutorials.py', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } jupyter = { cmd = 'jupyter', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } notebook-convert = 'jupytext docs/docs/tutorials/*.py --from py:percent --to ipynb' diff --git a/tools/benchmark_tutorials.py b/tools/benchmark_tutorials.py new file mode 100644 index 00000000..a59fd26e --- /dev/null +++ b/tools/benchmark_tutorials.py @@ -0,0 +1,209 @@ +"""Benchmark tutorial scripts and save timing results.""" + +from __future__ import annotations + +import argparse +import csv +import os +import platform +import subprocess # noqa: S404 +import sys +import time +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from pathlib import PurePosixPath + +ROOT = Path(__file__).resolve().parents[1] +SRC_ROOT = ROOT / 'src' +DEFAULT_TUTORIAL_DIR = ROOT / 'docs' / 'docs' / 'tutorials' +DEFAULT_OUTPUT_DIR = ROOT / 'docs' / 'dev' / 'benchmarking' +CHECKPOINT_DIR_NAME = '.ipynb_checkpoints' + + +@dataclass(frozen=True) +class TutorialBenchmarkResult: + """Store timing data for one tutorial run.""" + + tutorial_name: str + elapsed_seconds: float + status: str + return_code: int + + +def _relative_display_path(path: Path, start_path: Path) -> str: + try: + return path.relative_to(start_path).as_posix() + except ValueError: + return path.as_posix() + + +def _slugify(value: str) -> str: + return value.lower().replace(' ', '-').replace('/', '-') + + +def _build_env() -> dict[str, str]: + env = os.environ.copy() + if SRC_ROOT.exists(): + existing_pythonpath = env.get('PYTHONPATH', '') + env['PYTHONPATH'] = ( + str(SRC_ROOT) + if not existing_pythonpath + else str(SRC_ROOT) + os.pathsep + existing_pythonpath + ) + return env + + +def _discover_tutorials(tutorial_dir: Path) -> list[Path]: + return [ + path + for path in sorted(tutorial_dir.rglob('*.py')) + if CHECKPOINT_DIR_NAME not in path.parts + ] + + +def _matches_requested_patterns( + script_path: Path, + tutorial_dir: Path, + patterns: list[str], +) -> bool: + if not patterns: + return True + + rel_path = PurePosixPath(_relative_display_path(script_path, tutorial_dir)) + return any(rel_path.match(pattern) or script_path.name == pattern for pattern in patterns) + + +def _run_tutorial( + script_path: Path, + tutorial_dir: Path, + env: dict[str, str], +) -> TutorialBenchmarkResult: + tutorial_name = _relative_display_path(script_path, tutorial_dir) + start_time = time.perf_counter() + result = subprocess.run( # noqa: S603 + [sys.executable, str(script_path)], + cwd=str(ROOT), + env=env, + capture_output=True, + text=True, + encoding='utf-8', + ) + elapsed_seconds = time.perf_counter() - start_time + status = 'ok' if result.returncode == 0 else 'failed' + + if result.returncode == 0: + print(f'OK {tutorial_name} ({elapsed_seconds:.3f}s)') + else: + print(f'FAILED {tutorial_name} ({elapsed_seconds:.3f}s)', file=sys.stderr) + details = ((result.stdout or '') + (result.stderr or '')).strip() + if details: + print(details, file=sys.stderr) + + return TutorialBenchmarkResult( + tutorial_name=tutorial_name, + elapsed_seconds=elapsed_seconds, + status=status, + return_code=result.returncode, + ) + + +def _build_output_path(output_dir: Path) -> Path: + timestamp = datetime.now().astimezone().strftime('%Y%m%d-%H%M%S') + system_name = _slugify(platform.system()) + machine_name = _slugify(platform.machine()) + python_name = f'py{sys.version_info.major}{sys.version_info.minor}' + file_name = ( + f'{timestamp}_{system_name}-{machine_name}_' + f'{python_name}_tutorial-benchmarks.csv' + ) + return output_dir / file_name + + +def _write_results(output_path: Path, results: list[TutorialBenchmarkResult]) -> None: + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open('w', encoding='utf-8', newline='') as handle: + writer = csv.writer(handle) + writer.writerow(['tutorial_name', 'elapsed_seconds', 'status', 'return_code']) + for result in results: + writer.writerow( + [ + result.tutorial_name, + f'{result.elapsed_seconds:.3f}', + result.status, + result.return_code, + ] + ) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description='Run tutorial scripts sequentially and record timings.', + ) + parser.add_argument( + '--tutorial-dir', + type=Path, + default=DEFAULT_TUTORIAL_DIR, + help='Directory containing tutorial scripts to benchmark.', + ) + parser.add_argument( + '--output-dir', + type=Path, + default=DEFAULT_OUTPUT_DIR, + help='Directory where the benchmark CSV should be written.', + ) + parser.add_argument( + '--pattern', + action='append', + default=[], + help=( + 'Glob for tutorial paths relative to the tutorial directory. ' + 'Pass multiple times to benchmark a subset.' + ), + ) + return parser + + +def main() -> int: + args = build_parser().parse_args() + tutorial_dir = args.tutorial_dir.resolve() + output_dir = args.output_dir.resolve() + + if not tutorial_dir.is_dir(): + print(f'Tutorial directory not found: {tutorial_dir}', file=sys.stderr) + return 1 + + tutorials = [ + path + for path in _discover_tutorials(tutorial_dir) + if _matches_requested_patterns(path, tutorial_dir, args.pattern) + ] + if not tutorials: + print('No tutorial scripts matched the requested pattern(s).', file=sys.stderr) + return 1 + + env = _build_env() + results: list[TutorialBenchmarkResult] = [] + for index, tutorial_path in enumerate(tutorials, start=1): + tutorial_name = _relative_display_path(tutorial_path, tutorial_dir) + print(f'[{index}/{len(tutorials)}] Running {tutorial_name}') + results.append(_run_tutorial(tutorial_path, tutorial_dir, env)) + + output_path = _build_output_path(output_dir) + _write_results(output_path, results) + + total_elapsed = sum(result.elapsed_seconds for result in results) + failure_count = sum(result.status == 'failed' for result in results) + + print(f'Wrote benchmark results to {_relative_display_path(output_path, ROOT)}') + print(f'Total elapsed time: {total_elapsed:.3f}s') + + if failure_count: + print(f'Failed tutorials: {failure_count}', file=sys.stderr) + return 1 + + return 0 + + +if __name__ == '__main__': + raise SystemExit(main()) \ No newline at end of file From a7b70167004b3309282702b679cfcde59a2e48c1 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 10:40:15 +0200 Subject: [PATCH 62/72] Write benchmark results incrementally --- tests/unit/test_benchmark_tutorials.py | 133 +++++++++++++++++++++++++ tools/benchmark_tutorials.py | 43 ++++---- 2 files changed, 158 insertions(+), 18 deletions(-) create mode 100644 tests/unit/test_benchmark_tutorials.py diff --git a/tests/unit/test_benchmark_tutorials.py b/tests/unit/test_benchmark_tutorials.py new file mode 100644 index 00000000..3fb9d154 --- /dev/null +++ b/tests/unit/test_benchmark_tutorials.py @@ -0,0 +1,133 @@ +# SPDX-FileCopyrightText: 2026 EasyScience contributors +# SPDX-License-Identifier: BSD-3-Clause +"""Unit tests for tutorial benchmark CSV persistence.""" + +from __future__ import annotations + +import csv +import importlib.util +import sys +from argparse import Namespace +from pathlib import Path +from types import SimpleNamespace + + +def _load_module(): + module_path = Path(__file__).resolve().parents[2] / 'tools' / 'benchmark_tutorials.py' + spec = importlib.util.spec_from_file_location('benchmark_tutorials', module_path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +MUT = _load_module() + + +def test_append_result_writes_one_row(tmp_path): + output_path = tmp_path / 'benchmark.csv' + + MUT._write_csv_header(output_path) + MUT._append_result( + output_path, + MUT.TutorialBenchmarkResult( + tutorial_name='ed-21.py', + elapsed_seconds=12.3456, + status='ok', + return_code=0, + ), + ) + + with output_path.open(encoding='utf-8', newline='') as handle: + rows = list(csv.DictReader(handle)) + + assert rows == [ + { + 'tutorial_name': 'ed-21.py', + 'elapsed_seconds': '12.346', + 'status': 'ok', + 'return_code': '0', + } + ] + + +def test_main_appends_first_result_before_second_tutorial_starts(monkeypatch, tmp_path): + tutorial_dir = tmp_path / 'tutorials' + tutorial_dir.mkdir() + first_tutorial = tutorial_dir / 'ed-01.py' + second_tutorial = tutorial_dir / 'ed-02.py' + first_tutorial.write_text('print("first")\n', encoding='utf-8') + second_tutorial.write_text('print("second")\n', encoding='utf-8') + + output_path = tmp_path / 'benchmarking' / 'results.csv' + args = Namespace( + tutorial_dir=tutorial_dir, + output_dir=tmp_path / 'benchmarking', + pattern=[], + ) + + monkeypatch.setattr( + MUT, + 'build_parser', + lambda: SimpleNamespace(parse_args=lambda: args), + ) + monkeypatch.setattr(MUT, '_build_output_path', lambda output_dir: output_path) + monkeypatch.setattr(MUT, '_build_env', lambda: {}) + + def fake_run_tutorial( + script_path: Path, + tutorial_dir_path: Path, + env: dict[str, str], + ) -> MUT.TutorialBenchmarkResult: + del tutorial_dir_path, env + if script_path == first_tutorial: + with output_path.open(encoding='utf-8', newline='') as handle: + rows = list(csv.reader(handle)) + assert rows == [MUT.CSV_HEADER] + return MUT.TutorialBenchmarkResult( + tutorial_name='ed-01.py', + elapsed_seconds=1.0, + status='ok', + return_code=0, + ) + + with output_path.open(encoding='utf-8', newline='') as handle: + rows = list(csv.DictReader(handle)) + assert rows == [ + { + 'tutorial_name': 'ed-01.py', + 'elapsed_seconds': '1.000', + 'status': 'ok', + 'return_code': '0', + } + ] + return MUT.TutorialBenchmarkResult( + tutorial_name='ed-02.py', + elapsed_seconds=2.0, + status='ok', + return_code=0, + ) + + monkeypatch.setattr(MUT, '_run_tutorial', fake_run_tutorial) + + exit_code = MUT.main() + + assert exit_code == 0 + with output_path.open(encoding='utf-8', newline='') as handle: + rows = list(csv.DictReader(handle)) + assert rows == [ + { + 'tutorial_name': 'ed-01.py', + 'elapsed_seconds': '1.000', + 'status': 'ok', + 'return_code': '0', + }, + { + 'tutorial_name': 'ed-02.py', + 'elapsed_seconds': '2.000', + 'status': 'ok', + 'return_code': '0', + }, + ] \ No newline at end of file diff --git a/tools/benchmark_tutorials.py b/tools/benchmark_tutorials.py index a59fd26e..e28d77e9 100644 --- a/tools/benchmark_tutorials.py +++ b/tools/benchmark_tutorials.py @@ -19,6 +19,7 @@ DEFAULT_TUTORIAL_DIR = ROOT / 'docs' / 'docs' / 'tutorials' DEFAULT_OUTPUT_DIR = ROOT / 'docs' / 'dev' / 'benchmarking' CHECKPOINT_DIR_NAME = '.ipynb_checkpoints' +CSV_HEADER = ['tutorial_name', 'elapsed_seconds', 'status', 'return_code'] @dataclass(frozen=True) @@ -93,9 +94,9 @@ def _run_tutorial( status = 'ok' if result.returncode == 0 else 'failed' if result.returncode == 0: - print(f'OK {tutorial_name} ({elapsed_seconds:.3f}s)') + print(f' OK {elapsed_seconds:.1f}s') else: - print(f'FAILED {tutorial_name} ({elapsed_seconds:.3f}s)', file=sys.stderr) + print(f' FAILED {elapsed_seconds:.1f}s', file=sys.stderr) details = ((result.stdout or '') + (result.stderr or '')).strip() if details: print(details, file=sys.stderr) @@ -120,20 +121,24 @@ def _build_output_path(output_dir: Path) -> Path: return output_dir / file_name -def _write_results(output_path: Path, results: list[TutorialBenchmarkResult]) -> None: +def _write_csv_header(output_path: Path) -> None: output_path.parent.mkdir(parents=True, exist_ok=True) with output_path.open('w', encoding='utf-8', newline='') as handle: writer = csv.writer(handle) - writer.writerow(['tutorial_name', 'elapsed_seconds', 'status', 'return_code']) - for result in results: - writer.writerow( - [ - result.tutorial_name, - f'{result.elapsed_seconds:.3f}', - result.status, - result.return_code, - ] - ) + writer.writerow(CSV_HEADER) + + +def _append_result(output_path: Path, result: TutorialBenchmarkResult) -> None: + with output_path.open('a', encoding='utf-8', newline='') as handle: + writer = csv.writer(handle) + writer.writerow( + [ + result.tutorial_name, + f'{result.elapsed_seconds:.3f}', + result.status, + result.return_code, + ] + ) def build_parser() -> argparse.ArgumentParser: @@ -182,15 +187,17 @@ def main() -> int: print('No tutorial scripts matched the requested pattern(s).', file=sys.stderr) return 1 + output_path = _build_output_path(output_dir) + _write_csv_header(output_path) + env = _build_env() results: list[TutorialBenchmarkResult] = [] for index, tutorial_path in enumerate(tutorials, start=1): tutorial_name = _relative_display_path(tutorial_path, tutorial_dir) - print(f'[{index}/{len(tutorials)}] Running {tutorial_name}') - results.append(_run_tutorial(tutorial_path, tutorial_dir, env)) - - output_path = _build_output_path(output_dir) - _write_results(output_path, results) + print(f'[{index:2}/{len(tutorials)}] Running {tutorial_name}') + result = _run_tutorial(tutorial_path, tutorial_dir, env) + results.append(result) + _append_result(output_path, result) total_elapsed = sum(result.elapsed_seconds for result in results) failure_count = sum(result.status == 'failed' for result in results) From d2d35c4fc56a902ddfc5fccec34f07a7a1eea2cc Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 10:41:02 +0200 Subject: [PATCH 63/72] Reduce tutorial computation time and fix data ID --- docs/docs/tutorials/ed-15.py | 4 ++++ docs/docs/tutorials/ed-17.py | 2 +- docs/docs/tutorials/ed-21.py | 3 ++- docs/docs/tutorials/ed-22.py | 3 ++- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/docs/tutorials/ed-15.py b/docs/docs/tutorials/ed-15.py index 7359b712..6f6b45b2 100644 --- a/docs/docs/tutorials/ed-15.py +++ b/docs/docs/tutorials/ed-15.py @@ -79,6 +79,10 @@ # %% project.analysis.fitting.minimizer_type = 'bumps' +# %% +# Limit number of iterations to prevent long calculation time in this tutorial. +project.analysis.fitting.minimizer.max_iterations = 1500 + # %% # Start refinement. All parameters, which have standard uncertainties # in the input CIF files, are refined by default. diff --git a/docs/docs/tutorials/ed-17.py b/docs/docs/tutorials/ed-17.py index a2a5e272..93f691fb 100644 --- a/docs/docs/tutorials/ed-17.py +++ b/docs/docs/tutorials/ed-17.py @@ -128,7 +128,7 @@ # #### Download Measured Data # %% -zip_path = ed.download_data(id=27, destination='data') +zip_path = ed.download_data(id=25, destination='data') # %% [markdown] # #### Extract Data Files diff --git a/docs/docs/tutorials/ed-21.py b/docs/docs/tutorials/ed-21.py index b6a52791..df32bcab 100644 --- a/docs/docs/tutorials/ed-21.py +++ b/docs/docs/tutorials/ed-21.py @@ -297,7 +297,8 @@ project.analysis.fitting.minimizer_type = 'bumps (dream)' # %% -project.analysis.fitting.minimizer.steps = 300 # lower than the default 3000 +project.analysis.fitting.minimizer.steps = 100 # lower than the default 3000 +project.analysis.fitting.minimizer.burn = 20 # lower than the default 600 # %% project.analysis.fit() diff --git a/docs/docs/tutorials/ed-22.py b/docs/docs/tutorials/ed-22.py index da43c6d5..cbc08f74 100644 --- a/docs/docs/tutorials/ed-22.py +++ b/docs/docs/tutorials/ed-22.py @@ -219,7 +219,8 @@ project.analysis.fitting.minimizer_type = 'bumps (dream)' # %% -project.analysis.fitting.minimizer.steps = 500 # lower than the default 3000 +project.analysis.fitting.minimizer.steps = 100 # lower than the default 3000 +project.analysis.fitting.minimizer.burn = 20 # lower than the default 600 # %% project.analysis.fit() From d809c04dcdd87351c5fe6568c2148a33aea26cf2 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 10:41:42 +0200 Subject: [PATCH 64/72] Simplify tutorial titles for consistency --- docs/docs/tutorials/index.md | 6 +++--- docs/mkdocs.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/docs/tutorials/index.md b/docs/docs/tutorials/index.md index 2a6dae67..ac7378fd 100644 --- a/docs/docs/tutorials/index.md +++ b/docs/docs/tutorials/index.md @@ -40,13 +40,13 @@ The tutorials are organized into the following categories: ## Load Project -- [LBCO Standard, continuation](ed-18.ipynb) – The most minimal example +- [LBCO Single Fit](ed-18.ipynb) – The most minimal example showing how to load a previously saved project from a directory and continue working with it. -- [Co2SiO4 Sequential, resumed](ed-23.ipynb) – Resumes a sequential +- [Co2SiO4 Sequential Fit](ed-23.ipynb) – Resumes a sequential refinement from an existing `analysis/results.csv` after an incomplete previous run. -- [LBCO Bayesian, display](ed-24.ipynb) – Shows how to load the saved +- [LBCO Bayesian Display](ed-24.ipynb) – Shows how to load the saved project after a Bayesian analysis and inspect the persisted fit summary, correlation matrix, posterior plots, and predictive checks without rerunning MCMC sampling. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index fafb563f..02f86202 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -197,9 +197,9 @@ nav: - LBCO basic load: tutorials/ed-1.ipynb - LBCO complete: tutorials/ed-3.ipynb - Load Project: - - LBCO Standard continuation: tutorials/ed-18.ipynb - - Co2SiO4 Sequential resumed: tutorials/ed-23.ipynb - - LBCO Bayesian display: tutorials/ed-24.ipynb + - LBCO Single: tutorials/ed-18.ipynb + - Co2SiO4 Sequential: tutorials/ed-23.ipynb + - LBCO Bayesian: tutorials/ed-24.ipynb - Powder Diffraction: - Co2SiO4 pd-neut-cwl: tutorials/ed-5.ipynb - HS pd-neut-cwl: tutorials/ed-6.ipynb From b8c91226eacee3dfcafb50199b3592603a3464f1 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 12:14:47 +0200 Subject: [PATCH 65/72] Honor max_iterations in BUMPS fits --- docs/docs/tutorials/ed-15.py | 2 +- .../analysis/fit_helpers/tracking.py | 21 +- src/easydiffraction/analysis/fitting.py | 5 +- .../analysis/minimizers/base.py | 15 +- .../analysis/minimizers/bumps.py | 187 +++++++++++++++++- .../analysis/minimizers/bumps_dream.py | 17 +- src/easydiffraction/project/project.py | 11 +- .../analysis/fit_helpers/test_tracking.py | 17 ++ .../analysis/minimizers/test_base.py | 31 +++ .../analysis/minimizers/test_bumps.py | 121 ++++++++++++ .../analysis/minimizers/test_bumps_dream.py | 22 +++ .../analysis/minimizers/test_dfols.py | 4 + .../analysis/minimizers/test_lmfit.py | 35 ++++ .../easydiffraction/analysis/test_fitting.py | 48 +++++ 14 files changed, 519 insertions(+), 17 deletions(-) diff --git a/docs/docs/tutorials/ed-15.py b/docs/docs/tutorials/ed-15.py index 6f6b45b2..03dc35fa 100644 --- a/docs/docs/tutorials/ed-15.py +++ b/docs/docs/tutorials/ed-15.py @@ -81,7 +81,7 @@ # %% # Limit number of iterations to prevent long calculation time in this tutorial. -project.analysis.fitting.minimizer.max_iterations = 1500 +project.analysis.fitting.minimizer.max_iterations = 500 # %% # Start refinement. All parameters, which have standard uncertainties diff --git a/src/easydiffraction/analysis/fit_helpers/tracking.py b/src/easydiffraction/analysis/fit_helpers/tracking.py index 151512b7..c24f9435 100644 --- a/src/easydiffraction/analysis/fit_helpers/tracking.py +++ b/src/easydiffraction/analysis/fit_helpers/tracking.py @@ -154,8 +154,25 @@ def track( self._last_chi2 = reduced_chi2 return residuals + self.track_fit_progress( + iteration=self._iteration, + reduced_chi2=reduced_chi2, + elapsed_time=self._current_elapsed_time(), + ) + + return residuals + + def track_fit_progress( + self, + *, + iteration: int, + reduced_chi2: float, + elapsed_time: float, + ) -> None: + """Update fit progress from a backend iteration callback.""" + self._iteration = max(1, iteration) + row: list[str] = [] - elapsed_time = self._current_elapsed_time() if self._previous_chi2 is None: self._previous_chi2 = reduced_chi2 @@ -203,8 +220,6 @@ def track( self._last_chi2 = reduced_chi2 self._last_iteration = self._iteration - return residuals - def track_sampler_progress(self, update: SamplerProgressUpdate) -> None: """ Update progress from a sampler monitor. diff --git a/src/easydiffraction/analysis/fitting.py b/src/easydiffraction/analysis/fitting.py index 59b9d926..72d89a6c 100644 --- a/src/easydiffraction/analysis/fitting.py +++ b/src/easydiffraction/analysis/fitting.py @@ -345,4 +345,7 @@ def _residual_function( # Append the residuals for this experiment residuals.extend(diff) - return self.minimizer.tracker.track(np.array(residuals), parameters) + residual_array = np.array(residuals) + if getattr(self.minimizer, '_tracks_progress_via_solver_monitor', lambda: False)(): + return residual_array + return self.minimizer.tracker.track(residual_array, parameters) diff --git a/src/easydiffraction/analysis/minimizers/base.py b/src/easydiffraction/analysis/minimizers/base.py index a14a4677..4e1b4845 100644 --- a/src/easydiffraction/analysis/minimizers/base.py +++ b/src/easydiffraction/analysis/minimizers/base.py @@ -34,7 +34,7 @@ def __init__( ) -> None: self.name: str | None = name self.method: str | None = method - self.max_iterations: int | None = max_iterations + self._max_iterations: int | None = max_iterations self.result: FitResults | None = None self._previous_chi2: float | None = None self._iteration: int | None = None @@ -46,6 +46,15 @@ def __init__( self._deferred_warning_messages: list[str] = [] self.tracker: FitProgressTracker = FitProgressTracker() + @property + def max_iterations(self) -> int | None: + """User-facing iteration limit for the current minimizer.""" + return self._max_iterations + + @max_iterations.setter + def max_iterations(self, value: int | None) -> None: + self._max_iterations = value + def _start_tracking( self, minimizer_name: str, @@ -99,6 +108,10 @@ def _tracking_mode() -> str: """Return the tracker mode for the current minimizer.""" return 'fit' + def _tracks_progress_via_solver_monitor(self) -> bool: + """Return whether live progress comes from solver callbacks.""" + return False + @abstractmethod def _prepare_solver_args(self, parameters: list[Any]) -> dict[str, Any]: """ diff --git a/src/easydiffraction/analysis/minimizers/bumps.py b/src/easydiffraction/analysis/minimizers/bumps.py index 10926234..98d5fbfd 100644 --- a/src/easydiffraction/analysis/minimizers/bumps.py +++ b/src/easydiffraction/analysis/minimizers/bumps.py @@ -8,6 +8,7 @@ from bumps.fitproblem import FitProblem from bumps.fitters import FITTERS from bumps.fitters import FitDriver +from bumps.fitters import monitor as bumps_monitor from bumps.parameter import Parameter as BumpsParameter from scipy.optimize import OptimizeResult @@ -20,6 +21,22 @@ DEFAULT_MAX_ITERATIONS = 1000 +class _BumpsEvaluationLimitReached(RuntimeError): + """Raised when the BUMPS residual-evaluation budget is exhausted.""" + + def __init__( + self, + *, + evaluation_count: int, + parameter_values: np.ndarray, + residuals: np.ndarray | None, + ) -> None: + super().__init__('maximum number of residual evaluations reached') + self.evaluation_count = evaluation_count + self.parameter_values = parameter_values + self.residuals = residuals + + class _EasyDiffractionFitness: """ Adaptor wrapping an EasyDiffraction objective into bumps Fitness. @@ -29,10 +46,16 @@ def __init__( self, bumps_params: list[BumpsParameter], objective_function: object, + max_evaluations: int | None = None, ) -> None: self._bumps_params = bumps_params self._objective_function = objective_function + self._max_evaluations = max_evaluations self._numpoints = 0 + self._evaluation_count = 0 + self._count_evaluations = True + self._last_parameter_values: np.ndarray | None = None + self._last_residuals: np.ndarray | None = None def parameters(self) -> dict[str, BumpsParameter]: """Return bumps parameters as a name-keyed dictionary.""" @@ -43,9 +66,27 @@ def update(self) -> None: def residuals(self) -> np.ndarray: """Compute residuals using current bumps parameter values.""" + if ( + self._count_evaluations + and self._max_evaluations is not None + and self._evaluation_count >= self._max_evaluations + ): + last_parameter_values = self._last_parameter_values + if last_parameter_values is None: + last_parameter_values = np.array([p.value for p in self._bumps_params]) + raise _BumpsEvaluationLimitReached( + evaluation_count=self._evaluation_count, + parameter_values=last_parameter_values, + residuals=self._last_residuals, + ) + values = np.array([p.value for p in self._bumps_params]) - r = self._objective_function(values) + r = np.asarray(self._objective_function(values), dtype=float) self._numpoints = len(r) + self._last_parameter_values = values.copy() + self._last_residuals = r.copy() + if self._count_evaluations: + self._evaluation_count += 1 return r def nllf(self) -> float: @@ -59,6 +100,95 @@ def numpoints(self) -> int: """Return the number of data points.""" return self._numpoints + @property + def evaluation_count(self) -> int: + """Return the number of residual evaluations during the live fit.""" + return self._evaluation_count + + def reset_evaluation_count(self) -> None: + """Reset the residual-evaluation counter before the live fit starts.""" + self._evaluation_count = 0 + + def stop_counting_evaluations(self) -> None: + """Freeze residual-evaluation counting after the live fit ends.""" + self._count_evaluations = False + + @property + def last_residuals(self) -> np.ndarray | None: + """Return the residual vector from the last successful evaluation.""" + return self._last_residuals + + def last_reduced_chi_square(self, *, n_parameters: int) -> float | None: + """Return the reduced chi-square from the last residual vector.""" + if self._last_residuals is None: + return None + + chi_square = float(np.sum(self._last_residuals**2)) + dof = len(self._last_residuals) - n_parameters + if dof <= 0: + return chi_square + return chi_square / dof + + +class _BumpsProgressMonitor(bumps_monitor.Monitor): + """Progress monitor reporting live BUMPS fit evaluation counts.""" + + def __init__( + self, + *, + tracker: object, + fitness: _EasyDiffractionFitness, + n_points: int, + n_parameters: int, + ) -> None: + self._tracker = tracker + self._fitness = fitness + self._n_points = n_points + self._n_parameters = n_parameters + + @staticmethod + def config_history(history: object) -> None: + """Declare the history fields needed for deterministic progress.""" + history.requires(time=1, step=1, value=1) + + def __call__(self, history: object) -> None: + """Forward deterministic BUMPS progress to the fit tracker.""" + if not history.time or not history.value: + return + + self._tracker.track_fit_progress( + iteration=self._reported_iteration(history), + reduced_chi2=self._reduced_chi_square_from_nllf(float(history.value[0])), + elapsed_time=float(history.time[0]), + ) + + def final(self, history: object, best: dict[str, object]) -> None: + """Record the final BUMPS state in the fit tracker.""" + if not history.time or best.get('value') is None: + return + + self._tracker.track_fit_progress( + iteration=self._reported_iteration(history), + reduced_chi2=self._reduced_chi_square_from_nllf(float(best['value'])), + elapsed_time=float(history.time[0]), + ) + + def _reported_iteration(self, history: object) -> int: + """Return the live fit evaluation count shown in progress.""" + if self._fitness.evaluation_count > 0: + return self._fitness.evaluation_count + + step = int(history.step[0]) if history.step else 0 + return max(1, step) + + def _reduced_chi_square_from_nllf(self, nllf: float) -> float: + """Convert negative log-likelihood to reduced chi-square.""" + dof = self._n_points - self._n_parameters + chi_square = 2.0 * nllf + if dof <= 0: + return chi_square + return chi_square / dof + @MinimizerFactory.register class BumpsMinimizer(MinimizerBase): @@ -81,6 +211,10 @@ def __init__( max_iterations=max_iterations, ) + def _tracks_progress_via_solver_monitor(self) -> bool: + """Use BUMPS monitor callbacks for live deterministic progress.""" + return True + def _prepare_solver_args( # noqa: PLR6301 self, parameters: list[object], @@ -135,21 +269,58 @@ def _run_solver( A scipy OptimizeResult with the optimized values. """ bumps_params = kwargs.get('bumps_params') - fitness = _EasyDiffractionFitness(bumps_params, objective_function) + fitness = _EasyDiffractionFitness( + bumps_params, + objective_function, + max_evaluations=self.max_iterations, + ) fitness.nllf() # pre-compute so numpoints() is valid + fitness.reset_evaluation_count() problem = FitProblem(fitness) + progress_monitor = _BumpsProgressMonitor( + tracker=self.tracker, + fitness=fitness, + n_points=fitness.numpoints(), + n_parameters=len(bumps_params), + ) fitclass = next(cls for cls in FITTERS if cls.id == self.method) driver = FitDriver( fitclass=fitclass, problem=problem, - monitors=[], + monitors=[progress_monitor], steps=self.max_iterations, ) driver.clip() - x, fx = driver.fit() - - success = x is not None + evaluation_limit_reached = False + evaluation_limit_message = 'successful termination' + try: + x, fx = driver.fit() + except _BumpsEvaluationLimitReached as exc: + evaluation_limit_reached = True + evaluation_limit_message = str(exc) + x = exc.parameter_values.copy() + fx = None + reduced_chi2 = None + if exc.residuals is not None: + chi_square = float(np.sum(exc.residuals**2)) + dof = len(exc.residuals) - len(bumps_params) + reduced_chi2 = chi_square if dof <= 0 else chi_square / dof + elif fitness.last_residuals is not None: + reduced_chi2 = fitness.last_reduced_chi_square( + n_parameters=len(bumps_params) + ) + if reduced_chi2 is not None: + elapsed_time = self.tracker._current_elapsed_time() + self.tracker.track_fit_progress( + iteration=exc.evaluation_count, + reduced_chi2=reduced_chi2, + elapsed_time=0.0 if elapsed_time is None else elapsed_time, + ) + finally: + fitness.stop_counting_evaluations() + + success = x is not None and not evaluation_limit_reached if success: problem.setp(x) @@ -168,8 +339,8 @@ def _run_solver( dx=stderr, fun=fx, success=success, - status=0 if success else -1, - message='successful termination' if success else 'fit failed', + status=0 if success else 5 if evaluation_limit_reached else -1, + message='successful termination' if success else evaluation_limit_message, covar=covar, var_names=var_names, ) diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index cf28be55..00ced906 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -29,6 +29,7 @@ from easydiffraction.analysis.minimizers.enums import DreamPopulationInitializationEnum from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.analysis.minimizers.factory import MinimizerFactory +from easydiffraction.utils.logging import log from easydiffraction.core.metadata import TypeInfo DEFAULT_METHOD = 'dream' @@ -303,14 +304,26 @@ def __init__( self._parallel: int = DEFAULT_PARALLEL self._init: DreamPopulationInitializationEnum = DEFAULT_INIT + @property + def max_iterations(self) -> int: + """DREAM exposes sampler length through ``steps`` instead.""" + msg = "DREAM sampler uses 'steps' instead of 'max_iterations'." + raise AttributeError(msg) + + @max_iterations.setter + def max_iterations(self, value: int) -> None: + del value + msg = "DREAM sampler uses 'steps' instead of 'max_iterations'." + raise AttributeError(msg) + @property def steps(self) -> int: """Number of DREAM generations retained after burn-in.""" - return self._validated_positive_integer('steps', self.max_iterations) + return self._validated_positive_integer('steps', self._max_iterations) @steps.setter def steps(self, value: int) -> None: - self.max_iterations = self._validated_positive_integer('steps', value) + self._max_iterations = self._validated_positive_integer('steps', value) @property def burn(self) -> int | None: diff --git a/src/easydiffraction/project/project.py b/src/easydiffraction/project/project.py index a27cb900..9b89eb50 100644 --- a/src/easydiffraction/project/project.py +++ b/src/easydiffraction/project/project.py @@ -524,7 +524,16 @@ def save_as( project_dir = resolve_artifact_path(dir_path) if overwrite and project_dir.is_dir(): - shutil.rmtree(project_dir) + current_working_directory = pathlib.Path.cwd().resolve() + resolved_project_dir = project_dir.resolve() + if resolved_project_dir == current_working_directory: + for child_path in resolved_project_dir.iterdir(): + if child_path.is_dir(): + shutil.rmtree(child_path) + else: + child_path.unlink() + else: + shutil.rmtree(project_dir) self.info.path = project_dir self.save() diff --git a/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking.py b/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking.py index 89a9c86b..ff9df328 100644 --- a/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking.py +++ b/tests/unit/easydiffraction/analysis/fit_helpers/test_tracking.py @@ -79,6 +79,23 @@ def test_tracker_fit_adds_timed_rows_and_resets_counter(monkeypatch): assert tracker._previous_chi2 == 4.0 +def test_tracker_fit_progress_uses_backend_iterations_for_display(): + from easydiffraction.analysis.fit_helpers.tracking import FitProgressTracker + + tracker = FitProgressTracker() + + tracker.track_fit_progress(iteration=1, reduced_chi2=10.0, elapsed_time=0.1) + tracker.track_fit_progress(iteration=63, reduced_chi2=5.0, elapsed_time=1.0) + tracker.track_fit_progress(iteration=122, reduced_chi2=4.0, elapsed_time=2.0) + + assert tracker._df_rows == [ + ['1', '0.10', '10.00', ''], + ['63', '1.00', '5.00', '50.0% ↓'], + ['122', '2.00', '4.00', '20.0% ↓'], + ] + assert tracker.best_iteration == 122 + + def test_tracker_sampler_post_processing_adds_final_status_row(): from easydiffraction.analysis.fit_helpers.tracking import FitProgressTracker from easydiffraction.analysis.fit_helpers.tracking import SamplerProgressUpdate diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_base.py b/tests/unit/easydiffraction/analysis/minimizers/test_base.py index 45328ad2..df6741c7 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_base.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_base.py @@ -123,6 +123,37 @@ def _compute_residuals( assert np.allclose(out, np.array([1.0, 2.0, 3.0])) +def test_max_iterations_property_updates_internal_value(): + from easydiffraction.analysis.minimizers.base import MinimizerBase + + class M(MinimizerBase): + def __init__(self): + super().__init__(name='dummy', method='m', max_iterations=5) + + def _prepare_solver_args(self, parameters): + del parameters + return {} + + def _run_solver(self, objective_function, **kwargs): + del objective_function, kwargs + return None + + def _sync_result_to_parameters(self, parameters, raw_result): + del parameters, raw_result + + def _check_success(self, raw_result): + del raw_result + return True + + minimizer = M() + + assert minimizer.max_iterations == 5 + + minimizer.max_iterations = 200 + + assert minimizer.max_iterations == 200 + + def test_minimizer_base_fit_stops_tracking_when_solver_prep_fails(): from easydiffraction.analysis.minimizers.base import MinimizerBase diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_bumps.py b/tests/unit/easydiffraction/analysis/minimizers/test_bumps.py index 1a73fdbc..242fb4b1 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_bumps.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_bumps.py @@ -189,6 +189,80 @@ def test_fitness_numpoints_after_nllf(): assert fitness.numpoints() == 3 +def test_fitness_evaluation_count_can_be_reset_and_stopped(): + from bumps.parameter import Parameter as BumpsParameter + + from easydiffraction.analysis.minimizers.bumps import _EasyDiffractionFitness + + bp = BumpsParameter(value=0.0, name='a') + fitness = _EasyDiffractionFitness([bp], lambda values: np.array([values[0]])) + + fitness.residuals() + fitness.residuals() + assert fitness.evaluation_count == 2 + + fitness.reset_evaluation_count() + assert fitness.evaluation_count == 0 + + fitness.stop_counting_evaluations() + fitness.residuals() + assert fitness.evaluation_count == 0 + + +def test_fitness_raises_when_max_evaluations_is_reached(): + from bumps.parameter import Parameter as BumpsParameter + + from easydiffraction.analysis.minimizers.bumps import _BumpsEvaluationLimitReached + from easydiffraction.analysis.minimizers.bumps import _EasyDiffractionFitness + + bp = BumpsParameter(value=2.0, name='a') + fitness = _EasyDiffractionFitness( + [bp], + lambda values: np.array([values[0]]), + max_evaluations=2, + ) + + fitness.residuals() + fitness.residuals() + + with pytest.raises(_BumpsEvaluationLimitReached) as exc_info: + fitness.residuals() + + assert exc_info.value.evaluation_count == 2 + np.testing.assert_array_equal(exc_info.value.parameter_values, np.array([2.0])) + np.testing.assert_array_equal(exc_info.value.residuals, np.array([2.0])) + + +def test_bumps_progress_monitor_reports_evaluation_count(): + from easydiffraction.analysis.minimizers.bumps import _EasyDiffractionFitness + from easydiffraction.analysis.minimizers.bumps import _BumpsProgressMonitor + + tracker = MagicMock() + fitness = _EasyDiffractionFitness([], lambda values: np.array([])) + fitness.reset_evaluation_count() + fitness._evaluation_count = 63 + monitor = _BumpsProgressMonitor( + tracker=tracker, + fitness=fitness, + n_points=20, + n_parameters=4, + ) + + monitor( + types.SimpleNamespace( + step=[7], + value=[8.0], + time=[1.5], + ) + ) + + tracker.track_fit_progress.assert_called_once_with( + iteration=63, + reduced_chi2=pytest.approx(1.0), + elapsed_time=1.5, + ) + + def test_fitness_update_is_noop(): from easydiffraction.analysis.minimizers.bumps import _EasyDiffractionFitness @@ -232,6 +306,8 @@ def test_run_solver_returns_optimize_result(): bumps_params=[bp1, bp2], ) + assert len(mock_driver_cls.call_args.kwargs['monitors']) == 1 + assert isinstance(res, OptimizeResult) assert res.success is True np.testing.assert_array_almost_equal(res.x, [1.5, 2.5]) @@ -265,6 +341,51 @@ def test_run_solver_failure(): assert res.status == -1 +def test_run_solver_stops_at_max_evaluations(): + from easydiffraction.analysis.minimizers.bumps import BumpsMinimizer + from easydiffraction.analysis.minimizers.bumps import _BumpsEvaluationLimitReached + + m = BumpsMinimizer(max_iterations=50) + m.tracker = MagicMock() + m.tracker._current_elapsed_time.return_value = 1.25 + + fake_fitter = types.SimpleNamespace(id='lm') + limit_error = _BumpsEvaluationLimitReached( + evaluation_count=50, + parameter_values=np.array([1.5]), + residuals=np.array([2.0, 2.0]), + ) + + with ( + patch('easydiffraction.analysis.minimizers.bumps.FitDriver') as mock_driver_cls, + patch('easydiffraction.analysis.minimizers.bumps.FitProblem'), + patch('easydiffraction.analysis.minimizers.bumps.FITTERS', [fake_fitter]), + patch.object(m, '_compute_covariance', return_value=(None, None)) as mock_covariance, + ): + driver_instance = mock_driver_cls.return_value + driver_instance.fit.side_effect = limit_error + driver_instance.clip = MagicMock() + + from bumps.parameter import Parameter as BumpsParameter + + bp = BumpsParameter(value=1.5, name='a') + result = m._run_solver( + lambda values: np.array([values[0], values[0]]), + bumps_params=[bp], + ) + + assert result.success is False + assert result.status == 5 + assert result.message == 'maximum number of residual evaluations reached' + np.testing.assert_array_equal(result.x, np.array([1.5])) + mock_covariance.assert_not_called() + m.tracker.track_fit_progress.assert_called_once_with( + iteration=50, + reduced_chi2=pytest.approx(8.0), + elapsed_time=1.25, + ) + + # -- _sync_result_to_parameters tests ----------------------------------------- diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py index d17dce4f..3a23f557 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_bumps_dream.py @@ -53,6 +53,28 @@ def test_type_info_and_default_init(): assert minimizer.steps == 3000 +def test_dream_uses_steps_instead_of_max_iterations(): + from easydiffraction.analysis.minimizers.bumps_dream import BumpsDreamMinimizer + + minimizer = BumpsDreamMinimizer() + + with pytest.raises( + AttributeError, + match=r"DREAM sampler uses 'steps' instead of 'max_iterations'\.", + ): + _ = minimizer.max_iterations + + with pytest.raises( + AttributeError, + match=r"DREAM sampler uses 'steps' instead of 'max_iterations'\.", + ): + minimizer.max_iterations = 300 + + minimizer.steps = 300 + + assert minimizer.steps == 300 + + def test_dream_progress_monitor_allocates_rows_by_phase_ratio(): from easydiffraction.analysis.minimizers.bumps_dream import _DreamProgressMonitor diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_dfols.py b/tests/unit/easydiffraction/analysis/minimizers/test_dfols.py index 7d94bd04..68437dd2 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_dfols.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_dfols.py @@ -43,10 +43,12 @@ def __init__(self): def fake_solve(fun, x0, bounds, maxfun): # Verify we pass reasonable arguments + del fun assert isinstance(x0, np.ndarray) assert x0.shape[0] == 2 assert isinstance(bounds, tuple) assert all(isinstance(b, np.ndarray) for b in bounds) + assert maxfun == 10 return FakeRes() monkeypatch.setattr(mod, 'solve', fake_solve) @@ -63,3 +65,5 @@ def fake_solve(fun, x0, bounds, maxfun): assert params[0].uncertainty is None assert params[1].uncertainty is None assert minim._check_success(res) is True + assert minim.max_iterations == 10 + assert not hasattr(minim, 'steps') diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_lmfit.py b/tests/unit/easydiffraction/analysis/minimizers/test_lmfit.py index f41e9896..7091fdcd 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_lmfit.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_lmfit.py @@ -77,3 +77,38 @@ def __init__(self): assert params[1].value == 20.0 assert params[1].uncertainty == 1.0 assert minim._check_success(res) is True + + +def test_lmfit_max_iterations_is_user_facing_iteration_setting(monkeypatch): + from easydiffraction.analysis.minimizers.lmfit import LmfitMinimizer + + import easydiffraction.analysis.minimizers.lmfit as lm + + observed_max_nfev = {} + + def fake_minimize( + objective_function, + *, + params, + method, + nan_policy, + max_nfev, + ): + del objective_function, params, method, nan_policy + observed_max_nfev['value'] = max_nfev + return types.SimpleNamespace(success=True, params={}) + + monkeypatch.setattr( + lm, + 'lmfit', + types.SimpleNamespace(Parameters=lm.lmfit.Parameters, minimize=fake_minimize), + ) + + minimizer = LmfitMinimizer() + + minimizer.max_iterations = 300 + minimizer._run_solver(lambda *args, **kwargs: np.array([0.0]), engine_parameters=object()) + + assert minimizer.max_iterations == 300 + assert observed_max_nfev['value'] == 300 + assert not hasattr(minimizer, 'steps') diff --git a/tests/unit/easydiffraction/analysis/test_fitting.py b/tests/unit/easydiffraction/analysis/test_fitting.py index bf9f6523..f72b6ba6 100644 --- a/tests/unit/easydiffraction/analysis/test_fitting.py +++ b/tests/unit/easydiffraction/analysis/test_fitting.py @@ -235,3 +235,51 @@ def _stop_tracking(self): ) assert fitter.minimizer.stop_calls == 1 + + +def test_residual_function_skips_tracker_for_solver_monitored_minimizer(monkeypatch): + import numpy as np + + from easydiffraction.analysis.fitting import Fitter + + class DummyExperiment: + def _update_categories(self, called_by_minimizer=False): + del called_by_minimizer + return None + + class DummyMin: + def __init__(self): + self.tracker = SimpleNamespace( + track=lambda residuals, parameters: (_ for _ in ()).throw( + AssertionError('tracker.track should not be called') + ) + ) + + def _sync_result_to_parameters(self, parameters, engine_params): + del parameters, engine_params + + def _tracks_progress_via_solver_monitor(self): + return True + + fitter = Fitter() + fitter.minimizer = DummyMin() + + monkeypatch.setattr( + 'easydiffraction.analysis.fitting.intensity_category_for', + lambda experiment: SimpleNamespace( + intensity_calc=np.array([1.0]), + intensity_meas=np.array([2.0]), + intensity_meas_su=np.array([1.0]), + ), + ) + + residuals = fitter._residual_function( + engine_params={}, + parameters=[], + structures=[], + experiments=[DummyExperiment()], + weights=None, + analysis=None, + ) + + np.testing.assert_allclose(residuals, np.array([1.0])) From 60359acfc8322c0cbd55ccba8627dd1ed200bf7c Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 12:31:26 +0200 Subject: [PATCH 66/72] Add tutorial benchmark results --- ...darwin-arm64_py314_tutorial-benchmarks.csv | 24 +++++++++++++++++++ ...darwin-arm64_py314_tutorial-benchmarks.csv | 24 +++++++++++++++++++ ...darwin-arm64_py314_tutorial-benchmarks.csv | 24 +++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 docs/dev/benchmarking/20260519-103251_darwin-arm64_py314_tutorial-benchmarks.csv create mode 100644 docs/dev/benchmarking/20260519-103500_darwin-arm64_py314_tutorial-benchmarks.csv create mode 100644 docs/dev/benchmarking/20260519-121524_darwin-arm64_py314_tutorial-benchmarks.csv diff --git a/docs/dev/benchmarking/20260519-103251_darwin-arm64_py314_tutorial-benchmarks.csv b/docs/dev/benchmarking/20260519-103251_darwin-arm64_py314_tutorial-benchmarks.csv new file mode 100644 index 00000000..edb8e3f6 --- /dev/null +++ b/docs/dev/benchmarking/20260519-103251_darwin-arm64_py314_tutorial-benchmarks.csv @@ -0,0 +1,24 @@ +tutorial_name,elapsed_seconds,status,return_code +ed-1.py,13.789,ok,0 +ed-10.py,39.098,ok,0 +ed-11.py,10.343,ok,0 +ed-12.py,8.331,ok,0 +ed-13.py,21.745,ok,0 +ed-14.py,6.158,ok,0 +ed-15.py,240.151,ok,0 +ed-16.py,58.481,ok,0 +ed-17.py,152.214,ok,0 +ed-18.py,6.114,ok,0 +ed-2.py,18.322,ok,0 +ed-20.py,36.422,ok,0 +ed-21.py,197.610,ok,0 +ed-22.py,194.351,ok,0 +ed-23.py,6.060,ok,0 +ed-24.py,4.749,ok,0 +ed-3.py,19.050,ok,0 +ed-4.py,4.480,ok,0 +ed-5.py,36.605,ok,0 +ed-6.py,61.846,ok,0 +ed-7.py,115.147,ok,0 +ed-8.py,101.442,ok,0 +ed-9.py,9.214,ok,0 diff --git a/docs/dev/benchmarking/20260519-103500_darwin-arm64_py314_tutorial-benchmarks.csv b/docs/dev/benchmarking/20260519-103500_darwin-arm64_py314_tutorial-benchmarks.csv new file mode 100644 index 00000000..608944cb --- /dev/null +++ b/docs/dev/benchmarking/20260519-103500_darwin-arm64_py314_tutorial-benchmarks.csv @@ -0,0 +1,24 @@ +tutorial_name,elapsed_seconds,status,return_code +ed-1.py,13.979,ok,0 +ed-10.py,38.764,ok,0 +ed-11.py,10.606,ok,0 +ed-12.py,9.044,ok,0 +ed-13.py,23.157,ok,0 +ed-14.py,6.585,ok,0 +ed-15.py,258.188,ok,0 +ed-16.py,60.097,ok,0 +ed-17.py,69.418,ok,0 +ed-18.py,6.181,ok,0 +ed-2.py,18.844,ok,0 +ed-20.py,39.157,ok,0 +ed-21.py,96.730,ok,0 +ed-22.py,73.480,ok,0 +ed-23.py,5.984,ok,0 +ed-24.py,4.942,ok,0 +ed-3.py,20.782,ok,0 +ed-4.py,5.780,ok,0 +ed-5.py,37.716,ok,0 +ed-6.py,66.911,ok,0 +ed-7.py,119.645,ok,0 +ed-8.py,103.887,ok,0 +ed-9.py,8.891,ok,0 diff --git a/docs/dev/benchmarking/20260519-121524_darwin-arm64_py314_tutorial-benchmarks.csv b/docs/dev/benchmarking/20260519-121524_darwin-arm64_py314_tutorial-benchmarks.csv new file mode 100644 index 00000000..ff8eda19 --- /dev/null +++ b/docs/dev/benchmarking/20260519-121524_darwin-arm64_py314_tutorial-benchmarks.csv @@ -0,0 +1,24 @@ +tutorial_name,elapsed_seconds,status,return_code +ed-1.py,15.557,ok,0 +ed-10.py,40.860,ok,0 +ed-11.py,10.823,ok,0 +ed-12.py,8.861,ok,0 +ed-13.py,24.128,ok,0 +ed-14.py,6.722,ok,0 +ed-15.py,28.243,ok,0 +ed-16.py,59.218,ok,0 +ed-17.py,70.816,ok,0 +ed-18.py,6.944,ok,0 +ed-2.py,20.385,ok,0 +ed-20.py,39.513,ok,0 +ed-21.py,96.953,ok,0 +ed-22.py,75.390,ok,0 +ed-23.py,6.115,ok,0 +ed-24.py,5.159,ok,0 +ed-3.py,34.082,ok,0 +ed-4.py,8.215,ok,0 +ed-5.py,61.949,ok,0 +ed-6.py,83.857,ok,0 +ed-7.py,120.332,ok,0 +ed-8.py,103.831,ok,0 +ed-9.py,9.270,ok,0 From cec16f68db2f36b848a98bbe3423f3bc2c5a1e4f Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 12:31:58 +0200 Subject: [PATCH 67/72] Refactor BumpsMinimizer evaluation limit handling --- docs/dev/package-structure/full.md | 2 + docs/docs/tutorials/index.md | 6 +- .../analysis/minimizers/base.py | 3 +- .../analysis/minimizers/bumps.py | 152 +++++++++++------- .../analysis/minimizers/bumps_dream.py | 7 +- .../analysis/minimizers/test_base.py | 2 +- .../analysis/minimizers/test_bumps.py | 8 +- .../easydiffraction/analysis/test_fitting.py | 4 +- tests/unit/test_benchmark_tutorials.py | 4 +- 9 files changed, 118 insertions(+), 70 deletions(-) diff --git a/docs/dev/package-structure/full.md b/docs/dev/package-structure/full.md index f9643f88..677f5851 100644 --- a/docs/dev/package-structure/full.md +++ b/docs/dev/package-structure/full.md @@ -151,7 +151,9 @@ │ │ ├── 📄 base.py │ │ │ └── 🏷️ class MinimizerBase │ │ ├── 📄 bumps.py +│ │ │ ├── 🏷️ class _BumpsEvaluationLimitError │ │ │ ├── 🏷️ class _EasyDiffractionFitness +│ │ │ ├── 🏷️ class _BumpsProgressMonitor │ │ │ └── 🏷️ class BumpsMinimizer │ │ ├── 📄 bumps_amoeba.py │ │ │ └── 🏷️ class BumpsAmoebaMinimizer diff --git a/docs/docs/tutorials/index.md b/docs/docs/tutorials/index.md index ac7378fd..b789645d 100644 --- a/docs/docs/tutorials/index.md +++ b/docs/docs/tutorials/index.md @@ -40,9 +40,9 @@ The tutorials are organized into the following categories: ## Load Project -- [LBCO Single Fit](ed-18.ipynb) – The most minimal example - showing how to load a previously saved project from a directory and - continue working with it. +- [LBCO Single Fit](ed-18.ipynb) – The most minimal example showing how + to load a previously saved project from a directory and continue + working with it. - [Co2SiO4 Sequential Fit](ed-23.ipynb) – Resumes a sequential refinement from an existing `analysis/results.csv` after an incomplete previous run. diff --git a/src/easydiffraction/analysis/minimizers/base.py b/src/easydiffraction/analysis/minimizers/base.py index 4e1b4845..2d8bc83f 100644 --- a/src/easydiffraction/analysis/minimizers/base.py +++ b/src/easydiffraction/analysis/minimizers/base.py @@ -108,7 +108,8 @@ def _tracking_mode() -> str: """Return the tracker mode for the current minimizer.""" return 'fit' - def _tracks_progress_via_solver_monitor(self) -> bool: + @staticmethod + def _tracks_progress_via_solver_monitor() -> bool: """Return whether live progress comes from solver callbacks.""" return False diff --git a/src/easydiffraction/analysis/minimizers/bumps.py b/src/easydiffraction/analysis/minimizers/bumps.py index 98d5fbfd..f80446cf 100644 --- a/src/easydiffraction/analysis/minimizers/bumps.py +++ b/src/easydiffraction/analysis/minimizers/bumps.py @@ -21,7 +21,7 @@ DEFAULT_MAX_ITERATIONS = 1000 -class _BumpsEvaluationLimitReached(RuntimeError): +class _BumpsEvaluationLimitError(RuntimeError): """Raised when the BUMPS residual-evaluation budget is exhausted.""" def __init__( @@ -38,9 +38,7 @@ def __init__( class _EasyDiffractionFitness: - """ - Adaptor wrapping an EasyDiffraction objective into bumps Fitness. - """ + """Wrap an EasyDiffraction objective in the BUMPS fitness API.""" def __init__( self, @@ -58,14 +56,14 @@ def __init__( self._last_residuals: np.ndarray | None = None def parameters(self) -> dict[str, BumpsParameter]: - """Return bumps parameters as a name-keyed dictionary.""" + """Return BUMPS parameters as a name-keyed dictionary.""" return {p.name: p for p in self._bumps_params} def update(self) -> None: - """Signal that parameters have changed (no-op).""" + """Signal that parameters have changed.""" def residuals(self) -> np.ndarray: - """Compute residuals using current bumps parameter values.""" + """Compute residuals for the current BUMPS parameter values.""" if ( self._count_evaluations and self._max_evaluations is not None @@ -74,7 +72,7 @@ def residuals(self) -> np.ndarray: last_parameter_values = self._last_parameter_values if last_parameter_values is None: last_parameter_values = np.array([p.value for p in self._bumps_params]) - raise _BumpsEvaluationLimitReached( + raise _BumpsEvaluationLimitError( evaluation_count=self._evaluation_count, parameter_values=last_parameter_values, residuals=self._last_residuals, @@ -90,9 +88,7 @@ def residuals(self) -> np.ndarray: return r def nllf(self) -> float: - """ - Negative log-likelihood as half the sum of squared residuals. - """ + """Return half the sum of squared residuals.""" r = self.residuals() return 0.5 * np.sum(r**2) @@ -102,24 +98,24 @@ def numpoints(self) -> int: @property def evaluation_count(self) -> int: - """Return the number of residual evaluations during the live fit.""" + """Return the live residual-evaluation count.""" return self._evaluation_count def reset_evaluation_count(self) -> None: - """Reset the residual-evaluation counter before the live fit starts.""" + """Reset the residual-evaluation counter.""" self._evaluation_count = 0 def stop_counting_evaluations(self) -> None: - """Freeze residual-evaluation counting after the live fit ends.""" + """Freeze residual-evaluation counting.""" self._count_evaluations = False @property def last_residuals(self) -> np.ndarray | None: - """Return the residual vector from the last successful evaluation.""" + """Return the last successful residual vector.""" return self._last_residuals def last_reduced_chi_square(self, *, n_parameters: int) -> float | None: - """Return the reduced chi-square from the last residual vector.""" + """Return reduced chi-square for the last residual vector.""" if self._last_residuals is None: return None @@ -131,7 +127,7 @@ def last_reduced_chi_square(self, *, n_parameters: int) -> float | None: class _BumpsProgressMonitor(bumps_monitor.Monitor): - """Progress monitor reporting live BUMPS fit evaluation counts.""" + """Report live BUMPS fit evaluation counts.""" def __init__( self, @@ -148,7 +144,9 @@ def __init__( @staticmethod def config_history(history: object) -> None: - """Declare the history fields needed for deterministic progress.""" + """ + Declare the history fields needed for deterministic progress. + """ history.requires(time=1, step=1, value=1) def __call__(self, history: object) -> None: @@ -192,7 +190,7 @@ def _reduced_chi_square_from_nllf(self, nllf: float) -> float: @MinimizerFactory.register class BumpsMinimizer(MinimizerBase): - """Minimizer using the bumps package.""" + """Minimizer using the BUMPS package.""" type_info = TypeInfo( tag=MinimizerTypeEnum.BUMPS, @@ -211,8 +209,11 @@ def __init__( max_iterations=max_iterations, ) - def _tracks_progress_via_solver_monitor(self) -> bool: - """Use BUMPS monitor callbacks for live deterministic progress.""" + @staticmethod + def _tracks_progress_via_solver_monitor() -> bool: + """ + Use BUMPS monitor callbacks for live deterministic progress. + """ return True def _prepare_solver_args( # noqa: PLR6301 @@ -220,7 +221,7 @@ def _prepare_solver_args( # noqa: PLR6301 parameters: list[object], ) -> dict[str, object]: """ - Prepare bumps parameters from EasyDiffraction parameters. + Prepare BUMPS parameters from EasyDiffraction parameters. Parameters ---------- @@ -250,7 +251,7 @@ def _run_solver( **kwargs: object, ) -> object: """ - Run the bumps solver. + Run the BUMPS solver. Uses FitDriver directly instead of bumps.fitters.fit() to skip the expensive post-fit stderr/Jacobian computation that would @@ -292,31 +293,17 @@ def _run_solver( steps=self.max_iterations, ) driver.clip() - evaluation_limit_reached = False - evaluation_limit_message = 'successful termination' try: x, fx = driver.fit() - except _BumpsEvaluationLimitReached as exc: + evaluation_limit_reached = False + evaluation_limit_message = 'successful termination' + except _BumpsEvaluationLimitError as exc: + x, fx, evaluation_limit_message = self._handle_evaluation_limit( + exc=exc, + fitness=fitness, + n_parameters=len(bumps_params), + ) evaluation_limit_reached = True - evaluation_limit_message = str(exc) - x = exc.parameter_values.copy() - fx = None - reduced_chi2 = None - if exc.residuals is not None: - chi_square = float(np.sum(exc.residuals**2)) - dof = len(exc.residuals) - len(bumps_params) - reduced_chi2 = chi_square if dof <= 0 else chi_square / dof - elif fitness.last_residuals is not None: - reduced_chi2 = fitness.last_reduced_chi_square( - n_parameters=len(bumps_params) - ) - if reduced_chi2 is not None: - elapsed_time = self.tracker._current_elapsed_time() - self.tracker.track_fit_progress( - iteration=exc.evaluation_count, - reduced_chi2=reduced_chi2, - elapsed_time=0.0 if elapsed_time is None else elapsed_time, - ) finally: fitness.stop_counting_evaluations() @@ -324,25 +311,82 @@ def _run_solver( if success: problem.setp(x) + return self._build_optimize_result( + bumps_params=bumps_params, + fitness=fitness, + success=success, + evaluation_limit_reached=evaluation_limit_reached, + evaluation_limit_message=evaluation_limit_message, + function_value=fx, + ) + + def _handle_evaluation_limit( + self, + *, + exc: _BumpsEvaluationLimitError, + fitness: _EasyDiffractionFitness, + n_parameters: int, + ) -> tuple[np.ndarray, None, str]: + """ + Build a partial result when the evaluation budget is exhausted. + """ + reduced_chi2 = self._reduced_chi_square_from_limit( + exc=exc, + fitness=fitness, + n_parameters=n_parameters, + ) + if reduced_chi2 is not None: + elapsed_time = self.tracker._current_elapsed_time() + self.tracker.track_fit_progress( + iteration=exc.evaluation_count, + reduced_chi2=reduced_chi2, + elapsed_time=0.0 if elapsed_time is None else elapsed_time, + ) + return exc.parameter_values.copy(), None, str(exc) + + @staticmethod + def _reduced_chi_square_from_limit( + *, + exc: _BumpsEvaluationLimitError, + fitness: _EasyDiffractionFitness, + n_parameters: int, + ) -> float | None: + """Return reduced chi-square at the evaluation cutoff.""" + if exc.residuals is not None: + chi_square = float(np.sum(exc.residuals**2)) + dof = len(exc.residuals) - n_parameters + return chi_square if dof <= 0 else chi_square / dof + if fitness.last_residuals is None: + return None + return fitness.last_reduced_chi_square(n_parameters=n_parameters) + + def _build_optimize_result( + self, + *, + bumps_params: list[BumpsParameter], + fitness: _EasyDiffractionFitness, + success: bool, + evaluation_limit_reached: bool, + evaluation_limit_message: str, + function_value: float | None, + ) -> OptimizeResult: + """Convert the BUMPS solver outcome into an OptimizeResult.""" # Read values back from bumps Parameters in our original order. # FitProblem sorts parameters alphabetically, so x from # driver.fit() uses that sorted order — not ours. result_x = np.array([p.value for p in bumps_params]) - - covar, stderr = ( + covariance, stderr = ( self._compute_covariance(bumps_params, fitness) if success else (None, None) ) - var_names = [p.name for p in bumps_params] - return OptimizeResult( x=result_x, dx=stderr, - fun=fx, + fun=function_value, success=success, status=0 if success else 5 if evaluation_limit_reached else -1, message='successful termination' if success else evaluation_limit_message, - covar=covar, - var_names=var_names, + covar=covariance, + var_names=[p.name for p in bumps_params], ) def _compute_covariance( # noqa: PLR6301 @@ -396,7 +440,7 @@ def _sync_result_to_parameters( # noqa: PLR6301 raw_result: object, ) -> None: """ - Synchronize the result from the solver to the parameters. + Synchronize the solver result back to the parameters. Parameters ---------- @@ -422,7 +466,7 @@ def _sync_result_to_parameters( # noqa: PLR6301 def _check_success(self, raw_result: object) -> bool: # noqa: PLR6301 """ - Determine success from bumps OptimizeResult. + Determine success from a BUMPS OptimizeResult. Parameters ---------- diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index 00ced906..29e44643 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -29,7 +29,6 @@ from easydiffraction.analysis.minimizers.enums import DreamPopulationInitializationEnum from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.analysis.minimizers.factory import MinimizerFactory -from easydiffraction.utils.logging import log from easydiffraction.core.metadata import TypeInfo DEFAULT_METHOD = 'dream' @@ -307,13 +306,15 @@ def __init__( @property def max_iterations(self) -> int: """DREAM exposes sampler length through ``steps`` instead.""" - msg = "DREAM sampler uses 'steps' instead of 'max_iterations'." + sampler_name = self.type_info.description.partition('with ')[2].split()[0] + msg = f"{sampler_name} sampler uses 'steps' instead of 'max_iterations'." raise AttributeError(msg) @max_iterations.setter def max_iterations(self, value: int) -> None: del value - msg = "DREAM sampler uses 'steps' instead of 'max_iterations'." + sampler_name = self.type_info.description.partition('with ')[2].split()[0] + msg = f"{sampler_name} sampler uses 'steps' instead of 'max_iterations'." raise AttributeError(msg) @property diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_base.py b/tests/unit/easydiffraction/analysis/minimizers/test_base.py index df6741c7..4ee99f1d 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_base.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_base.py @@ -136,7 +136,7 @@ def _prepare_solver_args(self, parameters): def _run_solver(self, objective_function, **kwargs): del objective_function, kwargs - return None + return def _sync_result_to_parameters(self, parameters, raw_result): del parameters, raw_result diff --git a/tests/unit/easydiffraction/analysis/minimizers/test_bumps.py b/tests/unit/easydiffraction/analysis/minimizers/test_bumps.py index 242fb4b1..830a4f64 100644 --- a/tests/unit/easydiffraction/analysis/minimizers/test_bumps.py +++ b/tests/unit/easydiffraction/analysis/minimizers/test_bumps.py @@ -212,7 +212,7 @@ def test_fitness_evaluation_count_can_be_reset_and_stopped(): def test_fitness_raises_when_max_evaluations_is_reached(): from bumps.parameter import Parameter as BumpsParameter - from easydiffraction.analysis.minimizers.bumps import _BumpsEvaluationLimitReached + from easydiffraction.analysis.minimizers.bumps import _BumpsEvaluationLimitError from easydiffraction.analysis.minimizers.bumps import _EasyDiffractionFitness bp = BumpsParameter(value=2.0, name='a') @@ -225,7 +225,7 @@ def test_fitness_raises_when_max_evaluations_is_reached(): fitness.residuals() fitness.residuals() - with pytest.raises(_BumpsEvaluationLimitReached) as exc_info: + with pytest.raises(_BumpsEvaluationLimitError) as exc_info: fitness.residuals() assert exc_info.value.evaluation_count == 2 @@ -343,14 +343,14 @@ def test_run_solver_failure(): def test_run_solver_stops_at_max_evaluations(): from easydiffraction.analysis.minimizers.bumps import BumpsMinimizer - from easydiffraction.analysis.minimizers.bumps import _BumpsEvaluationLimitReached + from easydiffraction.analysis.minimizers.bumps import _BumpsEvaluationLimitError m = BumpsMinimizer(max_iterations=50) m.tracker = MagicMock() m.tracker._current_elapsed_time.return_value = 1.25 fake_fitter = types.SimpleNamespace(id='lm') - limit_error = _BumpsEvaluationLimitReached( + limit_error = _BumpsEvaluationLimitError( evaluation_count=50, parameter_values=np.array([1.5]), residuals=np.array([2.0, 2.0]), diff --git a/tests/unit/easydiffraction/analysis/test_fitting.py b/tests/unit/easydiffraction/analysis/test_fitting.py index f72b6ba6..83272f9f 100644 --- a/tests/unit/easydiffraction/analysis/test_fitting.py +++ b/tests/unit/easydiffraction/analysis/test_fitting.py @@ -243,9 +243,9 @@ def test_residual_function_skips_tracker_for_solver_monitored_minimizer(monkeypa from easydiffraction.analysis.fitting import Fitter class DummyExperiment: - def _update_categories(self, called_by_minimizer=False): + def _update_categories(self, *, called_by_minimizer=False): del called_by_minimizer - return None + return class DummyMin: def __init__(self): diff --git a/tests/unit/test_benchmark_tutorials.py b/tests/unit/test_benchmark_tutorials.py index 3fb9d154..c0b390ef 100644 --- a/tests/unit/test_benchmark_tutorials.py +++ b/tests/unit/test_benchmark_tutorials.py @@ -74,7 +74,7 @@ def test_main_appends_first_result_before_second_tutorial_starts(monkeypatch, tm lambda: SimpleNamespace(parse_args=lambda: args), ) monkeypatch.setattr(MUT, '_build_output_path', lambda output_dir: output_path) - monkeypatch.setattr(MUT, '_build_env', lambda: {}) + monkeypatch.setattr(MUT, '_build_env', dict) def fake_run_tutorial( script_path: Path, @@ -130,4 +130,4 @@ def fake_run_tutorial( 'status': 'ok', 'return_code': '0', }, - ] \ No newline at end of file + ] From 10f206cae1f1a686dac9f6fbf3db4d2cdc900177 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 13:04:03 +0200 Subject: [PATCH 68/72] Plot fit quality vs temperature --- docs/docs/tutorials/ed-23.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/docs/docs/tutorials/ed-23.py b/docs/docs/tutorials/ed-23.py index 8789fece..a4dbc8ed 100644 --- a/docs/docs/tutorials/ed-23.py +++ b/docs/docs/tutorials/ed-23.py @@ -61,11 +61,30 @@ # ## Plot Parameter Evolution # # Use the same persisted diffrn path stored in `analysis/results.csv` -# for the x-axis. Omitting `param` plots every fitted parameter one -# after another. +# for the x-axis. # %% temperature = 'diffrn.ambient_temperature' +# %% [markdown] +# Plot fit quality metrics vs. temperature. + +# %% +project.display.fit.series( + project.analysis.fit_result.success, + versus=temperature, +) +project.display.fit.series( + project.analysis.fit_result.reduced_chi_square, + versus=temperature, +) +project.display.fit.series( + project.analysis.fit_result.iterations, + versus=temperature, +) + +# %% [markdown] +# Omitting `param` plots every fitted parameter one after another. + # %% project.display.fit.series(versus=temperature) From e7247b34f5faabd5250d95ceca449a5efc7beba5 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 13:04:17 +0200 Subject: [PATCH 69/72] Improve output formatting and add logging --- src/easydiffraction/analysis/minimizers/bumps_dream.py | 3 +++ src/easydiffraction/analysis/sequential.py | 2 +- src/easydiffraction/utils/utils.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/easydiffraction/analysis/minimizers/bumps_dream.py b/src/easydiffraction/analysis/minimizers/bumps_dream.py index 29e44643..225c7ea0 100644 --- a/src/easydiffraction/analysis/minimizers/bumps_dream.py +++ b/src/easydiffraction/analysis/minimizers/bumps_dream.py @@ -30,6 +30,9 @@ from easydiffraction.analysis.minimizers.enums import MinimizerTypeEnum from easydiffraction.analysis.minimizers.factory import MinimizerFactory from easydiffraction.core.metadata import TypeInfo +from easydiffraction.utils.logging import log + +_BUMPS_DREAM_LOG = log DEFAULT_METHOD = 'dream' DEFAULT_MAX_ITERATIONS = 3000 diff --git a/src/easydiffraction/analysis/sequential.py b/src/easydiffraction/analysis/sequential.py index 1fc17ea5..a43b537a 100644 --- a/src/easydiffraction/analysis/sequential.py +++ b/src/easydiffraction/analysis/sequential.py @@ -896,7 +896,7 @@ def _print_sequential_completion( return console.print(f'✅ Sequential fitting complete: {processed_count} files processed.') - console.print(f'📄 Results saved to: {csv_path}') + console.print(f'📄 Results saved to:\n{csv_path}') def _prepare_sequential_run( diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index 418c155e..6dba4301 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -276,7 +276,7 @@ def download_data( if is_project_archive: project_dir = extract_project_from_zip(file_path, destination=extraction_dir) file_path.unlink() - console.print(f"✅ Data #{id} downloaded and extracted to '{project_dir}'") + console.print(f"✅ Data #{id} downloaded and extracted to\n'{project_dir}'") return str(project_dir) console.print(f"✅ Data #{id} downloaded to:\n'{file_path}'") From f220fcbfedda4266157f4bd41315d5271957902f Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 13:09:31 +0200 Subject: [PATCH 70/72] Optimize tutorial execution time --- .github/workflows/docs.yml | 2 +- docs/docs/tutorials/ed-13.ipynb | 2 +- docs/docs/tutorials/ed-15.ipynb | 39 +++++++++++++++++++----------- docs/docs/tutorials/ed-17.ipynb | 2 +- docs/docs/tutorials/ed-21.ipynb | 3 ++- docs/docs/tutorials/ed-22.ipynb | 3 ++- docs/docs/tutorials/ed-23.ipynb | 42 ++++++++++++++++++++++++++++++--- pixi.toml | 1 + 8 files changed, 72 insertions(+), 22 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index def30827..ea6bdad1 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -119,7 +119,7 @@ jobs: # Uses multiple cores for parallel execution to speed up the process. - name: Run notebooks # if: false # Temporarily disabled to speed up the docs build - run: pixi run notebook-exec + run: pixi run notebook-exec-ci # Build the static files for the documentation site for local inspection # Input: docs/ directory containing the Markdown files diff --git a/docs/docs/tutorials/ed-13.ipynb b/docs/docs/tutorials/ed-13.ipynb index 616c536c..ca636d82 100644 --- a/docs/docs/tutorials/ed-13.ipynb +++ b/docs/docs/tutorials/ed-13.ipynb @@ -2657,7 +2657,7 @@ ], "metadata": { "jupytext": { - "cell_metadata_filter": "tags,title,-all", + "cell_metadata_filter": "title,tags,-all", "main_language": "python", "notebook_metadata_filter": "-all" } diff --git a/docs/docs/tutorials/ed-15.ipynb b/docs/docs/tutorials/ed-15.ipynb index b6d82a31..e79bdd87 100644 --- a/docs/docs/tutorials/ed-15.ipynb +++ b/docs/docs/tutorials/ed-15.ipynb @@ -248,6 +248,17 @@ "id": "23", "metadata": {}, "outputs": [], + "source": [ + "# Limit number of iterations to prevent long calculation time in this tutorial.\n", + "project.analysis.fitting.minimizer.max_iterations = 500" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], "source": [ "# Start refinement. All parameters, which have standard uncertainties\n", "# in the input CIF files, are refined by default.\n", @@ -257,7 +268,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -268,7 +279,7 @@ { "cell_type": "code", "execution_count": null, - "id": "25", + "id": "26", "metadata": {}, "outputs": [], "source": [ @@ -278,7 +289,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -288,7 +299,7 @@ { "cell_type": "code", "execution_count": null, - "id": "27", + "id": "28", "metadata": {}, "outputs": [], "source": [ @@ -297,7 +308,7 @@ }, { "cell_type": "markdown", - "id": "28", + "id": "29", "metadata": {}, "source": [ "## Step 5: Perform Analysis (ADP aniso)" @@ -306,7 +317,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -317,7 +328,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -330,7 +341,7 @@ { "cell_type": "code", "execution_count": null, - "id": "31", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -340,7 +351,7 @@ { "cell_type": "code", "execution_count": null, - "id": "32", + "id": "33", "metadata": {}, "outputs": [], "source": [ @@ -350,7 +361,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33", + "id": "34", "metadata": {}, "outputs": [], "source": [ @@ -360,7 +371,7 @@ { "cell_type": "code", "execution_count": null, - "id": "34", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -370,7 +381,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -380,7 +391,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "37", "metadata": {}, "outputs": [], "source": [ @@ -390,7 +401,7 @@ { "cell_type": "code", "execution_count": null, - "id": "37", + "id": "38", "metadata": {}, "outputs": [], "source": [ diff --git a/docs/docs/tutorials/ed-17.ipynb b/docs/docs/tutorials/ed-17.ipynb index a8216c62..e471d99a 100644 --- a/docs/docs/tutorials/ed-17.ipynb +++ b/docs/docs/tutorials/ed-17.ipynb @@ -249,7 +249,7 @@ "metadata": {}, "outputs": [], "source": [ - "zip_path = ed.download_data(id=27, destination='data')" + "zip_path = ed.download_data(id=25, destination='data')" ] }, { diff --git a/docs/docs/tutorials/ed-21.ipynb b/docs/docs/tutorials/ed-21.ipynb index 4722b50d..689c3d57 100644 --- a/docs/docs/tutorials/ed-21.ipynb +++ b/docs/docs/tutorials/ed-21.ipynb @@ -617,7 +617,8 @@ "metadata": {}, "outputs": [], "source": [ - "project.analysis.fitting.minimizer.steps = 300 # lower than the default 3000" + "project.analysis.fitting.minimizer.steps = 100 # lower than the default 3000\n", + "project.analysis.fitting.minimizer.burn = 20 # lower than the default 600" ] }, { diff --git a/docs/docs/tutorials/ed-22.ipynb b/docs/docs/tutorials/ed-22.ipynb index bacb7c77..a8d51113 100644 --- a/docs/docs/tutorials/ed-22.ipynb +++ b/docs/docs/tutorials/ed-22.ipynb @@ -482,7 +482,8 @@ "metadata": {}, "outputs": [], "source": [ - "project.analysis.fitting.minimizer.steps = 500 # lower than the default 3000" + "project.analysis.fitting.minimizer.steps = 100 # lower than the default 3000\n", + "project.analysis.fitting.minimizer.burn = 20 # lower than the default 600" ] }, { diff --git a/docs/docs/tutorials/ed-23.ipynb b/docs/docs/tutorials/ed-23.ipynb index 9dd2d831..11a28c79 100644 --- a/docs/docs/tutorials/ed-23.ipynb +++ b/docs/docs/tutorials/ed-23.ipynb @@ -163,8 +163,7 @@ "## Plot Parameter Evolution\n", "\n", "Use the same persisted diffrn path stored in `analysis/results.csv`\n", - "for the x-axis. Omitting `param` plots every fitted parameter one\n", - "after another." + "for the x-axis." ] }, { @@ -177,10 +176,47 @@ "temperature = 'diffrn.ambient_temperature'" ] }, + { + "cell_type": "markdown", + "id": "16", + "metadata": {}, + "source": [ + "Plot fit quality metrics vs. temperature." + ] + }, { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "project.display.fit.series(\n", + " project.analysis.fit_result.success,\n", + " versus=temperature,\n", + ")\n", + "project.display.fit.series(\n", + " project.analysis.fit_result.reduced_chi_square,\n", + " versus=temperature,\n", + ")\n", + "project.display.fit.series(\n", + " project.analysis.fit_result.iterations,\n", + " versus=temperature,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "18", + "metadata": {}, + "source": [ + "Omitting `param` plots every fitted parameter one after another." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", "metadata": {}, "outputs": [], "source": [ diff --git a/pixi.toml b/pixi.toml index f02a04ba..3f8012e6 100644 --- a/pixi.toml +++ b/pixi.toml @@ -193,6 +193,7 @@ notebook-convert = 'jupytext docs/docs/tutorials/*.py --from py:percent --to ipy notebook-strip = 'nbstripout docs/docs/tutorials/*.ipynb' notebook-tweak = 'python tools/tweak_notebooks.py docs/docs/tutorials/' notebook-exec = { cmd = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --overwrite --color=yes -n auto -v', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } +notebook-exec-ci = { cmd = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --overwrite --color=yes -n auto -v' } notebook-prepare = { depends-on = [ 'notebook-convert', From dda5c94618e6659ba1398c1462b3404439d2972f Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 13:22:43 +0200 Subject: [PATCH 71/72] Add newline before saved path --- tests/unit/easydiffraction/analysis/test_sequential.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/easydiffraction/analysis/test_sequential.py b/tests/unit/easydiffraction/analysis/test_sequential.py index dc837570..2be4c19f 100644 --- a/tests/unit/easydiffraction/analysis/test_sequential.py +++ b/tests/unit/easydiffraction/analysis/test_sequential.py @@ -601,7 +601,7 @@ def test_fit_sequential_non_silent_starts_indicator_with_progress_table( assert events[8] == ('stop',) assert events[9:] == [ ('console_print', ('✅ Sequential fitting complete: 1 files processed.',), {}), - ('console_print', (f'📄 Results saved to: {tmp_path / "results.csv"}',), {}), + ('console_print', (f'📄 Results saved to:\n{tmp_path / "results.csv"}',), {}), ] From 208a3e7d2c6a02de1f7ba566d71de7f40d2af390 Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Tue, 19 May 2026 14:37:12 +0200 Subject: [PATCH 72/72] Remove notebook timeout, add CI artifact root --- pixi.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pixi.toml b/pixi.toml index 3f8012e6..70a8a310 100644 --- a/pixi.toml +++ b/pixi.toml @@ -192,8 +192,8 @@ jupyter = { cmd = 'jupyter', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutori notebook-convert = 'jupytext docs/docs/tutorials/*.py --from py:percent --to ipynb' notebook-strip = 'nbstripout docs/docs/tutorials/*.ipynb' notebook-tweak = 'python tools/tweak_notebooks.py docs/docs/tutorials/' -notebook-exec = { cmd = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --overwrite --color=yes -n auto -v', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } -notebook-exec-ci = { cmd = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --overwrite --color=yes -n auto -v' } +notebook-exec = { cmd = 'python -m pytest --nbmake docs/docs/tutorials/ --overwrite --color=yes -n auto -v', env = { EASYDIFFRACTION_ARTIFACT_ROOT = 'tmp/tutorials' } } +notebook-exec-ci = { cmd = 'python -m pytest --nbmake docs/docs/tutorials/ --nbmake-timeout=1200 --overwrite --color=yes -n auto -v', env = { EASYDIFFRACTION_ARTIFACT_ROOT = '.' } } notebook-prepare = { depends-on = [ 'notebook-convert',