From ec1ab56e6886990312f8d78066afd7d88f2588a5 Mon Sep 17 00:00:00 2001 From: frankfeifan Date: Wed, 24 Jun 2026 21:42:02 -0700 Subject: [PATCH] added scripts to automatically generate reaction system .hpp in configuration --- scripts/generate_reaction_system.py | 623 +++++++++++++++++++ scripts/validate_reaction_json.py | 235 +++++++ src/CMakeLists.txt | 74 +++ src/reactions/data/CarbonateMixed.json | 56 ++ src/reactions/data/CarbonatePureKinetic.json | 26 + src/reactions/data/README.md | 148 +++++ 6 files changed, 1162 insertions(+) create mode 100755 scripts/generate_reaction_system.py create mode 100755 scripts/validate_reaction_json.py create mode 100644 src/reactions/data/CarbonateMixed.json create mode 100644 src/reactions/data/CarbonatePureKinetic.json create mode 100644 src/reactions/data/README.md diff --git a/scripts/generate_reaction_system.py b/scripts/generate_reaction_system.py new file mode 100755 index 0000000..cacd963 --- /dev/null +++ b/scripts/generate_reaction_system.py @@ -0,0 +1,623 @@ +#!/usr/bin/env python3 +""" +Build-time code generator for HPCReact geochemistry reaction systems. +Generates constexpr C++ code from JSON input for GPU efficiency. +""" + +import argparse +import json +import os +from typing import List, Dict +from datetime import datetime + + +class ReactionSystemGenerator: + def __init__(self, config: Dict): + self.system_name = config['systemName'] + self.namespace = config['namespace'] + + # Primary species (always required) + self.primary_species = config['primarySpecies'] + + # Secondary species - can be provided or auto-inferred + if 'secondarySpecies' in config: + # Explicitly provided + self.secondary_species = config['secondarySpecies'] + self.num_equilibrium_reactions = len(self.secondary_species) + elif 'numEquilibriumReactions' in config and config['numEquilibriumReactions'] > 0: + # Auto-infer from first N reactions + num_eq = config['numEquilibriumReactions'] + print(f" Info: Auto-inferring {num_eq} secondary species from equilibrium reactions") + self.secondary_species = [] + self.num_equilibrium_reactions = num_eq + # Will be populated after parsing reactions + self._auto_infer_secondary = True + else: + # Pure kinetic system (no equilibrium reactions) + self.secondary_species = [] + self.num_equilibrium_reactions = 0 + self._auto_infer_secondary = False + + # Parse reactions first (before building species list if auto-inferring) + self.reactions = [] + for rxn_data in config['reactions']: + if 'equation' in rxn_data: + # Parse equation format - but we need species list + # For auto-infer, we'll do this in two passes + rxn = rxn_data # Store raw for now + else: + # Use stoichiometry array directly + rxn = rxn_data + self.reactions.append(rxn) + + # If auto-inferring, extract secondary species from first N reactions + if hasattr(self, '_auto_infer_secondary') and self._auto_infer_secondary: + self._infer_secondary_species_from_reactions() + + # Now build combined species list + if self.secondary_species: + self.species = self.secondary_species + self.primary_species + else: + self.species = self.primary_species + print(" Info: Pure kinetic system (no secondary species)") + + # Parse equations now that we have the species list + for i, rxn in enumerate(self.reactions): + if isinstance(rxn, dict) and 'equation' in rxn and 'stoichiometry' not in rxn: + self.reactions[i] = self._parse_equation(rxn) + + # Validate: must have one reaction per secondary species + kinetic reactions + if self.secondary_species and len(self.reactions) < len(self.secondary_species): + raise ValueError(f"Need at least {len(self.secondary_species)} reactions (one per secondary species), got {len(self.reactions)}") + + def _infer_secondary_species_from_reactions(self): + """Auto-infer secondary species from equilibrium reactions (first N reactions)""" + import re + + for i in range(self.num_equilibrium_reactions): + if i >= len(self.reactions): + raise ValueError(f"Cannot infer: only {len(self.reactions)} reactions but need {self.num_equilibrium_reactions}") + + rxn = self.reactions[i] + if 'equation' not in rxn: + raise ValueError(f"Cannot auto-infer from reaction {i+1}: missing 'equation' field") + + equation = rxn['equation'] + + # Extract left side (reactants) + if '=' not in equation: + raise ValueError(f"Reaction {i+1}: equation must contain '='") + + left, _ = equation.split('=', 1) + left = left.strip() + + # Split by + (with spaces) and get terms + terms = re.split(r'\s+\+\s+', left) + + # Find the secondary species (first non-water, non-solid term on left) + secondary_species = None + for term in terms: + term = term.strip() + if not term or term in ['H2O', 'H₂O']: + continue + + # Remove leading coefficient + if term[0].isdigit(): + while term and term[0].isdigit(): + term = term[1:] + term = term.strip() + + # First species on left side that's not water is the secondary species + # Check if it's in primary species (if so, skip) + if term not in self.primary_species: + secondary_species = term + break + + if not secondary_species: + raise ValueError(f"Reaction {i+1}: Could not identify secondary species on left side of '{equation}'") + + self.secondary_species.append(secondary_species) + print(f" Inferred secondary species {i+1}: {secondary_species}") + + def _parse_equation(self, rxn_data: Dict) -> Dict: + """Parse reaction equation string into stoichiometry""" + equation = rxn_data['equation'] + + # Split into left (reactants) and right (products) + if '=' not in equation: + raise ValueError(f"Equation missing '=': {equation}") + + left, right = equation.split('=') + left = left.strip() + right = right.strip() + + # Initialize stoichiometry array + stoich = [0] * len(self.species) + + # Parse left side (negative coefficients) + self._parse_side(left, stoich, sign=-1) + + # Parse right side (positive coefficients) + self._parse_side(right, stoich, sign=1) + + return { + 'stoichiometry': stoich, + 'comment': equation, # Use equation as comment + 'equilibriumConstant': rxn_data['equilibriumConstant'], + 'forwardRate': rxn_data.get('forwardRate', 1.0e10), + 'reverseRate': rxn_data.get('reverseRate', 1.0e10), + 'mobileFlag': rxn_data.get('mobileFlag', 1), + 'isEquilibrium': rxn_data.get('isEquilibrium', True) + } + + def _parse_side(self, side: str, stoich: List[int], sign: int): + """Parse one side of equation""" + import re + + # Skip if empty + if not side: + return + + # Split by + surrounded by spaces (to avoid splitting H+ or +2 charges) + # Match: " + " but not "+" in "H+" or "Ca+2" + terms = re.split(r'\s+\+\s+', side) + + for term in terms: + term = term.strip() + if not term: + continue + + # Skip water (common spectator species) + if term in ['H2O', 'H₂O']: + continue + + # Try to find coefficient only if term starts with digit followed by space or capital letter + # Examples: "2 Cl-" -> coeff=2, species="Cl-" + # "2Cl-" -> coeff=2, species="Cl-" + # "Cl-" -> coeff=1, species="Cl-" + # "CaCl2" -> coeff=1, species="CaCl2" (no leading digit) + + # First check: does it start with digit(s) followed by space or uppercase? + if term[0].isdigit(): + # Find where digits end + i = 0 + while i < len(term) and term[i].isdigit(): + i += 1 + coeff = int(term[:i]) + species = term[i:].strip() + else: + coeff = 1 + species = term + + # Find species index + if species in self.species: + idx = self.species.index(species) + stoich[idx] += sign * coeff + else: + # Species not in primary or secondary list + # Could be a solid/mineral (e.g., CaCO3) or water - skip it + print(f" Info: Skipping species '{species}' (likely solid/mineral or solvent)") + + def format_scientific(self, value: float) -> str: + """Format float in scientific notation: 1.23E+04""" + if value == 0.0: + return "0.0" + s = f"{value:.2E}" + return s.replace('e', 'E').replace('E+0', 'E+').replace('E-0', 'E-') + + def generate_hpp(self) -> str: + """Generate complete .hpp file with constexpr arrays""" + num_reactions = len(self.reactions) + num_species = len(self.species) + + # Determine number of equilibrium reactions + if self.num_equilibrium_reactions is not None: + # Explicitly specified + num_equilibrium = self.num_equilibrium_reactions + else: + # Auto-detect: count reactions with "isEquilibrium": true + # or fall back to forwardRate >= 1e9 + num_equilibrium = sum(1 for r in self.reactions + if r.get('isEquilibrium', False) or + (r.get('forwardRate', 1e10) >= 1e9 and + r.get('reverseRate', 1e10) >= 1e9)) + + year = datetime.now().year + max_species_len = max(len(s) for s in self.species) + + hpp = f"""/* + * ------------------------------------------------------------------------------------------------------------ + * SPDX-License-Identifier: (BSD-3-Clause) + * + * Copyright (c) {year}- Lawrence Livermore National Security LLC + * All rights reserved + * + * See top level LICENSE files for details. + * ------------------------------------------------------------------------------------------------------------ + */ + +#pragma once + +#include "../reactionsSystems/Parameters.hpp" + +namespace hpcReact +{{ + +namespace geochemistry +{{ +// turn off uncrustify to allow for better readability of the parameters +// *****UNCRUSTIFY-OFF****** + +// ################################## {self.system_name} rxn set ################################## +namespace {self.namespace} +{{ + +constexpr CArrayWrapper stoichMatrix = +{{ // """ + + # Species header + for i, sp in enumerate(self.species): + hpp += f"{sp:>{max_species_len+2}}" + hpp += " \n" + + # Stoichiometry matrix rows + for i, rxn in enumerate(self.reactions): + hpp += " { " + for j, coeff in enumerate(rxn['stoichiometry']): + hpp += f"{coeff:>{max_species_len+2}}" + if j < num_species - 1: + hpp += "," + hpp += " }" + if i < num_reactions - 1: + hpp += "," + comment = rxn.get('comment', '') + if comment: + hpp += f" // {comment}" + hpp += "\n" + + hpp += " };\n\n" + + # Equilibrium constants + hpp += f"constexpr CArrayWrapper equilibriumConstants = \n {{ \n" + for i, rxn in enumerate(self.reactions): + val = self.format_scientific(rxn['equilibriumConstant']) + hpp += f" {val:>12}" + if i < num_reactions - 1: + hpp += "," + comment = rxn.get('comment', '') + if comment: + hpp += f" // {comment}" + hpp += "\n" + hpp += " };\n\n" + + # Forward rates + hpp += f"constexpr CArrayWrapper forwardRates = \n {{ \n" + for i, rxn in enumerate(self.reactions): + val = self.format_scientific(rxn.get('forwardRate', 1.0e10)) + hpp += f" {val:>12}" + if i < num_reactions - 1: + hpp += "," + comment = rxn.get('comment', '') + if comment: + hpp += f" // {comment}" + hpp += "\n" + hpp += " };\n\n" + + # Reverse rates + hpp += f"constexpr CArrayWrapper reverseRates = \n {{ \n" + for i, rxn in enumerate(self.reactions): + val = self.format_scientific(rxn.get('reverseRate', 1.0e10)) + hpp += f" {val:>12}" + if i < num_reactions - 1: + hpp += "," + comment = rxn.get('comment', '') + if comment: + hpp += f" // {comment}" + hpp += "\n" + hpp += " };\n\n" + + # Mobile species flags + hpp += f"constexpr CArrayWrapper mobileSpeciesFlag = \n {{ \n" + for i, rxn in enumerate(self.reactions): + flag = rxn.get('mobileFlag', 1) + hpp += f" {flag}" + if i < num_reactions - 1: + hpp += "," + comment = rxn.get('comment', '') + if comment: + hpp += f" // {comment}" + hpp += "\n" + hpp += " };\n" + + hpp += f"""}} + + using {self.namespace}SystemType = reactionsSystems::MixedReactionsParameters< double, int, signed char, {num_species}, {num_reactions}, {num_equilibrium} >; + + constexpr {self.namespace}SystemType {self.namespace}System( {self.namespace}::stoichMatrix, {self.namespace}::equilibriumConstants, {self.namespace}::forwardRates, {self.namespace}::reverseRates, {self.namespace}::mobileSpeciesFlag ); + +// *****UNCRUSTIFY-ON****** +}} // namespace geochemistry +}} // namespace hpcReact +""" + + return hpp + + +def scan_reaction_json_files(reaction_dir: str) -> List[str]: + """Find all .json files in reactions/data/ directory""" + data_dir = os.path.join(reaction_dir, 'data') + if not os.path.exists(data_dir): + return [] + + return [ + os.path.join(data_dir, f) + for f in os.listdir(data_dir) + if f.endswith('.json') + ] + + +def add_system_to_registry(registry_file: str, system_hpp: str, system_type: str): + """Add a new system to existing GeochemicalSystems.hpp without regenerating""" + + if not os.path.exists(registry_file): + print(f"Error: {registry_file} not found") + return False + + with open(registry_file, 'r') as f: + lines = f.readlines() + + # Check if system already exists + content = ''.join(lines) + if f'#include "{system_hpp}"' in content: + print(f"System {system_hpp} already in registry") + return False + + if system_type in content: + print(f"System type {system_type} already in registry") + return False + + # Find where to insert the include (before #include ) + include_insert_idx = None + for i, line in enumerate(lines): + if line.strip().startswith('#include') and '.hpp"' in line and '' not in line: + include_insert_idx = i + 1 + elif line.strip() == '#include ': + break + + if include_insert_idx is None: + print("Error: Could not find include section") + return False + + # Insert the new include + lines.insert(include_insert_idx, f'#include "{system_hpp}"\n') + + # Find the variant closing (look for the line with >; that ends the variant) + variant_end_idx = None + in_variant = False + for i, line in enumerate(lines): + if 'using systemTypes = std::variant<' in line: + in_variant = True + elif in_variant and '>;' in line: + variant_end_idx = i + break + + if variant_end_idx is None: + print("Error: Could not find variant closing") + return False + + # Find the last type line (line before >;) + last_type_idx = variant_end_idx - 1 + while last_type_idx >= 0 and not lines[last_type_idx].strip(): + last_type_idx -= 1 + + # Check if last type ends with comma, if not add it + if not lines[last_type_idx].rstrip().endswith(','): + lines[last_type_idx] = lines[last_type_idx].rstrip() + ',\n' + + # Insert new type with same indentation as previous line + last_line = lines[last_type_idx] + indent = len(last_line) - len(last_line.lstrip()) + indent_str = last_line[:indent] + lines.insert(last_type_idx + 1, f'{indent_str}{system_type},\n') + + # Write back + with open(registry_file, 'w') as f: + f.writelines(lines) + + print(f"Added {system_type} to {registry_file}") + return True + + +def generate_registry_header(reaction_dir: str, output_file: str): + """Auto-generate GeochemicalSystems.hpp with all detected systems""" + + # Find all .hpp files (both manual and generated) + geochemistry_dir = os.path.join(reaction_dir, 'geochemistry') + if not os.path.exists(geochemistry_dir): + print(f"Warning: {geochemistry_dir} not found") + return + + hpp_files = [ + f for f in os.listdir(geochemistry_dir) + if f.endswith('.hpp') and f not in ['GeochemicalSystems.hpp', 'Parameters.hpp'] + ] + + # Extract namespace names from JSON files or .hpp files + systems = [] + for hpp_file in hpp_files: + basename = os.path.splitext(hpp_file)[0] + # Convert filename to namespace (e.g., "Ultramafics.hpp" -> "ultramafics") + namespace = basename.lower() + systems.append({ + 'file': hpp_file, + 'basename': basename, + 'namespace': namespace, + 'typeName': f'{namespace}SystemType' + }) + + year = datetime.now().year + registry = f"""/* + * ------------------------------------------------------------------------------------------------------------ + * SPDX-License-Identifier: (BSD-3-Clause) + * + * Copyright (c) {year}- Lawrence Livermore National Security LLC + * All rights reserved + * + * See top level LICENSE files for details. + * ------------------------------------------------------------------------------------------------------------ + */ + +#pragma once + +""" + + # Include all system headers + for sys in systems: + registry += f'#include "{sys["file"]}"\n' + + registry += """ +#include + +namespace hpcReact +{ + +namespace geochemistry +{ +using systemTypes = std::variant< """ + + # Add all system types to variant + registry += ",\n ".join(s['typeName'] for s in systems) + + registry += """ >; + +} // namespace geochemistry +} // namespace hpcReact +""" + + with open(output_file, 'w') as f: + f.write(registry) + + print(f"Generated {output_file} with {len(systems)} systems:") + for sys in systems: + print(f" - {sys['basename']}") + + +def main(): + parser = argparse.ArgumentParser( + description='Generate constexpr C++ reaction system from JSON', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +JSON Input Format (Recommended - Separate Primary/Secondary Species): +{ + "systemName": "Carbonate System", + "namespace": "carbonate", + "secondarySpecies": ["OH-", "CO2", "CO3-2", "CaHCO3+"], + "primarySpecies": ["H+", "HCO3-", "Ca+2"], + "numEquilibriumReactions": 3, + "reactions": [ + { + "comment": "OH- + H+ = H2O", + "stoichiometry": [-1, 0, 0, 0, -1, 0, 0], + "equilibriumConstant": 9.89e13, + "forwardRate": 1.0e10, + "reverseRate": 1.0e10, + "mobileFlag": 1, + "isEquilibrium": true + } + ] +} + +Notes: +- Species order in stoichiometry: [secondary..., primary...] +- isEquilibrium: true for fast equilibrium reactions, false for kinetic +- numEquilibriumReactions: number of equilibrium reactions (optional, auto-detected) +- mobileFlag: 1 for mobile species, 0 for immobile +- forwardRate and reverseRate: reaction rate constants + +Build System Integration: +Add to CMakeLists.txt to auto-generate systems at build time. + """ + ) + + parser.add_argument('input', nargs='?', help='Input JSON file (or use --scan)') + parser.add_argument('-o', '--output', help='Output .hpp file') + parser.add_argument('--scan', metavar='DIR', + help='Scan reactions/data/ directory and generate all systems') + parser.add_argument('--generate-registry', metavar='REACTION_DIR', + help='Generate GeochemicalSystems.hpp registry file') + parser.add_argument('--add-to-registry', action='store_true', + help='Add new system to existing GeochemicalSystems.hpp instead of regenerating') + + args = parser.parse_args() + + if args.generate_registry: + output = os.path.join(args.generate_registry, 'geochemistry', 'GeochemicalSystems.hpp') + generate_registry_header(args.generate_registry, output) + return + + if args.scan: + json_files = scan_reaction_json_files(args.scan) + if not json_files: + print(f"No .json files found in {args.scan}/data/") + return + + print(f"Found {len(json_files)} reaction JSON files:") + for json_file in json_files: + print(f" Processing: {json_file}") + with open(json_file, 'r') as f: + config = json.load(f) + + gen = ReactionSystemGenerator(config) + + # Output to geochemistry/ directory + basename = config['namespace'].capitalize() + output_file = os.path.join(args.scan, 'geochemistry', f'{basename}.hpp') + + hpp_content = gen.generate_hpp() + with open(output_file, 'w') as f: + f.write(hpp_content) + + print(f" -> Generated: {output_file}") + print(f" Species: {len(gen.species)}, Reactions: {len(gen.reactions)}") + + # Auto-generate registry + print("\nGenerating registry header...") + generate_registry_header(args.scan, + os.path.join(args.scan, 'geochemistry', 'GeochemicalSystems.hpp')) + + elif args.input: + if not args.output: + parser.error("--output required when processing single file") + + with open(args.input, 'r') as f: + config = json.load(f) + + gen = ReactionSystemGenerator(config) + hpp_content = gen.generate_hpp() + + with open(args.output, 'w') as f: + f.write(hpp_content) + + print(f"Generated {args.output}") + print(f" Species: {len(gen.species)}") + print(f" Reactions: {len(gen.reactions)}") + + # If --add-to-registry flag is set, add to GeochemicalSystems.hpp + if args.add_to_registry: + # Determine registry file location + # Assume output is in geochemistry/ directory + output_dir = os.path.dirname(args.output) + registry_file = os.path.join(output_dir, 'GeochemicalSystems.hpp') + + basename = os.path.basename(args.output) + namespace = config['namespace'] + type_name = f'{namespace}SystemType' + + add_system_to_registry(registry_file, basename, type_name) + + else: + parser.error("Either INPUT file or --scan DIR required") + + +if __name__ == '__main__': + main() diff --git a/scripts/validate_reaction_json.py b/scripts/validate_reaction_json.py new file mode 100755 index 0000000..272b498 --- /dev/null +++ b/scripts/validate_reaction_json.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +""" +Validate reaction system JSON files before generating C++ code +""" + +import argparse +import json +import sys +from typing import Dict, List + + +class ReactionValidator: + def __init__(self, config: Dict): + self.config = config + self.errors = [] + self.warnings = [] + + def validate(self) -> bool: + """Run all validation checks""" + self._validate_required_fields() + self._validate_species() + self._validate_reactions() + self._validate_namespace() + + return len(self.errors) == 0 + + def _validate_required_fields(self): + """Check all required fields are present""" + required = ['systemName', 'namespace', 'primarySpecies', 'secondarySpecies', 'reactions'] + for field in required: + if field not in self.config: + self.errors.append(f"Missing required field: '{field}'") + + def _validate_species(self): + """Validate species lists""" + if 'primarySpecies' not in self.config or 'secondarySpecies' not in self.config: + return + + primary = self.config['primarySpecies'] + secondary = self.config['secondarySpecies'] + + if not isinstance(primary, list) or len(primary) == 0: + self.errors.append("primarySpecies must be a non-empty list") + + if not isinstance(secondary, list): + self.errors.append("secondarySpecies must be a list (can be empty for pure kinetic systems)") + + # If no secondary species, this is a pure kinetic system + if len(secondary) == 0: + self.warnings.append("Pure kinetic system detected (no secondary species = no equilibrium reactions)") + + # Check for duplicates + all_species = primary + secondary + if len(all_species) != len(set(all_species)): + duplicates = [s for s in all_species if all_species.count(s) > 1] + self.errors.append(f"Duplicate species: {', '.join(set(duplicates))}") + + # Check for common naming issues + for sp in all_species: + if ' ' in sp: + self.warnings.append(f"Species '{sp}' contains space - this may cause parsing issues") + + def _validate_reactions(self): + """Validate reaction definitions""" + if 'reactions' not in self.config: + return + + reactions = self.config['reactions'] + if not isinstance(reactions, list) or len(reactions) == 0: + self.errors.append("reactions must be a non-empty list") + return + + num_secondary = len(self.config.get('secondarySpecies', [])) + + if num_secondary > 0 and len(reactions) < num_secondary: + self.errors.append( + f"Need at least {num_secondary} reactions (one per secondary species), " + f"got {len(reactions)}" + ) + + # Validate each reaction + for i, rxn in enumerate(reactions): + self._validate_reaction(i, rxn, num_secondary) + + def _validate_reaction(self, idx: int, rxn: Dict, num_secondary: int): + """Validate a single reaction""" + rxn_label = f"Reaction {idx+1}" + + # Check required fields + if 'equation' not in rxn: + self.errors.append(f"{rxn_label}: Missing 'equation' field") + return + + if 'equilibriumConstant' not in rxn: + self.errors.append(f"{rxn_label}: Missing 'equilibriumConstant' field") + + # Check equation format + equation = rxn['equation'] + if '=' not in equation: + self.errors.append(f"{rxn_label}: Equation must contain '=' : {equation}") + + # Parse and validate species + try: + left, right = equation.split('=') + all_species = self.config.get('primarySpecies', []) + self.config.get('secondarySpecies', []) + + for side in [left, right]: + # Split by + surrounded by spaces to avoid splitting charges + import re + terms = re.split(r'\s+\+\s+', side) + for term in terms: + term = term.strip() + if not term or term in ['H2O', 'H₂O']: + continue + + # Remove leading coefficients (digits before capital letter) + match = re.match(r'^\d+\s*([A-Z].*)$', term) + if match: + term = match.group(1).strip() + + # Check if species is known or likely a solid + if term not in all_species: + # Likely a solid/mineral - this is OK + if idx >= num_secondary: + # Kinetic reaction - solids expected + pass + else: + self.warnings.append( + f"{rxn_label}: Species '{term}' not in primary/secondary lists " + f"(OK if it's a solid/mineral)" + ) + except Exception as e: + self.errors.append(f"{rxn_label}: Failed to parse equation: {e}") + + # Check rate constants + forward = rxn.get('forwardRate', 1.0e10) + reverse = rxn.get('reverseRate', 1.0e10) + + if forward <= 0: + self.errors.append(f"{rxn_label}: forwardRate must be positive") + if reverse <= 0: + self.errors.append(f"{rxn_label}: reverseRate must be positive") + + # Warn about equilibrium/kinetic classification + is_equilibrium = (idx < num_secondary) + if is_equilibrium: + # Equilibrium reactions should be fast + if forward < 1e6 or reverse < 1e6: + self.warnings.append( + f"{rxn_label}: Equilibrium reaction but has slow rates " + f"(forward={forward:.2e}, reverse={reverse:.2e})" + ) + else: + # Kinetic reactions should be slower + if forward >= 1e9 and reverse >= 1e9: + self.warnings.append( + f"{rxn_label}: Kinetic reaction but has very fast rates " + f"(forward={forward:.2e}, reverse={reverse:.2e}). " + f"Consider moving to equilibrium reactions." + ) + + def _validate_namespace(self): + """Validate C++ namespace""" + if 'namespace' not in self.config: + return + + namespace = self.config['namespace'] + + # Must be valid C++ identifier + if not namespace.replace('_', '').isalnum(): + self.errors.append(f"namespace '{namespace}' is not a valid C++ identifier") + + if namespace[0].isdigit(): + self.errors.append(f"namespace '{namespace}' cannot start with a digit") + + if not namespace.islower(): + self.warnings.append(f"namespace '{namespace}' should be lowercase") + + def print_results(self): + """Print validation results""" + if self.errors: + print("❌ ERRORS:") + for error in self.errors: + print(f" - {error}") + print() + + if self.warnings: + print("⚠️ WARNINGS:") + for warning in self.warnings: + print(f" - {warning}") + print() + + if not self.errors and not self.warnings: + print("✅ Validation passed! No errors or warnings.") + elif not self.errors: + print("✅ Validation passed with warnings.") + else: + print("❌ Validation failed.") + + +def main(): + parser = argparse.ArgumentParser( + description='Validate reaction system JSON files' + ) + parser.add_argument('json_file', help='JSON file to validate') + parser.add_argument('--strict', action='store_true', + help='Treat warnings as errors') + + args = parser.parse_args() + + try: + with open(args.json_file, 'r') as f: + config = json.load(f) + except FileNotFoundError: + print(f"❌ Error: File not found: {args.json_file}") + sys.exit(1) + except json.JSONDecodeError as e: + print(f"❌ Error: Invalid JSON: {e}") + sys.exit(1) + + validator = ReactionValidator(config) + is_valid = validator.validate() + validator.print_results() + + if not is_valid: + sys.exit(1) + + if args.strict and validator.warnings: + sys.exit(1) + + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87f92bf..4f02dd7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,76 @@ +# ============================================================================ +# Auto-generate reaction system headers from JSON definitions +# ============================================================================ +set(REACTION_DATA_DIR "${CMAKE_CURRENT_SOURCE_DIR}/reactions/data") +set(HPCREACT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/..") +set(REACTION_GEN_SCRIPT "${HPCREACT_ROOT}/scripts/generate_reaction_system.py") +set(REACTION_VALIDATE_SCRIPT "${HPCREACT_ROOT}/scripts/validate_reaction_json.py") +set(GEOCHEMISTRY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/reactions/geochemistry") + +# Find all JSON reaction definition files +file(GLOB REACTION_JSON_FILES "${REACTION_DATA_DIR}/*.json") + +# Filter out example files (optional - remove this if you want to generate examples too) +# Commented out to generate all JSON files including examples +# list(FILTER REACTION_JSON_FILES EXCLUDE REGEX "example_.*\\.json$") + +set(GENERATED_REACTION_HEADERS) + +# Generate C++ header for each JSON file during CMake configuration +foreach(JSON_FILE ${REACTION_JSON_FILES}) + get_filename_component(BASENAME ${JSON_FILE} NAME_WE) + + # Convert filename to CamelCase for class name + # e.g., my_system.json -> Mysystem.hpp + string(SUBSTRING ${BASENAME} 0 1 FIRST_CHAR) + string(TOUPPER ${FIRST_CHAR} FIRST_CHAR_UPPER) + string(REGEX REPLACE "^.(.*)" "${FIRST_CHAR_UPPER}\\1" CLASSNAME ${BASENAME}) + + set(HPP_FILE "${GEOCHEMISTRY_DIR}/${CLASSNAME}.hpp") + + # Step 1: Validate JSON first + execute_process( + COMMAND python3 ${REACTION_VALIDATE_SCRIPT} ${JSON_FILE} + RESULT_VARIABLE VALIDATE_RESULT + OUTPUT_QUIET + ERROR_QUIET + ) + + if(NOT VALIDATE_RESULT EQUAL 0) + message(WARNING "${BASENAME}.json validation failed - skipping generation") + continue() + endif() + + # Step 2: Generate C++ header (only if validation passed) + message(STATUS "Generating reaction system: ${CLASSNAME}.hpp from ${BASENAME}.json") + execute_process( + COMMAND python3 ${REACTION_GEN_SCRIPT} ${JSON_FILE} -o ${HPP_FILE} --add-to-registry + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + RESULT_VARIABLE GEN_RESULT + OUTPUT_VARIABLE GEN_OUTPUT + ERROR_VARIABLE GEN_ERROR + ) + + if(NOT GEN_RESULT EQUAL 0) + message(WARNING "Failed to generate ${CLASSNAME}.hpp: ${GEN_ERROR}") + else() + message(STATUS " -> Generated: ${HPP_FILE}") + list(APPEND GENERATED_REACTION_HEADERS ${HPP_FILE}) + endif() +endforeach() + +# Note: GeochemicalSystems.hpp is now updated incrementally via --add-to-registry flag above +# If you need to regenerate it from scratch, manually run: +# python3 scripts/generate_reaction_system.py --generate-registry src/reactions +set(REGISTRY_HEADER "${GEOCHEMISTRY_DIR}/GeochemicalSystems.hpp") +if(EXISTS ${REGISTRY_HEADER}) + list(APPEND GENERATED_REACTION_HEADERS ${REGISTRY_HEADER}) +endif() + +# No need for custom target since files are generated during configuration +# The generated headers are already included in hpcReact_headers below + +# ============================================================================ set( hpcReact_headers common/macros.hpp @@ -19,6 +92,7 @@ set( hpcReact_headers reactions/unitTestUtilities/equilibriumReactionsTestUtilities.hpp reactions/unitTestUtilities/kineticReactionsTestUtilities.hpp reactions/unitTestUtilities/mixedReactionsTestUtilities.hpp + ${GENERATED_REACTION_HEADERS} ) set( hpcReact_sources) diff --git a/src/reactions/data/CarbonateMixed.json b/src/reactions/data/CarbonateMixed.json new file mode 100644 index 0000000..0a84a2f --- /dev/null +++ b/src/reactions/data/CarbonateMixed.json @@ -0,0 +1,56 @@ +{ + "systemName": "Example Carbonate System", + "namespace": "carbonatemixed", + "secondarySpecies": [ + "OH-", + "CO2", + "CO3-2", + "CaHCO3+", + "CaCl+" + ], + "primarySpecies": [ + "H+", + "HCO3-", + "Ca+2", + "Cl-" + ], + "reactions": [ + { + "equation": "OH- + H+ = H2O", + "equilibriumConstant": 9.89e13, + "forwardRate": 1.4e11, + "reverseRate": 1.43e-3 + }, + { + "equation": "CO2 + H2O = H+ + HCO3-", + "equilibriumConstant": 4.42e-7, + "forwardRate": 0.039, + "reverseRate": 8.92e4 + }, + { + "equation": "CO3-2 + H+ = HCO3-", + "equilibriumConstant": 2.21e10, + "forwardRate": 1.0e10, + "reverseRate": 4.67e-1 + }, + { + "equation": "CaHCO3+ = Ca+2 + HCO3-", + "equilibriumConstant": 6.0e-2, + "forwardRate": 1.5e6, + "reverseRate": 1.85e7 + }, + { + "equation": "CaCl+ = Ca+2 + Cl-", + "equilibriumConstant": 2.0e-1, + "forwardRate": 1.0e8, + "reverseRate": 2.14e7 + }, + { + "equation": "CaCO3 + H+ = Ca+2 + HCO3-", + "equilibriumConstant": 5.16e1, + "forwardRate": 1.55e-6, + "reverseRate": 3.0e-8, + "isEquilibrium": false + } + ] +} diff --git a/src/reactions/data/CarbonatePureKinetic.json b/src/reactions/data/CarbonatePureKinetic.json new file mode 100644 index 0000000..98f966f --- /dev/null +++ b/src/reactions/data/CarbonatePureKinetic.json @@ -0,0 +1,26 @@ +{ + "systemName": "Pure Kinetic System Example", + "namespace": "carbonatepurekinetic", + "secondarySpecies": [], + "primarySpecies": [ + "H+", + "Ca+2", + "HCO3-", + "Mg+2", + "SiO2(aq)" + ], + "reactions": [ + { + "equation": "CaCO3 + H+ = Ca+2 + HCO3-", + "equilibriumConstant": 5.16e1, + "forwardRate": 1.55e-6, + "reverseRate": 3.0e-8 + }, + { + "equation": "Mg2SiO4 + 4H+ = 2Mg+2 + SiO2(aq) + 2H2O", + "equilibriumConstant": 1.4e28, + "forwardRate": 2.29e-11, + "reverseRate": 1.65e-39 + } + ] +} \ No newline at end of file diff --git a/src/reactions/data/README.md b/src/reactions/data/README.md new file mode 100644 index 0000000..fd9a639 --- /dev/null +++ b/src/reactions/data/README.md @@ -0,0 +1,148 @@ +# Automated Reaction System Generation + +## Overview + +Define geochemical reaction systems in JSON instead of manually writing C++ matrices. Files are automatically generated during CMake configuration. + +## Quick Start + +**1. Create JSON file** (e.g., `my_system.json`): +```json +{ + "systemName": "My Carbonate System", + "namespace": "mycarbonate", + "secondarySpecies": ["OH-", "CO2"], + "primarySpecies": ["H+", "HCO3-", "Ca+2"], + "reactions": [ + { + "equation": "OH- + H+ = H2O", + "equilibriumConstant": 9.89e13, + "forwardRate": 1.4e11, + "reverseRate": 1.43e-3 + }, + { + "equation": "CO2 + H2O = H+ + HCO3-", + "equilibriumConstant": 4.42e-7, + "forwardRate": 0.039, + "reverseRate": 8.92e4 + }, + { + "equation": "CaCO3 + H+ = Ca+2 + HCO3-", + "equilibriumConstant": 5.16e1, + "forwardRate": 1.55e-6, + "reverseRate": 3.0e-8 + } + ] +} +``` + +This example has 2 equilibrium reactions (first 2) + 1 kinetic reaction (mineral dissolution). + +**2. Run configuration:** +```bash +python3 scripts/config-build.py -hc hostconfigs/your-config.cmake -bt Release +``` + +JSON is automatically validated, then `My_system.hpp` is generated. + +**3. Build:** +```bash +cd build-- # e.g., build-macOC_arm-release +make +``` + +Done! Your reaction system is now compiled into the HPCReact library. + +## JSON Format + +### Required Fields +```json +{ + "systemName": "Human-readable name", + "namespace": "lowercasecppname", + "secondarySpecies": ["Species", "from", "equilibrium"], + "primarySpecies": ["Basis", "species"], + "reactions": [...] +} +``` + +### Reaction Format +```json +{ + "equation": "A + B = C + D", + "equilibriumConstant": 1.23e10, + "forwardRate": 1.0e10, + "reverseRate": 1.0e-3 +} +``` + +### Key Rules + +- **Number of equilibrium reactions = Number of secondary species** +- First N reactions (N = # secondary) are equilibrium (fast) +- Remaining reactions are kinetic (time-dependent) +- Species order in equation: `[secondary..., primary...]` +- **Pure kinetic systems:** `secondarySpecies: []` → all reactions are kinetic + +### Equation Syntax + +- Separator: ` + ` (with spaces) +- Coefficients: `2Cl-` or `2 Cl-` +- Water: `H2O` (auto-skipped) +- Solids: Not in species lists (auto-skipped, e.g., `CaCO3` in kinetic reactions) +- Charges: Part of name (`H+`, `Ca+2`, `SO4-2`) + +## Examples + +- **[example_simple.json](example_simple.json)** - Minimal (1 equilibrium reaction) +- **[example_carbonate.json](example_carbonate.json)** - Mixed system (5 equilibrium + 1 kinetic) +- **[example_pure_kinetic.json](example_pure_kinetic.json)** - Pure kinetic (2 kinetic only) + +## Tools + +**Validate before configuring:** +```bash +python3 ../../../scripts/validate_reaction_json.py my_system.json +``` + +**Manual generation (for testing):** +```bash +python3 ../../../scripts/generate_reaction_system.py \ + my_system.json -o ../geochemistry/My_system.hpp +``` + +## File Locations + +``` +HPCReact/ +├── scripts/ +│ ├── generate_reaction_system.py # Generator +│ └── validate_reaction_json.py # Validator +└── src/reactions/ + ├── data/ # JSON definitions (you edit) + │ ├── example_simple.json + │ └── example_carbonate.json + └── geochemistry/ # Generated .hpp (auto-created) + ├── GeochemicalSystems.hpp # Registry (auto-updated) + ├── Example_carbonate.hpp # Auto-generated + └── Ultramafics.hpp # Manual (existing) +``` + +## Workflow + +``` +Edit JSON → config-build.py → .hpp generated → make → done +``` + +Every time you: +- Add new JSON → re-run `config-build.py` → new `.hpp` created +- Edit existing JSON → re-run `config-build.py` → `.hpp` updated +- `GeochemicalSystems.hpp` registry always auto-updated + +## Troubleshooting + +| Issue | Solution | +|-------|----------| +| "Unknown species 'X'" | Check spelling/charges: `Ca+2` not `Ca2+` | +| "Need at least N reactions" | Need ≥ N reactions for N secondary species | +| Changes not reflected | Re-run `config-build.py` (not just `make`) | \ No newline at end of file