From ec1ab56e6886990312f8d78066afd7d88f2588a5 Mon Sep 17 00:00:00 2001
From: frankfeifan <fei2@llnl.gov>
Date: Wed, 24 Jun 2026 21:42:02 -0700
Subject: [PATCH] added scripts to automatically generate reaction system .hpp
 in configuration

---
 scripts/generate_reaction_system.py          | 623 +++++++++++++++++++
 scripts/validate_reaction_json.py            | 235 +++++++
 src/CMakeLists.txt                           |  74 +++
 src/reactions/data/CarbonateMixed.json       |  56 ++
 src/reactions/data/CarbonatePureKinetic.json |  26 +
 src/reactions/data/README.md                 | 148 +++++
 6 files changed, 1162 insertions(+)
 create mode 100755 scripts/generate_reaction_system.py
 create mode 100755 scripts/validate_reaction_json.py
 create mode 100644 src/reactions/data/CarbonateMixed.json
 create mode 100644 src/reactions/data/CarbonatePureKinetic.json
 create mode 100644 src/reactions/data/README.md

diff --git a/scripts/generate_reaction_system.py b/scripts/generate_reaction_system.py
new file mode 100755
index 0000000..cacd963
--- /dev/null
+++ b/scripts/generate_reaction_system.py
@@ -0,0 +1,623 @@
+#!/usr/bin/env python3
+"""
+Build-time code generator for HPCReact geochemistry reaction systems.
+Generates constexpr C++ code from JSON input for GPU efficiency.
+"""
+
+import argparse
+import json
+import os
+from typing import List, Dict
+from datetime import datetime
+
+
+class ReactionSystemGenerator:
+    def __init__(self, config: Dict):
+        self.system_name = config['systemName']
+        self.namespace = config['namespace']
+
+        # Primary species (always required)
+        self.primary_species = config['primarySpecies']
+
+        # Secondary species - can be provided or auto-inferred
+        if 'secondarySpecies' in config:
+            # Explicitly provided
+            self.secondary_species = config['secondarySpecies']
+            self.num_equilibrium_reactions = len(self.secondary_species)
+        elif 'numEquilibriumReactions' in config and config['numEquilibriumReactions'] > 0:
+            # Auto-infer from first N reactions
+            num_eq = config['numEquilibriumReactions']
+            print(f"  Info: Auto-inferring {num_eq} secondary species from equilibrium reactions")
+            self.secondary_species = []
+            self.num_equilibrium_reactions = num_eq
+            # Will be populated after parsing reactions
+            self._auto_infer_secondary = True
+        else:
+            # Pure kinetic system (no equilibrium reactions)
+            self.secondary_species = []
+            self.num_equilibrium_reactions = 0
+            self._auto_infer_secondary = False
+
+        # Parse reactions first (before building species list if auto-inferring)
+        self.reactions = []
+        for rxn_data in config['reactions']:
+            if 'equation' in rxn_data:
+                # Parse equation format - but we need species list
+                # For auto-infer, we'll do this in two passes
+                rxn = rxn_data  # Store raw for now
+            else:
+                # Use stoichiometry array directly
+                rxn = rxn_data
+            self.reactions.append(rxn)
+
+        # If auto-inferring, extract secondary species from first N reactions
+        if hasattr(self, '_auto_infer_secondary') and self._auto_infer_secondary:
+            self._infer_secondary_species_from_reactions()
+
+        # Now build combined species list
+        if self.secondary_species:
+            self.species = self.secondary_species + self.primary_species
+        else:
+            self.species = self.primary_species
+            print("  Info: Pure kinetic system (no secondary species)")
+
+        # Parse equations now that we have the species list
+        for i, rxn in enumerate(self.reactions):
+            if isinstance(rxn, dict) and 'equation' in rxn and 'stoichiometry' not in rxn:
+                self.reactions[i] = self._parse_equation(rxn)
+
+        # Validate: must have one reaction per secondary species + kinetic reactions
+        if self.secondary_species and len(self.reactions) < len(self.secondary_species):
+            raise ValueError(f"Need at least {len(self.secondary_species)} reactions (one per secondary species), got {len(self.reactions)}")
+
+    def _infer_secondary_species_from_reactions(self):
+        """Auto-infer secondary species from equilibrium reactions (first N reactions)"""
+        import re
+
+        for i in range(self.num_equilibrium_reactions):
+            if i >= len(self.reactions):
+                raise ValueError(f"Cannot infer: only {len(self.reactions)} reactions but need {self.num_equilibrium_reactions}")
+
+            rxn = self.reactions[i]
+            if 'equation' not in rxn:
+                raise ValueError(f"Cannot auto-infer from reaction {i+1}: missing 'equation' field")
+
+            equation = rxn['equation']
+
+            # Extract left side (reactants)
+            if '=' not in equation:
+                raise ValueError(f"Reaction {i+1}: equation must contain '='")
+
+            left, _ = equation.split('=', 1)
+            left = left.strip()
+
+            # Split by + (with spaces) and get terms
+            terms = re.split(r'\s+\+\s+', left)
+
+            # Find the secondary species (first non-water, non-solid term on left)
+            secondary_species = None
+            for term in terms:
+                term = term.strip()
+                if not term or term in ['H2O', 'H₂O']:
+                    continue
+
+                # Remove leading coefficient
+                if term[0].isdigit():
+                    while term and term[0].isdigit():
+                        term = term[1:]
+                    term = term.strip()
+
+                # First species on left side that's not water is the secondary species
+                # Check if it's in primary species (if so, skip)
+                if term not in self.primary_species:
+                    secondary_species = term
+                    break
+
+            if not secondary_species:
+                raise ValueError(f"Reaction {i+1}: Could not identify secondary species on left side of '{equation}'")
+
+            self.secondary_species.append(secondary_species)
+            print(f"    Inferred secondary species {i+1}: {secondary_species}")
+
+    def _parse_equation(self, rxn_data: Dict) -> Dict:
+        """Parse reaction equation string into stoichiometry"""
+        equation = rxn_data['equation']
+
+        # Split into left (reactants) and right (products)
+        if '=' not in equation:
+            raise ValueError(f"Equation missing '=': {equation}")
+
+        left, right = equation.split('=')
+        left = left.strip()
+        right = right.strip()
+
+        # Initialize stoichiometry array
+        stoich = [0] * len(self.species)
+
+        # Parse left side (negative coefficients)
+        self._parse_side(left, stoich, sign=-1)
+
+        # Parse right side (positive coefficients)
+        self._parse_side(right, stoich, sign=1)
+
+        return {
+            'stoichiometry': stoich,
+            'comment': equation,  # Use equation as comment
+            'equilibriumConstant': rxn_data['equilibriumConstant'],
+            'forwardRate': rxn_data.get('forwardRate', 1.0e10),
+            'reverseRate': rxn_data.get('reverseRate', 1.0e10),
+            'mobileFlag': rxn_data.get('mobileFlag', 1),
+            'isEquilibrium': rxn_data.get('isEquilibrium', True)
+        }
+
+    def _parse_side(self, side: str, stoich: List[int], sign: int):
+        """Parse one side of equation"""
+        import re
+
+        # Skip if empty
+        if not side:
+            return
+
+        # Split by + surrounded by spaces (to avoid splitting H+ or +2 charges)
+        # Match: " + " but not "+" in "H+" or "Ca+2"
+        terms = re.split(r'\s+\+\s+', side)
+
+        for term in terms:
+            term = term.strip()
+            if not term:
+                continue
+
+            # Skip water (common spectator species)
+            if term in ['H2O', 'H₂O']:
+                continue
+
+            # Try to find coefficient only if term starts with digit followed by space or capital letter
+            # Examples: "2 Cl-" -> coeff=2, species="Cl-"
+            #           "2Cl-" -> coeff=2, species="Cl-"
+            #           "Cl-" -> coeff=1, species="Cl-"
+            #           "CaCl2" -> coeff=1, species="CaCl2" (no leading digit)
+
+            # First check: does it start with digit(s) followed by space or uppercase?
+            if term[0].isdigit():
+                # Find where digits end
+                i = 0
+                while i < len(term) and term[i].isdigit():
+                    i += 1
+                coeff = int(term[:i])
+                species = term[i:].strip()
+            else:
+                coeff = 1
+                species = term
+
+            # Find species index
+            if species in self.species:
+                idx = self.species.index(species)
+                stoich[idx] += sign * coeff
+            else:
+                # Species not in primary or secondary list
+                # Could be a solid/mineral (e.g., CaCO3) or water - skip it
+                print(f"  Info: Skipping species '{species}' (likely solid/mineral or solvent)")
+
+    def format_scientific(self, value: float) -> str:
+        """Format float in scientific notation: 1.23E+04"""
+        if value == 0.0:
+            return "0.0"
+        s = f"{value:.2E}"
+        return s.replace('e', 'E').replace('E+0', 'E+').replace('E-0', 'E-')
+
+    def generate_hpp(self) -> str:
+        """Generate complete .hpp file with constexpr arrays"""
+        num_reactions = len(self.reactions)
+        num_species = len(self.species)
+
+        # Determine number of equilibrium reactions
+        if self.num_equilibrium_reactions is not None:
+            # Explicitly specified
+            num_equilibrium = self.num_equilibrium_reactions
+        else:
+            # Auto-detect: count reactions with "isEquilibrium": true
+            # or fall back to forwardRate >= 1e9
+            num_equilibrium = sum(1 for r in self.reactions
+                                if r.get('isEquilibrium', False) or
+                                   (r.get('forwardRate', 1e10) >= 1e9 and
+                                    r.get('reverseRate', 1e10) >= 1e9))
+
+        year = datetime.now().year
+        max_species_len = max(len(s) for s in self.species)
+
+        hpp = f"""/*
+ * ------------------------------------------------------------------------------------------------------------
+ * SPDX-License-Identifier: (BSD-3-Clause)
+ *
+ * Copyright (c) {year}- Lawrence Livermore National Security LLC
+ * All rights reserved
+ *
+ * See top level LICENSE files for details.
+ * ------------------------------------------------------------------------------------------------------------
+ */
+
+#pragma once
+
+#include "../reactionsSystems/Parameters.hpp"
+
+namespace hpcReact
+{{
+
+namespace geochemistry
+{{
+// turn off uncrustify to allow for better readability of the parameters
+// *****UNCRUSTIFY-OFF******
+
+// ################################## {self.system_name} rxn set ##################################
+namespace {self.namespace}
+{{
+
+constexpr CArrayWrapper<signed char, {num_reactions}, {num_species}> stoichMatrix =
+{{ //  """
+
+        # Species header
+        for i, sp in enumerate(self.species):
+            hpp += f"{sp:>{max_species_len+2}}"
+        hpp += "  \n"
+
+        # Stoichiometry matrix rows
+        for i, rxn in enumerate(self.reactions):
+            hpp += "    {  "
+            for j, coeff in enumerate(rxn['stoichiometry']):
+                hpp += f"{coeff:>{max_species_len+2}}"
+                if j < num_species - 1:
+                    hpp += ","
+            hpp += "   }"
+            if i < num_reactions - 1:
+                hpp += ","
+            comment = rxn.get('comment', '')
+            if comment:
+                hpp += f"  //  {comment}"
+            hpp += "\n"
+
+        hpp += "  };\n\n"
+
+        # Equilibrium constants
+        hpp += f"constexpr CArrayWrapper<double, {num_reactions}> equilibriumConstants = \n  {{ \n"
+        for i, rxn in enumerate(self.reactions):
+            val = self.format_scientific(rxn['equilibriumConstant'])
+            hpp += f"    {val:>12}"
+            if i < num_reactions - 1:
+                hpp += ","
+            comment = rxn.get('comment', '')
+            if comment:
+                hpp += f"   //  {comment}"
+            hpp += "\n"
+        hpp += "  };\n\n"
+
+        # Forward rates
+        hpp += f"constexpr CArrayWrapper<double, {num_reactions}> forwardRates = \n  {{ \n"
+        for i, rxn in enumerate(self.reactions):
+            val = self.format_scientific(rxn.get('forwardRate', 1.0e10))
+            hpp += f"    {val:>12}"
+            if i < num_reactions - 1:
+                hpp += ","
+            comment = rxn.get('comment', '')
+            if comment:
+                hpp += f"   //  {comment}"
+            hpp += "\n"
+        hpp += "  };\n\n"
+
+        # Reverse rates
+        hpp += f"constexpr CArrayWrapper<double, {num_reactions}> reverseRates = \n  {{ \n"
+        for i, rxn in enumerate(self.reactions):
+            val = self.format_scientific(rxn.get('reverseRate', 1.0e10))
+            hpp += f"    {val:>12}"
+            if i < num_reactions - 1:
+                hpp += ","
+            comment = rxn.get('comment', '')
+            if comment:
+                hpp += f"   //  {comment}"
+            hpp += "\n"
+        hpp += "  };\n\n"
+
+        # Mobile species flags
+        hpp += f"constexpr CArrayWrapper<int, {num_reactions}> mobileSpeciesFlag = \n  {{ \n"
+        for i, rxn in enumerate(self.reactions):
+            flag = rxn.get('mobileFlag', 1)
+            hpp += f"    {flag}"
+            if i < num_reactions - 1:
+                hpp += ","
+            comment = rxn.get('comment', '')
+            if comment:
+                hpp += f"   //  {comment}"
+            hpp += "\n"
+        hpp += "  };\n"
+
+        hpp += f"""}}
+
+  using {self.namespace}SystemType = reactionsSystems::MixedReactionsParameters< double, int, signed char, {num_species}, {num_reactions}, {num_equilibrium} >;
+
+  constexpr {self.namespace}SystemType {self.namespace}System( {self.namespace}::stoichMatrix, {self.namespace}::equilibriumConstants, {self.namespace}::forwardRates, {self.namespace}::reverseRates, {self.namespace}::mobileSpeciesFlag );
+
+// *****UNCRUSTIFY-ON******
+}} // namespace geochemistry
+}} // namespace hpcReact
+"""
+
+        return hpp
+
+
+def scan_reaction_json_files(reaction_dir: str) -> List[str]:
+    """Find all .json files in reactions/data/ directory"""
+    data_dir = os.path.join(reaction_dir, 'data')
+    if not os.path.exists(data_dir):
+        return []
+
+    return [
+        os.path.join(data_dir, f)
+        for f in os.listdir(data_dir)
+        if f.endswith('.json')
+    ]
+
+
+def add_system_to_registry(registry_file: str, system_hpp: str, system_type: str):
+    """Add a new system to existing GeochemicalSystems.hpp without regenerating"""
+
+    if not os.path.exists(registry_file):
+        print(f"Error: {registry_file} not found")
+        return False
+
+    with open(registry_file, 'r') as f:
+        lines = f.readlines()
+
+    # Check if system already exists
+    content = ''.join(lines)
+    if f'#include "{system_hpp}"' in content:
+        print(f"System {system_hpp} already in registry")
+        return False
+
+    if system_type in content:
+        print(f"System type {system_type} already in registry")
+        return False
+
+    # Find where to insert the include (before #include <variant>)
+    include_insert_idx = None
+    for i, line in enumerate(lines):
+        if line.strip().startswith('#include') and '.hpp"' in line and '<variant>' not in line:
+            include_insert_idx = i + 1
+        elif line.strip() == '#include <variant>':
+            break
+
+    if include_insert_idx is None:
+        print("Error: Could not find include section")
+        return False
+
+    # Insert the new include
+    lines.insert(include_insert_idx, f'#include "{system_hpp}"\n')
+
+    # Find the variant closing (look for the line with >; that ends the variant)
+    variant_end_idx = None
+    in_variant = False
+    for i, line in enumerate(lines):
+        if 'using systemTypes = std::variant<' in line:
+            in_variant = True
+        elif in_variant and '>;' in line:
+            variant_end_idx = i
+            break
+
+    if variant_end_idx is None:
+        print("Error: Could not find variant closing")
+        return False
+
+    # Find the last type line (line before >;)
+    last_type_idx = variant_end_idx - 1
+    while last_type_idx >= 0 and not lines[last_type_idx].strip():
+        last_type_idx -= 1
+
+    # Check if last type ends with comma, if not add it
+    if not lines[last_type_idx].rstrip().endswith(','):
+        lines[last_type_idx] = lines[last_type_idx].rstrip() + ',\n'
+
+    # Insert new type with same indentation as previous line
+    last_line = lines[last_type_idx]
+    indent = len(last_line) - len(last_line.lstrip())
+    indent_str = last_line[:indent]
+    lines.insert(last_type_idx + 1, f'{indent_str}{system_type},\n')
+
+    # Write back
+    with open(registry_file, 'w') as f:
+        f.writelines(lines)
+
+    print(f"Added {system_type} to {registry_file}")
+    return True
+
+
+def generate_registry_header(reaction_dir: str, output_file: str):
+    """Auto-generate GeochemicalSystems.hpp with all detected systems"""
+
+    # Find all .hpp files (both manual and generated)
+    geochemistry_dir = os.path.join(reaction_dir, 'geochemistry')
+    if not os.path.exists(geochemistry_dir):
+        print(f"Warning: {geochemistry_dir} not found")
+        return
+
+    hpp_files = [
+        f for f in os.listdir(geochemistry_dir)
+        if f.endswith('.hpp') and f not in ['GeochemicalSystems.hpp', 'Parameters.hpp']
+    ]
+
+    # Extract namespace names from JSON files or .hpp files
+    systems = []
+    for hpp_file in hpp_files:
+        basename = os.path.splitext(hpp_file)[0]
+        # Convert filename to namespace (e.g., "Ultramafics.hpp" -> "ultramafics")
+        namespace = basename.lower()
+        systems.append({
+            'file': hpp_file,
+            'basename': basename,
+            'namespace': namespace,
+            'typeName': f'{namespace}SystemType'
+        })
+
+    year = datetime.now().year
+    registry = f"""/*
+ * ------------------------------------------------------------------------------------------------------------
+ * SPDX-License-Identifier: (BSD-3-Clause)
+ *
+ * Copyright (c) {year}- Lawrence Livermore National Security LLC
+ * All rights reserved
+ *
+ * See top level LICENSE files for details.
+ * ------------------------------------------------------------------------------------------------------------
+ */
+
+#pragma once
+
+"""
+
+    # Include all system headers
+    for sys in systems:
+        registry += f'#include "{sys["file"]}"\n'
+
+    registry += """
+#include <variant>
+
+namespace hpcReact
+{
+
+namespace geochemistry
+{
+using systemTypes = std::variant< """
+
+    # Add all system types to variant
+    registry += ",\n                                  ".join(s['typeName'] for s in systems)
+
+    registry += """ >;
+
+} // namespace geochemistry
+} // namespace hpcReact
+"""
+
+    with open(output_file, 'w') as f:
+        f.write(registry)
+
+    print(f"Generated {output_file} with {len(systems)} systems:")
+    for sys in systems:
+        print(f"  - {sys['basename']}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Generate constexpr C++ reaction system from JSON',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+JSON Input Format (Recommended - Separate Primary/Secondary Species):
+{
+  "systemName": "Carbonate System",
+  "namespace": "carbonate",
+  "secondarySpecies": ["OH-", "CO2", "CO3-2", "CaHCO3+"],
+  "primarySpecies": ["H+", "HCO3-", "Ca+2"],
+  "numEquilibriumReactions": 3,
+  "reactions": [
+    {
+      "comment": "OH- + H+ = H2O",
+      "stoichiometry": [-1, 0, 0, 0, -1, 0, 0],
+      "equilibriumConstant": 9.89e13,
+      "forwardRate": 1.0e10,
+      "reverseRate": 1.0e10,
+      "mobileFlag": 1,
+      "isEquilibrium": true
+    }
+  ]
+}
+
+Notes:
+- Species order in stoichiometry: [secondary..., primary...]
+- isEquilibrium: true for fast equilibrium reactions, false for kinetic
+- numEquilibriumReactions: number of equilibrium reactions (optional, auto-detected)
+- mobileFlag: 1 for mobile species, 0 for immobile
+- forwardRate and reverseRate: reaction rate constants
+
+Build System Integration:
+Add to CMakeLists.txt to auto-generate systems at build time.
+        """
+    )
+
+    parser.add_argument('input', nargs='?', help='Input JSON file (or use --scan)')
+    parser.add_argument('-o', '--output', help='Output .hpp file')
+    parser.add_argument('--scan', metavar='DIR',
+                       help='Scan reactions/data/ directory and generate all systems')
+    parser.add_argument('--generate-registry', metavar='REACTION_DIR',
+                       help='Generate GeochemicalSystems.hpp registry file')
+    parser.add_argument('--add-to-registry', action='store_true',
+                       help='Add new system to existing GeochemicalSystems.hpp instead of regenerating')
+
+    args = parser.parse_args()
+
+    if args.generate_registry:
+        output = os.path.join(args.generate_registry, 'geochemistry', 'GeochemicalSystems.hpp')
+        generate_registry_header(args.generate_registry, output)
+        return
+
+    if args.scan:
+        json_files = scan_reaction_json_files(args.scan)
+        if not json_files:
+            print(f"No .json files found in {args.scan}/data/")
+            return
+
+        print(f"Found {len(json_files)} reaction JSON files:")
+        for json_file in json_files:
+            print(f"  Processing: {json_file}")
+            with open(json_file, 'r') as f:
+                config = json.load(f)
+
+            gen = ReactionSystemGenerator(config)
+
+            # Output to geochemistry/ directory
+            basename = config['namespace'].capitalize()
+            output_file = os.path.join(args.scan, 'geochemistry', f'{basename}.hpp')
+
+            hpp_content = gen.generate_hpp()
+            with open(output_file, 'w') as f:
+                f.write(hpp_content)
+
+            print(f"    -> Generated: {output_file}")
+            print(f"       Species: {len(gen.species)}, Reactions: {len(gen.reactions)}")
+
+        # Auto-generate registry
+        print("\nGenerating registry header...")
+        generate_registry_header(args.scan,
+                                os.path.join(args.scan, 'geochemistry', 'GeochemicalSystems.hpp'))
+
+    elif args.input:
+        if not args.output:
+            parser.error("--output required when processing single file")
+
+        with open(args.input, 'r') as f:
+            config = json.load(f)
+
+        gen = ReactionSystemGenerator(config)
+        hpp_content = gen.generate_hpp()
+
+        with open(args.output, 'w') as f:
+            f.write(hpp_content)
+
+        print(f"Generated {args.output}")
+        print(f"  Species: {len(gen.species)}")
+        print(f"  Reactions: {len(gen.reactions)}")
+
+        # If --add-to-registry flag is set, add to GeochemicalSystems.hpp
+        if args.add_to_registry:
+            # Determine registry file location
+            # Assume output is in geochemistry/ directory
+            output_dir = os.path.dirname(args.output)
+            registry_file = os.path.join(output_dir, 'GeochemicalSystems.hpp')
+
+            basename = os.path.basename(args.output)
+            namespace = config['namespace']
+            type_name = f'{namespace}SystemType'
+
+            add_system_to_registry(registry_file, basename, type_name)
+
+    else:
+        parser.error("Either INPUT file or --scan DIR required")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/validate_reaction_json.py b/scripts/validate_reaction_json.py
new file mode 100755
index 0000000..272b498
--- /dev/null
+++ b/scripts/validate_reaction_json.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""
+Validate reaction system JSON files before generating C++ code
+"""
+
+import argparse
+import json
+import sys
+from typing import Dict, List
+
+
+class ReactionValidator:
+    def __init__(self, config: Dict):
+        self.config = config
+        self.errors = []
+        self.warnings = []
+
+    def validate(self) -> bool:
+        """Run all validation checks"""
+        self._validate_required_fields()
+        self._validate_species()
+        self._validate_reactions()
+        self._validate_namespace()
+
+        return len(self.errors) == 0
+
+    def _validate_required_fields(self):
+        """Check all required fields are present"""
+        required = ['systemName', 'namespace', 'primarySpecies', 'secondarySpecies', 'reactions']
+        for field in required:
+            if field not in self.config:
+                self.errors.append(f"Missing required field: '{field}'")
+
+    def _validate_species(self):
+        """Validate species lists"""
+        if 'primarySpecies' not in self.config or 'secondarySpecies' not in self.config:
+            return
+
+        primary = self.config['primarySpecies']
+        secondary = self.config['secondarySpecies']
+
+        if not isinstance(primary, list) or len(primary) == 0:
+            self.errors.append("primarySpecies must be a non-empty list")
+
+        if not isinstance(secondary, list):
+            self.errors.append("secondarySpecies must be a list (can be empty for pure kinetic systems)")
+
+        # If no secondary species, this is a pure kinetic system
+        if len(secondary) == 0:
+            self.warnings.append("Pure kinetic system detected (no secondary species = no equilibrium reactions)")
+
+        # Check for duplicates
+        all_species = primary + secondary
+        if len(all_species) != len(set(all_species)):
+            duplicates = [s for s in all_species if all_species.count(s) > 1]
+            self.errors.append(f"Duplicate species: {', '.join(set(duplicates))}")
+
+        # Check for common naming issues
+        for sp in all_species:
+            if ' ' in sp:
+                self.warnings.append(f"Species '{sp}' contains space - this may cause parsing issues")
+
+    def _validate_reactions(self):
+        """Validate reaction definitions"""
+        if 'reactions' not in self.config:
+            return
+
+        reactions = self.config['reactions']
+        if not isinstance(reactions, list) or len(reactions) == 0:
+            self.errors.append("reactions must be a non-empty list")
+            return
+
+        num_secondary = len(self.config.get('secondarySpecies', []))
+
+        if num_secondary > 0 and len(reactions) < num_secondary:
+            self.errors.append(
+                f"Need at least {num_secondary} reactions (one per secondary species), "
+                f"got {len(reactions)}"
+            )
+
+        # Validate each reaction
+        for i, rxn in enumerate(reactions):
+            self._validate_reaction(i, rxn, num_secondary)
+
+    def _validate_reaction(self, idx: int, rxn: Dict, num_secondary: int):
+        """Validate a single reaction"""
+        rxn_label = f"Reaction {idx+1}"
+
+        # Check required fields
+        if 'equation' not in rxn:
+            self.errors.append(f"{rxn_label}: Missing 'equation' field")
+            return
+
+        if 'equilibriumConstant' not in rxn:
+            self.errors.append(f"{rxn_label}: Missing 'equilibriumConstant' field")
+
+        # Check equation format
+        equation = rxn['equation']
+        if '=' not in equation:
+            self.errors.append(f"{rxn_label}: Equation must contain '=' : {equation}")
+
+        # Parse and validate species
+        try:
+            left, right = equation.split('=')
+            all_species = self.config.get('primarySpecies', []) + self.config.get('secondarySpecies', [])
+
+            for side in [left, right]:
+                # Split by + surrounded by spaces to avoid splitting charges
+                import re
+                terms = re.split(r'\s+\+\s+', side)
+                for term in terms:
+                    term = term.strip()
+                    if not term or term in ['H2O', 'H₂O']:
+                        continue
+
+                    # Remove leading coefficients (digits before capital letter)
+                    match = re.match(r'^\d+\s*([A-Z].*)$', term)
+                    if match:
+                        term = match.group(1).strip()
+
+                    # Check if species is known or likely a solid
+                    if term not in all_species:
+                        # Likely a solid/mineral - this is OK
+                        if idx >= num_secondary:
+                            # Kinetic reaction - solids expected
+                            pass
+                        else:
+                            self.warnings.append(
+                                f"{rxn_label}: Species '{term}' not in primary/secondary lists "
+                                f"(OK if it's a solid/mineral)"
+                            )
+        except Exception as e:
+            self.errors.append(f"{rxn_label}: Failed to parse equation: {e}")
+
+        # Check rate constants
+        forward = rxn.get('forwardRate', 1.0e10)
+        reverse = rxn.get('reverseRate', 1.0e10)
+
+        if forward <= 0:
+            self.errors.append(f"{rxn_label}: forwardRate must be positive")
+        if reverse <= 0:
+            self.errors.append(f"{rxn_label}: reverseRate must be positive")
+
+        # Warn about equilibrium/kinetic classification
+        is_equilibrium = (idx < num_secondary)
+        if is_equilibrium:
+            # Equilibrium reactions should be fast
+            if forward < 1e6 or reverse < 1e6:
+                self.warnings.append(
+                    f"{rxn_label}: Equilibrium reaction but has slow rates "
+                    f"(forward={forward:.2e}, reverse={reverse:.2e})"
+                )
+        else:
+            # Kinetic reactions should be slower
+            if forward >= 1e9 and reverse >= 1e9:
+                self.warnings.append(
+                    f"{rxn_label}: Kinetic reaction but has very fast rates "
+                    f"(forward={forward:.2e}, reverse={reverse:.2e}). "
+                    f"Consider moving to equilibrium reactions."
+                )
+
+    def _validate_namespace(self):
+        """Validate C++ namespace"""
+        if 'namespace' not in self.config:
+            return
+
+        namespace = self.config['namespace']
+
+        # Must be valid C++ identifier
+        if not namespace.replace('_', '').isalnum():
+            self.errors.append(f"namespace '{namespace}' is not a valid C++ identifier")
+
+        if namespace[0].isdigit():
+            self.errors.append(f"namespace '{namespace}' cannot start with a digit")
+
+        if not namespace.islower():
+            self.warnings.append(f"namespace '{namespace}' should be lowercase")
+
+    def print_results(self):
+        """Print validation results"""
+        if self.errors:
+            print("❌ ERRORS:")
+            for error in self.errors:
+                print(f"  - {error}")
+            print()
+
+        if self.warnings:
+            print("⚠️  WARNINGS:")
+            for warning in self.warnings:
+                print(f"  - {warning}")
+            print()
+
+        if not self.errors and not self.warnings:
+            print("✅ Validation passed! No errors or warnings.")
+        elif not self.errors:
+            print("✅ Validation passed with warnings.")
+        else:
+            print("❌ Validation failed.")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Validate reaction system JSON files'
+    )
+    parser.add_argument('json_file', help='JSON file to validate')
+    parser.add_argument('--strict', action='store_true',
+                       help='Treat warnings as errors')
+
+    args = parser.parse_args()
+
+    try:
+        with open(args.json_file, 'r') as f:
+            config = json.load(f)
+    except FileNotFoundError:
+        print(f"❌ Error: File not found: {args.json_file}")
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        print(f"❌ Error: Invalid JSON: {e}")
+        sys.exit(1)
+
+    validator = ReactionValidator(config)
+    is_valid = validator.validate()
+    validator.print_results()
+
+    if not is_valid:
+        sys.exit(1)
+
+    if args.strict and validator.warnings:
+        sys.exit(1)
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 87f92bf..4f02dd7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,3 +1,76 @@
+# ============================================================================
+# Auto-generate reaction system headers from JSON definitions
+# ============================================================================
+set(REACTION_DATA_DIR "${CMAKE_CURRENT_SOURCE_DIR}/reactions/data")
+set(HPCREACT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/..")
+set(REACTION_GEN_SCRIPT "${HPCREACT_ROOT}/scripts/generate_reaction_system.py")
+set(REACTION_VALIDATE_SCRIPT "${HPCREACT_ROOT}/scripts/validate_reaction_json.py")
+set(GEOCHEMISTRY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/reactions/geochemistry")
+
+# Find all JSON reaction definition files
+file(GLOB REACTION_JSON_FILES "${REACTION_DATA_DIR}/*.json")
+
+# Filter out example files (optional - remove this if you want to generate examples too)
+# Commented out to generate all JSON files including examples
+# list(FILTER REACTION_JSON_FILES EXCLUDE REGEX "example_.*\\.json$")
+
+set(GENERATED_REACTION_HEADERS)
+
+# Generate C++ header for each JSON file during CMake configuration
+foreach(JSON_FILE ${REACTION_JSON_FILES})
+    get_filename_component(BASENAME ${JSON_FILE} NAME_WE)
+
+    # Convert filename to CamelCase for class name
+    # e.g., my_system.json -> Mysystem.hpp
+    string(SUBSTRING ${BASENAME} 0 1 FIRST_CHAR)
+    string(TOUPPER ${FIRST_CHAR} FIRST_CHAR_UPPER)
+    string(REGEX REPLACE "^.(.*)" "${FIRST_CHAR_UPPER}\\1" CLASSNAME ${BASENAME})
+
+    set(HPP_FILE "${GEOCHEMISTRY_DIR}/${CLASSNAME}.hpp")
+
+    # Step 1: Validate JSON first
+    execute_process(
+        COMMAND python3 ${REACTION_VALIDATE_SCRIPT} ${JSON_FILE}
+        RESULT_VARIABLE VALIDATE_RESULT
+        OUTPUT_QUIET
+        ERROR_QUIET
+    )
+
+    if(NOT VALIDATE_RESULT EQUAL 0)
+        message(WARNING "${BASENAME}.json validation failed - skipping generation")
+        continue()
+    endif()
+
+    # Step 2: Generate C++ header (only if validation passed)
+    message(STATUS "Generating reaction system: ${CLASSNAME}.hpp from ${BASENAME}.json")
+    execute_process(
+        COMMAND python3 ${REACTION_GEN_SCRIPT} ${JSON_FILE} -o ${HPP_FILE} --add-to-registry
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        RESULT_VARIABLE GEN_RESULT
+        OUTPUT_VARIABLE GEN_OUTPUT
+        ERROR_VARIABLE GEN_ERROR
+    )
+
+    if(NOT GEN_RESULT EQUAL 0)
+        message(WARNING "Failed to generate ${CLASSNAME}.hpp: ${GEN_ERROR}")
+    else()
+        message(STATUS "  -> Generated: ${HPP_FILE}")
+        list(APPEND GENERATED_REACTION_HEADERS ${HPP_FILE})
+    endif()
+endforeach()
+
+# Note: GeochemicalSystems.hpp is now updated incrementally via --add-to-registry flag above
+# If you need to regenerate it from scratch, manually run:
+#   python3 scripts/generate_reaction_system.py --generate-registry src/reactions
+set(REGISTRY_HEADER "${GEOCHEMISTRY_DIR}/GeochemicalSystems.hpp")
+if(EXISTS ${REGISTRY_HEADER})
+    list(APPEND GENERATED_REACTION_HEADERS ${REGISTRY_HEADER})
+endif()
+
+# No need for custom target since files are generated during configuration
+# The generated headers are already included in hpcReact_headers below
+
+# ============================================================================
 
 set( hpcReact_headers
      common/macros.hpp
@@ -19,6 +92,7 @@ set( hpcReact_headers
      reactions/unitTestUtilities/equilibriumReactionsTestUtilities.hpp
      reactions/unitTestUtilities/kineticReactionsTestUtilities.hpp
      reactions/unitTestUtilities/mixedReactionsTestUtilities.hpp
+     ${GENERATED_REACTION_HEADERS}
      )
 
 set( hpcReact_sources)
diff --git a/src/reactions/data/CarbonateMixed.json b/src/reactions/data/CarbonateMixed.json
new file mode 100644
index 0000000..0a84a2f
--- /dev/null
+++ b/src/reactions/data/CarbonateMixed.json
@@ -0,0 +1,56 @@
+{
+  "systemName": "Example Carbonate System",
+  "namespace": "carbonatemixed",
+  "secondarySpecies": [
+    "OH-",
+    "CO2",
+    "CO3-2",
+    "CaHCO3+",
+    "CaCl+"
+  ],
+  "primarySpecies": [
+    "H+",
+    "HCO3-",
+    "Ca+2",
+    "Cl-"
+  ],
+  "reactions": [
+    {
+      "equation": "OH- + H+ = H2O",
+      "equilibriumConstant": 9.89e13,
+      "forwardRate": 1.4e11,
+      "reverseRate": 1.43e-3
+    },
+    {
+      "equation": "CO2 + H2O = H+ + HCO3-",
+      "equilibriumConstant": 4.42e-7,
+      "forwardRate": 0.039,
+      "reverseRate": 8.92e4
+    },
+    {
+      "equation": "CO3-2 + H+ = HCO3-",
+      "equilibriumConstant": 2.21e10,
+      "forwardRate": 1.0e10,
+      "reverseRate": 4.67e-1
+    },
+    {
+      "equation": "CaHCO3+ = Ca+2 + HCO3-",
+      "equilibriumConstant": 6.0e-2,
+      "forwardRate": 1.5e6,
+      "reverseRate": 1.85e7
+    },
+    {
+      "equation": "CaCl+ = Ca+2 + Cl-",
+      "equilibriumConstant": 2.0e-1,
+      "forwardRate": 1.0e8,
+      "reverseRate": 2.14e7
+    },
+    {
+      "equation": "CaCO3 + H+ = Ca+2 + HCO3-",
+      "equilibriumConstant": 5.16e1,
+      "forwardRate": 1.55e-6,
+      "reverseRate": 3.0e-8,
+      "isEquilibrium": false
+    }
+  ]
+}
diff --git a/src/reactions/data/CarbonatePureKinetic.json b/src/reactions/data/CarbonatePureKinetic.json
new file mode 100644
index 0000000..98f966f
--- /dev/null
+++ b/src/reactions/data/CarbonatePureKinetic.json
@@ -0,0 +1,26 @@
+{
+  "systemName": "Pure Kinetic System Example",
+  "namespace": "carbonatepurekinetic",
+  "secondarySpecies": [],
+  "primarySpecies": [
+    "H+",
+    "Ca+2",
+    "HCO3-",
+    "Mg+2",
+    "SiO2(aq)"
+  ],
+  "reactions": [
+    {
+      "equation": "CaCO3 + H+ = Ca+2 + HCO3-",
+      "equilibriumConstant": 5.16e1,
+      "forwardRate": 1.55e-6,
+      "reverseRate": 3.0e-8
+    },
+    {
+      "equation": "Mg2SiO4 + 4H+ = 2Mg+2 + SiO2(aq) + 2H2O",
+      "equilibriumConstant": 1.4e28,
+      "forwardRate": 2.29e-11,
+      "reverseRate": 1.65e-39
+    }
+  ]
+}
\ No newline at end of file
diff --git a/src/reactions/data/README.md b/src/reactions/data/README.md
new file mode 100644
index 0000000..fd9a639
--- /dev/null
+++ b/src/reactions/data/README.md
@@ -0,0 +1,148 @@
+# Automated Reaction System Generation
+
+## Overview
+
+Define geochemical reaction systems in JSON instead of manually writing C++ matrices. Files are automatically generated during CMake configuration.
+
+## Quick Start
+
+**1. Create JSON file** (e.g., `my_system.json`):
+```json
+{
+  "systemName": "My Carbonate System",
+  "namespace": "mycarbonate",
+  "secondarySpecies": ["OH-", "CO2"],
+  "primarySpecies": ["H+", "HCO3-", "Ca+2"],
+  "reactions": [
+    {
+      "equation": "OH- + H+ = H2O",
+      "equilibriumConstant": 9.89e13,
+      "forwardRate": 1.4e11,
+      "reverseRate": 1.43e-3
+    },
+    {
+      "equation": "CO2 + H2O = H+ + HCO3-",
+      "equilibriumConstant": 4.42e-7,
+      "forwardRate": 0.039,
+      "reverseRate": 8.92e4
+    },
+    {
+      "equation": "CaCO3 + H+ = Ca+2 + HCO3-",
+      "equilibriumConstant": 5.16e1,
+      "forwardRate": 1.55e-6,
+      "reverseRate": 3.0e-8
+    }
+  ]
+}
+```
+
+This example has 2 equilibrium reactions (first 2) + 1 kinetic reaction (mineral dissolution).
+
+**2. Run configuration:**
+```bash
+python3 scripts/config-build.py -hc hostconfigs/your-config.cmake -bt Release
+```
+
+JSON is automatically validated, then `My_system.hpp` is generated.
+
+**3. Build:**
+```bash
+cd build-<hostname>-<buildtype>  # e.g., build-macOC_arm-release
+make
+```
+
+Done! Your reaction system is now compiled into the HPCReact library.
+
+## JSON Format
+
+### Required Fields
+```json
+{
+  "systemName": "Human-readable name",
+  "namespace": "lowercasecppname",
+  "secondarySpecies": ["Species", "from", "equilibrium"],
+  "primarySpecies": ["Basis", "species"],
+  "reactions": [...]
+}
+```
+
+### Reaction Format
+```json
+{
+  "equation": "A + B = C + D",
+  "equilibriumConstant": 1.23e10,
+  "forwardRate": 1.0e10,
+  "reverseRate": 1.0e-3
+}
+```
+
+### Key Rules
+
+- **Number of equilibrium reactions = Number of secondary species**
+- First N reactions (N = # secondary) are equilibrium (fast)
+- Remaining reactions are kinetic (time-dependent)
+- Species order in equation: `[secondary..., primary...]`
+- **Pure kinetic systems:** `secondarySpecies: []` → all reactions are kinetic
+
+### Equation Syntax
+
+- Separator: ` + ` (with spaces)
+- Coefficients: `2Cl-` or `2 Cl-`
+- Water: `H2O` (auto-skipped)
+- Solids: Not in species lists (auto-skipped, e.g., `CaCO3` in kinetic reactions)
+- Charges: Part of name (`H+`, `Ca+2`, `SO4-2`)
+
+## Examples
+
+- **[example_simple.json](example_simple.json)** - Minimal (1 equilibrium reaction)
+- **[example_carbonate.json](example_carbonate.json)** - Mixed system (5 equilibrium + 1 kinetic)
+- **[example_pure_kinetic.json](example_pure_kinetic.json)** - Pure kinetic (2 kinetic only)
+
+## Tools
+
+**Validate before configuring:**
+```bash
+python3 ../../../scripts/validate_reaction_json.py my_system.json
+```
+
+**Manual generation (for testing):**
+```bash
+python3 ../../../scripts/generate_reaction_system.py \
+    my_system.json -o ../geochemistry/My_system.hpp
+```
+
+## File Locations
+
+```
+HPCReact/
+├── scripts/
+│   ├── generate_reaction_system.py   # Generator
+│   └── validate_reaction_json.py     # Validator
+└── src/reactions/
+    ├── data/                          # JSON definitions (you edit)
+    │   ├── example_simple.json
+    │   └── example_carbonate.json
+    └── geochemistry/                  # Generated .hpp (auto-created)
+        ├── GeochemicalSystems.hpp     # Registry (auto-updated)
+        ├── Example_carbonate.hpp      # Auto-generated
+        └── Ultramafics.hpp            # Manual (existing)
+```
+
+## Workflow
+
+```
+Edit JSON → config-build.py → .hpp generated → make → done
+```
+
+Every time you:
+- Add new JSON → re-run `config-build.py` → new `.hpp` created
+- Edit existing JSON → re-run `config-build.py` → `.hpp` updated
+- `GeochemicalSystems.hpp` registry always auto-updated
+
+## Troubleshooting
+
+| Issue | Solution |
+|-------|----------|
+| "Unknown species 'X'" | Check spelling/charges: `Ca+2` not `Ca2+` |
+| "Need at least N reactions" | Need ≥ N reactions for N secondary species |
+| Changes not reflected | Re-run `config-build.py` (not just `make`) |
\ No newline at end of file