diff --git a/.gitignore b/.gitignore
index f58f9ed..c96c4aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,9 @@ neural_models/
 examples/c_generated_code/
 examples/*_acados_ocp.json
 
+# Generated demo output (gifs, json dumps)
+examples/out_ipddp_mpcc_rc/
+
 # PDF images
 pdf_images/
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4c90c0..80a0893 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,7 +20,7 @@ set(CMAKE_POLICY_VERSION_MINIMUM "3.15" CACHE STRING "Minimum CMake version for
 
 project(
   cddp
-  VERSION 0.5.1
+  VERSION 0.5.2
   DESCRIPTION "CDDP: A C++ library for Trajectory Optimization and MPC"
   HOMEPAGE_URL  "https://github.com/astomodynamics/cddp-cpp"
 )
@@ -147,7 +147,9 @@ set(cddp_core_srcs
   src/cddp_core/dynamical_system.cpp
   src/cddp_core/objective.cpp
   src/cddp_core/constraint.cpp
+  src/cddp_core/cddp_context_utils.cpp
   src/cddp_core/helper.cpp
+  src/cddp_core/interior_point_utils.cpp
   src/cddp_core/boxqp.cpp
   src/cddp_core/qp_solver.cpp
   src/cddp_core/cddp_core.cpp
diff --git a/README.md b/README.md
index 00c3231..6547421 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,10 @@ $$
 \quad \mathbf{g}(\mathbf{x}_k,\mathbf{u})_k\leq 0
 $$
 
+$$
+\quad \mathbf{h}(\mathbf{x}_N)=0,\;\mathbf{g}_N(\mathbf{x}_N)\leq 0
+$$
+
 $$
 \quad {\mathbf{x}}_{0} = \mathbf{x}{(t_0)} 
 $$
@@ -28,6 +32,13 @@ $$
 \quad k = 0,1,\cdots N-1
 $$
 
+For interior-point solves, `IPDDP` supports path inequalities, terminal
+equalities, terminal inequalities, and mixed path/terminal constrained
+problems with warm-started multiplier and slack state. In the C++ API,
+terminal constraints are attached with `CDDP::addTerminalConstraint(...)`
+and should use the built-in `TerminalEqualityConstraint` or
+`TerminalInequalityConstraint` classes.
+
 ## Examples
 The maintained example surface is now split:
 
@@ -81,6 +92,10 @@ The MPCC portfolio example is a lightweight kinematic contouring-control demo.
 Its vendored track data in `examples/data/` is derived from the
 [`alexliniger/MPCC`](https://github.com/alexliniger/MPCC) project.
 
+The portfolio currently emphasizes animated path-constrained examples. The
+terminal-constraint cases added to IPDDP are covered by the regression suite
+rather than by a dedicated gallery animation.
+
 ## Installation
 ### Dependencies
 * [CMake](https://cmake.org/) (Build System)
diff --git a/docs/assets/python_portfolio/mpcc_racing_line.gif b/docs/assets/python_portfolio/mpcc_racing_line.gif
index cf443cf..9ac7fb7 100644
Binary files a/docs/assets/python_portfolio/mpcc_racing_line.gif and b/docs/assets/python_portfolio/mpcc_racing_line.gif differ
diff --git a/docs/cpp.md b/docs/cpp.md
index 4b5a46b..b4bd09d 100644
--- a/docs/cpp.md
+++ b/docs/cpp.md
@@ -30,3 +30,17 @@ That lets the repository serve both as:
 
 - a source build for C++ consumers
 - the native backend for the `pycddp` wheel
+
+## Constraint coverage
+
+`IPDDP` is the interior-point single-shooting solver in the C++ API. The
+current implementation supports:
+
+- path inequality constraints
+- terminal equality constraints via `TerminalEqualityConstraint`
+- terminal inequality constraints via `TerminalInequalityConstraint`
+- mixed path and terminal constrained problems
+
+Attach terminal constraints on `CDDP` with `addTerminalConstraint(...)`. IPDDP
+rejects unsupported terminal-constraint subclasses instead of silently ignoring
+them.
diff --git a/docs/index.md b/docs/index.md
index 398fd5c..cca4090 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -5,6 +5,7 @@
 - a C++17 core library for trajectory optimization and MPC
 - `pycddp` Python bindings built with `pybind11`
 - a small animation-oriented Python portfolio for demos and regression checks
+- IPDDP support for path constraints plus terminal equality and terminal inequality constraints
 
 Use the navigation to get started with installation, local development, and
 the release workflow for PyPI and GitHub Pages.
diff --git a/docs/python.md b/docs/python.md
index bc9d000..65dc3a0 100644
--- a/docs/python.md
+++ b/docs/python.md
@@ -7,6 +7,22 @@ The Python package is published as `pycddp`.
 - the native extension module `_pycddp_core`
 - the public package namespace `pycddp`
 - version metadata from `pycddp._version`
+- `CDDP` problem bindings with `add_constraint(...)` and `add_terminal_constraint(...)`
+
+## Solver coverage
+
+The Python package follows the same solver capability as the C++ core. For
+`IPDDP`, the regression suite now covers:
+
+- path-only constrained problems
+- terminal-inequality-only problems
+- terminal-equality-only problems
+- mixed path + terminal-equality problems
+- warm-start reuse of path, terminal-inequality, and terminal-equality state
+
+The current Python portfolio focuses on animated demos rather than terminal
+constraint examples, so terminal-constrained IPDDP coverage is documented here
+and exercised in tests.
 
 ## Local validation
 
diff --git a/docs/python_portfolio.md b/docs/python_portfolio.md
index 107f0d7..3590d35 100644
--- a/docs/python_portfolio.md
+++ b/docs/python_portfolio.md
@@ -13,6 +13,11 @@ Matplotlib.
       actually produce: constrained solves, animated trajectories, and compact
       reproducible workflows.
     </p>
+    <p>
+      The gallery is intentionally path-focused. Terminal-equality and
+      terminal-inequality support in <code>IPDDP</code> is covered in the solver
+      regression suite rather than in a separate animation card.
+    </p>
     <p>
       <a class="md-button md-button--primary" href="../python/">Python package guide</a>
       <a class="md-button" href="https://github.com/astomodynamics/cddp-cpp/tree/master/examples">Browse examples</a>
diff --git a/examples/ipddp_mpcc_rc.py b/examples/ipddp_mpcc_rc.py
new file mode 100644
index 0000000..c6cff23
--- /dev/null
+++ b/examples/ipddp_mpcc_rc.py
@@ -0,0 +1,933 @@
+"""Closed-loop IPDDP MPCC on the bundled RC racing track.
+
+This module is a pycddp IPDDP port of AIRCoM's acados kinematic MPCC
+(``control_pkg/control_pkg/mpcc/solver.py``). It drives a 6 cm wheelbase
+RC car around the same ``examples/data/mpcc_racing_track.csv`` that the
+acados demo consumes, using a receding-horizon MPC loop with cold-seeded
+solves.
+
+Model — :class:`IpddpKinematicBicycle7`
+    7-state augmented kinematic bicycle:
+    ``[x, y, psi, theta, v_prev, delta_prev, v_theta_prev]`` with control
+    ``[v_w, delta, v_theta]``. The last three states are control-history
+    latches implemented via the continuous expression
+    ``d(v_prev)/dt = (v_w - v_prev)/dt``; under Euler integration with
+    step size ``dt`` this latches ``v_prev_next = v_w`` exactly, giving
+    AIRCoM's rate-of-change residuals without having to override
+    ``get_discrete_dynamics`` from Python (which the pycddp binding does
+    not allow).
+
+Cost — :class:`IpddpRcMpccObjective`
+    11-residual cost mirroring AIRCoM's ``NONLINEAR_LS`` shape (contour,
+    lag, speed, control regs, pose regs, three rate penalties) plus a
+    smooth one-sided quadratic boundary penalty that stands in for
+    AIRCoM's L1+quadratic slack on the soft track-boundary constraint
+    (pycddp's IPDDP has no slack variables). Weight tuning — notably
+    ``w_contour = 200`` and ``w_dv = 300`` — was chosen by sweep; see
+    the git log for the experiment that drove the numbers.
+
+What diverges from AIRCoM, and why
+    * **No per-stage parameter vector.** AIRCoM sets ``p[0..8]`` per
+      stage to hand the solver a time-varying reference; pycddp has no
+      stage-parameter slot, so the reference is derived from ``theta``
+      (state[3]) inside the cost via ``track.interpolate``.
+    * **No soft-constraint slack.** See the ``w_boundary`` note above.
+    * **Terminal regulariser zeroed.** AIRCoM's
+      ``base_terminal_velocity_weight = 40`` is a *parking-mode* term
+      that pulls the terminal ``v_prev`` toward zero; over 20 stages of
+      the ``w_dv`` rate-penalty chain, that quietly bleeds the cruise
+      speed every MPC tick. We anchor ``v_w`` directly via an
+      ``w_speed_w * (v_w - v_target)^2`` term on the *running* cost
+      instead, which has no backward-propagation artefact.
+    * **Cold-seeded every tick.** A primal-warm-start from the previous
+      IPDDP solution sounds like a good idea but in practice drags the
+      new solve into a stale basin near the old terminal — iter counts
+      climb and the first-stage control drifts off the line. A fresh
+      forward-roll along the reference converges in ~15-40 iterations
+      and produces clean tracking.
+
+Entry points
+    * :func:`solve_ipddp_mpcc_rc` — single-shot open-loop trajectory
+      optimisation against the track.
+    * :func:`run_ipddp_mpc` — closed-loop receding-horizon MPC loop that
+      feeds a ``DemoResult``-compatible history object to the existing
+      ``_animate_mpcc`` portfolio renderer.
+    * CLI: ``python ipddp_mpcc_rc.py --mpc --lap`` runs a full lap and
+      writes ``examples/out_ipddp_mpcc_rc/ipddp_mpcc_rc.gif``.
+"""
+
+from __future__ import annotations
+
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import numpy as np
+
+# Make ``python_portfolio_lib`` importable regardless of how this file is
+# loaded (direct CLI invocation or lazy import from ``python_portfolio_lib``
+# itself).
+_EXAMPLES_DIR = Path(__file__).resolve().parent
+if str(_EXAMPLES_DIR) not in sys.path:
+    sys.path.insert(0, str(_EXAMPLES_DIR))
+
+import pycddp  # noqa: E402
+
+if __package__:
+    from .python_portfolio_lib import (  # noqa: E402
+        TrackData,
+        _load_track_csv,
+        _portfolio_track_path,
+        _wrap_angle,
+    )
+else:
+    from python_portfolio_lib import (  # noqa: E402
+        TrackData,
+        _load_track_csv,
+        _portfolio_track_path,
+        _wrap_angle,
+    )
+
+
+# ---------------------------------------------------------------------------
+# 7-state augmented kinematic bicycle (AIRCoM-style)
+# ---------------------------------------------------------------------------
+
+
+class IpddpKinematicBicycle7(pycddp.DynamicalSystem):
+    """7-state augmented kinematic bicycle that mirrors AIRCoM's MPCC model.
+
+    State (7-D):  ``[x, y, psi, theta, v_prev, delta_prev, v_theta_prev]``
+    Control (3-D): ``[v_w, delta, v_theta]``
+
+    The first four states are the standard MPCC kinematic states (planar
+    pose plus the progress variable). The last three are *latches* that
+    hold the previously-applied control. They exist purely so the cost
+    can penalise rate-of-change residuals
+    ``(v_w - v_prev), (delta - delta_prev), (v_theta - v_theta_prev)`` —
+    exactly the smoothing AIRCoM uses to keep the controls calm.
+
+    The latch is implemented at the **continuous-dynamics** level via
+    ``d(v_prev)/dt = (v_w - v_prev) / dt``. Under Euler integration with
+    step size equal to ``dt``, this expression yields ``v_prev_next = v_w``
+    *exactly*, so we get the AIRCoM latching behaviour without having to
+    override ``get_discrete_dynamics`` from Python (which the pycddp
+    binding does not allow). Use ``"euler"`` as the integrator type or
+    the latch will only be approximate.
+    """
+
+    def __init__(self, timestep: float, wheelbase: float = 0.20) -> None:
+        super().__init__(7, 3, timestep, "euler")
+        self.dt = float(timestep)
+        self.wheelbase = float(wheelbase)
+        self._inv_dt = 1.0 / float(timestep)
+
+    def get_continuous_dynamics(
+        self,
+        state: np.ndarray,
+        control: np.ndarray,
+        time: float = 0.0,
+    ) -> np.ndarray:
+        psi = float(state[2])
+        v_prev = float(state[4])
+        delta_prev = float(state[5])
+        v_theta_prev = float(state[6])
+        v_w = float(control[0])
+        delta = float(control[1])
+        v_theta = float(control[2])
+        return np.array(
+            [
+                v_w * np.cos(psi),
+                v_w * np.sin(psi),
+                v_w * np.tan(delta) / self.wheelbase,
+                v_theta,
+                (v_w - v_prev) * self._inv_dt,
+                (delta - delta_prev) * self._inv_dt,
+                (v_theta - v_theta_prev) * self._inv_dt,
+            ],
+            dtype=float,
+        )
+
+    def get_state_jacobian(
+        self,
+        state: np.ndarray,
+        control: np.ndarray,
+        time: float = 0.0,
+    ) -> np.ndarray:
+        psi = float(state[2])
+        v_w = float(control[0])
+        A = np.zeros((7, 7), dtype=float)
+        A[0, 2] = -v_w * np.sin(psi)
+        A[1, 2] = v_w * np.cos(psi)
+        A[4, 4] = -self._inv_dt
+        A[5, 5] = -self._inv_dt
+        A[6, 6] = -self._inv_dt
+        return A
+
+    def get_control_jacobian(
+        self,
+        state: np.ndarray,
+        control: np.ndarray,
+        time: float = 0.0,
+    ) -> np.ndarray:
+        psi = float(state[2])
+        v_w = float(control[0])
+        delta = float(control[1])
+        sec_sq = 1.0 / (np.cos(delta) ** 2)
+        B = np.zeros((7, 3), dtype=float)
+        B[0, 0] = np.cos(psi)
+        B[1, 0] = np.sin(psi)
+        B[2, 0] = np.tan(delta) / self.wheelbase
+        B[2, 1] = v_w * sec_sq / self.wheelbase
+        B[3, 2] = 1.0
+        B[4, 0] = self._inv_dt
+        B[5, 1] = self._inv_dt
+        B[6, 2] = self._inv_dt
+        return B
+
+    def get_state_hessian(
+        self,
+        state: np.ndarray,
+        control: np.ndarray,
+        time: float = 0.0,
+    ) -> list[np.ndarray]:
+        return [np.zeros((7, 7), dtype=float) for _ in range(7)]
+
+    def get_control_hessian(
+        self,
+        state: np.ndarray,
+        control: np.ndarray,
+        time: float = 0.0,
+    ) -> list[np.ndarray]:
+        return [np.zeros((3, 3), dtype=float) for _ in range(7)]
+
+    def get_cross_hessian(
+        self,
+        state: np.ndarray,
+        control: np.ndarray,
+        time: float = 0.0,
+    ) -> list[np.ndarray]:
+        return [np.zeros((3, 7), dtype=float) for _ in range(7)]
+
+
+# State indices for the 7-state augmented model.
+IDX_X = 0
+IDX_Y = 1
+IDX_PSI = 2
+IDX_THETA = 3
+IDX_V_PREV = 4
+IDX_DELTA_PREV = 5
+IDX_V_THETA_PREV = 6
+
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class IpddpRcMpccConfig:
+    """Knobs for the IPDDP MPCC problem (AIRCoM kinematic shape).
+
+    Cost weights mirror the AIRCoM ``MpccConfig`` defaults for the
+    *kinematic* MPCC (the simpler model in
+    ``control_pkg/control_pkg/mpcc/solver.py``), with one departure: the
+    soft track-boundary constraint is replaced by a smooth quadratic
+    penalty in the cost because pycddp's IPDDP has no slack variables.
+    """
+
+    dt: float = 0.05
+    # Horizon 20 × dt = 1 s lookahead, ~1.5 m at the 1.5 m/s cruise we
+    # actually execute. A grid probe (H∈{20,25} × band∈{0.85,0.90}) showed
+    # that longer horizon costs ~20% more wall-clock per step and doesn't
+    # improve the trajectory at all — the rate-penalty smoothing already
+    # sees as far forward as it needs to.
+    horizon: int = 20
+    # AIRCoM RC car: wheelbase = lf + lr = 0.029 + 0.033 = 0.062 m. With
+    # delta_max = 0.6 rad the bicycle's min turning radius is
+    # L/tan(0.6) ≈ 0.091 m, ~2x tighter than the 0.185 m min radius of the
+    # bundled CSV track.
+    wheelbase: float = 0.062
+    # Floor used when the track's curvature-scaled ``v_ref`` drops below
+    # this. AIRCoM keeps this at 1.0 m/s. The *upper* end of the speed
+    # profile comes from the CSV (which caps at 2.2 m/s on straights) via
+    # ``v_target = max(ref.v_ref, reference_speed)`` in the cost.
+    reference_speed: float = 1.0
+
+    # Control bounds (hard, via pycddp.ControlConstraint). AIRCoM's acados
+    # version caps v_max at 1.5 m/s because the Pacejka tire model gets
+    # numerically stiff above that for a 6 cm wheelbase. The kinematic
+    # bicycle has no such issue, so we let v_w follow the full 2.2 m/s
+    # track-speed profile on the straights for a proper racing line.
+    speed_min: float = 0.1
+    speed_max: float = 2.2
+    delta_max: float = 0.60
+    v_theta_min: float = 0.0
+    v_theta_max: float = 2.2
+
+    # 11-residual cost weights (NONLINEAR_LS diagonal W). Names match
+    # AIRCoM's ``control_pkg/.../mpcc/solver.py`` but the magnitudes are
+    # tuned for IPDDP on this track — see the weight-sweep in the commit
+    # message for the experimental justification.
+    #
+    # Notable deviations from AIRCoM's defaults:
+    # * ``w_contour = 200`` (up from 100): tightens tracking AND makes the
+    #   cost Hessian more strictly convex in position, so IPDDP converges
+    #   ~12% faster per step.
+    # * ``w_dv = 300`` (down from 1000): over-strong rate penalty was the
+    #   main per-step cost driver without meaningfully improving
+    #   smoothness. Lowering it speeds solves up ~14%.
+    w_contour: float = 200.0   # (n . d)^2
+    w_lag: float = 100.0       # (t . d)^2
+    w_speed: float = 5.0       # (v_theta - v_ref)^2
+    w_control: float = 0.1     # v_w^2 and delta^2
+    w_x: float = 0.0           # dx_err^2 (zero by default like AIRCoM)
+    w_y: float = 0.0           # dy_err^2
+    w_yaw: float = 0.0         # e_yaw^2
+    # Extra: explicit v_w tracking on every stage. AIRCoM doesn't have this
+    # (their v_w comes for free through lag-error coupling + terminal v_prev
+    # regulariser), but here it anchors v_w directly so the rate-penalty
+    # chain has nothing to bleed against. Cheaper and more stable for IPDDP
+    # than a pure terminal anchor.
+    w_speed_w: float = 10.0    # (v_w - v_target)^2 on every stage
+    # Rate-of-change weights — the key smoothing terms.
+    w_dv: float = 300.0
+    w_ddelta: float = 1000.0
+    w_dv_theta: float = 100.0
+
+    # Smooth track-boundary penalty (replaces AIRCoM's L1+quadratic slack).
+    # The grid probe showed the band doesn't get reached on this track —
+    # w_contour (100) already pulls the car within 0.12 m of the
+    # centerline, well inside 0.85 * 0.18 = 0.153 m. This is a safety
+    # margin, not an active shaping term.
+    w_boundary: float = 200.0
+    boundary_band: float = 0.85
+
+    # Terminal weights — AIRCoM W_e diagonal, but with the velocity/steering
+    # anchors *zeroed out*. AIRCoM uses those for parking mode (pull the car
+    # to a stop at the goal); for racing-line MPC they propagate backward
+    # through 20 stages of the rate-penalty chain and bleed the forward
+    # speed every tick. We replace their role with ``w_speed_w`` on the
+    # running cost, which anchors v_w directly at every stage.
+    w_terminal: float = 50.0
+    base_terminal_velocity_weight: float = 0.0
+    base_terminal_steering_weight: float = 0.0
+    # Linear reward for advancing the progress variable. Small — just a
+    # tiebreaker so the terminal cost prefers reaching farther ``theta``.
+    w_terminal_progress: float = 2.0
+
+    # IPDDP solver options.
+    max_iterations: int = 100
+    tolerance: float = 1e-4
+    acceptable_tolerance: float = 5e-4
+    initial_regularization: float = 1e-4
+    line_search_iters: int = 12
+
+
+# ---------------------------------------------------------------------------
+# Cost
+# ---------------------------------------------------------------------------
+
+
+class IpddpRcMpccObjective(pycddp.NonlinearObjective):
+    """11-residual MPCC cost matching AIRCoM's NONLINEAR_LS shape.
+
+    Operates on the 7-state augmented kinematic bicycle:
+    ``state = [x, y, psi, theta, v_prev, delta_prev, v_theta_prev]`` and
+    ``control = [v_w, delta, v_theta]``.
+
+    The running cost is ``dt * sum_i w_i * r_i^2`` over the same eleven
+    residuals AIRCoM uses (`solver.py:240`):
+
+    1.  contour error ``e_c = n . (p - p_ref)``
+    2.  lag error ``e_l = t . (p - p_ref)``
+    3.  speed-tracking ``v_theta - v_ref``
+    4.  control regulariser on ``v_w``
+    5.  control regulariser on ``delta``
+    6.  position error ``dx``  (zero weight by default — AIRCoM)
+    7.  position error ``dy``  (zero weight by default — AIRCoM)
+    8.  yaw error ``e_yaw``    (zero weight by default — AIRCoM)
+    9.  rate ``v_w - v_prev``
+    10. rate ``delta - delta_prev``
+    11. rate ``v_theta - v_theta_prev``
+
+    Plus a smooth quadratic boundary penalty
+    ``w_boundary * max(0, |e_c| - boundary_band * half_width)^2``
+    that stands in for AIRCoM's L1+quadratic slack on the soft
+    ``-half_width <= e_c <= half_width`` constraint (pycddp IPDDP has no
+    slack variables).
+
+    The reference parameters AIRCoM passes via ``p[stage]`` are looked
+    up from ``theta`` (state[3]) on the track instead — that turns the
+    time-varying reference into a function of state, which is what IPDDP
+    needs.
+    """
+
+    def __init__(self, cfg: IpddpRcMpccConfig, track: TrackData) -> None:
+        super().__init__(cfg.dt)
+        self.cfg = cfg
+        self.track = track
+        self.dt = float(cfg.dt)
+        self._half_width = float(track.width)
+
+    def _tracking(self, state: np.ndarray) -> tuple[float, float, float, float, float, float]:
+        theta = float(state[IDX_THETA])
+        ref = self.track.interpolate(theta)
+        dx = float(state[IDX_X]) - ref.x
+        dy = float(state[IDX_Y]) - ref.y
+        e_c = ref.normal[0] * dx + ref.normal[1] * dy
+        e_l = ref.tangent[0] * dx + ref.tangent[1] * dy
+        e_yaw = _wrap_angle(float(state[IDX_PSI]) - ref.heading)
+        return e_c, e_l, e_yaw, ref.v_ref, dx, dy
+
+    def running_cost(
+        self,
+        state: np.ndarray,
+        control: np.ndarray,
+        index: int,
+    ) -> float:
+        cfg = self.cfg
+        e_c, e_l, e_yaw, v_ref_track, dx, dy = self._tracking(state)
+        v_prev = float(state[IDX_V_PREV])
+        delta_prev = float(state[IDX_DELTA_PREV])
+        v_theta_prev = float(state[IDX_V_THETA_PREV])
+        v_w = float(control[0])
+        delta = float(control[1])
+        v_theta = float(control[2])
+
+        v_target = max(v_ref_track, cfg.reference_speed)
+        boundary = max(0.0, abs(e_c) - cfg.boundary_band * self._half_width)
+
+        return float(
+            self.dt
+            * (
+                cfg.w_contour * e_c * e_c
+                + cfg.w_lag * e_l * e_l
+                + cfg.w_speed * (v_theta - v_target) ** 2
+                + cfg.w_speed_w * (v_w - v_target) ** 2
+                + cfg.w_control * v_w * v_w
+                + cfg.w_control * delta * delta
+                + cfg.w_x * dx * dx
+                + cfg.w_y * dy * dy
+                + cfg.w_yaw * e_yaw * e_yaw
+                + cfg.w_dv * (v_w - v_prev) ** 2
+                + cfg.w_ddelta * (delta - delta_prev) ** 2
+                + cfg.w_dv_theta * (v_theta - v_theta_prev) ** 2
+                + cfg.w_boundary * boundary * boundary
+            )
+        )
+
+    def terminal_cost(self, final_state: np.ndarray) -> float:
+        cfg = self.cfg
+        e_c, e_l, _e_yaw, _v_ref, _dx, _dy = self._tracking(final_state)
+        theta = float(final_state[IDX_THETA])
+        return float(
+            cfg.w_terminal * e_c * e_c
+            + cfg.w_terminal * e_l * e_l
+            - cfg.w_terminal_progress * theta
+        )
+
+
+# ---------------------------------------------------------------------------
+# Result container
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class IpddpRcMpccResult:
+    """One-shot IPDDP MPCC solution."""
+
+    solution: pycddp.CDDPSolution
+    states: np.ndarray
+    controls: np.ndarray
+    time_points: np.ndarray
+    track: TrackData
+    cfg: IpddpRcMpccConfig
+
+    @property
+    def lap_progress(self) -> float:
+        if self.states.size == 0:
+            return 0.0
+        return float(self.states[-1, IDX_THETA] - self.states[0, IDX_THETA])
+
+    @property
+    def contour_errors(self) -> np.ndarray:
+        out = np.empty(self.states.shape[0], dtype=float)
+        for i, state in enumerate(self.states):
+            ref = self.track.interpolate(float(state[IDX_THETA]))
+            d = state[:2] - np.array([ref.x, ref.y], dtype=float)
+            out[i] = float(ref.normal @ d)
+        return out
+
+    @property
+    def lag_errors(self) -> np.ndarray:
+        out = np.empty(self.states.shape[0], dtype=float)
+        for i, state in enumerate(self.states):
+            ref = self.track.interpolate(float(state[IDX_THETA]))
+            d = state[:2] - np.array([ref.x, ref.y], dtype=float)
+            out[i] = float(ref.tangent @ d)
+        return out
+
+
+# ---------------------------------------------------------------------------
+# Solver setup + run
+# ---------------------------------------------------------------------------
+
+
+def _initial_state(track: TrackData, cfg: IpddpRcMpccConfig) -> np.ndarray:
+    """Place the car on the centerline at theta = s[0], aligned heading.
+
+    The control-latch components are seeded at the cruise speed so the very
+    first stage's rate residuals (``v_w - v_prev`` etc.) start near zero —
+    important for the strong AIRCoM rate-penalty weights.
+    """
+    ref = track.interpolate(float(track.s[0]))
+    cruise = float(np.clip(cfg.reference_speed, cfg.speed_min, cfg.speed_max))
+    v_theta = float(np.clip(cruise, cfg.v_theta_min, cfg.v_theta_max))
+    return np.array(
+        [
+            ref.x,
+            ref.y,
+            ref.heading,
+            float(track.s[0]),
+            cruise,    # v_prev
+            0.0,       # delta_prev
+            v_theta,   # v_theta_prev
+        ],
+        dtype=float,
+    )
+
+
+def _seed_controls(
+    track: TrackData,
+    cfg: IpddpRcMpccConfig,
+    initial_progress: float,
+) -> list[np.ndarray]:
+    """Cold-start control seed: roll forward along the reference at v_ref."""
+    seeds: list[np.ndarray] = []
+    progress = float(initial_progress)
+    wheelbase = cfg.wheelbase
+    for _ in range(cfg.horizon):
+        ref = track.interpolate(progress)
+        v_target = float(np.clip(max(ref.v_ref, cfg.reference_speed), cfg.speed_min, cfg.speed_max))
+        steer_guess = float(
+            np.clip(np.arctan(wheelbase * ref.curvature), -cfg.delta_max, cfg.delta_max)
+        )
+        v_theta = float(np.clip(v_target, cfg.v_theta_min, cfg.v_theta_max))
+        seeds.append(np.array([v_target, steer_guess, v_theta], dtype=float))
+        progress += cfg.dt * v_theta
+    return seeds
+
+
+def _rollout(
+    model: IpddpKinematicBicycle7, x0: np.ndarray, controls: list[np.ndarray]
+) -> tuple[list[np.ndarray], list[np.ndarray]]:
+    state = x0.copy()
+    states = [state.copy()]
+    rolled: list[np.ndarray] = []
+    for u in controls:
+        u_arr = np.asarray(u, dtype=float).copy()
+        rolled.append(u_arr)
+        state = np.asarray(model.get_discrete_dynamics(state, u_arr), dtype=float)
+        states.append(state.copy())
+    return states, rolled
+
+
+def build_solver(
+    cfg: IpddpRcMpccConfig,
+    track: TrackData,
+) -> tuple[pycddp.CDDP, IpddpKinematicBicycle7, IpddpRcMpccObjective, np.ndarray]:
+    """Build a fully configured pycddp CDDP solver for the simplified MPCC."""
+    model = IpddpKinematicBicycle7(cfg.dt, wheelbase=cfg.wheelbase)
+    objective = IpddpRcMpccObjective(cfg, track)
+
+    opts = pycddp.CDDPOptions()
+    opts.max_iterations = cfg.max_iterations
+    opts.tolerance = cfg.tolerance
+    opts.acceptable_tolerance = cfg.acceptable_tolerance
+    opts.regularization.initial_value = cfg.initial_regularization
+    opts.line_search.max_iterations = cfg.line_search_iters
+    opts.use_ilqr = True  # avoid relying on dynamics Hessians
+    opts.verbose = False
+    opts.print_solver_header = False
+    opts.return_iteration_info = True
+
+    x0 = _initial_state(track, cfg)
+    # Reference state argument is consumed by the QuadraticObjective path; for
+    # NonlinearObjective the solver only uses it as bookkeeping, so we point it
+    # at the same starting reference.
+    x_ref = x0.copy()
+
+    solver = pycddp.CDDP(x0, x_ref, cfg.horizon, cfg.dt, opts)
+    solver.set_dynamical_system(model)
+    solver.set_objective(objective)
+    solver.add_constraint(
+        "control_limits",
+        pycddp.ControlConstraint(
+            np.array([cfg.speed_min, -cfg.delta_max, cfg.v_theta_min], dtype=float),
+            np.array([cfg.speed_max, cfg.delta_max, cfg.v_theta_max], dtype=float),
+        ),
+    )
+
+    return solver, model, objective, x0
+
+
+def solve_ipddp_mpcc_rc(
+    cfg: IpddpRcMpccConfig | None = None,
+    track: TrackData | None = None,
+) -> IpddpRcMpccResult:
+    """Run a single open-loop IPDDP solve of the simplified MPCC."""
+    if cfg is None:
+        cfg = IpddpRcMpccConfig()
+    if track is None:
+        track = _load_track_csv(_portfolio_track_path())
+
+    solver, model, _objective, x0 = build_solver(cfg, track)
+
+    seed_u = _seed_controls(track, cfg, float(x0[3]))
+    seed_x, seed_u = _rollout(model, x0, seed_u)
+    solver.set_initial_trajectory(seed_x, seed_u)
+
+    solution = solver.solve(pycddp.SolverType.IPDDP)
+    if not solution.state_trajectory or not solution.control_trajectory:
+        raise RuntimeError("IPDDP MPCC RC solve produced an empty trajectory.")
+
+    states = np.vstack([np.asarray(x, dtype=float) for x in solution.state_trajectory])
+    controls = np.vstack([np.asarray(u, dtype=float) for u in solution.control_trajectory])
+    time_points = np.asarray(solution.time_points, dtype=float)
+
+    return IpddpRcMpccResult(
+        solution=solution,
+        states=states,
+        controls=controls,
+        time_points=time_points,
+        track=track,
+        cfg=cfg,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Closed-loop MPC runner
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class IpddpRcMpccHistory:
+    """Closed-loop history captured per MPC step."""
+
+    executed_states: list[np.ndarray] = field(default_factory=list)
+    executed_controls: list[np.ndarray] = field(default_factory=list)
+    contour_errors: list[float] = field(default_factory=list)
+    lag_errors: list[float] = field(default_factory=list)
+    solve_times_ms: list[float] = field(default_factory=list)
+    predicted_paths: list[np.ndarray] = field(default_factory=list)
+    reference_windows: list[np.ndarray] = field(default_factory=list)
+    failures: int = 0
+
+
+def _reference_window_from_predicted(
+    track: TrackData, predicted_states: list[np.ndarray]
+) -> np.ndarray:
+    """Sample the track centerline at the progress of each predicted stage.
+
+    The visualization overlays this alongside the IPDDP prediction, so the
+    two lines need to span the **same** stages — sampling at a fixed
+    ``reference_speed`` stride makes the orange line shorter than the red
+    one whenever the solver is running above that cruise speed (the case
+    on straights). Keying off ``predicted_states[k][IDX_THETA]`` keeps the
+    orange line aligned with what the solver is actually aiming at.
+    """
+    pts = np.empty((len(predicted_states), 2), dtype=float)
+    for stage, x in enumerate(predicted_states):
+        ref = track.interpolate(float(x[IDX_THETA]))
+        pts[stage, 0] = ref.x
+        pts[stage, 1] = ref.y
+    return pts
+
+
+def run_ipddp_mpc(
+    cfg: IpddpRcMpccConfig | None = None,
+    track: TrackData | None = None,
+    *,
+    simulation_steps: int = 60,
+    stop_at_progress: float | None = None,
+    show_progress: bool = False,
+) -> IpddpRcMpccHistory:
+    """Run closed-loop receding-horizon IPDDP MPC.
+
+    At each step we re-solve the IPDDP problem from the current plant state,
+    apply the first control to the kinematic bicycle, and cold-seed the
+    next solve from a forward-roll along the reference. The whole history
+    is captured in a form compatible with ``_animate_mpcc``.
+
+    If ``stop_at_progress`` is given, the simulation breaks as soon as the
+    executed ``theta`` crosses that threshold — use
+    ``stop_at_progress=track.length`` to stop after one full lap.
+    ``simulation_steps`` still acts as an upper bound so a wandering run
+    cannot loop forever.
+    """
+    if cfg is None:
+        cfg = IpddpRcMpccConfig()
+    if track is None:
+        track = _load_track_csv(_portfolio_track_path())
+    if simulation_steps < 1:
+        raise ValueError("simulation_steps must be >= 1")
+
+    solver, model, _objective, x0 = build_solver(cfg, track)
+
+    state = x0.copy()
+    history = IpddpRcMpccHistory(executed_states=[state.copy()])
+
+    last_control = np.zeros(3, dtype=float)
+
+    for step_index in range(simulation_steps):
+        # Track-relative metrics at the current state.
+        ref_now = track.interpolate(float(state[IDX_THETA]))
+        d_pos = state[:2] - np.array([ref_now.x, ref_now.y], dtype=float)
+        history.contour_errors.append(float(ref_now.normal @ d_pos))
+        history.lag_errors.append(float(ref_now.tangent @ d_pos))
+
+        # Cold-seed every step. The shifted warm-start from the previous
+        # IPDDP solution sounds like it should be a good idea but in
+        # practice it tends to drag the new solve into a stale basin near
+        # the old terminal: iter counts climb and the first-stage control
+        # drifts off the line. A fresh forward-roll along the reference
+        # happens to be both faster and far more stable for this problem.
+        seed_u = _seed_controls(track, cfg, float(state[IDX_THETA]))
+        seed_x, seed_u = _rollout(model, state, seed_u)
+        solver.set_initial_state(state)
+        solver.set_initial_trajectory(seed_x, seed_u)
+
+        t0 = time.perf_counter()
+        sol = solver.solve(pycddp.SolverType.IPDDP)
+        history.solve_times_ms.append((time.perf_counter() - t0) * 1000.0)
+
+        if not sol.state_trajectory or not sol.control_trajectory:
+            history.failures += 1
+            control = last_control.copy()
+            predicted_states = [state.copy() for _ in range(cfg.horizon + 1)]
+            predicted_controls = [last_control.copy() for _ in range(cfg.horizon)]
+        else:
+            predicted_states = [
+                np.asarray(x, dtype=float).copy() for x in sol.state_trajectory
+            ]
+            predicted_controls = [
+                np.asarray(u, dtype=float).copy() for u in sol.control_trajectory
+            ]
+            control = predicted_controls[0].copy()
+            if not np.all(np.isfinite(control)):
+                history.failures += 1
+                control = last_control.copy()
+            else:
+                last_control = control.copy()
+
+        history.predicted_paths.append(
+            np.vstack([s[:2] for s in predicted_states])
+        )
+        history.reference_windows.append(
+            _reference_window_from_predicted(track, predicted_states)
+        )
+
+        # Hard-clip the control (defensive — IPDDP should already respect bounds).
+        control[0] = float(np.clip(control[0], cfg.speed_min, cfg.speed_max))
+        control[1] = float(np.clip(control[1], -cfg.delta_max, cfg.delta_max))
+        control[2] = float(np.clip(control[2], cfg.v_theta_min, cfg.v_theta_max))
+
+        # Advance the plant.
+        next_state = np.asarray(model.get_discrete_dynamics(state, control), dtype=float)
+        next_state[IDX_PSI] = _wrap_angle(float(next_state[IDX_PSI]))
+        history.executed_controls.append(control.copy())
+        state = next_state
+        history.executed_states.append(state.copy())
+
+        if show_progress and (
+            step_index == 0
+            or step_index + 1 == simulation_steps
+            or (step_index + 1) % 5 == 0
+        ):
+            print(
+                f"[ipddp_mpc] step {step_index + 1:3d}/{simulation_steps} "
+                f"solve={history.solve_times_ms[-1]:6.1f} ms iter={sol.iterations_completed:3d} "
+                f"contour={history.contour_errors[-1]:+.3f} progress={float(state[IDX_THETA]):.2f} m",
+                flush=True,
+            )
+
+        if stop_at_progress is not None and float(state[IDX_THETA]) >= stop_at_progress:
+            if show_progress:
+                print(
+                    f"[ipddp_mpc] lap finished at step {step_index + 1}, "
+                    f"progress={float(state[IDX_THETA]):.2f} m",
+                    flush=True,
+                )
+            break
+
+    # Final tracking pair so the time series matches len(executed_states).
+    final_ref = track.interpolate(float(state[3]))
+    final_d = state[:2] - np.array([final_ref.x, final_ref.y], dtype=float)
+    history.contour_errors.append(float(final_ref.normal @ final_d))
+    history.lag_errors.append(float(final_ref.tangent @ final_d))
+
+    return history
+
+
+def history_to_demo_result(
+    history: IpddpRcMpccHistory,
+    track: TrackData,
+    cfg: IpddpRcMpccConfig,
+) -> "object":
+    """Wrap the MPC history as a ``DemoResult`` so ``save_animation`` works."""
+    if not history.executed_controls:
+        raise ValueError("history must contain at least one executed control")
+    if __package__:
+        from .python_portfolio_lib import DemoResult  # local import
+    else:
+        from python_portfolio_lib import DemoResult  # local import
+
+    full_states = np.vstack(history.executed_states)
+    # The animator destructures (x, y, heading, progress) from each state row,
+    # so project the 7-state augmented vector down to its first four
+    # components before handing it over.
+    states = full_states[:, :4]
+    controls = np.vstack(history.executed_controls)
+    time_points_arr = np.arange(len(history.executed_states), dtype=float) * cfg.dt
+    final_ref = track.interpolate(float(full_states[-1, IDX_THETA]))
+    target_state = np.array(
+        [final_ref.x, final_ref.y, final_ref.heading, float(full_states[-1, IDX_THETA])],
+        dtype=float,
+    )
+
+    # Synthesize a minimal CDDPSolution-shaped object — _animate_mpcc only
+    # touches result.metadata + result.states/controls/time_points/target_state,
+    # not the solution itself, so a sentinel with the right attributes works.
+    stub = type(
+        "_StubSolution",
+        (),
+        {
+            "solver_name": "IPDDP MPC",
+            "final_objective": 0.0,
+            "final_primal_infeasibility": 0.0,
+            "iterations_completed": 0,
+            "solve_time_ms": float(np.sum(history.solve_times_ms)),
+            "state_trajectory": list(history.executed_states),
+            "control_trajectory": list(history.executed_controls),
+            "time_points": time_points_arr,
+        },
+    )()
+
+    return DemoResult(
+        slug="mpcc_racing_line",
+        title="IPDDP MPCC RC",
+        solver_name="IPDDP MPC",
+        solution=stub,
+        states=states,
+        controls=controls,
+        time_points=time_points_arr,
+        target_state=target_state,
+        metadata={
+            "timestep": cfg.dt,
+            "track": track,
+            "track_width": track.width,
+            "contour_errors": np.asarray(history.contour_errors, dtype=float),
+            "lag_errors": np.asarray(history.lag_errors, dtype=float),
+            "solve_times_ms": np.asarray(history.solve_times_ms, dtype=float),
+            "predicted_paths": history.predicted_paths,
+            "reference_windows": history.reference_windows,
+            "subtitle": "Closed-loop IPDDP receding-horizon MPC on the bundled racing track",
+            "final_error_override": float(
+                np.hypot(history.contour_errors[-1], history.lag_errors[-1])
+            ),
+        },
+    )
+
+
+# ---------------------------------------------------------------------------
+# CLI entry
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    import argparse
+
+    parser = argparse.ArgumentParser(description="IPDDP MPCC RC demo.")
+    parser.add_argument("--mpc", action="store_true", help="Run closed-loop MPC and save a GIF.")
+    parser.add_argument("--steps", type=int, default=60, help="Closed-loop simulation steps (MPC mode). Ignored if --lap.")
+    parser.add_argument("--lap", action="store_true",
+                        help="Run until a full lap is completed (theta >= track.length).")
+    parser.add_argument("--max-steps", type=int, default=600,
+                        help="Safety cap on simulation steps when running a full lap.")
+    parser.add_argument("--horizon", type=int, default=12, help="MPC horizon length.")
+    parser.add_argument("--cap", type=int, default=60, help="Per-solve IPDDP iteration cap (MPC mode).")
+    parser.add_argument("--gif", type=str, default="examples/out_ipddp_mpcc_rc/ipddp_mpcc_rc.gif",
+                        help="Output GIF path.")
+    parser.add_argument("--fps", type=int, default=12)
+    parser.add_argument("--dpi", type=int, default=110)
+    args = parser.parse_args()
+
+    if args.steps < 1:
+        parser.error("--steps must be >= 1")
+    if args.max_steps < 1:
+        parser.error("--max-steps must be >= 1")
+    if args.horizon < 1:
+        parser.error("--horizon must be >= 1")
+    if args.cap < 1:
+        parser.error("--cap must be >= 1")
+    if args.fps < 1:
+        parser.error("--fps must be >= 1")
+    if args.dpi < 1:
+        parser.error("--dpi must be >= 1")
+
+    if args.mpc:
+        cfg = IpddpRcMpccConfig(
+            horizon=args.horizon,
+            max_iterations=args.cap,
+            dt=0.05,
+        )
+        track = _load_track_csv(_portfolio_track_path())
+        if args.lap:
+            steps = args.max_steps
+            stop_at = float(track.length)
+        else:
+            steps = args.steps
+            stop_at = None
+        history = run_ipddp_mpc(
+            cfg,
+            track,
+            simulation_steps=steps,
+            stop_at_progress=stop_at,
+            show_progress=True,
+        )
+        print(
+            f"[ipddp_mpc] total solve = {sum(history.solve_times_ms):.0f} ms, "
+            f"failures = {history.failures}"
+        )
+
+        if __package__:
+            from .python_portfolio_lib import save_animation  # local import
+        else:
+            from python_portfolio_lib import save_animation  # local import
+        result = history_to_demo_result(history, track, cfg)
+        out = save_animation(result, args.gif, fps=args.fps, dpi=args.dpi, frame_step=1)
+        print(f"[ipddp_mpc] saved gif -> {out}")
+        return
+
+    cfg = IpddpRcMpccConfig()
+    result = solve_ipddp_mpcc_rc(cfg)
+    sol = result.solution
+    print(f"[ipddp_mpcc_rc] solver        = {sol.solver_name}")
+    print(f"[ipddp_mpcc_rc] iterations    = {sol.iterations_completed}")
+    print(f"[ipddp_mpcc_rc] solve_time_ms = {sol.solve_time_ms:.1f}")
+    print(f"[ipddp_mpcc_rc] objective     = {sol.final_objective:+.4f}")
+    print(
+        f"[ipddp_mpcc_rc] primal_inf    = {sol.final_primal_infeasibility:+.3e}"
+    )
+    print(f"[ipddp_mpcc_rc] lap_progress  = {result.lap_progress:.3f} m")
+    contour = result.contour_errors
+    print(
+        f"[ipddp_mpcc_rc] |contour|: max={np.max(np.abs(contour)):.3f}, "
+        f"rms={float(np.sqrt(np.mean(contour ** 2))):.3f}"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/python_portfolio_lib.py b/examples/python_portfolio_lib.py
index 2b5cbc1..781dbea 100644
--- a/examples/python_portfolio_lib.py
+++ b/examples/python_portfolio_lib.py
@@ -243,166 +243,6 @@ def _load_track_csv(
     )
 
 
-class MpccBicycle(pycddp.DynamicalSystem):
-    def __init__(self, timestep: float, wheelbase: float = 0.33) -> None:
-        super().__init__(4, 3, timestep, "euler")
-        self.wheelbase = float(wheelbase)
-
-    def get_continuous_dynamics(
-        self,
-        state: np.ndarray,
-        control: np.ndarray,
-        time: float = 0.0,
-    ) -> np.ndarray:
-        speed, steering_angle, progress_rate = np.asarray(control, dtype=float)
-        heading = float(state[2])
-        beta = float(np.arctan(0.5 * np.tan(steering_angle)))
-        return np.array(
-            [
-                speed * np.cos(heading + beta),
-                speed * np.sin(heading + beta),
-                2.0 * speed * np.sin(beta) / self.wheelbase,
-                progress_rate,
-            ],
-            dtype=float,
-        )
-
-    def get_state_jacobian(
-        self,
-        state: np.ndarray,
-        control: np.ndarray,
-        time: float = 0.0,
-    ) -> np.ndarray:
-        speed = float(control[0])
-        heading = float(state[2])
-        steering_angle = float(control[1])
-        beta = float(np.arctan(0.5 * np.tan(steering_angle)))
-        return np.array(
-            [
-                [0.0, 0.0, -speed * np.sin(heading + beta), 0.0],
-                [0.0, 0.0, speed * np.cos(heading + beta), 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
-            dtype=float,
-        )
-
-    def get_control_jacobian(
-        self,
-        state: np.ndarray,
-        control: np.ndarray,
-        time: float = 0.0,
-    ) -> np.ndarray:
-        heading = float(state[2])
-        speed = float(control[0])
-        steering_angle = float(control[1])
-        tan_delta = np.tan(steering_angle)
-        sec_sq = 1.0 / (np.cos(steering_angle) ** 2)
-        beta = float(np.arctan(0.5 * tan_delta))
-        beta_prime = 0.5 * sec_sq / (1.0 + 0.25 * tan_delta * tan_delta)
-        return np.array(
-            [
-                [np.cos(heading + beta), -speed * np.sin(heading + beta) * beta_prime, 0.0],
-                [np.sin(heading + beta), speed * np.cos(heading + beta) * beta_prime, 0.0],
-                [
-                    2.0 * np.sin(beta) / self.wheelbase,
-                    2.0 * speed * np.cos(beta) * beta_prime / self.wheelbase,
-                    0.0,
-                ],
-                [0.0, 0.0, 1.0],
-            ],
-            dtype=float,
-        )
-
-    def get_state_hessian(
-        self,
-        state: np.ndarray,
-        control: np.ndarray,
-        time: float = 0.0,
-    ) -> list[np.ndarray]:
-        return [np.zeros((4, 4), dtype=float) for _ in range(4)]
-
-    def get_control_hessian(
-        self,
-        state: np.ndarray,
-        control: np.ndarray,
-        time: float = 0.0,
-    ) -> list[np.ndarray]:
-        return [np.zeros((3, 3), dtype=float) for _ in range(4)]
-
-    def get_cross_hessian(
-        self,
-        state: np.ndarray,
-        control: np.ndarray,
-        time: float = 0.0,
-    ) -> list[np.ndarray]:
-        return [np.zeros((3, 4), dtype=float) for _ in range(4)]
-
-
-class ContouringObjective(pycddp.NonlinearObjective):
-    def __init__(self, timestep: float, track: TrackData) -> None:
-        super().__init__(timestep)
-        self.dt = float(timestep)
-        self.track = track
-        self.w_contour = 220.0
-        self.w_lag = 28.0
-        self.w_heading = 8.0
-        self.w_progress = 4.5
-        self.w_sync = 10.0
-        self.w_steer = 0.25
-        self.w_boundary = 320.0
-        self.w_terminal = 6.0
-
-    def tracking_terms(
-        self,
-        state: np.ndarray,
-    ) -> tuple[float, float, float, TrackReference]:
-        ref = self.track.interpolate(float(state[3]))
-        delta_position = np.asarray(state[:2], dtype=float) - np.array([ref.x, ref.y])
-        contour_error = float(ref.normal @ delta_position)
-        lag_error = float(ref.tangent @ delta_position)
-        heading_error = _wrap_angle(float(state[2]) - ref.heading)
-        return contour_error, lag_error, heading_error, ref
-
-    def running_cost(
-        self,
-        state: np.ndarray,
-        control: np.ndarray,
-        index: int,
-    ) -> float:
-        contour_error, lag_error, heading_error, ref = self.tracking_terms(state)
-        speed, steering_angle, progress_rate = np.asarray(control, dtype=float)
-        boundary_error = max(0.0, abs(contour_error) - 0.72 * self.track.width)
-        progress_error = progress_rate - ref.v_ref
-        sync_error = speed - progress_rate
-        return float(
-            self.dt
-            * (
-                self.w_contour * contour_error**2
-                + self.w_lag * lag_error**2
-                + self.w_heading * heading_error**2
-                + self.w_progress * progress_error**2
-                + self.w_sync * sync_error**2
-                + self.w_steer * steering_angle**2
-                + self.w_boundary * boundary_error**2
-                - 1.6 * progress_rate
-            )
-        )
-
-    def terminal_cost(self, final_state: np.ndarray) -> float:
-        contour_error, lag_error, heading_error, _ = self.tracking_terms(final_state)
-        boundary_error = max(0.0, abs(contour_error) - 0.75 * self.track.width)
-        return float(
-            self.w_terminal
-            * (
-                220.0 * contour_error**2
-                + 42.0 * lag_error**2
-                + 18.0 * heading_error**2
-                + 180.0 * boundary_error**2
-            )
-        )
-
-
 def _default_options(max_iterations: int = 120) -> pycddp.CDDPOptions:
     opts = pycddp.CDDPOptions()
     opts.max_iterations = max_iterations
@@ -633,169 +473,61 @@ def solve_unicycle_demo() -> DemoResult:
     )
 
 
-def _reference_seed_controls(track: TrackData, start_progress: float, horizon: int, dt: float, wheelbase: float) -> list[np.ndarray]:
-    controls: list[np.ndarray] = []
-    progress = float(start_progress)
-    for _ in range(horizon):
-        ref = track.interpolate(progress)
-        steering = float(np.clip(np.arctan(wheelbase * ref.curvature), -0.42, 0.42))
-        speed = float(np.clip(ref.v_ref, 1.0, 2.2))
-        progress_rate = float(np.clip(speed + 0.12, 1.0, 2.4))
-        controls.append(np.array([speed, steering, progress_rate], dtype=float))
-        progress = progress + dt * progress_rate
-    return controls
-
-
-def _rollout_mpcc_seed(
-    model: MpccBicycle,
-    track: TrackData,
-    initial_state: np.ndarray,
-    controls: list[np.ndarray],
-) -> tuple[list[np.ndarray], list[np.ndarray]]:
-    state = np.asarray(initial_state, dtype=float).copy()
-    states = [state.copy()]
-    rolled_controls: list[np.ndarray] = []
-    for control in controls:
-        control_array = np.asarray(control, dtype=float).copy()
-        rolled_controls.append(control_array)
-        state = np.asarray(model.get_discrete_dynamics(state, control_array), dtype=float)
-        states.append(state.copy())
-    return states, rolled_controls
-
+def solve_mpcc_demo(
+    simulation_steps: int | None = None,
+    horizon: int = 20,
+) -> DemoResult:
+    """Closed-loop IPDDP MPCC on the bundled RC racing track.
+
+    This is a thin wrapper around the IPDDP runner in ``ipddp_mpcc_rc.py``.
+    The kinematic bicycle + 11-residual AIRCoM-style cost lives in that
+    module; this function just drives it to produce a ``DemoResult`` that
+    the existing portfolio animator (``_animate_mpcc``) consumes.
+
+    Parameters
+    ----------
+    simulation_steps:
+        Number of closed-loop MPC ticks. If ``None``, runs until the
+        executed ``theta`` crosses ``track.length`` (one full lap).
+    horizon:
+        MPC prediction horizon in stages.
+    """
+    # Lazy import to avoid a circular module load: ``ipddp_mpcc_rc`` imports
+    # ``TrackData`` / ``_load_track_csv`` / ``_wrap_angle`` from this module,
+    # so it must not be a top-level import here.
+    if __package__:
+        from .ipddp_mpcc_rc import (
+            IpddpRcMpccConfig,
+            history_to_demo_result,
+            run_ipddp_mpc,
+        )
+    else:
+        from ipddp_mpcc_rc import (
+            IpddpRcMpccConfig,
+            history_to_demo_result,
+            run_ipddp_mpc,
+        )
 
-def solve_mpcc_demo(simulation_steps: int | None = None, horizon: int = 16) -> DemoResult:
-    dt = 0.10
-    wheelbase = 0.20
     track = _load_track_csv(_portfolio_track_path())
-    if simulation_steps is None:
-        nominal_progress = max(float(np.mean(track.v_ref)) + 0.15, 1.55)
-        simulation_steps = int(np.ceil(1.08 * track.length / (dt * nominal_progress)))
-    model = MpccBicycle(dt, wheelbase=wheelbase)
-    objective = ContouringObjective(dt, track)
-
-    opts = _default_options(max_iterations=12)
-    opts.use_ilqr = True
-    opts.enable_parallel = False
-    opts.return_iteration_info = False
-    opts.tolerance = 2e-3
-    opts.acceptable_tolerance = 6e-3
-    opts.regularization.initial_value = 1e-5
-    opts.line_search.max_iterations = 9
+    cfg = IpddpRcMpccConfig(horizon=int(horizon), max_iterations=100)
 
-    start_ref = track.interpolate(0.0)
-    initial_state = np.array(
-        [
-            start_ref.x + 0.28 * track.width * start_ref.normal[0],
-            start_ref.y + 0.28 * track.width * start_ref.normal[1],
-            start_ref.heading - 0.08,
-            0.0,
-        ],
-        dtype=float,
-    )
-
-    solver = pycddp.CDDP(
-        initial_state,
-        track.reference_state(0.0),
-        horizon,
-        dt,
-        opts,
-    )
-    solver.set_dynamical_system(model)
-    solver.set_objective(objective)
-    solver.add_constraint(
-        "control_limits",
-        pycddp.ControlConstraint(
-            np.array([0.8, -0.55, 0.8], dtype=float),
-            np.array([2.4, 0.55, 2.6], dtype=float),
-        ),
-    )
-
-    seed_controls = _reference_seed_controls(track, initial_state[3], horizon, dt, wheelbase)
-    seed_states, seed_controls = _rollout_mpcc_seed(model, track, initial_state, seed_controls)
-
-    state = initial_state.copy()
-    executed_states = [state.copy()]
-    contour_errors: list[float] = []
-    lag_errors: list[float] = []
-    heading_errors: list[float] = []
-    solve_times_ms: list[float] = []
-    executed_controls: list[np.ndarray] = []
-    predicted_paths: list[np.ndarray] = []
-    reference_windows: list[np.ndarray] = []
-    last_solution: pycddp.CDDPSolution | None = None
-
-    for _ in range(simulation_steps):
-        contour_error, lag_error, heading_error, _ = objective.tracking_terms(state)
-        contour_errors.append(contour_error)
-        lag_errors.append(lag_error)
-        heading_errors.append(heading_error)
-
-        solver.set_initial_state(state)
-        solver.set_reference_state(track.reference_state(state[3]))
-        solver.set_initial_trajectory(seed_states, seed_controls)
-        solution = solver.solve(pycddp.SolverType.CLDDP)
-        _ensure_solution_has_trajectory(solution, "MPCC portfolio solve")
-        last_solution = solution
-
-        predicted_states, predicted_controls, _ = _solution_arrays(solution)
-        if predicted_controls.size == 0:
-            raise RuntimeError("MPCC demo produced an empty control trajectory.")
-
-        predicted_paths.append(predicted_states[:, :2].copy())
-        reference_windows.append(
-            np.vstack([track.reference_state(progress)[0:2] for progress in predicted_states[:, 3]])
-        )
-        solve_times_ms.append(float(solution.solve_time_ms))
-
-        control = predicted_controls[0].copy()
-        executed_controls.append(control)
-        state = np.asarray(model.get_discrete_dynamics(state, control), dtype=float)
-        state[2] = _wrap_angle(state[2])
-        executed_states.append(state.copy())
-
-        shifted_controls = [u.copy() for u in predicted_controls[1:]]
-        if shifted_controls:
-            shifted_controls.append(shifted_controls[-1].copy())
-        else:
-            shifted_controls = _reference_seed_controls(track, state[3], horizon, dt, wheelbase)
-        if len(shifted_controls) < horizon:
-            fallback = _reference_seed_controls(track, state[3], horizon - len(shifted_controls), dt, wheelbase)
-            shifted_controls.extend(fallback)
-        seed_states, seed_controls = _rollout_mpcc_seed(model, track, state, shifted_controls[:horizon])
-
-    final_contour, final_lag, final_heading, final_ref = objective.tracking_terms(executed_states[-1])
-    contour_errors.append(final_contour)
-    lag_errors.append(final_lag)
-    heading_errors.append(final_heading)
-
-    if last_solution is None:
-        raise RuntimeError("MPCC demo did not produce any MPC solves.")
-
-    closed_loop_states = np.vstack(executed_states)
-    closed_loop_controls = np.vstack(executed_controls)
-    time_points = np.arange(len(executed_states), dtype=float) * dt
-
-    return _make_result(
-        slug="mpcc_racing_line",
-        title="MPCC Racing Line",
-        target_state=np.array([final_ref.x, final_ref.y, final_ref.heading, final_ref.s], dtype=float),
-        solution=last_solution,
-        states=closed_loop_states,
-        controls=closed_loop_controls,
-        time_points=time_points,
-        solver_name=f"{last_solution.solver_name} MPC",
-        timestep=dt,
-        track=track,
-        track_width=track.width,
-        contour_errors=np.asarray(contour_errors, dtype=float),
-        lag_errors=np.asarray(lag_errors, dtype=float),
-        heading_errors=np.asarray(heading_errors, dtype=float),
-        solve_times_ms=np.asarray(solve_times_ms, dtype=float),
-        predicted_paths=predicted_paths,
-        reference_windows=reference_windows,
-        final_error_override=float(np.hypot(final_contour, final_lag)),
-        subtitle="Full-lap receding-horizon contouring control on a closed circuit",
+    if simulation_steps is None:
+        stop_at = float(track.length)
+        max_steps = 500
+    else:
+        if int(simulation_steps) < 1:
+            raise ValueError("simulation_steps must be >= 1")
+        stop_at = None
+        max_steps = int(simulation_steps)
+
+    history = run_ipddp_mpc(
+        cfg,
+        track,
+        simulation_steps=max_steps,
+        stop_at_progress=stop_at,
+        show_progress=False,
     )
+    return history_to_demo_result(history, track, cfg)
 
 
 DEMO_BUILDERS: dict[str, Callable[[], DemoResult]] = {
@@ -1267,12 +999,19 @@ def _animate_mpcc(
     closed_centerline = np.vstack([np.column_stack((track.x, track.y)), [track.x[0], track.y[0]]])
     closed_inner = np.vstack([track.inner_boundary, track.inner_boundary[0]])
     closed_outer = np.vstack([track.outer_boundary, track.outer_boundary[0]])
-    ax_map.plot(closed_outer[:, 0], closed_outer[:, 1], color=_GRID, linewidth=1.3)
-    ax_map.plot(closed_inner[:, 0], closed_inner[:, 1], color=_GRID, linewidth=1.3)
-    ax_map.plot(closed_centerline[:, 0], closed_centerline[:, 1], color=_MUTED, linewidth=1.1, linestyle="--")
+    ax_map.plot(closed_outer[:, 0], closed_outer[:, 1], color=_GRID, linewidth=1.3, zorder=1)
+    ax_map.plot(closed_inner[:, 0], closed_inner[:, 1], color=_GRID, linewidth=1.3, zorder=1)
+    ax_map.plot(
+        closed_centerline[:, 0], closed_centerline[:, 1],
+        color=_MUTED, linewidth=1.1, linestyle="--", zorder=1,
+    )
 
     start_ref = track.interpolate(0.0)
-    ax_map.scatter([start_ref.x], [start_ref.y], color=_SUCCESS, s=54, zorder=5)
+    ax_map.scatter(
+        [start_ref.x], [start_ref.y],
+        color=_SUCCESS, s=64, zorder=5,
+        edgecolors="white", linewidths=1.2,
+    )
     ax_map.annotate(
         "start",
         (start_ref.x, start_ref.y),
@@ -1280,22 +1019,47 @@ def _animate_mpcc(
         textcoords="offset points",
         color=_SUCCESS,
         fontsize=9,
+        fontweight="bold",
     )
 
-    reference_preview, = ax_map.plot([], [], color=_SECONDARY, linewidth=2.0, alpha=0.9)
-    prediction_preview, = ax_map.plot([], [], color=_TERTIARY, linewidth=2.0, alpha=0.9)
-    driven_path, = ax_map.plot([], [], color=_ACCENT, linewidth=2.8)
-    vehicle_marker, = ax_map.plot([], [], marker="o", color=_ACCENT, markersize=8)
+    reference_preview, = ax_map.plot(
+        [], [], color=_SECONDARY, linewidth=2.4, alpha=0.95,
+        zorder=3, label="reference",
+    )
+    prediction_preview, = ax_map.plot(
+        [], [], color=_TERTIARY, linewidth=2.4, alpha=0.95,
+        zorder=3, label="prediction",
+    )
+    driven_path, = ax_map.plot(
+        [], [], color=_ACCENT, linewidth=3.0,
+        solid_capstyle="round", zorder=4, label="driven",
+    )
+    vehicle_marker, = ax_map.plot(
+        [], [], marker="o", color=_ACCENT, markersize=10,
+        markeredgecolor="white", markeredgewidth=1.4, zorder=6,
+    )
     heading_arrow = FancyArrowPatch(
         (0.0, 0.0),
         (0.0, 0.0),
         arrowstyle="-|>",
-        mutation_scale=14,
+        mutation_scale=16,
         color=_ACCENT,
-        linewidth=2.2,
+        linewidth=2.4,
+        zorder=6,
     )
     ax_map.add_patch(heading_arrow)
 
+    map_legend = ax_map.legend(
+        loc="upper right",
+        frameon=True,
+        facecolor="#fff7ea",
+        edgecolor=_GRID,
+        fontsize=8,
+        framealpha=0.92,
+    )
+    for text in map_legend.get_texts():
+        text.set_color(_TEXT)
+
     metric_times = time_points
     metric_end = metric_times[-1] if len(metric_times) > 1 else 1.0
     ax_error.set_xlim(metric_times[0], metric_end)
@@ -1319,8 +1083,11 @@ def _animate_mpcc(
         text.set_color(_TEXT)
 
     control_times = time_points[:-1]
+    control_start = float(time_points[0]) if len(time_points) > 0 else 0.0
     control_end = control_times[-1] if len(control_times) > 0 else 1.0
-    ax_control.set_xlim(time_points[0], control_end)
+    if np.isclose(control_start, control_end):
+        control_end = control_start + 1.0
+    ax_control.set_xlim(control_start, control_end)
     combined_controls = np.concatenate([controls[:, 0], controls[:, 1], solve_times_ms / 40.0])
     control_min, control_max = _metric_bounds(combined_controls, minimum=0.8)
     ax_control.set_ylim(control_min, control_max)
@@ -1352,14 +1119,21 @@ def _animate_mpcc(
 
     status_text = ax_map.text(
         0.03,
-        0.96,
+        0.985,
         "",
         transform=ax_map.transAxes,
         ha="left",
         va="top",
         fontsize=10,
         color=_TEXT,
-        bbox={"boxstyle": "round,pad=0.35", "facecolor": "#fff7ea", "edgecolor": _GRID},
+        zorder=6,
+        bbox={
+            "boxstyle": "round,pad=0.45",
+            "facecolor": "#fff7ea",
+            "edgecolor": _GRID,
+            "linewidth": 1.0,
+            "alpha": 0.92,
+        },
     )
 
     def update(frame_index: int) -> tuple[Any, ...]:
diff --git a/include/cddp-cpp/cddp.hpp b/include/cddp-cpp/cddp.hpp
index e1b2cf5..ac96b92 100644
--- a/include/cddp-cpp/cddp.hpp
+++ b/include/cddp-cpp/cddp.hpp
@@ -25,6 +25,7 @@
 #include "cddp_core/dynamical_system.hpp"
 #include "cddp_core/objective.hpp"
 #include "cddp_core/constraint.hpp"
+#include "cddp_core/terminal_constraint.hpp"
 #include "cddp_core/barrier.hpp"
 #include "cddp_core/options.hpp"
 #include "cddp_core/cddp_core.hpp"
diff --git a/include/cddp-cpp/cddp_core/cddp_core.hpp b/include/cddp-cpp/cddp_core/cddp_core.hpp
index 4ae0210..e162eeb 100644
--- a/include/cddp-cpp/cddp_core/cddp_core.hpp
+++ b/include/cddp-cpp/cddp_core/cddp_core.hpp
@@ -119,6 +119,9 @@ struct ForwardPassResult {
   bool success = false;
 
   double constraint_violation = 0.0;
+  double theta = 0.0;      // Filter violation metric
+  double inf_pr = 0.0;     // Primal infeasibility
+  double inf_comp = 0.0;   // Complementary infeasibility
 
   // Optional: Only relevant for certain solver strategies during their forward
   // pass
diff --git a/include/cddp-cpp/cddp_core/ipddp_solver.hpp b/include/cddp-cpp/cddp_core/ipddp_solver.hpp
index de3341a..497be19 100644
--- a/include/cddp-cpp/cddp_core/ipddp_solver.hpp
+++ b/include/cddp-cpp/cddp_core/ipddp_solver.hpp
@@ -25,12 +25,16 @@
 
 namespace cddp
 {
+    class IPDDPSolverTestAccess;
 
     /**
      * @brief Interior Point Differential Dynamic Programming (IPDDP) solver.
      *
      * Inherits from CDDPSolverBase and overrides virtual hooks
      * for primal-dual interior point method with logarithmic barrier.
+     * Supports path constraints, terminal equality constraints, and
+     * terminal inequality constraints via a single-shooting formulation
+     * with explicit costate variables.
      */
     class IPDDPSolver : public CDDPSolverBase
     {
@@ -44,11 +48,15 @@ namespace cddp
         // === CDDPSolverBase virtual hook overrides ===
         void preIterationSetup(CDDP &context) override;
         bool backwardPass(CDDP &context) override;
+        bool checkEarlyConvergence(CDDP &context, int iter,
+                                   std::string &reason) override;
         ForwardPassResult forwardPass(CDDP &context, double alpha) override;
         void applyForwardPassResult(CDDP &context, const ForwardPassResult &result) override;
         bool checkConvergence(CDDP &context, double dJ, double dL, int iter,
                               std::string &reason) override;
         void postIterationUpdate(CDDP &context, bool forward_pass_success) override;
+        bool handleForwardPassFailure(CDDP &context,
+                                      std::string &termination_reason) override;
         void recordIterationHistory(const CDDP &context) override;
         void populateSolverSpecificSolution(CDDPSolution &solution,
                                             const CDDP &context) override;
@@ -56,6 +64,8 @@ namespace cddp
         void printSolutionSummary(const CDDPSolution &solution) const override;
 
     private:
+        friend class IPDDPSolverTestAccess;
+
         // Constraint derivatives
         std::map<std::string, std::vector<Eigen::MatrixXd>> G_x_; ///< State gradients
         std::map<std::string, std::vector<Eigen::MatrixXd>> G_u_; ///< Control gradients
@@ -67,6 +77,7 @@ namespace cddp
             G_ux_; ///< Constraint mixed hessians
 
         // Interior point variables
+        std::map<std::string, std::vector<Eigen::VectorXd>> G_raw_; ///< Raw constraint values
         std::map<std::string, std::vector<Eigen::VectorXd>> G_; ///< Constraint values
         std::map<std::string, std::vector<Eigen::VectorXd>> Y_; ///< Dual variables
         std::map<std::string, std::vector<Eigen::VectorXd>> S_; ///< Slack variables
@@ -77,42 +88,41 @@ namespace cddp
         std::map<std::string, std::vector<Eigen::VectorXd>> k_s_; ///< Slack feedforward
         std::map<std::string, std::vector<Eigen::MatrixXd>> K_s_; ///< Slack feedback
 
+        // Single-shooting costate variables and gains
+        std::vector<Eigen::VectorXd>
+            Lambda_; ///< Costate variables (Lagrange multipliers for dynamics)
+        std::vector<Eigen::VectorXd>
+            k_lambda_; ///< Feedforward gains for costate variables
+        std::vector<Eigen::MatrixXd>
+            K_lambda_; ///< Feedback gains for costate variables
+
+        // Terminal equality constraint variables
+        Eigen::VectorXd Lambda_T_eq_;  ///< Terminal equality multipliers
+        Eigen::VectorXd dLambda_T_eq_; ///< Terminal equality multiplier direction
+
+        // Terminal inequality constraint variables
+        std::map<std::string, Eigen::VectorXd> G_T_; ///< Terminal inequality values
+        std::map<std::string, Eigen::VectorXd> Y_T_; ///< Terminal inequality duals
+        std::map<std::string, Eigen::VectorXd> S_T_; ///< Terminal inequality slacks
+        std::map<std::string, Eigen::VectorXd> dY_T_; ///< Terminal inequality dual directions
+        std::map<std::string, Eigen::VectorXd> dS_T_; ///< Terminal inequality slack directions
+
+        // Search directions
+        std::vector<Eigen::VectorXd> dX_; ///< State search directions
+        std::vector<Eigen::VectorXd> dU_; ///< Control search directions
+        std::map<std::string, std::vector<Eigen::VectorXd>>
+            dY_; ///< Dual search directions
+        std::map<std::string, std::vector<Eigen::VectorXd>>
+            dS_; ///< Slack search directions
+
         // Barrier method parameters
         double mu_;                       ///< Barrier parameter
         std::vector<FilterPoint> filter_; ///< Filter for line search
-
-        // Pre-allocated workspace for performance optimization
-        struct Workspace {
-            // Backward pass workspace
-            std::vector<Eigen::MatrixXd> A_matrices;
-            std::vector<Eigen::MatrixXd> B_matrices;
-            std::vector<Eigen::MatrixXd> Q_xx_matrices;
-            std::vector<Eigen::MatrixXd> Q_ux_matrices;
-            std::vector<Eigen::MatrixXd> Q_uu_matrices;
-            std::vector<Eigen::VectorXd> Q_x_vectors;
-            std::vector<Eigen::VectorXd> Q_u_vectors;
-
-            // LDLT solver cache
-            std::vector<Eigen::LDLT<Eigen::MatrixXd>> ldlt_solvers;
-            std::vector<bool> ldlt_valid;
-
-            // Constraint workspace
-            Eigen::VectorXd y_combined;
-            Eigen::VectorXd s_combined;
-            Eigen::VectorXd g_combined;
-            Eigen::MatrixXd Q_yu_combined;
-            Eigen::MatrixXd Q_yx_combined;
-            Eigen::MatrixXd YSinv;
-            Eigen::MatrixXd bigRHS;
-
-            // Forward pass workspace
-            std::vector<Eigen::VectorXd> delta_x_vectors;
-
-            bool initialized = false;
-        } workspace_;
+        double phi_ = 0.0;               ///< Current filter merit value
+        double theta_ = 0.0;             ///< Current filter violation metric
+        double filter_theta_ = 0.0;      ///< Current unfloored filter violation metric
 
         // === Private helper methods ===
-        void precomputeDynamicsDerivatives(CDDP &context);
         void precomputeConstraintGradients(CDDP &context);
         void evaluateTrajectory(CDDP &context);
         void evaluateTrajectoryWarmStart(CDDP &context);
@@ -126,6 +136,40 @@ namespace cddp
         double computeMaxConstraintViolation(const CDDP &context) const;
         double computeScaledDualInfeasibility(const CDDP &context) const;
 
+        // Filter and merit function methods
+        bool acceptFilterEntry(double merit_function, double constraint_violation);
+        bool isFilterAcceptable(double merit_function,
+                                double constraint_violation) const;
+
+        double computeTheta(
+            const CDDPOptions &options,
+            const std::map<std::string, std::vector<Eigen::VectorXd>> &constraints,
+            const std::map<std::string, std::vector<Eigen::VectorXd>> &slacks,
+            const std::map<std::string, Eigen::VectorXd> *terminal_constraints = nullptr,
+            const std::map<std::string, Eigen::VectorXd> *terminal_slacks = nullptr,
+            const Eigen::VectorXd *terminal_equality_residual = nullptr) const;
+
+        double computeBarrierMerit(
+            const CDDP &context,
+            const std::map<std::string, std::vector<Eigen::VectorXd>> &slacks,
+            double cost,
+            const std::map<std::string, Eigen::VectorXd> *terminal_slacks = nullptr,
+            const Eigen::VectorXd *terminal_equality_multipliers = nullptr,
+            const Eigen::VectorXd *terminal_equality_residual = nullptr) const;
+
+        std::pair<double, double> computePrimalAndComplementarity(
+            const CDDP &context,
+            const std::map<std::string, std::vector<Eigen::VectorXd>> &constraints,
+            const std::map<std::string, std::vector<Eigen::VectorXd>> &slacks,
+            const std::map<std::string, std::vector<Eigen::VectorXd>> &duals,
+            double mu,
+            const std::map<std::string, Eigen::VectorXd> *terminal_constraints = nullptr,
+            const std::map<std::string, Eigen::VectorXd> *terminal_slacks = nullptr,
+            const std::map<std::string, Eigen::VectorXd> *terminal_duals = nullptr,
+            const Eigen::VectorXd *terminal_equality_residual = nullptr) const;
+
+        std::pair<double, double> computeMaxStepSizes(const CDDP &context) const;
+
         // Legacy print helper (used by printIteration override)
         void printIterationLegacy(int iter, double objective, double inf_pr,
                                   double inf_du, double inf_comp, double mu,
diff --git a/include/cddp-cpp/cddp_core/options.hpp b/include/cddp-cpp/cddp_core/options.hpp
index 850fd6f..a12bfbb 100644
--- a/include/cddp-cpp/cddp_core/options.hpp
+++ b/include/cddp-cpp/cddp_core/options.hpp
@@ -143,9 +143,46 @@ namespace cddp
     };
 
     /**
-     * @brief Options for the IPDDP solver.
+     * @brief Comprehensive options for the IPDDP (Interior-Point DDP) solver.
      */
-    struct IPDDPAlgorithmOptions : InteriorPointOptions {};
+    struct IPDDPAlgorithmOptions
+    {
+        double dual_var_init_scale = 1e-1;  ///< Initial scale for dual variables.
+        double slack_var_init_scale = 1e-2; ///< Initial scale for slack variables.
+
+        double barrier_tol_mult =
+            0.1; ///< Barrier-scaled inner tolerance multiplier.
+        double barrier_update_dual_weight =
+            0.01; ///< Down-weight inf_du for IPOPT-style barrier updates.
+        double mu_kappa_epsilon =
+            10.0; ///< IPOPT-style KKT trigger multiplier for non-adaptive updates.
+        bool check_state_stationarity =
+            false; ///< Include state-stationarity in inf_du when true.
+        std::string theta_norm =
+            "l1"; ///< Constraint violation norm used by the filter ("l1" or "l2").
+        int max_filter_size =
+            5; ///< Maximum number of active non-dominated filter entries.
+        double theta_0_floor =
+            1.0; ///< Minimum theta_0 value used for filter initialization.
+
+        bool warmstart_repair =
+            false; ///< Repair warm-started slack/dual variables into the strict interior.
+        double warmstart_s_min =
+            1e-4; ///< Minimum slack value for warm-start repair.
+        double warmstart_y_min =
+            1e-4; ///< Minimum dual value for warm-start repair.
+        double warmstart_interior_factor =
+            1.1; ///< Multiplicative safety margin when warm-start values are too close to the boundary.
+        double warmstart_reset_x0_threshold =
+            -1.0; ///< Reset warm-start arrays when |x0 - X[0]| exceeds this threshold (<=0 disables).
+
+        double jacobian_regularization_value =
+            1e-8; ///< Base reduced-system regularization floor for terminal equality LQR.
+        double jacobian_regularization_exponent =
+            0.25; ///< Exponent for mu-dependent reduced-system regularization.
+
+        SolverSpecificBarrierOptions barrier; ///< Barrier method parameters for IPDDP.
+    };
 
     /**
      * @brief Options for the MSIPDDP solver (interior-point + multi-shooting).
diff --git a/include/cddp-cpp/cddp_core/terminal_constraint.hpp b/include/cddp-cpp/cddp_core/terminal_constraint.hpp
index c27c814..ee83677 100644
--- a/include/cddp-cpp/cddp_core/terminal_constraint.hpp
+++ b/include/cddp-cpp/cddp_core/terminal_constraint.hpp
@@ -33,7 +33,8 @@ namespace cddp
 
         Eigen::MatrixXd getControlJacobian(
             const Eigen::VectorXd &/*state*/,
-            const Eigen::VectorXd &control // control.size() gives control_dim
+            const Eigen::VectorXd &control, // control.size() gives control_dim
+            int /*index*/ = 0
         ) const override 
         {
             return Eigen::MatrixXd::Zero(getDualDim(), control.size());
@@ -41,7 +42,8 @@ namespace cddp
 
         std::vector<Eigen::MatrixXd> getControlHessian(
             const Eigen::VectorXd &/*state*/,
-            const Eigen::VectorXd &control
+            const Eigen::VectorXd &control,
+            int /*index*/ = 0
         ) const override
         {
             return {};
@@ -49,7 +51,8 @@ namespace cddp
 
         std::vector<Eigen::MatrixXd> getCrossHessian(
             const Eigen::VectorXd &state,
-            const Eigen::VectorXd &control
+            const Eigen::VectorXd &control,
+            int /*index*/ = 0
         ) const override
         {
             return {};
@@ -70,7 +73,8 @@ namespace cddp
         }
 
         Eigen::VectorXd evaluate(const Eigen::VectorXd &final_state,
-                                 const Eigen::VectorXd &/*control_is_ignored*/) const override
+                                 const Eigen::VectorXd &/*control_is_ignored*/,
+                                 int /*index*/ = 0) const override
         {
             if (final_state.size() != target_state_.size()) {
                 throw std::invalid_argument("TerminalEqualityConstraint: final_state dimension mismatch.");
@@ -95,7 +99,8 @@ namespace cddp
         }
 
         Eigen::MatrixXd getStateJacobian(const Eigen::VectorXd &final_state,
-                                         const Eigen::VectorXd &/*control_is_ignored*/) const override
+                                         const Eigen::VectorXd &/*control_is_ignored*/,
+                                         int /*index*/ = 0) const override
         {
             if (final_state.size() != target_state_.size()) {
                 throw std::invalid_argument("TerminalEqualityConstraint: final_state dimension mismatch for Jacobian.");
@@ -109,7 +114,8 @@ namespace cddp
         }
         
         double computeViolation(const Eigen::VectorXd &final_state,
-                                const Eigen::VectorXd &control) const override
+                                const Eigen::VectorXd &control,
+                                int /*index*/ = 0) const override
         {
             Eigen::VectorXd g = evaluate(final_state, control);
             return computeViolationFromValue(g);
@@ -130,7 +136,8 @@ namespace cddp
         // Hessians for g(x_N) = x_N - target are zero
         std::vector<Eigen::MatrixXd> getStateHessian(
             const Eigen::VectorXd &state,
-            const Eigen::VectorXd &/*control_is_ignored*/
+            const Eigen::VectorXd &/*control_is_ignored*/,
+            int /*index*/ = 0
         ) const override
         {
             std::vector<Eigen::MatrixXd> Hxx_list;
@@ -171,7 +178,8 @@ namespace cddp
         }
 
         Eigen::VectorXd evaluate(const Eigen::VectorXd &final_state,
-                                 const Eigen::VectorXd &/*control_is_ignored*/) const override
+                                 const Eigen::VectorXd &/*control_is_ignored*/,
+                                 int /*index*/ = 0) const override
         {
             if (final_state.size() != A_N_.cols()) {
                 throw std::invalid_argument("TerminalInequalityConstraint: final_state dimension and A_N columns mismatch.");
@@ -196,7 +204,8 @@ namespace cddp
         }
 
         Eigen::MatrixXd getStateJacobian(const Eigen::VectorXd &final_state,
-                                         const Eigen::VectorXd &/*control_is_ignored*/) const override
+                                         const Eigen::VectorXd &/*control_is_ignored*/,
+                                         int /*index*/ = 0) const override
         {
              if (final_state.size() != A_N_.cols()) {
                 throw std::invalid_argument("TerminalStateInequalityConstraint: final_state dimension for Jacobian and A_N columns mismatch.");
@@ -210,7 +219,8 @@ namespace cddp
         }
 
         double computeViolation(const Eigen::VectorXd &final_state,
-                                const Eigen::VectorXd &control) const override
+                                const Eigen::VectorXd &control,
+                                int /*index*/ = 0) const override
         {
             Eigen::VectorXd g = evaluate(final_state, control);
             return computeViolationFromValue(g);
@@ -231,7 +241,8 @@ namespace cddp
         // Hessians for g(x_N) = A_N * x_N - b_N are zero
         std::vector<Eigen::MatrixXd> getStateHessian(
             const Eigen::VectorXd &state,
-            const Eigen::VectorXd &/*control_is_ignored*/
+            const Eigen::VectorXd &/*control_is_ignored*/,
+            int /*index*/ = 0
         ) const override
         {
             std::vector<Eigen::MatrixXd> Hxx_list;
@@ -251,4 +262,4 @@ namespace cddp
         Eigen::VectorXd b_N_;
     };
 } // namespace cddp
-#endif // CDDP_TERMINAL_CONSTRAINT_HPP
\ No newline at end of file
+#endif // CDDP_TERMINAL_CONSTRAINT_HPP
diff --git a/pyproject.toml b/pyproject.toml
index 47052dc..95fe971 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
 
 [project]
 name = "pycddp"
-version = "0.5.1"
+version = "0.5.2"
 description = "Python bindings for CDDP trajectory optimization"
 readme = "README.md"
 authors = [{name = "Tomo Sasaki"}]
diff --git a/python/pycddp/_version.py b/python/pycddp/_version.py
index dd9b22c..7225152 100644
--- a/python/pycddp/_version.py
+++ b/python/pycddp/_version.py
@@ -1 +1 @@
-__version__ = "0.5.1"
+__version__ = "0.5.2"
diff --git a/python/tests/test_portfolio.py b/python/tests/test_portfolio.py
index 3e0d366..9471c9e 100644
--- a/python/tests/test_portfolio.py
+++ b/python/tests/test_portfolio.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import importlib
 import sys
 from pathlib import Path
 
@@ -67,3 +68,23 @@ def test_portfolio_animation_rejects_invalid_settings(tmp_path):
 
     with pytest.raises(ValueError, match="frame_step must be >= 1"):
         portfolio.save_animation(result, tmp_path / "bad.gif", frame_step=0)
+
+
+def test_mpcc_demo_supports_package_style_import():
+    package_portfolio = importlib.import_module("examples.python_portfolio_lib")
+
+    result = package_portfolio.solve_mpcc_demo(simulation_steps=1, horizon=12)
+
+    assert result.slug == "mpcc_racing_line"
+    assert result.solver_name == "IPDDP MPC"
+
+
+def test_mpcc_demo_rejects_nonpositive_steps():
+    with pytest.raises(ValueError, match="simulation_steps must be >= 1"):
+        portfolio.solve_mpcc_demo(simulation_steps=0)
+
+    package_mpcc = importlib.import_module("examples.ipddp_mpcc_rc")
+    track = portfolio._load_track_csv(portfolio._portfolio_track_path())
+
+    with pytest.raises(ValueError, match="simulation_steps must be >= 1"):
+        package_mpcc.run_ipddp_mpc(track=track, simulation_steps=0)
diff --git a/src/cddp_core/cddp_context_utils.cpp b/src/cddp_core/cddp_context_utils.cpp
new file mode 100644
index 0000000..147cba6
--- /dev/null
+++ b/src/cddp_core/cddp_context_utils.cpp
@@ -0,0 +1,109 @@
+/*
+ Copyright 2026 Tomo Sasaki
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "cddp_context_utils.hpp"
+
+#include <algorithm>
+#include <stdexcept>
+
+namespace cddp::detail {
+
+namespace {
+
+bool hasCompatibleVectorLayout(const std::vector<Eigen::VectorXd> &trajectory,
+                               int expected_size, int vector_dim) {
+  return static_cast<int>(trajectory.size()) == expected_size &&
+         std::all_of(trajectory.begin(), trajectory.end(),
+                     [vector_dim](const Eigen::VectorXd &value) {
+                       return value.size() == vector_dim;
+                     });
+}
+
+} // namespace
+
+std::vector<double> buildLineSearchAlphas(const LineSearchOptions &options) {
+  std::vector<double> alphas;
+  alphas.reserve(static_cast<size_t>(std::max(1, options.max_iterations)));
+
+  double current_alpha = options.initial_step_size;
+  for (int i = 0; i < options.max_iterations; ++i) {
+    alphas.push_back(current_alpha);
+    current_alpha *= options.step_reduction_factor;
+    if (current_alpha < options.min_step_size &&
+        i < options.max_iterations - 1) {
+      alphas.push_back(options.min_step_size);
+      break;
+    }
+  }
+
+  if (alphas.empty()) {
+    alphas.push_back(options.initial_step_size);
+  }
+
+  return alphas;
+}
+
+bool hasCompatibleWarmStartTrajectories(
+    const CDDPOptions &options, const std::vector<Eigen::VectorXd> &states,
+    const std::vector<Eigen::VectorXd> &controls, int horizon, int state_dim,
+    int control_dim) {
+  return options.warm_start &&
+         hasCompatibleVectorLayout(states, horizon + 1, state_dim) &&
+         hasCompatibleVectorLayout(controls, horizon, control_dim);
+}
+
+void ensureTrajectoryShape(std::vector<Eigen::VectorXd> &trajectory,
+                           int expected_size, int vector_dim) {
+  if (hasCompatibleVectorLayout(trajectory, expected_size, vector_dim)) {
+    return;
+  }
+
+  trajectory.assign(static_cast<size_t>(expected_size),
+                    Eigen::VectorXd::Zero(vector_dim));
+}
+
+void addOrReplaceConstraint(
+    std::map<std::string, std::unique_ptr<Constraint>> &constraint_set,
+    std::string constraint_name, std::unique_ptr<Constraint> constraint,
+    int &total_dual_dim) {
+  if (!constraint) {
+    throw std::runtime_error("Cannot add null constraint.");
+  }
+
+  const int dual_dim = constraint->getDualDim();
+  auto existing_constraint = constraint_set.find(constraint_name);
+  if (existing_constraint != constraint_set.end()) {
+    total_dual_dim -= existing_constraint->second->getDualDim();
+  }
+
+  constraint_set[constraint_name] = std::move(constraint);
+  total_dual_dim += dual_dim;
+}
+
+bool removeConstraint(
+    std::map<std::string, std::unique_ptr<Constraint>> &constraint_set,
+    const std::string &constraint_name, int &total_dual_dim) {
+  auto it = constraint_set.find(constraint_name);
+  if (it == constraint_set.end()) {
+    return false;
+  }
+
+  total_dual_dim -= it->second->getDualDim();
+  constraint_set.erase(it);
+  return true;
+}
+
+} // namespace cddp::detail
diff --git a/src/cddp_core/cddp_context_utils.hpp b/src/cddp_core/cddp_context_utils.hpp
new file mode 100644
index 0000000..d4313ed
--- /dev/null
+++ b/src/cddp_core/cddp_context_utils.hpp
@@ -0,0 +1,53 @@
+/*
+ Copyright 2026 Tomo Sasaki
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef CDDP_CONTEXT_UTILS_HPP
+#define CDDP_CONTEXT_UTILS_HPP
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <Eigen/Dense>
+
+#include "cddp_core/constraint.hpp"
+#include "cddp_core/options.hpp"
+
+namespace cddp::detail {
+
+std::vector<double> buildLineSearchAlphas(const LineSearchOptions &options);
+
+bool hasCompatibleWarmStartTrajectories(
+    const CDDPOptions &options, const std::vector<Eigen::VectorXd> &states,
+    const std::vector<Eigen::VectorXd> &controls, int horizon, int state_dim,
+    int control_dim);
+
+void ensureTrajectoryShape(std::vector<Eigen::VectorXd> &trajectory,
+                           int expected_size, int vector_dim);
+
+void addOrReplaceConstraint(
+    std::map<std::string, std::unique_ptr<Constraint>> &constraint_set,
+    std::string constraint_name, std::unique_ptr<Constraint> constraint,
+    int &total_dual_dim);
+
+bool removeConstraint(
+    std::map<std::string, std::unique_ptr<Constraint>> &constraint_set,
+    const std::string &constraint_name, int &total_dual_dim);
+
+} // namespace cddp::detail
+
+#endif // CDDP_CONTEXT_UTILS_HPP
diff --git a/src/cddp_core/cddp_core.cpp b/src/cddp_core/cddp_core.cpp
index 05899b1..62ff83a 100644
--- a/src/cddp_core/cddp_core.cpp
+++ b/src/cddp_core/cddp_core.cpp
@@ -14,6 +14,7 @@
  limitations under the License.
 */
 
+#include "cddp_context_utils.hpp"
 #include "cddp_core/cddp_core.hpp"      // For CDDP class declaration
 #include "cddp_core/clddp_solver.hpp"   // For CLDDPSolver
 #include "cddp_core/ipddp_solver.hpp"   // For IPDDPSolver
@@ -24,8 +25,8 @@
 #include <functional>
 #include <iomanip> // For std::setw
 #include <iostream>
-#include <map>
 #include <array>
+#include <map>
 
 namespace cddp {
 
@@ -56,23 +57,7 @@ CDDP::CDDP(const Eigen::VectorXd &initial_state,
           0) { // Check if reference_state is valid before setting
     objective_->setReferenceState(reference_state_);
   }
-  // Basic alpha sequence for line search
-  alphas_.clear();
-  double current_alpha = options_.line_search.initial_step_size;
-  for (int i = 0; i < options_.line_search.max_iterations; ++i) {
-    alphas_.push_back(current_alpha);
-    current_alpha *= options_.line_search.step_reduction_factor;
-    if (current_alpha < options_.line_search.min_step_size &&
-        i < options_.line_search.max_iterations - 1) {
-      alphas_.push_back(
-          options_.line_search.min_step_size); // Ensure min_step_size is tried
-      break;
-    }
-  }
-  if (alphas_
-          .empty()) { // Ensure at least one alpha if max_iterations is 0 or 1
-    alphas_.push_back(options_.line_search.initial_step_size);
-  }
+  alphas_ = detail::buildLineSearchAlphas(options_.line_search);
 }
 // --- Setters ---
 void CDDP::setDynamicalSystem(std::unique_ptr<DynamicalSystem> system) {
@@ -123,21 +108,7 @@ void CDDP::setTimestep(double timestep) { timestep_ = timestep; }
 
 void CDDP::setOptions(const CDDPOptions &options) {
   options_ = options;
-  // Re-initialize alpha sequence if line search options changed
-  alphas_.clear();
-  double current_alpha = options_.line_search.initial_step_size;
-  for (int i = 0; i < options_.line_search.max_iterations; ++i) {
-    alphas_.push_back(current_alpha);
-    current_alpha *= options_.line_search.step_reduction_factor;
-    if (current_alpha < options_.line_search.min_step_size &&
-        i < options_.line_search.max_iterations - 1) {
-      alphas_.push_back(options_.line_search.min_step_size);
-      break;
-    }
-  }
-  if (alphas_.empty()) {
-    alphas_.push_back(options_.line_search.initial_step_size);
-  }
+  alphas_ = detail::buildLineSearchAlphas(options_.line_search);
   alpha_pr_ = options_.line_search.initial_step_size;
 }
 
@@ -184,80 +155,37 @@ int CDDP::getTotalDualDim() const { return total_dual_dim_; }
 
 void CDDP::addPathConstraint(std::string constraint_name,
                              std::unique_ptr<Constraint> constraint) {
-  if (!constraint) {
-    throw std::runtime_error("Cannot add null constraint.");
-  }
-
-  // Get dual dimension BEFORE moving the constraint
-  int dual_dim = constraint->getDualDim();
-  auto existing_constraint = path_constraint_set_.find(constraint_name);
-  if (existing_constraint != path_constraint_set_.end()) {
-    total_dual_dim_ -= existing_constraint->second->getDualDim();
-  }
-
-  path_constraint_set_[constraint_name] = std::move(constraint);
-
-  // Increment total dual dimension
-  total_dual_dim_ += dual_dim;
-
+  detail::addOrReplaceConstraint(path_constraint_set_, std::move(constraint_name),
+                                 std::move(constraint), total_dual_dim_);
   initialized_ = false; // Constraint set changed, need to reinitialize
 }
 
 bool CDDP::removePathConstraint(const std::string &constraint_name) {
-  auto it = path_constraint_set_.find(constraint_name);
-  if (it != path_constraint_set_.end()) {
-    // Decrement total dual dimension
-    total_dual_dim_ -= it->second->getDualDim();
-
-    // Remove the constraint from the set
-    path_constraint_set_.erase(it);
-
-    // Mark as needing reinitialization since constraint set changed
+  const bool removed = detail::removeConstraint(path_constraint_set_,
+                                                constraint_name,
+                                                total_dual_dim_);
+  if (removed) {
     initialized_ = false;
-
-    return true; // Successfully removed
   }
-
-  return false; // Constraint not found
+  return removed;
 }
 
 void CDDP::addTerminalConstraint(std::string constraint_name,
                                  std::unique_ptr<Constraint> constraint) {
-  if (!constraint) {
-    throw std::runtime_error("Cannot add null constraint.");
-  }
-
-  // Get dual dimension BEFORE moving the constraint
-  int dual_dim = constraint->getDualDim();
-  auto existing_constraint = terminal_constraint_set_.find(constraint_name);
-  if (existing_constraint != terminal_constraint_set_.end()) {
-    total_dual_dim_ -= existing_constraint->second->getDualDim();
-  }
-
-  terminal_constraint_set_[constraint_name] = std::move(constraint);
-
-  // Increment total dual dimension
-  total_dual_dim_ += dual_dim;
-
+  detail::addOrReplaceConstraint(terminal_constraint_set_,
+                                 std::move(constraint_name),
+                                 std::move(constraint), total_dual_dim_);
   initialized_ = false; // Constraint set changed, need to reinitialize
 }
 
 bool CDDP::removeTerminalConstraint(const std::string &constraint_name) {
-  auto it = terminal_constraint_set_.find(constraint_name);
-  if (it != terminal_constraint_set_.end()) {
-    // Decrement total dual dimension
-    total_dual_dim_ -= it->second->getDualDim();
-
-    // Remove the constraint from the set
-    terminal_constraint_set_.erase(it);
-
-    // Mark as needing reinitialization since constraint set changed
+  const bool removed = detail::removeConstraint(terminal_constraint_set_,
+                                                constraint_name,
+                                                total_dual_dim_);
+  if (removed) {
     initialized_ = false;
-
-    return true; // Successfully removed
   }
-
-  return false; // Constraint not found
+  return removed;
 }
 
 namespace {
@@ -356,59 +284,15 @@ void CDDP::initializeProblemIfNecessary() {
   int state_dim = system_->getStateDim();
   int control_dim = system_->getControlDim();
 
-  // For warm start: preserve existing trajectories if they have compatible
-  // dimensions
-  bool preserve_trajectories =
-      options_.warm_start && !X_.empty() && !U_.empty() &&
-      static_cast<int>(X_.size()) == horizon_ + 1 &&
-      static_cast<int>(U_.size()) == horizon_ && X_[0].size() == state_dim &&
-      U_[0].size() == control_dim;
-
-  // Initialize state trajectory
-  if (X_.empty() || static_cast<int>(X_.size()) != horizon_ + 1) {
-    if (preserve_trajectories) {
-      // Warm start: resize existing trajectory carefully
-      if (options_.verbose) {
-        std::cout << "CDDP: Warm start detected - preserving existing state "
-                     "trajectory"
-                  << std::endl;
-      }
-      // Keep existing X_ but ensure correct size
-      X_.resize(horizon_ + 1);
-    } else {
-      // Cold start: initialize with zeros
-      X_.clear();
-      X_.resize(horizon_ + 1);
-      for (int k = 0; k <= horizon_; ++k) {
-        X_[k] = Eigen::VectorXd::Zero(state_dim);
-      }
-    }
+  const bool preserve_trajectories = detail::hasCompatibleWarmStartTrajectories(
+      options_, X_, U_, horizon_, state_dim, control_dim);
+  if (!preserve_trajectories) {
+    detail::ensureTrajectoryShape(X_, horizon_ + 1, state_dim);
+    detail::ensureTrajectoryShape(U_, horizon_, control_dim);
   }
 
-  // Ensure initial state is set correctly (always required)
   X_[0] = initial_state_;
 
-  // Initialize control trajectory
-  if (U_.empty() || static_cast<int>(U_.size()) != horizon_) {
-    if (preserve_trajectories) {
-      // Warm start: resize existing trajectory carefully
-      if (options_.verbose) {
-        std::cout << "CDDP: Warm start detected - preserving existing control "
-                     "trajectory"
-                  << std::endl;
-      }
-      // Keep existing U_ but ensure correct size
-      U_.resize(horizon_);
-    } else {
-      // Cold start: initialize with zeros
-      U_.clear();
-      U_.resize(horizon_);
-      for (int k = 0; k < horizon_; ++k) {
-        U_[k] = Eigen::VectorXd::Zero(control_dim);
-      }
-    }
-  }
-
   // Initialize cost and merit function
   cost_ = std::numeric_limits<double>::infinity();
   merit_function_ = std::numeric_limits<double>::infinity();
diff --git a/src/cddp_core/interior_point_utils.cpp b/src/cddp_core/interior_point_utils.cpp
new file mode 100644
index 0000000..4ffa5f9
--- /dev/null
+++ b/src/cddp_core/interior_point_utils.cpp
@@ -0,0 +1,157 @@
+/*
+ Copyright 2026 Tomo Sasaki
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "interior_point_utils.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
+#include <limits>
+
+namespace cddp::detail {
+
+void printInteriorPointIteration(int iter, double objective, double inf_pr,
+                                 double inf_du, double inf_comp, double mu,
+                                 double step_norm, double regularization,
+                                 double alpha_du, double alpha_pr) {
+  if (iter == 0) {
+    std::cout << std::setw(4) << "iter"
+              << " " << std::setw(12) << "objective"
+              << " " << std::setw(9) << "inf_pr"
+              << " " << std::setw(9) << "inf_du"
+              << " " << std::setw(9) << "inf_comp"
+              << " " << std::setw(7) << "lg(mu)"
+              << " " << std::setw(9) << "||d||"
+              << " " << std::setw(7) << "lg(rg)"
+              << " " << std::setw(9) << "alpha_du"
+              << " " << std::setw(9) << "alpha_pr" << std::endl;
+  }
+
+  std::cout << std::setw(4) << iter << " ";
+  std::cout << std::setw(12) << std::scientific << std::setprecision(6)
+            << objective << " ";
+  std::cout << std::setw(9) << std::scientific << std::setprecision(2) << inf_pr
+            << " ";
+  std::cout << std::setw(9) << std::scientific << std::setprecision(2) << inf_du
+            << " ";
+  std::cout << std::setw(9) << std::scientific << std::setprecision(2)
+            << inf_comp << " ";
+
+  if (mu > 0.0) {
+    std::cout << std::setw(7) << std::fixed << std::setprecision(1)
+              << std::log10(mu) << " ";
+  } else {
+    std::cout << std::setw(7) << "-inf"
+              << " ";
+  }
+
+  std::cout << std::setw(9) << std::scientific << std::setprecision(2)
+            << step_norm << " ";
+
+  if (regularization > 0.0) {
+    std::cout << std::setw(7) << std::fixed << std::setprecision(1)
+              << std::log10(regularization) << " ";
+  } else {
+    std::cout << std::setw(7) << "-"
+              << " ";
+  }
+
+  std::cout << std::setw(9) << std::fixed << std::setprecision(6) << alpha_du
+            << " ";
+  std::cout << std::setw(9) << std::fixed << std::setprecision(6) << alpha_pr;
+  std::cout << std::endl;
+}
+
+bool acceptFilterEntry(std::vector<FilterPoint> &filter, double merit_function,
+                       double constraint_violation) {
+  FilterPoint candidate(merit_function, constraint_violation);
+  for (const auto &filter_point : filter) {
+    if (filter_point.dominates(candidate)) {
+      return false;
+    }
+  }
+
+  filter.erase(std::remove_if(filter.begin(), filter.end(),
+                              [&candidate](const FilterPoint &point) {
+                                return candidate.dominates(point);
+                              }),
+               filter.end());
+  filter.push_back(candidate);
+  return true;
+}
+
+bool isFilterCandidateDominated(const std::vector<FilterPoint> &filter,
+                                double merit_function,
+                                double constraint_violation) {
+  FilterPoint candidate(merit_function, constraint_violation);
+  return std::any_of(filter.begin(), filter.end(),
+                     [&candidate](const FilterPoint &point) {
+                       return point.dominates(candidate);
+                     });
+}
+
+bool filterContainsInvalidValues(const std::vector<FilterPoint> &filter) {
+  return std::any_of(filter.begin(), filter.end(), [](const FilterPoint &point) {
+    return !std::isfinite(point.merit_function) ||
+           !std::isfinite(point.constraint_violation);
+  });
+}
+
+void pruneFilterToBestPoints(std::vector<FilterPoint> &filter) {
+  if (filter.empty()) {
+    return;
+  }
+
+  const auto best_violation =
+      *std::min_element(filter.begin(), filter.end(),
+                        [](const FilterPoint &a, const FilterPoint &b) {
+                          return a.constraint_violation <
+                                 b.constraint_violation;
+                        });
+  const auto best_merit =
+      *std::min_element(filter.begin(), filter.end(),
+                        [](const FilterPoint &a, const FilterPoint &b) {
+                          return a.merit_function < b.merit_function;
+                        });
+
+  filter.clear();
+  filter.push_back(best_violation);
+  if (std::abs(best_merit.constraint_violation -
+                   best_violation.constraint_violation) > 1e-12 ||
+      std::abs(best_merit.merit_function - best_violation.merit_function) >
+          1e-12) {
+    filter.push_back(best_merit);
+  }
+}
+
+double computeMaxConstraintViolation(
+    const std::map<std::string, std::vector<Eigen::VectorXd>> &constraints) {
+  double max_violation = 0.0;
+
+  for (const auto &constraint_pair : constraints) {
+    for (const auto &constraint_value : constraint_pair.second) {
+      if (constraint_value.size() == 0) {
+        continue;
+      }
+      max_violation = std::max(max_violation, constraint_value.maxCoeff());
+    }
+  }
+
+  return max_violation;
+}
+
+} // namespace cddp::detail
diff --git a/src/cddp_core/interior_point_utils.hpp b/src/cddp_core/interior_point_utils.hpp
new file mode 100644
index 0000000..160ff41
--- /dev/null
+++ b/src/cddp_core/interior_point_utils.hpp
@@ -0,0 +1,50 @@
+/*
+ Copyright 2026 Tomo Sasaki
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef CDDP_INTERIOR_POINT_UTILS_HPP
+#define CDDP_INTERIOR_POINT_UTILS_HPP
+
+#include <map>
+#include <vector>
+
+#include <Eigen/Dense>
+
+#include "cddp_core/cddp_core.hpp"
+
+namespace cddp::detail {
+
+void printInteriorPointIteration(int iter, double objective, double inf_pr,
+                                 double inf_du, double inf_comp, double mu,
+                                 double step_norm, double regularization,
+                                 double alpha_du, double alpha_pr);
+
+bool acceptFilterEntry(std::vector<FilterPoint> &filter, double merit_function,
+                       double constraint_violation);
+
+bool isFilterCandidateDominated(const std::vector<FilterPoint> &filter,
+                                double merit_function,
+                                double constraint_violation);
+
+bool filterContainsInvalidValues(const std::vector<FilterPoint> &filter);
+
+void pruneFilterToBestPoints(std::vector<FilterPoint> &filter);
+
+double computeMaxConstraintViolation(
+    const std::map<std::string, std::vector<Eigen::VectorXd>> &constraints);
+
+} // namespace cddp::detail
+
+#endif // CDDP_INTERIOR_POINT_UTILS_HPP
diff --git a/src/cddp_core/ipddp_solver.cpp b/src/cddp_core/ipddp_solver.cpp
index 3f88209..e1b39ca 100644
--- a/src/cddp_core/ipddp_solver.cpp
+++ b/src/cddp_core/ipddp_solver.cpp
@@ -14,8 +14,12 @@
  limitations under the License.
 */
 
+#include "interior_point_utils.hpp"
 #include "cddp_core/ipddp_solver.hpp"
 #include "cddp_core/cddp_core.hpp"
+#include "cddp_core/terminal_constraint.hpp"
+#include <algorithm>
+#include <array>
 #include <chrono>
 #include <cmath>
 #include <execution>
@@ -26,72 +30,626 @@
 
 namespace cddp
 {
+  namespace
+  {
+    constexpr double kSlackInteriorOffset = 1e-4;
+    constexpr double EPS_SLACK = 1e-10;
+    constexpr double EPS_DUAL = 1e-10;
+    constexpr double MAX_BARRIER_RATIO = 1e6;
 
-  IPDDPSolver::IPDDPSolver() : mu_(1e-1) {}
+    struct TerminalEqualityLayoutEntry
+    {
+      std::string name;
+      int dim = 0;
+    };
 
-  void IPDDPSolver::initialize(CDDP &context)
-  {
-    const CDDPOptions &options = context.getOptions();
-    const auto &constraint_set = context.getConstraintSet();
+    struct TerminalInequalityLayoutEntry
+    {
+      std::string name;
+      int dim = 0;
+    };
 
-    int horizon = context.getHorizon();
-    int control_dim = context.getControlDim();
-    int state_dim = context.getStateDim();
+    std::vector<TerminalEqualityLayoutEntry> getTerminalEqualityLayout(
+        const CDDP &context)
+    {
+      std::vector<TerminalEqualityLayoutEntry> layout;
+      for (const auto &constraint_pair : context.getTerminalConstraintSet())
+      {
+        if (dynamic_cast<const TerminalEqualityConstraint *>(
+                constraint_pair.second.get()) == nullptr &&
+            dynamic_cast<const TerminalInequalityConstraint *>(
+                constraint_pair.second.get()) == nullptr)
+        {
+          throw std::runtime_error(
+              "IPDDP: terminal constraint '" + constraint_pair.first +
+              "' has unsupported type. Supported terminal constraints are "
+              "TerminalEqualityConstraint and TerminalInequalityConstraint.");
+        }
+        if (dynamic_cast<const TerminalEqualityConstraint *>(
+                constraint_pair.second.get()) == nullptr)
+        {
+          continue;
+        }
+        layout.push_back(
+            TerminalEqualityLayoutEntry{constraint_pair.first,
+                                        constraint_pair.second->getDualDim()});
+      }
+      return layout;
+    }
+
+    std::vector<TerminalInequalityLayoutEntry> getTerminalInequalityLayout(
+        const CDDP &context)
+    {
+      std::vector<TerminalInequalityLayoutEntry> layout;
+      for (const auto &constraint_pair : context.getTerminalConstraintSet())
+      {
+        if (dynamic_cast<const TerminalEqualityConstraint *>(
+                constraint_pair.second.get()) == nullptr &&
+            dynamic_cast<const TerminalInequalityConstraint *>(
+                constraint_pair.second.get()) == nullptr)
+        {
+          throw std::runtime_error(
+              "IPDDP: terminal constraint '" + constraint_pair.first +
+              "' has unsupported type. Supported terminal constraints are "
+              "TerminalEqualityConstraint and TerminalInequalityConstraint.");
+        }
+        if (dynamic_cast<const TerminalInequalityConstraint *>(
+                constraint_pair.second.get()) == nullptr)
+        {
+          continue;
+        }
+        layout.push_back(TerminalInequalityLayoutEntry{
+            constraint_pair.first, constraint_pair.second->getDualDim()});
+      }
+      return layout;
+    }
+
+    int getTerminalEqualityDim(const CDDP &context)
+    {
+      int dim = 0;
+      for (const auto &entry : getTerminalEqualityLayout(context))
+      {
+        dim += entry.dim;
+      }
+      return dim;
+    }
+
+    std::map<std::string, Eigen::VectorXd> evaluateTerminalInequalityResidualMap(
+        const CDDP &context, const Eigen::VectorXd &x_terminal)
+    {
+      std::map<std::string, Eigen::VectorXd> residuals;
+      for (const auto &entry : getTerminalInequalityLayout(context))
+      {
+        const auto *constraint =
+            dynamic_cast<const TerminalInequalityConstraint *>(
+                context.getTerminalConstraintSet().at(entry.name).get());
+        if (!constraint) {
+          throw std::runtime_error(
+              "IPDDP: terminal constraint '" + entry.name +
+              "' is not a TerminalInequalityConstraint");
+        }
+        residuals[entry.name] = constraint->evaluate(x_terminal);
+      }
+      return residuals;
+    }
+
+    std::map<std::string, Eigen::MatrixXd> evaluateTerminalInequalityJacobianMap(
+        const CDDP &context, const Eigen::VectorXd &x_terminal)
+    {
+      std::map<std::string, Eigen::MatrixXd> jacobians;
+      for (const auto &entry : getTerminalInequalityLayout(context))
+      {
+        const auto *constraint =
+            dynamic_cast<const TerminalInequalityConstraint *>(
+                context.getTerminalConstraintSet().at(entry.name).get());
+        if (!constraint) {
+          throw std::runtime_error(
+              "IPDDP: terminal constraint '" + entry.name +
+              "' is not a TerminalInequalityConstraint");
+        }
+        jacobians[entry.name] = constraint->getStateJacobian(x_terminal);
+      }
+      return jacobians;
+    }
+
+    Eigen::VectorXd evaluateTerminalEqualityResidual(
+        const CDDP &context, const Eigen::VectorXd &x_terminal)
+    {
+      const auto layout = getTerminalEqualityLayout(context);
+      const int total_dim = getTerminalEqualityDim(context);
+      Eigen::VectorXd residual = Eigen::VectorXd::Zero(total_dim);
+      int offset = 0;
+      for (const auto &entry : layout)
+      {
+        const auto *constraint =
+            dynamic_cast<const TerminalEqualityConstraint *>(
+                context.getTerminalConstraintSet().at(entry.name).get());
+        if (!constraint) {
+          throw std::runtime_error(
+              "IPDDP: terminal constraint '" + entry.name +
+              "' is not a TerminalEqualityConstraint");
+        }
+        residual.segment(offset, entry.dim) = constraint->evaluate(x_terminal);
+        offset += entry.dim;
+      }
+      return residual;
+    }
+
+    Eigen::MatrixXd evaluateTerminalEqualityJacobian(
+        const CDDP &context, const Eigen::VectorXd &x_terminal)
+    {
+      const auto layout = getTerminalEqualityLayout(context);
+      const int total_dim = getTerminalEqualityDim(context);
+      Eigen::MatrixXd jacobian =
+          Eigen::MatrixXd::Zero(total_dim, x_terminal.size());
+      int offset = 0;
+      for (const auto &entry : layout)
+      {
+        const auto *constraint =
+            dynamic_cast<const TerminalEqualityConstraint *>(
+                context.getTerminalConstraintSet().at(entry.name).get());
+        if (!constraint) {
+          throw std::runtime_error(
+              "IPDDP: terminal constraint '" + entry.name +
+              "' is not a TerminalEqualityConstraint");
+        }
+        jacobian.block(offset, 0, entry.dim, x_terminal.size()) =
+            constraint->getStateJacobian(x_terminal);
+        offset += entry.dim;
+      }
+      return jacobian;
+    }
+
+    std::map<std::string, Eigen::VectorXd> splitTerminalEqualityVector(
+        const CDDP &context, const Eigen::VectorXd &stacked)
+    {
+      std::map<std::string, Eigen::VectorXd> split;
+      const auto layout = getTerminalEqualityLayout(context);
+      int offset = 0;
+      for (const auto &entry : layout)
+      {
+        split[entry.name] = stacked.segment(offset, entry.dim);
+        offset += entry.dim;
+      }
+      return split;
+    }
+
+    Eigen::MatrixXd symmetrizeMatrix(const Eigen::MatrixXd &matrix)
+    {
+      return 0.5 * (matrix + matrix.transpose());
+    }
+
+    double clipPositiveBarrierRatio(double numerator, double denominator)
+    {
+      return std::clamp(numerator / denominator, 0.0, MAX_BARRIER_RATIO);
+    }
+
+    double clipSignedBarrierRatio(double numerator, double denominator)
+    {
+      return std::clamp(numerator / denominator, -MAX_BARRIER_RATIO,
+                        MAX_BARRIER_RATIO);
+    }
+
+    void repairWarmstartInterior(Eigen::VectorXd &s, Eigen::VectorXd &y,
+                                 const CDDPOptions &options)
+    {
+      if (!options.ipddp.warmstart_repair)
+      {
+        return;
+      }
+      if (s.size() > 0)
+      {
+        s = s.cwiseMax(options.ipddp.warmstart_s_min);
+        const double min_s = s.minCoeff();
+        if (min_s <
+            options.ipddp.warmstart_s_min *
+                options.ipddp.warmstart_interior_factor)
+        {
+          s *= options.ipddp.warmstart_interior_factor;
+        }
+      }
+      if (y.size() > 0)
+      {
+        y = y.cwiseMax(options.ipddp.warmstart_y_min);
+        const double min_y = y.minCoeff();
+        if (min_y <
+            options.ipddp.warmstart_y_min *
+                options.ipddp.warmstart_interior_factor)
+        {
+          y *= options.ipddp.warmstart_interior_factor;
+        }
+      }
+    }
+
+    bool warmstartNeedsReinit(const Eigen::VectorXd &y_current,
+                              const Eigen::VectorXd &s_current,
+                              const Eigen::VectorXd &g_val,
+                              const CDDPOptions &options)
+    {
+      if (y_current.size() != g_val.size() || s_current.size() != g_val.size() ||
+          !y_current.allFinite() || !s_current.allFinite())
+      {
+        return true;
+      }
+
+      for (int i = 0; i < g_val.size(); ++i)
+      {
+        if (y_current(i) <= EPS_DUAL || s_current(i) <= EPS_SLACK)
+        {
+          return true;
+        }
+
+        const double required_slack =
+            std::max(options.ipddp.slack_var_init_scale,
+                     -g_val(i) + kSlackInteriorOffset);
+        if (s_current(i) < 0.1 * required_slack)
+        {
+          return true;
+        }
+      }
+
+      return false;
+    }
+
+    void initializeTerminalWarmstartDualSlack(
+        const CDDP &context,
+        double mu,
+        std::map<std::string, Eigen::VectorXd> &G_T,
+        std::map<std::string, Eigen::VectorXd> &Y_T,
+        std::map<std::string, Eigen::VectorXd> &S_T,
+        std::map<std::string, Eigen::VectorXd> &dY_T,
+        std::map<std::string, Eigen::VectorXd> &dS_T)
+    {
+      const CDDPOptions &options = context.getOptions();
+      const auto layout = getTerminalInequalityLayout(context);
+
+      bool has_existing_dual_slack = true;
+      for (const auto &entry : layout)
+      {
+        auto y_it = Y_T.find(entry.name);
+        auto s_it = S_T.find(entry.name);
+        if (y_it == Y_T.end() || s_it == S_T.end() ||
+            y_it->second.size() != entry.dim || s_it->second.size() != entry.dim)
+        {
+          has_existing_dual_slack = false;
+          break;
+        }
+      }
+
+      for (const auto &entry : layout)
+      {
+        const Eigen::VectorXd &g_val = G_T.at(entry.name);
+        Eigen::VectorXd s_current = Eigen::VectorXd::Zero(entry.dim);
+        Eigen::VectorXd y_current = Eigen::VectorXd::Zero(entry.dim);
+        bool need_reinit = !has_existing_dual_slack;
+
+        if (!need_reinit)
+        {
+          s_current = S_T.at(entry.name);
+          y_current = Y_T.at(entry.name);
+          need_reinit =
+              warmstartNeedsReinit(y_current, s_current, g_val, options);
+        }
 
-    // Initialize workspace if not already done
-    if (!workspace_.initialized) {
-      // Allocate backward pass workspace
-      workspace_.A_matrices.resize(horizon);
-      workspace_.B_matrices.resize(horizon);
-      workspace_.Q_xx_matrices.resize(horizon);
-      workspace_.Q_ux_matrices.resize(horizon);
-      workspace_.Q_uu_matrices.resize(horizon);
-      workspace_.Q_x_vectors.resize(horizon);
-      workspace_.Q_u_vectors.resize(horizon);
+        if (need_reinit)
+        {
+          s_current = g_val.unaryExpr([&](double g) {
+            return std::max(options.ipddp.slack_var_init_scale,
+                            -g + kSlackInteriorOffset);
+          });
+          for (int i = 0; i < entry.dim; ++i)
+          {
+            y_current(i) =
+                (mu * options.ipddp.dual_var_init_scale) /
+                std::max(s_current(i), EPS_SLACK);
+          }
+        }
 
-      // Allocate LDLT solver cache
-      workspace_.ldlt_solvers.resize(horizon);
-      workspace_.ldlt_valid.resize(horizon, false);
+        repairWarmstartInterior(s_current, y_current, options);
+        S_T[entry.name] = s_current;
+        Y_T[entry.name] = y_current;
+        dS_T[entry.name] = Eigen::VectorXd::Zero(entry.dim);
+        dY_T[entry.name] = Eigen::VectorXd::Zero(entry.dim);
+      }
+    }
 
-      // Allocate forward pass workspace
-      workspace_.delta_x_vectors.resize(horizon + 1);
+    Eigen::VectorXd initializeTerminalEqualityWarmstartMultipliers(
+        const Eigen::VectorXd &lambda_current,
+        int terminal_equality_dim)
+    {
+      if (lambda_current.size() == terminal_equality_dim &&
+          lambda_current.allFinite())
+      {
+        return lambda_current;
+      }
+      return Eigen::VectorXd::Zero(terminal_equality_dim);
+    }
 
-      for (int t = 0; t < horizon; ++t) {
-        workspace_.A_matrices[t] = Eigen::MatrixXd::Zero(state_dim, state_dim);
-        workspace_.B_matrices[t] = Eigen::MatrixXd::Zero(state_dim, control_dim);
-        workspace_.Q_xx_matrices[t] = Eigen::MatrixXd::Zero(state_dim, state_dim);
-        workspace_.Q_ux_matrices[t] = Eigen::MatrixXd::Zero(control_dim, state_dim);
-        workspace_.Q_uu_matrices[t] = Eigen::MatrixXd::Zero(control_dim, control_dim);
-        workspace_.Q_x_vectors[t] = Eigen::VectorXd::Zero(state_dim);
-        workspace_.Q_u_vectors[t] = Eigen::VectorXd::Zero(control_dim);
+    void rolloutLinearPolicy(const std::vector<Eigen::MatrixXd> &A,
+                             const std::vector<Eigen::MatrixXd> &B,
+                             const std::vector<Eigen::VectorXd> &d,
+                             const std::vector<Eigen::MatrixXd> &K,
+                             const std::vector<Eigen::VectorXd> &k,
+                             const Eigen::VectorXd &dx0,
+                             std::vector<Eigen::VectorXd> &dX,
+                             std::vector<Eigen::VectorXd> &dU)
+    {
+      const int T = static_cast<int>(K.size());
+      if (T == 0 || A.empty() || B.empty())
+      {
+        dX.assign(1, dx0);
+        dU.clear();
+        return;
+      }
+      dX.assign(T + 1, Eigen::VectorXd::Zero(A.front().rows()));
+      dU.assign(T, Eigen::VectorXd::Zero(B.front().cols()));
+      dX[0] = dx0;
+      for (int t = 0; t < T; ++t)
+      {
+        dU[t] = k[t] + K[t] * dX[t];
+        dX[t + 1] = A[t] * dX[t] + B[t] * dU[t] + d[t];
       }
+    }
 
-      for (int t = 0; t <= horizon; ++t) {
-        workspace_.delta_x_vectors[t] = Eigen::VectorXd::Zero(state_dim);
+    void rolloutLinearPolicy(const std::vector<Eigen::MatrixXd> &A,
+                             const std::vector<Eigen::MatrixXd> &B,
+                             const std::vector<Eigen::VectorXd> &d,
+                             const std::vector<Eigen::MatrixXd> &K,
+                             const std::vector<Eigen::VectorXd> &k,
+                             std::vector<Eigen::VectorXd> &dX,
+                             std::vector<Eigen::VectorXd> &dU)
+    {
+      if (A.empty() || B.empty())
+      {
+        const int state_dim = !K.empty() ? K.front().cols() : 0;
+        dX.assign(1, Eigen::VectorXd::Zero(state_dim));
+        dU.clear();
+        return;
       }
+      rolloutLinearPolicy(A, B, d, K, k,
+                          Eigen::VectorXd::Zero(A.front().rows()), dX, dU);
+    }
 
-      // Allocate constraint workspace if needed
-      if (!constraint_set.empty()) {
-        int total_dual_dim = getTotalDualDim(context);
-        workspace_.y_combined = Eigen::VectorXd::Zero(total_dual_dim);
-        workspace_.s_combined = Eigen::VectorXd::Zero(total_dual_dim);
-        workspace_.g_combined = Eigen::VectorXd::Zero(total_dual_dim);
-        workspace_.Q_yu_combined = Eigen::MatrixXd::Zero(total_dual_dim, control_dim);
-        workspace_.Q_yx_combined = Eigen::MatrixXd::Zero(total_dual_dim, state_dim);
-        workspace_.YSinv = Eigen::MatrixXd::Zero(total_dual_dim, total_dual_dim);
-        workspace_.bigRHS = Eigen::MatrixXd::Zero(control_dim, 1 + state_dim);
+    bool solveSequentialLQR(
+        const std::vector<Eigen::MatrixXd> &Q,
+        const std::vector<Eigen::VectorXd> &q,
+        const std::vector<Eigen::MatrixXd> &R,
+        const std::vector<Eigen::VectorXd> &r,
+        const std::vector<Eigen::MatrixXd> &M,
+        const std::vector<Eigen::MatrixXd> &A,
+        const std::vector<Eigen::MatrixXd> &B,
+        const std::vector<Eigen::VectorXd> &d,
+        std::vector<Eigen::MatrixXd> &K,
+        std::vector<Eigen::VectorXd> &k,
+        std::vector<Eigen::MatrixXd> &P,
+        std::vector<Eigen::VectorXd> &p)
+    {
+      const int T = static_cast<int>(R.size());
+      if (T == 0)
+      {
+        return true;
       }
 
-      workspace_.initialized = true;
+      const int n = Q.front().rows();
+      const int m = R.front().rows();
+      K.assign(T, Eigen::MatrixXd::Zero(m, n));
+      k.assign(T, Eigen::VectorXd::Zero(m));
+      P.assign(T + 1, Eigen::MatrixXd::Zero(n, n));
+      p.assign(T + 1, Eigen::VectorXd::Zero(n));
+      P[T] = 0.5 * (Q[T] + Q[T].transpose());
+      p[T] = q[T];
+
+      for (int t = T - 1; t >= 0; --t)
+      {
+        const Eigen::MatrixXd &P_next = P[t + 1];
+        const Eigen::VectorXd &p_next = p[t + 1];
+        const Eigen::MatrixXd BtP = B[t].transpose() * P_next;
+        const Eigen::MatrixXd Q_uu =
+            0.5 * (R[t] + BtP * B[t] + R[t].transpose() +
+                   B[t].transpose() * P_next.transpose() * B[t]);
+        const Eigen::MatrixXd Q_ux = BtP * A[t] + M[t].transpose();
+        const Eigen::MatrixXd Q_xu = Q_ux.transpose();
+        const Eigen::VectorXd drift = p_next + P_next * d[t];
+        const Eigen::VectorXd Q_x = q[t] + A[t].transpose() * drift;
+        const Eigen::VectorXd Q_u = r[t] + B[t].transpose() * drift;
+
+        Eigen::LDLT<Eigen::MatrixXd> ldlt(Q_uu);
+        if (ldlt.info() != Eigen::Success)
+        {
+          return false;
+        }
+
+        K[t] = -ldlt.solve(Q_ux);
+        k[t] = -ldlt.solve(Q_u);
+        P[t] = Q[t] + A[t].transpose() * P_next * A[t] + Q_xu * K[t] +
+               K[t].transpose() * Q_ux + K[t].transpose() * Q_uu * K[t];
+        P[t] = 0.5 * (P[t] + P[t].transpose());
+        p[t] = Q_x + Q_xu * k[t] + K[t].transpose() * Q_u +
+               K[t].transpose() * Q_uu * k[t];
+        if (!P[t].allFinite() || !p[t].allFinite() || !K[t].allFinite() ||
+            !k[t].allFinite())
+        {
+          return false;
+        }
+      }
+      return true;
     }
 
-    // Validate reference state consistency
-    if ((context.getReferenceState() - context.getObjective().getReferenceState()).norm() > 1e-6)
+    bool solveTerminalEqualityLQR(
+        const std::vector<Eigen::MatrixXd> &Q,
+        const std::vector<Eigen::VectorXd> &q,
+        const std::vector<Eigen::MatrixXd> &R,
+        const std::vector<Eigen::VectorXd> &r,
+        const std::vector<Eigen::MatrixXd> &M,
+        const std::vector<Eigen::MatrixXd> &A,
+        const std::vector<Eigen::MatrixXd> &B,
+        const std::vector<Eigen::VectorXd> &d,
+        const Eigen::VectorXd &dx0,
+        const Eigen::MatrixXd &H_T,
+        const Eigen::VectorXd &b_T,
+        double mu,
+        double reg_scale,
+        double reg_exponent,
+        const Eigen::VectorXd *lambda_prev,
+        std::vector<Eigen::MatrixXd> &K_out,
+        std::vector<Eigen::VectorXd> &k_out,
+        std::vector<Eigen::MatrixXd> &P_out,
+        std::vector<Eigen::VectorXd> &p_out,
+        Eigen::VectorXd &lambda_total,
+        Eigen::VectorXd &lambda_delta)
     {
-      throw std::runtime_error("IPDDP: Reference state mismatch between context and objective");
+      const int p_dim = H_T.rows();
+      if (p_dim == 0)
+      {
+        lambda_total = Eigen::VectorXd::Zero(0);
+        lambda_delta = Eigen::VectorXd::Zero(0);
+        return solveSequentialLQR(Q, q, R, r, M, A, B, d, K_out, k_out, P_out,
+                                  p_out);
+      }
+
+      std::vector<Eigen::VectorXd> q_base = q;
+      Eigen::VectorXd lambda_prev_vec = Eigen::VectorXd::Zero(p_dim);
+      if (lambda_prev != nullptr && lambda_prev->size() == p_dim)
+      {
+        lambda_prev_vec = *lambda_prev;
+        q_base.back() += H_T.transpose() * lambda_prev_vec;
+      }
+
+      const int T = static_cast<int>(R.size());
+      const int n = Q.front().rows();
+      std::vector<std::vector<Eigen::MatrixXd>> K_variants(
+          p_dim + 1, std::vector<Eigen::MatrixXd>(T));
+      std::vector<std::vector<Eigen::VectorXd>> k_variants(
+          p_dim + 1, std::vector<Eigen::VectorXd>(T));
+      std::vector<std::vector<Eigen::MatrixXd>> P_variants(
+          p_dim + 1, std::vector<Eigen::MatrixXd>(T + 1));
+      std::vector<std::vector<Eigen::VectorXd>> p_variants(
+          p_dim + 1, std::vector<Eigen::VectorXd>(T + 1));
+      std::vector<Eigen::VectorXd> xT_variants(
+          p_dim + 1, Eigen::VectorXd::Zero(n));
+
+      for (int i = 0; i < p_dim + 1; ++i)
+      {
+        std::vector<Eigen::VectorXd> q_variant = q_base;
+        if (i > 0)
+        {
+          q_variant.back() += H_T.row(i - 1).transpose();
+        }
+        if (!solveSequentialLQR(Q, q_variant, R, r, M, A, B, d, K_variants[i],
+                                k_variants[i], P_variants[i], p_variants[i]))
+        {
+          return false;
+        }
+        std::vector<Eigen::VectorXd> dX_variant;
+        std::vector<Eigen::VectorXd> dU_variant;
+        rolloutLinearPolicy(A, B, d, K_variants[i], k_variants[i], dx0,
+                            dX_variant, dU_variant);
+        xT_variants[i] = dX_variant.back();
+      }
+
+      Eigen::MatrixXd S_mat(n, p_dim);
+      for (int i = 0; i < p_dim; ++i)
+      {
+        S_mat.col(i) = xT_variants[i + 1] - xT_variants[0];
+      }
+
+      const Eigen::MatrixXd A_small = H_T * S_mat;
+      const Eigen::VectorXd rhs = b_T - H_T * xT_variants[0];
+      const Eigen::MatrixXd AtA = A_small.transpose() * A_small;
+      const Eigen::VectorXd Atb = A_small.transpose() * rhs;
+
+      const double trace_term =
+          (AtA.trace() > 1.0 ? AtA.trace() / std::max(p_dim, 1) : 1.0);
+      const double base_floor =
+          std::max(1e-10, reg_scale * std::pow(std::max(mu, 0.0), reg_exponent));
+      const double reg = std::max(base_floor, 1e-6 * trace_term);
+      Eigen::JacobiSVD<Eigen::MatrixXd> svd(A_small);
+      const auto &singular = svd.singularValues();
+      const double sigma_max = singular.size() > 0 ? singular.maxCoeff() : 0.0;
+      const double sigma_min = singular.size() > 0 ? singular.minCoeff() : 0.0;
+      const double svd_reg = std::max(1e-8 * sigma_max - sigma_min, 0.0);
+      const double reg_base = std::max(reg, svd_reg);
+      const double lambda_norm_cap = 100.0 * (1.0 + rhs.norm());
+
+      const std::array<double, 5> reg_scales{{1.0, 10.0, 100.0, 1e3, 1e4}};
+      Eigen::VectorXd best_lambda = Eigen::VectorXd::Zero(p_dim);
+      double best_residual = std::numeric_limits<double>::infinity();
+      bool found_finite = false;
+      for (double scale : reg_scales)
+      {
+        const double reg_i = std::max(reg_base * scale, 1e-12);
+        Eigen::MatrixXd shifted =
+            AtA + reg_i * Eigen::MatrixXd::Identity(p_dim, p_dim);
+        Eigen::LDLT<Eigen::MatrixXd> ldlt(shifted);
+        if (ldlt.info() != Eigen::Success)
+        {
+          continue;
+        }
+        Eigen::VectorXd lambda_i = ldlt.solve(Atb);
+        if (!lambda_i.allFinite())
+        {
+          continue;
+        }
+        const double lambda_norm = lambda_i.norm();
+        if (lambda_norm > lambda_norm_cap)
+        {
+          lambda_i *= lambda_norm_cap / std::max(lambda_norm, 1e-12);
+        }
+        const double residual = (A_small * lambda_i - rhs).norm();
+        if (!std::isfinite(residual))
+        {
+          continue;
+        }
+        if (!found_finite || residual < best_residual)
+        {
+          best_lambda = lambda_i;
+          best_residual = residual;
+          found_finite = true;
+        }
+      }
+
+      if (!found_finite)
+      {
+        std::cerr << "IPDDP: solveTerminalEqualityLQR failed for all "
+                  << reg_scales.size() << " regularization scales; "
+                  << "using zero terminal multipliers" << std::endl;
+        best_lambda = Eigen::VectorXd::Zero(p_dim);
+      }
+
+      K_out = K_variants[0];
+      k_out = k_variants[0];
+      P_out = P_variants[0];
+      p_out = p_variants[0];
+      for (int i = 0; i < p_dim; ++i)
+      {
+        const double coeff = best_lambda(i);
+        for (int t = 0; t < T; ++t)
+        {
+          k_out[t] += coeff * (k_variants[i + 1][t] - k_variants[0][t]);
+        }
+        for (int t = 0; t <= T; ++t)
+        {
+          p_out[t] += coeff * (p_variants[i + 1][t] - p_variants[0][t]);
+        }
+      }
+
+      lambda_delta = best_lambda;
+      lambda_total = lambda_prev_vec + best_lambda;
+      return true;
     }
+  } // namespace
+
+  IPDDPSolver::IPDDPSolver() : mu_(1e-1) {}
+
+  void IPDDPSolver::initialize(CDDP &context)
+  {
+    const CDDPOptions &options = context.getOptions();
+    const auto &constraint_set = context.getConstraintSet();
+    int horizon = context.getHorizon();
+    int control_dim = context.getControlDim();
+    int state_dim = context.getStateDim();
 
-    // For warm starts, verify that existing state is valid
+    // Check for valid warm start with existing solver state
     if (options.warm_start)
     {
       bool valid_warm_start = (k_u_.size() == static_cast<size_t>(horizon) &&
@@ -123,8 +681,52 @@ namespace cddp
         }
         mu_ = options.ipddp.barrier.mu_initial * 0.1;
         context.step_norm_ = 0.0;
+
+        // Re-rollout trajectory with existing controls
+        context.X_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+        context.X_[0] = context.getInitialState();
+        for (int t = 0; t < horizon; ++t)
+        {
+          context.X_[t + 1] = context.getSystem().getDiscreteDynamics(
+              context.X_[t], context.U_[t], t * context.getTimestep());
+        }
+
+        // Resize costate arrays if needed
+        if (Lambda_.size() != static_cast<size_t>(horizon + 1))
+        {
+          Lambda_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+        }
+        if (k_lambda_.size() != static_cast<size_t>(horizon + 1))
+        {
+          k_lambda_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+          K_lambda_.assign(horizon + 1, Eigen::MatrixXd::Zero(state_dim, state_dim));
+        }
+        if (dX_.size() != static_cast<size_t>(horizon + 1))
+        {
+          dX_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+          dU_.assign(horizon, Eigen::VectorXd::Zero(control_dim));
+        }
+        dV_ = Eigen::Vector2d::Zero();
+        const auto warmstart_terminal_eq_dual = Lambda_T_eq_;
+        const int terminal_eq_dim = getTerminalEqualityDim(context);
+        Lambda_T_eq_ = initializeTerminalEqualityWarmstartMultipliers(
+            warmstart_terminal_eq_dual, terminal_eq_dim);
+        dLambda_T_eq_ = Eigen::VectorXd::Zero(terminal_eq_dim);
+
+        const auto warmstart_dual = Y_;
+        const auto warmstart_slack = S_;
+        const auto warmstart_terminal_dual = Y_T_;
+        const auto warmstart_terminal_slack = S_T_;
+        initializeConstraintStorage(context);
+        Y_ = warmstart_dual;
+        S_ = warmstart_slack;
+        Y_T_ = warmstart_terminal_dual;
+        S_T_ = warmstart_terminal_slack;
         evaluateTrajectoryWarmStart(context);
         initializeDualSlackVariablesWarmStart(context);
+        initializeTerminalWarmstartDualSlack(context, mu_, G_T_, Y_T_, S_T_,
+                                            dY_T_, dS_T_);
+        G_ = G_raw_;
         resetFilter(context);
         return;
       }
@@ -137,13 +739,48 @@ namespace cddp
         }
 
         // Initialize gains and constraints
-        initializeGains(horizon, control_dim, state_dim);
+        k_u_.assign(horizon, Eigen::VectorXd::Zero(control_dim));
+        K_u_.assign(horizon, Eigen::MatrixXd::Zero(control_dim, state_dim));
+        dX_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+        dU_.assign(horizon, Eigen::VectorXd::Zero(control_dim));
+        k_lambda_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+        K_lambda_.assign(horizon + 1, Eigen::MatrixXd::Zero(state_dim, state_dim));
+        Lambda_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+        dV_ = Eigen::Vector2d::Zero();
+        const auto warmstart_terminal_eq_dual = Lambda_T_eq_;
+        const int terminal_eq_dim = getTerminalEqualityDim(context);
+        Lambda_T_eq_ = initializeTerminalEqualityWarmstartMultipliers(
+            warmstart_terminal_eq_dual, terminal_eq_dim);
+        dLambda_T_eq_ = Eigen::VectorXd::Zero(terminal_eq_dim);
+
+        const auto warmstart_dual = Y_;
+        const auto warmstart_slack = S_;
+        const auto warmstart_terminal_dual = Y_T_;
+        const auto warmstart_terminal_slack = S_T_;
         initializeConstraintStorage(context);
+        Y_ = warmstart_dual;
+        S_ = warmstart_slack;
+        Y_T_ = warmstart_terminal_dual;
+        S_T_ = warmstart_terminal_slack;
+
+        // Re-rollout trajectory with provided controls
+        if (static_cast<int>(context.U_.size()) != horizon)
+        {
+          context.U_.assign(horizon, Eigen::VectorXd::Zero(control_dim));
+        }
+        context.X_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+        context.X_[0] = context.getInitialState();
+        for (int t = 0; t < horizon; ++t)
+        {
+          context.X_[t + 1] = context.getSystem().getDiscreteDynamics(
+              context.X_[t], context.U_[t], t * context.getTimestep());
+        }
 
         // Set barrier parameter based on constraint evaluation
-        if (constraint_set.empty())
+        if (constraint_set.empty() && context.getTerminalConstraintSet().empty())
         {
-          mu_ = 1e-8;
+          mu_ = std::max(options.tolerance / 10.0,
+                         options.ipddp.barrier.mu_min_value);
         }
         else
         {
@@ -151,11 +788,13 @@ namespace cddp
           double max_constraint_violation = computeMaxConstraintViolation(context);
           if (max_constraint_violation <= options.tolerance)
           {
-            mu_ = options.tolerance * 0.01;
+            mu_ = std::max(options.tolerance,
+                           options.ipddp.barrier.mu_min_value);
           }
           else if (max_constraint_violation <= 0.1)
           {
-            mu_ = options.tolerance;
+            mu_ = std::max(options.tolerance * 10.0,
+                           options.ipddp.barrier.mu_initial * 0.01);
           }
           else
           {
@@ -165,1382 +804,1859 @@ namespace cddp
 
         context.regularization_ = options.regularization.initial_value;
         context.step_norm_ = 0.0;
+        context.alpha_pr_ = 1.0;
+        context.alpha_du_ = 1.0;
         initializeDualSlackVariablesWarmStart(context);
+        initializeTerminalWarmstartDualSlack(context, mu_, G_T_, Y_T_, S_T_,
+                                            dY_T_, dS_T_);
+        G_ = G_raw_;
         resetFilter(context);
+        context.inf_du_ = 0.0;
         return;
       }
     }
 
-    // Cold start: check if trajectory is provided
-    bool trajectory_provided = (context.X_.size() == static_cast<size_t>(horizon + 1) &&
-                                context.U_.size() == static_cast<size_t>(horizon) &&
-                                context.X_[0].size() == state_dim &&
-                                context.U_[0].size() == control_dim);
-
-    if (!trajectory_provided)
-    {
-      context.X_.resize(horizon + 1);
-      context.U_.resize(horizon);
-
-      for (int t = 0; t <= horizon; ++t)
-      {
-        context.X_[t] = context.getInitialState() +
-                        t * (context.getReferenceState() - context.getInitialState()) / horizon;
-      }
+    initializeConstraintStorage(context);
 
-      for (int t = 0; t < horizon; ++t)
-      {
-        context.U_[t] = Eigen::VectorXd::Zero(control_dim);
-      }
+    k_u_.assign(horizon, Eigen::VectorXd::Zero(control_dim));
+    K_u_.assign(horizon, Eigen::MatrixXd::Zero(control_dim, state_dim));
+    dX_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    dU_.assign(horizon, Eigen::VectorXd::Zero(control_dim));
+    k_lambda_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    K_lambda_.assign(horizon + 1, Eigen::MatrixXd::Zero(state_dim, state_dim));
+    Lambda_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    dV_ = Eigen::Vector2d::Zero();
+    Lambda_T_eq_ = Eigen::VectorXd::Zero(getTerminalEqualityDim(context));
+    dLambda_T_eq_ = Eigen::VectorXd::Zero(getTerminalEqualityDim(context));
 
-      if (options.verbose)
+    if (static_cast<int>(context.U_.size()) != horizon)
+    {
+      context.U_.assign(horizon, Eigen::VectorXd::Zero(control_dim));
+    }
+    for (auto &u : context.U_)
+    {
+      if (u.size() != control_dim)
       {
-        std::cout << "IPDDP: Using interpolated initial trajectory" << std::endl;
+        u = Eigen::VectorXd::Zero(control_dim);
       }
     }
-    else if (options.verbose)
+
+    bool reset_warmstart = false;
+    if (options.warm_start && options.ipddp.warmstart_reset_x0_threshold > 0.0 &&
+        !context.X_.empty())
     {
-      std::cout << "IPDDP: Using provided initial trajectory" << std::endl;
+      const double dx0 =
+          (context.getInitialState() - context.X_.front()).norm();
+      reset_warmstart = dx0 > options.ipddp.warmstart_reset_x0_threshold;
     }
-
-    // Initialize gains, constraints, and parameters
-    initializeGains(horizon, control_dim, state_dim);
-    initializeConstraintStorage(context);
-
-    // Set barrier parameter
-    if (constraint_set.empty())
+    if (reset_warmstart)
     {
-      mu_ = 1e-8;
+      for (auto &u : context.U_)
+      {
+        u.setZero();
+      }
+      Y_.clear();
+      S_.clear();
+      G_.clear();
+      Y_T_.clear();
+      S_T_.clear();
+      G_T_.clear();
+      Lambda_T_eq_.setZero();
+      dLambda_T_eq_.setZero();
     }
-    else
+
+    context.X_.assign(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    context.X_[0] = context.getInitialState();
+    for (int t = 0; t < horizon; ++t)
     {
-      mu_ = options.ipddp.barrier.mu_initial;
+      context.X_[t + 1] = context.getSystem().getDiscreteDynamics(
+          context.X_[t], context.U_[t], t * context.getTimestep());
     }
 
-    initializeDualSlackVariables(context);
+    mu_ = constraint_set.empty() && context.getTerminalConstraintSet().empty()
+              ? std::max(options.tolerance / 10.0,
+                         options.ipddp.barrier.mu_min_value)
+              : options.ipddp.barrier.mu_initial;
+
     context.regularization_ = options.regularization.initial_value;
     context.step_norm_ = 0.0;
-    evaluateTrajectory(context);
-    resetFilter(context);
-  }
+    context.alpha_pr_ = 1.0;
+    context.alpha_du_ = 1.0;
 
-  std::string IPDDPSolver::getSolverName() const { return "IPDDP"; }
+    evaluateTrajectory(context);
+    initializeDualSlackVariables(context);
 
-  int IPDDPSolver::getTotalDualDim(const CDDP &context) const
-  {
-    int total_dual_dim = 0;
-    const auto &constraint_set = context.getConstraintSet();
-    for (const auto &constraint_pair : constraint_set)
+    for (const auto &entry : getTerminalInequalityLayout(context))
     {
-      total_dual_dim += constraint_pair.second->getDualDim();
+      const Eigen::VectorXd &g_T = G_T_[entry.name];
+      Eigen::VectorXd s_init =
+          g_T.unaryExpr([&](double g) {
+            return std::max(options.ipddp.slack_var_init_scale,
+                            -g + kSlackInteriorOffset);
+          });
+      Eigen::VectorXd y_init(entry.dim);
+      for (int i = 0; i < entry.dim; ++i)
+      {
+        y_init(i) =
+            (mu_ * options.ipddp.dual_var_init_scale) / std::max(s_init(i), EPS_SLACK);
+      }
+      repairWarmstartInterior(s_init, y_init, options);
+      S_T_[entry.name] = s_init;
+      Y_T_[entry.name] = y_init;
+      dS_T_[entry.name] = Eigen::VectorXd::Zero(entry.dim);
+      dY_T_[entry.name] = Eigen::VectorXd::Zero(entry.dim);
     }
-    return total_dual_dim;
-  }
 
-  // === CDDPSolverBase hook implementations ===
+    G_ = G_raw_;
 
-  void IPDDPSolver::preIterationSetup(CDDP &context)
-  {
-    // evaluateTrajectory and resetFilter are already called in initialize().
-    // No additional pre-iteration setup needed for IPDDP.
+    resetFilter(context);
+    context.inf_du_ = 0.0;
   }
 
-  void IPDDPSolver::applyForwardPassResult(CDDP &context,
-                                            const ForwardPassResult &result)
-  {
-    // Call base to update X_, U_, cost_, merit_function_, alpha_pr_, alpha_du_
-    CDDPSolverBase::applyForwardPassResult(context, result);
+  std::string IPDDPSolver::getSolverName() const { return "IPDDP"; }
 
-    // Update IP-specific variables
-    if (result.dual_trajectory)
-      Y_ = *result.dual_trajectory;
-    if (result.slack_trajectory)
-      S_ = *result.slack_trajectory;
-    if (result.constraint_eval_trajectory)
-      G_ = *result.constraint_eval_trajectory;
+  // === CDDPSolverBase hook overrides ===
+
+  void IPDDPSolver::preIterationSetup(CDDP &context)
+  {
+    // Initialization already done in initialize()
   }
 
-  bool IPDDPSolver::checkConvergence(CDDP &context, double dJ, double dL,
-                                      int iter, std::string &termination_reason)
+  bool IPDDPSolver::checkEarlyConvergence(CDDP &context, int iter,
+                                           std::string &reason)
   {
+    // Check convergence after backward pass (before forward pass)
     const CDDPOptions &options = context.getOptions();
+    const bool no_barrier_needed =
+        context.getConstraintSet().empty() &&
+        getTerminalInequalityLayout(context).empty();
+    const double scaled_inf_du = computeScaledDualInfeasibility(context);
+    const double scaled_inf_comp = context.inf_comp_;
 
-    // Compute IPOPT-style scaling factors
-    double scaled_inf_du = computeScaledDualInfeasibility(context);
-    double termination_metric = std::max({scaled_inf_du, context.inf_pr_, context.inf_comp_});
-
-    if (termination_metric <= options.tolerance)
+    if (no_barrier_needed)
     {
-      termination_reason = "OptimalSolutionFound";
-      if (options.verbose)
+      if (context.inf_pr_ < options.tolerance &&
+          scaled_inf_du < options.tolerance)
       {
-        std::cout << "IPDDP: Converged due to scaled optimality gap and constraint "
-                     "violation (metric: "
-                  << std::scientific << std::setprecision(2)
-                  << termination_metric << ", scaled inf_du: " << scaled_inf_du << ")" << std::endl;
+        reason = "OptimalSolutionFound";
+        return true;
       }
+      return false;
+    }
+
+    const double tol = std::max(options.tolerance,
+                                options.ipddp.barrier_tol_mult * mu_);
+    const double accepted_step_norm =
+        std::abs(context.alpha_pr_) * context.step_norm_;
+    if (context.inf_pr_ < tol && scaled_inf_du < tol && scaled_inf_comp < tol &&
+        accepted_step_norm < options.tolerance * 10.0)
+    {
+      reason = "OptimalSolutionFound";
       return true;
     }
+    return false;
+  }
 
-    if (std::abs(dJ) < options.acceptable_tolerance && iter > 10)
+  bool IPDDPSolver::backwardPass(CDDP &context)
+  {
+    const CDDPOptions &options = context.getOptions();
+    const int state_dim = context.getStateDim();
+    const int control_dim = context.getControlDim();
+    const int horizon = context.getHorizon();
+    const auto &constraint_set = context.getConstraintSet();
+    const int total_dual_dim = getTotalDualDim(context);
+    const bool has_terminal_ineq =
+        !getTerminalInequalityLayout(context).empty();
+    const bool has_terminal_eq = getTerminalEqualityDim(context) > 0;
+    const bool has_path_constraints = !constraint_set.empty();
+
+    struct CondensedPathConstraintModel
     {
-      bool acceptable_infeasibility = (context.inf_pr_ < std::sqrt(options.acceptable_tolerance) &&
-                                       context.inf_comp_ < std::sqrt(options.acceptable_tolerance));
+      Eigen::VectorXd y;
+      Eigen::VectorXd s;
+      Eigen::MatrixXd Q_yx;
+      Eigen::MatrixXd Q_yu;
+      Eigen::MatrixXd YSinv;
+      Eigen::VectorXd primal_residual;
+      Eigen::VectorXd rhat;
+    };
+
+    CDDPSolverBase::precomputeDynamicsDerivatives(context);
+    precomputeConstraintGradients(context);
+    G_ = G_raw_;
+
+    Eigen::VectorXd V_x =
+        context.getObjective().getFinalCostGradient(context.X_.back());
+    Eigen::MatrixXd V_xx =
+        context.getObjective().getFinalCostHessian(context.X_.back());
+    V_xx = symmetrizeMatrix(V_xx);
+
+    dV_ = Eigen::Vector2d::Zero();
+    double inf_du = 0.0;
+    double inf_pr = 0.0;
+    double inf_comp = 0.0;
+    double step_norm = 0.0;
 
-      if (acceptable_infeasibility)
+    if (has_terminal_ineq)
+    {
+      G_T_ = evaluateTerminalInequalityResidualMap(context, context.X_.back());
+      const auto terminal_jacobians =
+          evaluateTerminalInequalityJacobianMap(context, context.X_.back());
+      for (const auto &entry : getTerminalInequalityLayout(context))
       {
-        termination_reason = "AcceptableSolutionFound";
-        if (options.verbose)
+        const Eigen::VectorXd &g_T = G_T_.at(entry.name);
+        const Eigen::MatrixXd &G_T_x = terminal_jacobians.at(entry.name);
+        const Eigen::VectorXd &S_T = S_T_.at(entry.name);
+        const Eigen::VectorXd &Y_T = Y_T_.at(entry.name);
+        Eigen::VectorXd sigma_T(entry.dim);
+        Eigen::VectorXd barrier_grad_T(entry.dim);
+        for (int i = 0; i < entry.dim; ++i)
         {
-          std::cout << "IPDDP: Converged due to small change in cost (dJ: "
-                    << std::scientific << std::setprecision(2) << std::abs(dJ)
-                    << ") with acceptable infeasibility" << std::endl;
+          const double s_safe =
+              std::max(S_T(i), std::max(mu_ * 1e-3, EPS_SLACK));
+          const double y_safe = std::max(Y_T(i), EPS_DUAL);
+          sigma_T(i) = clipPositiveBarrierRatio(y_safe, s_safe);
+          const double barrier_grad_correction =
+              clipSignedBarrierRatio(y_safe * g_T(i) + mu_, s_safe);
+          barrier_grad_T(i) = y_safe + barrier_grad_correction;
         }
-        return true;
+        V_x.noalias() += G_T_x.transpose() * barrier_grad_T;
+        V_xx.noalias() += G_T_x.transpose() * sigma_T.asDiagonal() * G_T_x;
+        V_xx = symmetrizeMatrix(V_xx);
+        inf_pr = std::max(inf_pr, (g_T + S_T).lpNorm<Eigen::Infinity>());
+        inf_comp = std::max(
+            inf_comp,
+            (Y_T.cwiseProduct(S_T).array() - mu_).matrix().lpNorm<Eigen::Infinity>());
       }
     }
 
-    if (iter >= 1 &&
-        context.step_norm_ < options.tolerance * 10.0 &&
-        context.inf_pr_ < 1e-4)
+    Eigen::VectorXd h_T = Eigen::VectorXd::Zero(getTerminalEqualityDim(context));
+    Eigen::MatrixXd H_T =
+        Eigen::MatrixXd::Zero(getTerminalEqualityDim(context), state_dim);
+    if (has_terminal_eq)
     {
-      termination_reason = "AcceptableSolutionFound";
-      if (options.verbose)
-      {
-        std::cout << "IPDDP: Converged based on small step norm and feasibility"
-                  << std::endl;
-      }
-      return true;
+      h_T = evaluateTerminalEqualityResidual(context, context.X_.back());
+      H_T = evaluateTerminalEqualityJacobian(context, context.X_.back());
+      inf_pr = std::max(inf_pr, h_T.lpNorm<Eigen::Infinity>());
+      dLambda_T_eq_ = -h_T;
+    }
+    else
+    {
+      dLambda_T_eq_ = Eigen::VectorXd::Zero(0);
     }
 
-    return false;
-  }
+    if (!has_path_constraints && !has_terminal_ineq && !has_terminal_eq)
+    {
+      k_lambda_.back() = V_x;
+      K_lambda_.back() = V_xx;
+      for (int t = horizon - 1; t >= 0; --t)
+      {
+        const Eigen::VectorXd &x = context.X_[t];
+        const Eigen::VectorXd &u = context.U_[t];
 
-  void IPDDPSolver::postIterationUpdate(CDDP &context, bool forward_pass_success)
-  {
-    updateBarrierParameters(context, forward_pass_success);
-  }
+        const Eigen::MatrixXd &A = F_x_[t];
+        const Eigen::MatrixXd &B = F_u_[t];
 
-  void IPDDPSolver::recordIterationHistory(const CDDP &context)
-  {
-    CDDPSolverBase::recordIterationHistory(context);
-    history_.barrier_mu.push_back(mu_);
-  }
+        auto [l_x, l_u] = context.getObjective().getRunningCostGradients(x, u, t);
+        auto [l_xx, l_uu, l_ux] =
+            context.getObjective().getRunningCostHessians(x, u, t);
 
-  void IPDDPSolver::populateSolverSpecificSolution(CDDPSolution &solution,
-                                                    const CDDP &context)
-  {
-    solution.final_barrier_mu = mu_;
-    solution.final_primal_infeasibility = context.inf_pr_;
-    solution.final_dual_infeasibility = context.inf_du_;
-    solution.final_complementary_infeasibility = context.inf_comp_;
-  }
+        Eigen::VectorXd Q_x = l_x + A.transpose() * V_x;
+        Eigen::VectorXd Q_u = l_u + B.transpose() * V_x;
+        Eigen::MatrixXd Q_xx = l_xx + A.transpose() * V_xx * A;
+        Eigen::MatrixXd Q_ux = l_ux + B.transpose() * V_xx * A;
+        Eigen::MatrixXd Q_uu = l_uu + B.transpose() * V_xx * B;
 
-  void IPDDPSolver::printIteration(int iter, const CDDP &context) const
-  {
-    printIterationLegacy(iter, context.cost_, context.inf_pr_, context.inf_du_,
-                         context.inf_comp_, mu_, context.step_norm_,
-                         context.regularization_, context.alpha_du_,
-                         context.alpha_pr_);
-  }
+        if (!options.use_ilqr)
+        {
+          const auto &Fxx = F_xx_[t];
+          const auto &Fuu = F_uu_[t];
+          const auto &Fux = F_ux_[t];
 
-  // === Private methods (unchanged from original) ===
+          for (int i = 0; i < state_dim; ++i)
+          {
+            Q_xx += V_x(i) * Fxx[i];
+            Q_ux += V_x(i) * Fux[i];
+            Q_uu += V_x(i) * Fuu[i];
+          }
+        }
 
-  void IPDDPSolver::evaluateTrajectory(CDDP &context)
-  {
-    const int horizon = context.getHorizon();
-    double cost = 0.0;
+        Q_uu = symmetrizeMatrix(Q_uu);
+        Q_uu.diagonal().array() += context.regularization_;
 
-    context.X_[0] = context.getInitialState();
+        Eigen::LDLT<Eigen::MatrixXd> ldlt(Q_uu);
+        if (ldlt.info() != Eigen::Success)
+        {
+          return false;
+        }
 
-    for (int t = 0; t < horizon; ++t)
-    {
-      const Eigen::VectorXd &x = context.X_[t];
-      const Eigen::VectorXd &u = context.U_[t];
+        Eigen::VectorXd k_u = -ldlt.solve(Q_u);
+        Eigen::MatrixXd K_u = -ldlt.solve(Q_ux);
+        k_u_[t] = k_u;
+        K_u_[t] = K_u;
 
-      cost += context.getObjective().running_cost(x, u, t);
+        V_x = Q_x + K_u.transpose() * Q_u + Q_ux.transpose() * k_u +
+              K_u.transpose() * Q_uu * k_u;
+        V_xx = Q_xx + K_u.transpose() * Q_ux + Q_ux.transpose() * K_u +
+               K_u.transpose() * Q_uu * K_u;
+        V_xx = symmetrizeMatrix(V_xx);
+        k_lambda_[t] = V_x;
+        K_lambda_[t] = V_xx;
 
-      const auto &constraint_set = context.getConstraintSet();
-      for (const auto &constraint_pair : constraint_set)
-      {
-        const std::string &constraint_name = constraint_pair.first;
-        Eigen::VectorXd g_val = constraint_pair.second->evaluate(x, u) -
-                                constraint_pair.second->getUpperBound();
-        G_[constraint_name][t] = g_val;
+        dV_[0] += k_u.dot(Q_u);
+        dV_[1] += 0.5 * k_u.dot(Q_uu * k_u);
+
+        inf_du = std::max(inf_du, Q_u.lpNorm<Eigen::Infinity>());
+        step_norm = std::max(step_norm, k_u.lpNorm<Eigen::Infinity>());
       }
 
-      context.X_[t + 1] = context.getSystem().getDiscreteDynamics(
-          x, u, t * context.getTimestep());
+      context.inf_du_ = inf_du;
+      context.step_norm_ = step_norm;
+      context.inf_pr_ = 0.0;
+      context.inf_comp_ = 0.0;
+      return true;
     }
 
-    cost += context.getObjective().terminal_cost(context.X_.back());
-    context.cost_ = cost;
-  }
-
-  void IPDDPSolver::evaluateTrajectoryWarmStart(CDDP &context)
-  {
-    const int horizon = context.getHorizon();
-    double cost = 0.0;
-
-    const auto &constraint_set = context.getConstraintSet();
-    for (const auto &constraint_pair : constraint_set)
+    if (has_terminal_eq)
     {
-      const std::string &constraint_name = constraint_pair.first;
-      G_[constraint_name].resize(horizon);
-    }
+      std::vector<Eigen::MatrixXd> Q(horizon + 1,
+                                     Eigen::MatrixXd::Zero(state_dim, state_dim));
+      std::vector<Eigen::VectorXd> q(horizon + 1,
+                                     Eigen::VectorXd::Zero(state_dim));
+      std::vector<Eigen::MatrixXd> R(horizon,
+                                     Eigen::MatrixXd::Zero(control_dim, control_dim));
+      std::vector<Eigen::VectorXd> r(horizon,
+                                     Eigen::VectorXd::Zero(control_dim));
+      std::vector<Eigen::MatrixXd> M(horizon,
+                                     Eigen::MatrixXd::Zero(state_dim, control_dim));
+      std::vector<Eigen::MatrixXd> A_vec(horizon,
+                                         Eigen::MatrixXd::Zero(state_dim, state_dim));
+      std::vector<Eigen::MatrixXd> B_vec(horizon,
+                                         Eigen::MatrixXd::Zero(state_dim, control_dim));
+      std::vector<Eigen::VectorXd> d_vec(horizon,
+                                         Eigen::VectorXd::Zero(state_dim));
+      std::vector<CondensedPathConstraintModel> path_models(horizon);
+
+      Q.back() = V_xx;
+      q.back() = V_x;
 
-    for (int t = 0; t < horizon; ++t)
-    {
-      const Eigen::VectorXd &x = context.X_[t];
-      const Eigen::VectorXd &u = context.U_[t];
+      for (int t = 0; t < horizon; ++t)
+      {
+        const Eigen::VectorXd &x = context.X_[t];
+        const Eigen::VectorXd &u = context.U_[t];
+        auto [l_x, l_u] =
+            context.getObjective().getRunningCostGradients(x, u, t);
+        auto [l_xx, l_uu, l_ux] =
+            context.getObjective().getRunningCostHessians(x, u, t);
 
-      cost += context.getObjective().running_cost(x, u, t);
+        Q[t] = symmetrizeMatrix(l_xx);
+        q[t] = l_x;
+        R[t] = symmetrizeMatrix(l_uu);
+        r[t] = l_u;
+        M[t] = l_ux.transpose();
+        A_vec[t] = F_x_[t];
+        B_vec[t] = F_u_[t];
 
-      for (const auto &constraint_pair : constraint_set)
-      {
-        const std::string &constraint_name = constraint_pair.first;
-        Eigen::VectorXd g_val = constraint_pair.second->evaluate(x, u) -
-                                constraint_pair.second->getUpperBound();
-        G_[constraint_name][t] = g_val;
-      }
-    }
+        if (!options.use_ilqr)
+        {
+          const Eigen::VectorXd lambda_next =
+              (Lambda_.size() == static_cast<size_t>(horizon + 1) &&
+               Lambda_[t + 1].size() == state_dim && Lambda_[t + 1].allFinite())
+                  ? Lambda_[t + 1]
+                  : Eigen::VectorXd::Zero(state_dim);
+
+          // Use the current costate iterate as the local value-gradient proxy
+          // when condensing a terminal-equality solve into a single reduced LQR.
+          for (int i = 0; i < state_dim; ++i)
+          {
+            Q[t] += lambda_next(i) * F_xx_[t][i];
+            M[t] += lambda_next(i) * F_ux_[t][i].transpose();
+            R[t] += lambda_next(i) * F_uu_[t][i];
+          }
+          Q[t] = symmetrizeMatrix(Q[t]);
+          R[t] = symmetrizeMatrix(R[t]);
+        }
 
-    cost += context.getObjective().terminal_cost(context.X_.back());
-    context.cost_ = cost;
-  }
+        if (has_path_constraints)
+        {
+          Eigen::VectorXd y = Eigen::VectorXd::Zero(total_dual_dim);
+          Eigen::VectorXd s = Eigen::VectorXd::Zero(total_dual_dim);
+          Eigen::VectorXd g = Eigen::VectorXd::Zero(total_dual_dim);
+          Eigen::MatrixXd Q_yx = Eigen::MatrixXd::Zero(total_dual_dim, state_dim);
+          Eigen::MatrixXd Q_yu = Eigen::MatrixXd::Zero(total_dual_dim, control_dim);
+
+          int offset = 0;
+          for (const auto &constraint_pair : constraint_set)
+          {
+            const std::string &constraint_name = constraint_pair.first;
+            const int dual_dim = constraint_pair.second->getDualDim();
+            y.segment(offset, dual_dim) = Y_[constraint_name][t];
+            s.segment(offset, dual_dim) = S_[constraint_name][t];
+            g.segment(offset, dual_dim) = G_[constraint_name][t];
+            Q_yx.block(offset, 0, dual_dim, state_dim) = G_x_[constraint_name][t];
+            Q_yu.block(offset, 0, dual_dim, control_dim) = G_u_[constraint_name][t];
+            offset += dual_dim;
+          }
 
-  void IPDDPSolver::initializeDualSlackVariablesWarmStart(CDDP &context)
-  {
-    const CDDPOptions &options = context.getOptions();
-    const int horizon = context.getHorizon();
-    const auto &constraint_set = context.getConstraintSet();
+          Eigen::MatrixXd YSinv =
+              Eigen::MatrixXd::Zero(total_dual_dim, total_dual_dim);
+          for (int i = 0; i < total_dual_dim; ++i)
+          {
+            const double s_safe =
+                std::max(s(i), std::max(mu_ * 1e-3, EPS_SLACK));
+            YSinv(i, i) = clipPositiveBarrierRatio(y(i), s_safe);
+          }
 
-    bool has_existing_dual_slack = true;
-    for (const auto &constraint_pair : constraint_set)
-    {
-      const std::string &constraint_name = constraint_pair.first;
-      if (Y_.find(constraint_name) == Y_.end() ||
-          S_.find(constraint_name) == S_.end() ||
-          Y_[constraint_name].size() != static_cast<size_t>(horizon) ||
-          S_[constraint_name].size() != static_cast<size_t>(horizon))
-      {
-        has_existing_dual_slack = false;
-        break;
-      }
-    }
+          const Eigen::VectorXd primal_residual = g + s;
+          const Eigen::VectorXd complementary_residual =
+              y.cwiseProduct(s).array() - mu_;
+          const Eigen::VectorXd rhat =
+              y.cwiseProduct(primal_residual) - complementary_residual;
+          Eigen::VectorXd S_inv_rhat(total_dual_dim);
+          for (int i = 0; i < total_dual_dim; ++i)
+          {
+            const double s_safe =
+                std::max(s(i), std::max(mu_ * 1e-3, EPS_SLACK));
+            S_inv_rhat(i) = clipSignedBarrierRatio(rhat(i), s_safe);
+          }
 
-    k_y_.clear();
-    K_y_.clear();
-    k_s_.clear();
-    K_s_.clear();
+          q[t].noalias() += Q_yx.transpose() * (y + S_inv_rhat);
+          r[t].noalias() += Q_yu.transpose() * (y + S_inv_rhat);
+          Q[t].noalias() += Q_yx.transpose() * YSinv * Q_yx;
+          M[t].noalias() += (Q_yu.transpose() * YSinv * Q_yx).transpose();
+          R[t].noalias() += Q_yu.transpose() * YSinv * Q_yu;
+          Q[t] = symmetrizeMatrix(Q[t]);
+          R[t] = symmetrizeMatrix(R[t]);
+
+          path_models[t].y = y;
+          path_models[t].s = s;
+          path_models[t].Q_yx = Q_yx;
+          path_models[t].Q_yu = Q_yu;
+          path_models[t].YSinv = YSinv;
+          path_models[t].primal_residual = primal_residual;
+          path_models[t].rhat = rhat;
 
-    for (const auto &constraint_pair : constraint_set)
-    {
-      const std::string &constraint_name = constraint_pair.first;
-      int dual_dim = constraint_pair.second->getDualDim();
+          inf_pr = std::max(inf_pr, primal_residual.lpNorm<Eigen::Infinity>());
+          inf_comp = std::max(
+              inf_comp, complementary_residual.lpNorm<Eigen::Infinity>());
+        }
 
-      if (!has_existing_dual_slack)
-      {
-        Y_[constraint_name].resize(horizon);
-        S_[constraint_name].resize(horizon);
+        R[t].diagonal().array() += context.regularization_;
       }
 
-      k_y_[constraint_name].resize(horizon);
-      K_y_[constraint_name].resize(horizon);
-      k_s_[constraint_name].resize(horizon);
-      K_s_[constraint_name].resize(horizon);
+      Eigen::VectorXd lambda_total;
+      Eigen::VectorXd lambda_delta;
+      if (!solveTerminalEqualityLQR(
+              Q, q, R, r, M, A_vec, B_vec, d_vec,
+              Eigen::VectorXd::Zero(state_dim), H_T, -h_T, mu_,
+              options.ipddp.jacobian_regularization_value,
+              options.ipddp.jacobian_regularization_exponent, &Lambda_T_eq_,
+              K_u_, k_u_, K_lambda_, k_lambda_, lambda_total, lambda_delta))
+      {
+        return false;
+      }
 
+      dLambda_T_eq_ = lambda_delta;
       for (int t = 0; t < horizon; ++t)
       {
-        const Eigen::VectorXd &g_val = G_[constraint_name][t];
+        const Eigen::VectorXd Q_u =
+            r[t] + B_vec[t].transpose() * k_lambda_[t + 1];
+        inf_du = std::max(inf_du, Q_u.lpNorm<Eigen::Infinity>());
+        step_norm = std::max(step_norm, k_u_[t].lpNorm<Eigen::Infinity>());
+      }
 
-        bool need_reinit = false;
-        Eigen::VectorXd y_current, s_current;
+      rolloutLinearPolicy(A_vec, B_vec, d_vec, K_u_, k_u_, dX_, dU_);
 
-        if (has_existing_dual_slack)
+      if (has_path_constraints)
+      {
+        for (int t = 0; t < horizon; ++t)
         {
-          y_current = Y_[constraint_name][t];
-          s_current = S_[constraint_name][t];
-
-          if (y_current.size() != dual_dim || s_current.size() != dual_dim)
+          const auto &model = path_models[t];
+          const Eigen::VectorXd temp = model.Q_yu * k_u_[t];
+          Eigen::VectorXd k_y(total_dual_dim);
+          for (int i = 0; i < total_dual_dim; ++i)
           {
-            need_reinit = true;
+            const double s_safe =
+                std::max(model.s(i), std::max(mu_ * 1e-3, EPS_SLACK));
+            k_y(i) =
+                clipSignedBarrierRatio(model.rhat(i) + model.y(i) * temp(i),
+                                       s_safe);
           }
-          else
+          Eigen::MatrixXd K_y =
+              (model.YSinv * (model.Q_yx + model.Q_yu * K_u_[t]))
+                  .cwiseMax(-MAX_BARRIER_RATIO)
+                  .cwiseMin(MAX_BARRIER_RATIO);
+          const Eigen::VectorXd k_s = -model.primal_residual - temp;
+          const Eigen::MatrixXd K_s = -model.Q_yx - model.Q_yu * K_u_[t];
+
+          int offset = 0;
+          for (const auto &constraint_pair : constraint_set)
           {
-            for (int i = 0; i < dual_dim; ++i)
-            {
-              if (y_current(i) <= 1e-12 || s_current(i) <= 1e-12)
-              {
-                need_reinit = true;
-                break;
-              }
-
-              double required_slack =
-                  std::max(options.ipddp.slack_var_init_scale, -g_val(i));
-              if (s_current(i) < 0.1 * required_slack)
-              {
-                need_reinit = true;
-                break;
-              }
-            }
+            const std::string &constraint_name = constraint_pair.first;
+            const int dual_dim = constraint_pair.second->getDualDim();
+            k_y_[constraint_name][t] = k_y.segment(offset, dual_dim);
+            K_y_[constraint_name][t] =
+                K_y.block(offset, 0, dual_dim, state_dim);
+            k_s_[constraint_name][t] = k_s.segment(offset, dual_dim);
+            K_s_[constraint_name][t] =
+                K_s.block(offset, 0, dual_dim, state_dim);
+            dS_[constraint_name][t] =
+                k_s_[constraint_name][t] + K_s_[constraint_name][t] * dX_[t];
+            dY_[constraint_name][t] =
+                (k_y_[constraint_name][t] +
+                 K_y_[constraint_name][t] * dX_[t])
+                    .cwiseMax(-MAX_BARRIER_RATIO)
+                    .cwiseMin(MAX_BARRIER_RATIO);
+            offset += dual_dim;
           }
         }
-        else
-        {
-          need_reinit = true;
-        }
+      }
 
-        if (need_reinit)
+      if (has_terminal_ineq)
+      {
+        auto G_T_x =
+            evaluateTerminalInequalityJacobianMap(context, context.X_.back());
+        auto G_T_eval =
+            evaluateTerminalInequalityResidualMap(context, context.X_.back());
+        for (const auto &entry : getTerminalInequalityLayout(context))
         {
-          Eigen::VectorXd s_init = Eigen::VectorXd::Zero(dual_dim);
-          Eigen::VectorXd y_init = Eigen::VectorXd::Zero(dual_dim);
-
-          for (int i = 0; i < dual_dim; ++i)
+          const Eigen::VectorXd &g_T = G_T_eval.at(entry.name);
+          const Eigen::MatrixXd &Gtx = G_T_x.at(entry.name);
+          const Eigen::VectorXd &S_T = S_T_.at(entry.name);
+          const Eigen::VectorXd &Y_T = Y_T_.at(entry.name);
+          const Eigen::VectorXd r_p_T = g_T + S_T;
+          const Eigen::VectorXd r_d_T = S_T.cwiseProduct(Y_T).array() - mu_;
+          dS_T_[entry.name] = -r_p_T - Gtx * dX_.back();
+          Eigen::VectorXd dY_T = Eigen::VectorXd::Zero(entry.dim);
+          for (int i = 0; i < entry.dim; ++i)
           {
-            s_init(i) = std::max(options.ipddp.slack_var_init_scale, -g_val(i));
-
-            if (s_init(i) < 1e-12)
-            {
-              y_init(i) = mu_ / 1e-12;
-            }
-            else
-            {
-              y_init(i) = mu_ / s_init(i);
-            }
-            y_init(i) = std::max(
-                options.ipddp.dual_var_init_scale * 0.01,
-                std::min(y_init(i), options.ipddp.dual_var_init_scale * 100.0));
+            const double s_safe =
+                std::max(S_T(i), std::max(mu_ * 1e-3, EPS_SLACK));
+            const double dual_ratio =
+                std::clamp(Y_T(i) / s_safe, 0.0, MAX_BARRIER_RATIO);
+            const double affine =
+                std::clamp(-r_d_T(i) / s_safe, -MAX_BARRIER_RATIO,
+                           MAX_BARRIER_RATIO);
+            dY_T(i) = std::clamp(
+                affine - dual_ratio * dS_T_[entry.name](i),
+                -MAX_BARRIER_RATIO, MAX_BARRIER_RATIO);
           }
-          Y_[constraint_name][t] = y_init;
-          S_[constraint_name][t] = s_init;
+          dY_T_[entry.name] = dY_T;
         }
+      }
 
-        k_y_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
-        K_y_[constraint_name][t] =
-            Eigen::MatrixXd::Zero(dual_dim, context.getStateDim());
-        k_s_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
-        K_s_[constraint_name][t] =
-            Eigen::MatrixXd::Zero(dual_dim, context.getStateDim());
-      }
+      context.inf_pr_ = inf_pr;
+      context.inf_du_ = inf_du;
+      context.inf_comp_ = inf_comp;
+      context.step_norm_ = step_norm;
+      return true;
     }
 
-    if (options.verbose)
+    k_lambda_.back() = V_x;
+    K_lambda_.back() = V_xx;
+    for (int t = horizon - 1; t >= 0; --t)
     {
-      std::cout << "IPDDP: " << (has_existing_dual_slack ? "Preserved" : "Initialized")
-                << " dual/slack variables, μ = " << std::scientific << std::setprecision(2)
-                << mu_ << ", max violation = " << computeMaxConstraintViolation(context) << std::endl;
-    }
-  }
-
-  void IPDDPSolver::initializeDualSlackVariables(CDDP &context)
-  {
-    const CDDPOptions &options = context.getOptions();
-    const int horizon = context.getHorizon();
-    const auto &constraint_set = context.getConstraintSet();
+      const Eigen::VectorXd &x = context.X_[t];
+      const Eigen::VectorXd &u = context.U_[t];
 
-    for (const auto &constraint_pair : constraint_set)
-    {
-      const std::string &constraint_name = constraint_pair.first;
-      int dual_dim = constraint_pair.second->getDualDim();
+      const Eigen::MatrixXd &A = F_x_[t];
+      const Eigen::MatrixXd &B = F_u_[t];
 
-      G_[constraint_name].resize(horizon);
-      Y_[constraint_name].resize(horizon);
-      S_[constraint_name].resize(horizon);
-      k_y_[constraint_name].resize(horizon);
-      K_y_[constraint_name].resize(horizon);
-      k_s_[constraint_name].resize(horizon);
-      K_s_[constraint_name].resize(horizon);
+      Eigen::VectorXd y = Eigen::VectorXd::Zero(total_dual_dim);
+      Eigen::VectorXd s = Eigen::VectorXd::Zero(total_dual_dim);
+      Eigen::VectorXd g = Eigen::VectorXd::Zero(total_dual_dim);
+      Eigen::MatrixXd Q_yx = Eigen::MatrixXd::Zero(total_dual_dim, state_dim);
+      Eigen::MatrixXd Q_yu = Eigen::MatrixXd::Zero(total_dual_dim, control_dim);
 
-      for (int t = 0; t < horizon; ++t)
+      int offset = 0;
+      for (const auto &constraint_pair : constraint_set)
       {
-        Eigen::VectorXd g_val =
-            constraint_pair.second->evaluate(context.X_[t], context.U_[t]) -
-            constraint_pair.second->getUpperBound();
-        G_[constraint_name][t] = g_val;
+        const std::string &constraint_name = constraint_pair.first;
+        int dual_dim = constraint_pair.second->getDualDim();
 
-        Eigen::VectorXd s_init = Eigen::VectorXd::Zero(dual_dim);
-        Eigen::VectorXd y_init = Eigen::VectorXd::Zero(dual_dim);
+        y.segment(offset, dual_dim) = Y_[constraint_name][t];
+        s.segment(offset, dual_dim) = S_[constraint_name][t];
+        g.segment(offset, dual_dim) = G_[constraint_name][t];
+        Q_yx.block(offset, 0, dual_dim, state_dim) = G_x_[constraint_name][t];
+        Q_yu.block(offset, 0, dual_dim, control_dim) = G_u_[constraint_name][t];
 
-        for (int i = 0; i < dual_dim; ++i)
-        {
-          s_init(i) = std::max(options.ipddp.slack_var_init_scale, -g_val(i));
+        offset += dual_dim;
+      }
 
-          if (s_init(i) < 1e-12)
-          {
-            y_init(i) = mu_ / 1e-12;
-          }
-          else
-          {
-            y_init(i) = mu_ / s_init(i);
-          }
-          y_init(i) = std::max(
-              options.ipddp.dual_var_init_scale * 0.01,
-              std::min(y_init(i), options.ipddp.dual_var_init_scale * 100.0));
-        }
-        Y_[constraint_name][t] = y_init;
-        S_[constraint_name][t] = s_init;
+      auto [l_x, l_u] = context.getObjective().getRunningCostGradients(x, u, t);
+      auto [l_xx, l_uu, l_ux] =
+          context.getObjective().getRunningCostHessians(x, u, t);
 
-        k_y_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
-        K_y_[constraint_name][t] =
-            Eigen::MatrixXd::Zero(dual_dim, context.getStateDim());
-        k_s_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
-        K_s_[constraint_name][t] =
-            Eigen::MatrixXd::Zero(dual_dim, context.getStateDim());
+      Eigen::VectorXd Q_x = l_x + Q_yx.transpose() * y + A.transpose() * V_x;
+      Eigen::VectorXd Q_u = l_u + Q_yu.transpose() * y + B.transpose() * V_x;
+      Eigen::MatrixXd Q_xx = l_xx + A.transpose() * V_xx * A;
+      Eigen::MatrixXd Q_ux = l_ux + B.transpose() * V_xx * A;
+      Eigen::MatrixXd Q_uu = l_uu + B.transpose() * V_xx * B;
+
+      if (!options.use_ilqr)
+      {
+        const auto &Fxx = F_xx_[t];
+        const auto &Fuu = F_uu_[t];
+        const auto &Fux = F_ux_[t];
+
+        for (int i = 0; i < state_dim; ++i)
+        {
+          Q_xx += V_x(i) * Fxx[i];
+          Q_ux += V_x(i) * Fux[i];
+          Q_uu += V_x(i) * Fuu[i];
+        }
       }
-    }
 
-    context.cost_ = context.getObjective().evaluate(context.X_, context.U_);
-  }
+      Eigen::MatrixXd YSinv = Eigen::MatrixXd::Zero(total_dual_dim, total_dual_dim);
+      for (int i = 0; i < total_dual_dim; ++i)
+      {
+        const double s_safe =
+            std::max(s(i), std::max(mu_ * 1e-3, EPS_SLACK));
+        YSinv(i, i) = clipPositiveBarrierRatio(y(i), s_safe);
+      }
 
-  void IPDDPSolver::resetBarrierFilter(CDDP &context)
-  {
-    double merit_function = context.cost_;
-    double inf_pr = 0.0;
-    double filter_constraint_violation = 0.0;
-    double inf_du = 0.0;
-    double inf_comp = 0.0;
+      const Eigen::VectorXd primal_residual = g + s;
+      const Eigen::VectorXd complementary_residual =
+          y.cwiseProduct(s).array() - mu_;
+      const Eigen::VectorXd rhat =
+          y.cwiseProduct(primal_residual) - complementary_residual;
 
-    const auto &constraint_set = context.getConstraintSet();
+      Eigen::MatrixXd Q_uu_reg = symmetrizeMatrix(Q_uu);
+      Q_uu_reg.noalias() += Q_yu.transpose() * YSinv * Q_yu;
+      Q_uu_reg.diagonal().array() += context.regularization_;
 
-    if (!constraint_set.empty())
-    {
-      for (int t = 0; t < context.getHorizon(); ++t)
+      Eigen::LDLT<Eigen::MatrixXd> ldlt(Q_uu_reg);
+      if (ldlt.info() != Eigen::Success)
       {
-        for (const auto &constraint_pair : constraint_set)
-        {
-          const std::string &constraint_name = constraint_pair.first;
-          const Eigen::VectorXd &s_vec = S_[constraint_name][t];
-          const Eigen::VectorXd &g_vec = G_[constraint_name][t];
-          const Eigen::VectorXd &y_vec = Y_[constraint_name][t];
+        return false;
+      }
 
-          merit_function -= mu_ * s_vec.array().log().sum();
+      Eigen::VectorXd S_inv_rhat(total_dual_dim);
+      for (int i = 0; i < total_dual_dim; ++i)
+      {
+        const double s_safe =
+            std::max(s(i), std::max(mu_ * 1e-3, EPS_SLACK));
+        S_inv_rhat(i) = clipSignedBarrierRatio(rhat(i), s_safe);
+      }
+      Eigen::MatrixXd bigRHS(control_dim, 1 + state_dim);
+      bigRHS.col(0).noalias() = Q_u + Q_yu.transpose() * S_inv_rhat;
+      bigRHS.rightCols(state_dim).noalias() =
+          Q_ux + Q_yu.transpose() * YSinv * Q_yx;
 
-          Eigen::VectorXd primal_residual = g_vec + s_vec;
-          inf_pr = std::max(inf_pr, primal_residual.lpNorm<Eigen::Infinity>());
-          filter_constraint_violation += primal_residual.lpNorm<1>();
+      Eigen::MatrixXd kK = -ldlt.solve(bigRHS);
 
-          Eigen::VectorXd complementary_residual = y_vec.cwiseProduct(s_vec).array() - mu_;
-          inf_comp = std::max(inf_comp, complementary_residual.lpNorm<Eigen::Infinity>());
-        }
+      Eigen::VectorXd k_u = kK.col(0);
+      Eigen::MatrixXd K_u(control_dim, state_dim);
+      for (int col = 0; col < state_dim; ++col)
+      {
+        K_u.col(col) = kK.col(col + 1);
       }
-    }
-    else
-    {
-      inf_pr = 0.0;
-      filter_constraint_violation = 0.0;
-      inf_du = 0.0;
-      inf_comp = 0.0;
-    }
-
-    context.merit_function_ = merit_function;
-    context.inf_pr_ = inf_pr;
-    context.inf_comp_ = inf_comp;
 
-    filter_.clear();
-    filter_.push_back(FilterPoint(merit_function, filter_constraint_violation));
-  }
+      k_u_[t] = k_u;
+      K_u_[t] = K_u;
 
-  void IPDDPSolver::resetFilter(CDDP &context) { resetBarrierFilter(context); }
+      Eigen::VectorXd k_y(total_dual_dim);
+      const Eigen::VectorXd temp = Q_yu * k_u;
+      for (int i = 0; i < total_dual_dim; ++i)
+      {
+        const double s_safe =
+            std::max(s(i), std::max(mu_ * 1e-3, EPS_SLACK));
+        k_y(i) =
+            clipSignedBarrierRatio(rhat(i) + y(i) * temp(i), s_safe);
+      }
+      Eigen::MatrixXd K_y =
+          (YSinv * (Q_yx + Q_yu * K_u))
+              .cwiseMax(-MAX_BARRIER_RATIO)
+              .cwiseMin(MAX_BARRIER_RATIO);
+      const Eigen::VectorXd k_s = -primal_residual - temp;
+      const Eigen::MatrixXd K_s = -Q_yx - Q_yu * K_u;
+
+      offset = 0;
+      for (const auto &constraint_pair : constraint_set)
+      {
+        const std::string &constraint_name = constraint_pair.first;
+        int dual_dim = constraint_pair.second->getDualDim();
 
-  void IPDDPSolver::precomputeDynamicsDerivatives(CDDP &context)
-  {
-    const CDDPOptions &options = context.getOptions();
-    const int horizon = context.getHorizon();
-    const int state_dim = context.getStateDim();
-    const double timestep = context.getTimestep();
+        k_y_[constraint_name][t] = k_y.segment(offset, dual_dim);
+        K_y_[constraint_name][t] = K_y.block(offset, 0, dual_dim, state_dim);
+        k_s_[constraint_name][t] = k_s.segment(offset, dual_dim);
+        K_s_[constraint_name][t] = K_s.block(offset, 0, dual_dim, state_dim);
 
-    F_x_.resize(horizon);
-    F_u_.resize(horizon);
-    F_xx_.resize(horizon);
-    F_uu_.resize(horizon);
-    F_ux_.resize(horizon);
+        offset += dual_dim;
+      }
 
-    const int MIN_HORIZON_FOR_PARALLEL = 50;
-    const bool use_parallel = options.enable_parallel && horizon >= MIN_HORIZON_FOR_PARALLEL;
+      Q_u.noalias() += Q_yu.transpose() * S_inv_rhat;
+      Q_x.noalias() += Q_yx.transpose() * S_inv_rhat;
+      Q_xx.noalias() += Q_yx.transpose() * YSinv * Q_yx;
+      Q_ux.noalias() += Q_yu.transpose() * YSinv * Q_yx;
+      Q_uu.noalias() += Q_yu.transpose() * YSinv * Q_yu;
+
+      dV_[0] += k_u.dot(Q_u);
+      dV_[1] += 0.5 * k_u.dot(Q_uu * k_u);
+
+      V_x = Q_x + K_u.transpose() * Q_u + Q_ux.transpose() * k_u +
+            K_u.transpose() * Q_uu * k_u;
+      V_xx = Q_xx + K_u.transpose() * Q_ux + Q_ux.transpose() * K_u +
+             K_u.transpose() * Q_uu * K_u;
+      V_xx = symmetrizeMatrix(V_xx);
+      k_lambda_[t] = V_x;
+      K_lambda_[t] = V_xx;
+
+      inf_du = std::max(inf_du, Q_u.lpNorm<Eigen::Infinity>());
+      inf_pr = std::max(inf_pr, primal_residual.lpNorm<Eigen::Infinity>());
+      inf_comp = std::max(inf_comp, complementary_residual.lpNorm<Eigen::Infinity>());
+      step_norm = std::max(step_norm, k_u.lpNorm<Eigen::Infinity>());
+    }
 
-    if (!use_parallel)
     {
+      std::vector<Eigen::MatrixXd> A_vec(horizon);
+      std::vector<Eigen::MatrixXd> B_vec(horizon);
+      std::vector<Eigen::VectorXd> d_vec(horizon, Eigen::VectorXd::Zero(state_dim));
       for (int t = 0; t < horizon; ++t)
       {
-        const Eigen::VectorXd &x = context.X_[t];
-        const Eigen::VectorXd &u = context.U_[t];
-
-        const auto [Fx, Fu] =
-            context.getSystem().getJacobians(x, u, t * timestep);
-        F_x_[t] = Fx;
-        F_u_[t] = Fu;
+        A_vec[t] = F_x_[t];
+        B_vec[t] = F_u_[t];
+      }
+      rolloutLinearPolicy(A_vec, B_vec, d_vec, K_u_, k_u_, dX_, dU_);
 
-        if (!options.use_ilqr)
+      for (const auto &constraint_pair : constraint_set)
+      {
+        const std::string &name = constraint_pair.first;
+        for (int t = 0; t < horizon; ++t)
         {
-          const auto hessians =
-              context.getSystem().getHessians(x, u, t * timestep);
-          F_xx_[t] = std::get<0>(hessians);
-          F_uu_[t] = std::get<1>(hessians);
-          F_ux_[t] = std::get<2>(hessians);
+          dS_[name][t] = k_s_[name][t] + K_s_[name][t] * dX_[t];
+          dY_[name][t] = (k_y_[name][t] + K_y_[name][t] * dX_[t])
+                             .cwiseMax(-MAX_BARRIER_RATIO)
+                             .cwiseMin(MAX_BARRIER_RATIO);
         }
-        else
+      }
+
+      if (has_terminal_ineq)
+      {
+        auto G_T_x = evaluateTerminalInequalityJacobianMap(context, context.X_.back());
+        auto G_T_eval = evaluateTerminalInequalityResidualMap(context, context.X_.back());
+        for (const auto &entry : getTerminalInequalityLayout(context))
         {
-          F_xx_[t] = std::vector<Eigen::MatrixXd>();
-          F_uu_[t] = std::vector<Eigen::MatrixXd>();
-          F_ux_[t] = std::vector<Eigen::MatrixXd>();
+          const Eigen::VectorXd &g_T = G_T_eval.at(entry.name);
+          const Eigen::MatrixXd &Gtx = G_T_x.at(entry.name);
+          const Eigen::VectorXd &S_T = S_T_.at(entry.name);
+          const Eigen::VectorXd &Y_T = Y_T_.at(entry.name);
+          const Eigen::VectorXd r_p_T = g_T + S_T;
+          const Eigen::VectorXd r_d_T = S_T.cwiseProduct(Y_T).array() - mu_;
+          dS_T_[entry.name] = -r_p_T - Gtx * dX_.back();
+          Eigen::VectorXd dY_T = Eigen::VectorXd::Zero(entry.dim);
+          for (int i = 0; i < entry.dim; ++i)
+          {
+            const double s_safe = std::max(S_T(i), std::max(mu_ * 1e-3, EPS_SLACK));
+            const double dual_ratio =
+                std::clamp(Y_T(i) / s_safe, 0.0, MAX_BARRIER_RATIO);
+            const double affine =
+                std::clamp(-r_d_T(i) / s_safe, -MAX_BARRIER_RATIO, MAX_BARRIER_RATIO);
+            dY_T(i) = std::clamp(
+                affine - dual_ratio * dS_T_[entry.name](i),
+                -MAX_BARRIER_RATIO, MAX_BARRIER_RATIO);
+          }
+          dY_T_[entry.name] = dY_T;
         }
       }
     }
-    else
-    {
-      const int num_threads = std::min(options.num_threads,
-                                       static_cast<int>(std::thread::hardware_concurrency()));
-      const int chunk_size = std::max(1, horizon / num_threads);
 
-      std::vector<std::future<void>> futures;
-      futures.reserve(num_threads);
+    context.inf_pr_ = inf_pr;
+    context.inf_du_ = inf_du;
+    context.inf_comp_ = inf_comp;
+    context.step_norm_ = step_norm;
+    return true;
+  }
 
-      for (int thread_id = 0; thread_id < num_threads; ++thread_id)
-      {
-        int start_t = thread_id * chunk_size;
-        int end_t = (thread_id == num_threads - 1) ? horizon : (thread_id + 1) * chunk_size;
+  ForwardPassResult IPDDPSolver::forwardPass(CDDP &context, double alpha)
+  {
+    const CDDPOptions &options = context.getOptions();
+    const auto &constraint_set = context.getConstraintSet();
+    const bool has_terminal_ineq = !getTerminalInequalityLayout(context).empty();
+    const bool has_terminal_eq = getTerminalEqualityDim(context) > 0;
+    const auto [alpha_pr_max, alpha_du_max] = computeMaxStepSizes(context);
 
-        if (start_t >= horizon)
-          break;
+    ForwardPassResult result;
+    result.success = false;
+    result.cost = context.cost_;
+    result.merit_function = phi_;
+    result.theta = theta_;
+    const int horizon = context.getHorizon();
+    const double tau = (constraint_set.empty() && !has_terminal_ineq)
+                           ? 1.0
+                           : std::max(options.ipddp.barrier.min_fraction_to_boundary,
+                                      1.0 - mu_);
+    const double alpha_pr = std::min(alpha, alpha_pr_max);
+    const double alpha_du = std::min(alpha, alpha_du_max);
+    result.alpha_pr = alpha_pr;
+    result.alpha_du = alpha_du;
+
+    result.state_trajectory.assign(horizon + 1, Eigen::VectorXd::Zero(context.getStateDim()));
+    result.control_trajectory.assign(horizon, Eigen::VectorXd::Zero(context.getControlDim()));
+    result.state_trajectory[0] = context.getInitialState();
 
-        futures.push_back(std::async(std::launch::async,
-                                     [this, &context, &options, start_t, end_t, timestep]()
-                                     {
-            for (int t = start_t; t < end_t; ++t) {
-              const Eigen::VectorXd &x = context.X_[t];
-              const Eigen::VectorXd &u = context.U_[t];
+    std::vector<Eigen::VectorXd> dx_real(horizon + 1,
+                                         Eigen::VectorXd::Zero(context.getStateDim()));
+    std::vector<Eigen::VectorXd> Lambda_new = Lambda_;
+    std::map<std::string, std::vector<Eigen::VectorXd>> S_new = S_;
+    std::map<std::string, std::vector<Eigen::VectorXd>> Y_new = Y_;
+    std::map<std::string, std::vector<Eigen::VectorXd>> G_new;
+    std::map<std::string, Eigen::VectorXd> S_T_new = S_T_;
+    std::map<std::string, Eigen::VectorXd> Y_T_new = Y_T_;
+    std::map<std::string, Eigen::VectorXd> G_T_new = G_T_;
+    Eigen::VectorXd Lambda_T_eq_new = Lambda_T_eq_;
+    bool step_feasible = true;
 
-              const auto [Fx, Fu] =
-                  context.getSystem().getJacobians(x, u, t * timestep);
-              F_x_[t] = Fx;
-              F_u_[t] = Fu;
-
-              if (!options.use_ilqr) {
-                const auto hessians =
-                    context.getSystem().getHessians(x, u, t * timestep);
-                F_xx_[t] = std::get<0>(hessians);
-                F_uu_[t] = std::get<1>(hessians);
-                F_ux_[t] = std::get<2>(hessians);
-              } else {
-                F_xx_[t] = std::vector<Eigen::MatrixXd>();
-                F_uu_[t] = std::vector<Eigen::MatrixXd>();
-                F_ux_[t] = std::vector<Eigen::MatrixXd>();
-              }
-            } }));
+    for (int t = 0; t < horizon; ++t)
+    {
+      dx_real[t] = result.state_trajectory[t] - context.X_[t];
+      Lambda_new[t] =
+          Lambda_[t] + alpha_pr * k_lambda_[t] + K_lambda_[t] * dx_real[t];
+      if (!Lambda_new[t].allFinite())
+      {
+        return result;
       }
 
-      for (auto &future : futures)
+      for (const auto &constraint_pair : constraint_set)
       {
-        try
+        const std::string &name = constraint_pair.first;
+        Eigen::VectorXd s_new =
+            S_[name][t] + alpha_pr * k_s_[name][t] + K_s_[name][t] * dx_real[t];
+        Eigen::VectorXd s_min = (1.0 - tau) * S_[name][t];
+        Eigen::VectorXd y_new =
+            Y_[name][t] + alpha_du * k_y_[name][t] + K_y_[name][t] * dx_real[t];
+        Eigen::VectorXd y_min = (1.0 - tau) * Y_[name][t];
+        for (int i = 0; i < constraint_pair.second->getDualDim(); ++i)
         {
-          if (future.valid())
+          if (s_new(i) < s_min(i) || y_new(i) < y_min(i))
           {
-            future.get();
+            step_feasible = false;
+            break;
           }
         }
-        catch (const std::exception &e)
+        if (!step_feasible) break;
+        if (!s_new.allFinite() || !y_new.allFinite())
         {
-          if (options.verbose)
-          {
-            std::cerr << "IPDDP: Dynamics derivatives computation thread failed: "
-                      << e.what() << std::endl;
-          }
-          throw;
+          return result;
         }
+        S_new[name][t] = s_new;
+        Y_new[name][t] = y_new;
       }
-    }
-  }
+      if (!step_feasible) return result;
 
-  void IPDDPSolver::precomputeConstraintGradients(CDDP &context)
-  {
-    const CDDPOptions &options = context.getOptions();
-    const int horizon = context.getHorizon();
-    const auto &constraint_set = context.getConstraintSet();
+      result.control_trajectory[t] =
+          context.U_[t] + alpha_pr * k_u_[t] + K_u_[t] * dx_real[t];
+      result.state_trajectory[t + 1] = context.getSystem().getDiscreteDynamics(
+          result.state_trajectory[t], result.control_trajectory[t],
+          t * context.getTimestep());
+      if (!result.state_trajectory[t + 1].allFinite() ||
+          !result.control_trajectory[t].allFinite())
+      {
+        return result;
+      }
+    }
 
-    if (constraint_set.empty())
+    dx_real.back() = result.state_trajectory.back() - context.X_.back();
+    Lambda_new.back() =
+        Lambda_.back() + alpha_pr * k_lambda_.back() + K_lambda_.back() * dx_real.back();
+    if (!Lambda_new.back().allFinite())
     {
-      return;
+      return result;
     }
 
-    for (const auto &constraint_pair : constraint_set)
+    if (has_terminal_ineq)
     {
-      const std::string &constraint_name = constraint_pair.first;
-      if (G_x_.find(constraint_name) == G_x_.end() || G_x_[constraint_name].size() != horizon) {
-        G_x_[constraint_name].resize(horizon);
-        G_u_[constraint_name].resize(horizon);
-        int state_dim = context.getStateDim();
-        int control_dim = context.getControlDim();
-        int constraint_dim = constraint_pair.second->getDualDim();
-        for (int t = 0; t < horizon; ++t) {
-          G_x_[constraint_name][t] = Eigen::MatrixXd::Zero(constraint_dim, state_dim);
-          G_u_[constraint_name][t] = Eigen::MatrixXd::Zero(constraint_dim, control_dim);
+      auto terminal_residuals =
+          evaluateTerminalInequalityResidualMap(context, context.X_.back());
+      auto terminal_jacobians =
+          evaluateTerminalInequalityJacobianMap(context, context.X_.back());
+      for (const auto &entry : getTerminalInequalityLayout(context))
+      {
+        const Eigen::VectorXd &g_T0 = terminal_residuals.at(entry.name);
+        const Eigen::MatrixXd &G_T_x0 = terminal_jacobians.at(entry.name);
+        const Eigen::VectorXd k_s_T = -(g_T0 + S_T_.at(entry.name));
+        const Eigen::MatrixXd K_s_T = -G_T_x0;
+        S_T_new[entry.name] =
+            S_T_.at(entry.name) + alpha_pr * k_s_T + K_s_T * dx_real.back();
+
+        Eigen::VectorXd Y_trial = Y_T_.at(entry.name);
+        for (int i = 0; i < entry.dim; ++i)
+        {
+          const double s_safe =
+              std::max(S_T_.at(entry.name)(i), std::max(mu_ * 1e-3, EPS_SLACK));
+          const double r_d =
+              Y_T_.at(entry.name)(i) * S_T_.at(entry.name)(i) - mu_;
+          const double dual_ratio =
+              clipPositiveBarrierRatio(Y_T_.at(entry.name)(i), s_safe);
+          Eigen::RowVectorXd K_y_row =
+              -(dual_ratio * K_s_T.row(i));
+          const double k_y =
+              clipSignedBarrierRatio(-r_d - Y_T_.at(entry.name)(i) * k_s_T(i),
+                                     s_safe);
+          const double y_new =
+              Y_T_.at(entry.name)(i) + alpha_du * k_y + K_y_row.dot(dx_real.back());
+          Y_trial(i) = y_new;
+        }
+        Y_T_new[entry.name] = Y_trial;
+
+        const Eigen::ArrayXd s_floor =
+            ((1.0 - tau) * S_T_.at(entry.name).array())
+                .max(Eigen::ArrayXd::Constant(
+                    entry.dim, std::max(mu_ * 1e-3, EPS_SLACK)));
+        if ((S_T_new[entry.name].array() < s_floor).any() ||
+            (Y_T_new[entry.name].array() <
+             (1.0 - tau) * Y_T_.at(entry.name).array()).any() ||
+            !S_T_new[entry.name].allFinite() || !Y_T_new[entry.name].allFinite())
+        {
+          return result;
         }
       }
     }
 
-    const int MIN_HORIZON_FOR_PARALLEL = 50;
-    const bool use_parallel =
-        options.enable_parallel && horizon >= MIN_HORIZON_FOR_PARALLEL;
-
-    if (!use_parallel)
+    if (has_terminal_eq)
     {
-      for (int t = 0; t < horizon; ++t)
+      Lambda_T_eq_new = Lambda_T_eq_ + alpha_pr * dLambda_T_eq_;
+      if (!Lambda_T_eq_new.allFinite())
       {
-        const Eigen::VectorXd &x = context.X_[t];
-        const Eigen::VectorXd &u = context.U_[t];
-
-        for (const auto &constraint_pair : constraint_set)
-        {
-          const std::string &constraint_name = constraint_pair.first;
-          G_x_[constraint_name][t] =
-              constraint_pair.second->getStateJacobian(x, u);
-          G_u_[constraint_name][t] =
-              constraint_pair.second->getControlJacobian(x, u);
-        }
+        return result;
       }
     }
-    else
-    {
-      const int num_threads =
-          std::min(options.num_threads,
-                   static_cast<int>(std::thread::hardware_concurrency()));
-      const int chunk_size = std::max(1, horizon / num_threads);
 
-      std::vector<std::future<void>> futures;
-      futures.reserve(num_threads);
+    double cost_new = 0.0;
+    std::map<std::string, std::vector<Eigen::VectorXd>> G_raw_new;
+    for (const auto &constraint_pair : constraint_set)
+    {
+      G_raw_new[constraint_pair.first].resize(horizon);
+      G_new[constraint_pair.first].resize(horizon);
+    }
 
-      for (int thread_id = 0; thread_id < num_threads; ++thread_id)
+    for (int t = 0; t < horizon; ++t)
+    {
+      const Eigen::VectorXd &x_trial = result.state_trajectory[t];
+      const Eigen::VectorXd &u_trial = result.control_trajectory[t];
+      cost_new += context.getObjective().running_cost(x_trial, u_trial, t);
+      for (const auto &constraint_pair : constraint_set)
       {
-        int start_t = thread_id * chunk_size;
-        int end_t = (thread_id == num_threads - 1) ? horizon
-                                                   : (thread_id + 1) * chunk_size;
-
-        if (start_t >= horizon)
-          break;
+        const std::string &name = constraint_pair.first;
+        G_raw_new[name][t] =
+            constraint_pair.second->evaluate(x_trial, u_trial) -
+            constraint_pair.second->getUpperBound();
+      }
+    }
+    cost_new += context.getObjective().terminal_cost(result.state_trajectory.back());
 
-        futures.push_back(
-            std::async(std::launch::async, [this, &context, &constraint_set,
-                                            start_t, end_t]()
-                       {
-            for (int t = start_t; t < end_t; ++t) {
-              const Eigen::VectorXd &x = context.X_[t];
-              const Eigen::VectorXd &u = context.U_[t];
+    G_new = G_raw_new;
 
-              for (const auto &constraint_pair : constraint_set) {
-                const std::string &constraint_name = constraint_pair.first;
-                G_x_[constraint_name][t] =
-                    constraint_pair.second->getStateJacobian(x, u);
-                G_u_[constraint_name][t] =
-                    constraint_pair.second->getControlJacobian(x, u);
-              }
-            } }));
-      }
+    Eigen::VectorXd h_T_new = Eigen::VectorXd::Zero(getTerminalEqualityDim(context));
+    if (has_terminal_ineq)
+    {
+      G_T_new = evaluateTerminalInequalityResidualMap(
+          context, result.state_trajectory.back());
+    }
+    if (has_terminal_eq)
+    {
+      h_T_new = evaluateTerminalEqualityResidual(
+          context, result.state_trajectory.back());
+    }
 
-      for (auto &future : futures)
+    const double phi_new = computeBarrierMerit(
+        context, S_new, cost_new, has_terminal_ineq ? &S_T_new : nullptr,
+        has_terminal_eq ? &Lambda_T_eq_new : nullptr,
+        has_terminal_eq ? &h_T_new : nullptr);
+    const double theta_new =
+        computeTheta(options, G_new, S_new, has_terminal_ineq ? &G_T_new : nullptr,
+                     has_terminal_ineq ? &S_T_new : nullptr,
+                     has_terminal_eq ? &h_T_new : nullptr);
+    const auto [inf_pr_new, inf_comp_new] =
+        computePrimalAndComplementarity(
+            context, G_new, S_new, Y_new, mu_,
+            has_terminal_ineq ? &G_T_new : nullptr,
+            has_terminal_ineq ? &S_T_new : nullptr,
+            has_terminal_ineq ? &Y_T_new : nullptr,
+            has_terminal_eq ? &h_T_new : nullptr);
+
+    if (!std::isfinite(phi_new) || !std::isfinite(theta_new) ||
+        !std::isfinite(inf_pr_new) || !std::isfinite(inf_comp_new))
+    {
+      return result;
+    }
+
+    bool filter_acceptance = false;
+    if (constraint_set.empty() && !has_terminal_ineq && !has_terminal_eq)
+    {
+      double dJ = context.cost_ - cost_new;
+      double expected = -alpha_pr * (dV_(0) + 0.5 * alpha_pr * dV_(1));
+      double reduction_ratio =
+          expected > 0.0 ? dJ / expected : std::copysign(1.0, dJ);
+      filter_acceptance = reduction_ratio > 1e-6;
+    }
+    else
+    {
+      double expected_improvement = alpha_pr * dV_(0);
+      double constraint_violation_old =
+          filter_.empty() ? 0.0 : filter_.back().constraint_violation;
+      const double high_violation_reference =
+          filter_.empty() ? filter_theta_ : constraint_violation_old;
+      double merit_function_old = context.merit_function_;
+
+      if (theta_new > options.filter.max_violation_threshold)
       {
-        try
+        if (theta_new <
+            (1 - options.filter.violation_acceptance_threshold) *
+                high_violation_reference)
         {
-          if (future.valid())
-          {
-            future.get();
-          }
+          filter_acceptance = true;
         }
-        catch (const std::exception &e)
+      }
+      else if (std::max(theta_new, constraint_violation_old) <
+                   options.filter.min_violation_for_armijo_check &&
+               expected_improvement < 0)
+      {
+        if (phi_new <
+            merit_function_old +
+                options.filter.armijo_constant * expected_improvement)
         {
-          if (options.verbose)
-          {
-            std::cerr << "IPDDP: Constraint gradients computation thread failed: "
-                      << e.what() << std::endl;
-          }
-          throw;
+          filter_acceptance = true;
+        }
+      }
+      else
+      {
+        if (phi_new <
+                merit_function_old - options.filter.merit_acceptance_threshold *
+                                         theta_new ||
+            theta_new <
+                (1 - options.filter.violation_acceptance_threshold) *
+                    constraint_violation_old)
+        {
+          filter_acceptance = true;
         }
       }
     }
-  }
-
-  bool IPDDPSolver::backwardPass(CDDP &context)
-  {
-    const CDDPOptions &options = context.getOptions();
-    const int state_dim = context.getStateDim();
-    const int control_dim = context.getControlDim();
-    const int horizon = context.getHorizon();
-    const double timestep = context.getTimestep();
-    const auto &constraint_set = context.getConstraintSet();
-    const int total_dual_dim = getTotalDualDim(context);
-
-    precomputeDynamicsDerivatives(context);
-    precomputeConstraintGradients(context);
 
-    Eigen::VectorXd V_x =
-        context.getObjective().getFinalCostGradient(context.X_.back());
-    Eigen::MatrixXd V_xx =
-        context.getObjective().getFinalCostHessian(context.X_.back());
-    V_xx = 0.5 * (V_xx + V_xx.transpose());
-
-    dV_ = Eigen::Vector2d::Zero();
-    double inf_du = 0.0;
-    double inf_pr = 0.0;
-    double inf_comp = 0.0;
-    double step_norm = 0.0;
+    if (!filter_acceptance)
+    {
+      return result;
+    }
 
-    if (constraint_set.empty())
+    result.success = true;
+    result.cost = cost_new;
+    result.merit_function = phi_new;
+    result.theta = theta_new;
+    result.constraint_violation = theta_new;
+    result.inf_pr = inf_pr_new;
+    result.inf_comp = inf_comp_new;
+    result.dual_trajectory = Y_new;
+    result.slack_trajectory = S_new;
+    result.constraint_eval_trajectory = G_new;
+    result.costate_trajectory = Lambda_new;
+    if (has_terminal_ineq)
     {
-      for (int t = horizon - 1; t >= 0; --t)
+      result.terminal_slack = S_T_new;
+      result.terminal_constraint_dual = Y_T_new;
+      result.terminal_constraint_value = G_T_new;
+    }
+    if (has_terminal_eq)
+    {
+      if (!result.terminal_constraint_dual.has_value())
       {
-        const Eigen::VectorXd &x = context.X_[t];
-        const Eigen::VectorXd &u = context.U_[t];
-
-        const Eigen::MatrixXd &Fx = F_x_[t];
-        const Eigen::MatrixXd &Fu = F_u_[t];
-
-        Eigen::MatrixXd &A = workspace_.A_matrices[t];
-        Eigen::MatrixXd &B = workspace_.B_matrices[t];
-        A.noalias() = Eigen::MatrixXd::Identity(state_dim, state_dim) + timestep * Fx;
-        B.noalias() = timestep * Fu;
-
-        auto [l_x, l_u] = context.getObjective().getRunningCostGradients(x, u, t);
-        auto [l_xx, l_uu, l_ux] =
-            context.getObjective().getRunningCostHessians(x, u, t);
-
-        Eigen::VectorXd &Q_x = workspace_.Q_x_vectors[t];
-        Eigen::VectorXd &Q_u = workspace_.Q_u_vectors[t];
-        Eigen::MatrixXd &Q_xx = workspace_.Q_xx_matrices[t];
-        Eigen::MatrixXd &Q_ux = workspace_.Q_ux_matrices[t];
-        Eigen::MatrixXd &Q_uu = workspace_.Q_uu_matrices[t];
+        result.terminal_constraint_dual =
+            std::map<std::string, Eigen::VectorXd>{};
+      }
+      auto equality_duals = splitTerminalEqualityVector(context, Lambda_T_eq_new);
+      result.terminal_constraint_dual->insert(equality_duals.begin(), equality_duals.end());
+      if (!result.terminal_constraint_value.has_value())
+      {
+        result.terminal_constraint_value =
+            std::map<std::string, Eigen::VectorXd>{};
+      }
+      auto equality_values = splitTerminalEqualityVector(context, h_T_new);
+      result.terminal_constraint_value->insert(equality_values.begin(), equality_values.end());
+    }
+    return result;
+  }
 
-        Q_x.noalias() = l_x + A.transpose() * V_x;
-        Q_u.noalias() = l_u + B.transpose() * V_x;
-        Q_xx.noalias() = l_xx + A.transpose() * V_xx * A;
-        Q_ux.noalias() = l_ux + B.transpose() * V_xx * A;
-        Q_uu.noalias() = l_uu + B.transpose() * V_xx * B;
+  void IPDDPSolver::applyForwardPassResult(CDDP &context, const ForwardPassResult &result)
+  {
+    // Call base class to update X_, U_, cost_, merit_function_, alpha_pr_, alpha_du_
+    CDDPSolverBase::applyForwardPassResult(context, result);
 
-        if (!options.use_ilqr)
+    if (result.dual_trajectory)
+    {
+      Y_ = *result.dual_trajectory;
+    }
+    if (result.slack_trajectory)
+    {
+      S_ = *result.slack_trajectory;
+    }
+    if (result.constraint_eval_trajectory)
+    {
+      G_ = *result.constraint_eval_trajectory;
+      G_raw_ = *result.constraint_eval_trajectory;
+    }
+    if (result.costate_trajectory)
+    {
+      Lambda_ = *result.costate_trajectory;
+    }
+    if (result.terminal_slack)
+    {
+      S_T_ = *result.terminal_slack;
+    }
+    if (result.terminal_constraint_dual)
+    {
+      for (const auto &entry : getTerminalInequalityLayout(context))
+      {
+        auto it = result.terminal_constraint_dual->find(entry.name);
+        if (it != result.terminal_constraint_dual->end())
         {
-          const auto &Fxx = F_xx_[t];
-          const auto &Fuu = F_uu_[t];
-          const auto &Fux = F_ux_[t];
-
-          for (int i = 0; i < state_dim; ++i)
-          {
-            Q_xx += timestep * V_x(i) * Fxx[i];
-            Q_ux += timestep * V_x(i) * Fux[i];
-            Q_uu += timestep * V_x(i) * Fuu[i];
-          }
-        }
-
-        Q_uu = 0.5 * (Q_uu + Q_uu.transpose());
-        Q_uu.diagonal().array() += context.regularization_;
-
-        bool need_recompute = !workspace_.ldlt_valid[t] ||
-                             (workspace_.ldlt_valid[t] && workspace_.ldlt_solvers[t].matrixLDLT().rows() != control_dim);
-
-        if (need_recompute) {
-          workspace_.ldlt_solvers[t].compute(Q_uu);
-          workspace_.ldlt_valid[t] = true;
+          Y_T_[entry.name] = it->second;
         }
-
-        if (workspace_.ldlt_solvers[t].info() != Eigen::Success)
+      }
+      const auto eq_layout = getTerminalEqualityLayout(context);
+      if (!eq_layout.empty())
+      {
+        int total_dim = getTerminalEqualityDim(context);
+        Lambda_T_eq_ = Eigen::VectorXd::Zero(total_dim);
+        int offset = 0;
+        for (const auto &entry : eq_layout)
         {
-          if (options.debug)
+          auto it = result.terminal_constraint_dual->find(entry.name);
+          if (it != result.terminal_constraint_dual->end())
           {
-            std::cerr << "IPDDP: Backward pass failed at time " << t << " (Q_uu not positive definite)" << std::endl;
+            Lambda_T_eq_.segment(offset, entry.dim) = it->second;
           }
-          workspace_.ldlt_valid[t] = false;
-          return false;
+          offset += entry.dim;
         }
+      }
+    }
+    if (result.terminal_constraint_value)
+    {
+      for (const auto &entry : getTerminalInequalityLayout(context))
+      {
+        auto it = result.terminal_constraint_value->find(entry.name);
+        if (it != result.terminal_constraint_value->end())
+        {
+          G_T_[entry.name] = it->second;
+        }
+      }
+    }
 
-        Eigen::VectorXd k_u = -workspace_.ldlt_solvers[t].solve(Q_u);
-        Eigen::MatrixXd K_u = -workspace_.ldlt_solvers[t].solve(Q_ux);
-        k_u_[t] = k_u;
-        K_u_[t] = K_u;
+    context.inf_pr_ = result.inf_pr;
+    context.inf_comp_ = result.inf_comp;
+    phi_ = result.merit_function;
+    filter_theta_ = result.theta;
+    theta_ = result.theta;
 
-        V_x = Q_x + K_u.transpose() * Q_u + Q_ux.transpose() * k_u +
-              K_u.transpose() * Q_uu * k_u;
-        V_xx = Q_xx + K_u.transpose() * Q_ux + Q_ux.transpose() * K_u +
-               K_u.transpose() * Q_uu * K_u;
-        V_xx = 0.5 * (V_xx + V_xx.transpose());
+    // Update barrier parameter before convergence check (matching old solve loop timing)
+    updateBarrierParameters(context, true);
+  }
 
-        dV_[0] += k_u.dot(Q_u);
-        dV_[1] += 0.5 * k_u.dot(Q_uu * k_u);
+  bool IPDDPSolver::checkConvergence(CDDP &context, double dJ, double dL,
+                                      int iter, std::string &reason)
+  {
+    const CDDPOptions &options = context.getOptions();
+    const bool no_barrier_needed =
+        context.getConstraintSet().empty() &&
+        getTerminalInequalityLayout(context).empty();
+    const double scaled_inf_du = computeScaledDualInfeasibility(context);
+    const double scaled_inf_comp = context.inf_comp_;
 
-        inf_du = std::max(inf_du, Q_u.lpNorm<Eigen::Infinity>());
-        step_norm = std::max(step_norm, k_u.lpNorm<Eigen::Infinity>());
+    if (no_barrier_needed)
+    {
+      if (context.inf_pr_ < options.tolerance &&
+          scaled_inf_du < options.tolerance)
+      {
+        reason = "OptimalSolutionFound";
+        return true;
       }
-
-      context.inf_du_ = inf_du;
-      context.step_norm_ = step_norm;
-      context.inf_pr_ = 0.0;
-      context.inf_comp_ = 0.0;
-
-      if (options.debug)
+      if (options.acceptable_tolerance > 0.0)
       {
-        std::cout << "[IPDDP Backward] inf_du: " << std::scientific << std::setprecision(2)
-                  << inf_du << " ||d||: " << context.step_norm_ << " dV: " << dV_.transpose() << std::endl;
+        const double sqrt_atol = std::sqrt(options.acceptable_tolerance);
+        bool acceptable = (context.inf_pr_ < sqrt_atol &&
+                           scaled_inf_du < sqrt_atol && iter > 50);
+        if (dJ > 0.0)
+        {
+          acceptable = acceptable ||
+                       (dJ < options.acceptable_tolerance && iter > 50 &&
+                        context.inf_pr_ < sqrt_atol &&
+                        scaled_inf_du < sqrt_atol);
+        }
+        if (acceptable)
+        {
+          reason = "AcceptableSolutionFound";
+          return true;
+        }
       }
+      return false;
+    }
+
+    const double tol = std::max(options.tolerance,
+                                options.ipddp.barrier_tol_mult * mu_);
+    if (context.inf_pr_ < tol && scaled_inf_du < tol && scaled_inf_comp < tol &&
+        context.step_norm_ < options.tolerance * 10.0)
+    {
+      reason = "OptimalSolutionFound";
       return true;
     }
-    else
+
+    if (options.acceptable_tolerance > 0.0)
     {
-      for (int t = horizon - 1; t >= 0; --t)
+      const double accept_tol = std::sqrt(options.acceptable_tolerance);
+      const double barrier_accept_tol =
+          std::max(options.ipddp.barrier.mu_min_value * 100.0,
+                   options.tolerance / 10.0);
+      const bool acceptable_kkt =
+          context.inf_pr_ < accept_tol && scaled_inf_du < accept_tol &&
+          scaled_inf_comp < accept_tol;
+      const bool barrier_phase_complete = mu_ <= barrier_accept_tol;
+      bool acceptable =
+          acceptable_kkt && barrier_phase_complete && iter > 10 &&
+          std::abs(dJ) < options.acceptable_tolerance;
+      acceptable = acceptable ||
+                   (acceptable_kkt && barrier_phase_complete && iter >= 1 &&
+                    context.step_norm_ < options.tolerance * 10.0 &&
+                    context.inf_pr_ < 1e-4);
+      if (acceptable)
       {
-        const Eigen::VectorXd &x = context.X_[t];
-        const Eigen::VectorXd &u = context.U_[t];
-
-        const Eigen::MatrixXd &Fx = F_x_[t];
-        const Eigen::MatrixXd &Fu = F_u_[t];
-        Eigen::MatrixXd A =
-            Eigen::MatrixXd::Identity(state_dim, state_dim) + timestep * Fx;
-        Eigen::MatrixXd B = timestep * Fu;
-
-        Eigen::VectorXd &y = workspace_.y_combined;
-        Eigen::VectorXd &s = workspace_.s_combined;
-        Eigen::VectorXd &g = workspace_.g_combined;
-        Eigen::MatrixXd &Q_yu = workspace_.Q_yu_combined;
-        Eigen::MatrixXd &Q_yx = workspace_.Q_yx_combined;
-
-        int offset = 0;
-        for (const auto &constraint_pair : constraint_set)
-        {
-          const std::string &constraint_name = constraint_pair.first;
-          int dual_dim = constraint_pair.second->getDualDim();
+        reason = "AcceptableSolutionFound";
+        return true;
+      }
+    }
+    return false;
+  }
 
-          const Eigen::VectorXd &y_vec = Y_[constraint_name][t];
-          const Eigen::VectorXd &s_vec = S_[constraint_name][t];
-          const Eigen::VectorXd &g_vec = G_[constraint_name][t];
-          const Eigen::MatrixXd &g_x = G_x_[constraint_name][t];
-          const Eigen::MatrixXd &g_u = G_u_[constraint_name][t];
+  void IPDDPSolver::postIterationUpdate(CDDP &context, bool forward_pass_success)
+  {
+    // Barrier update on success is done in applyForwardPassResult (before convergence check).
+    // Only update on failure here.
+    if (!forward_pass_success)
+    {
+      updateBarrierParameters(context, false);
+    }
+  }
 
-          y.segment(offset, dual_dim) = y_vec;
-          s.segment(offset, dual_dim) = s_vec;
-          g.segment(offset, dual_dim) = g_vec;
-          Q_yx.block(offset, 0, dual_dim, state_dim) = g_x;
-          Q_yu.block(offset, 0, dual_dim, control_dim) = g_u;
+  bool IPDDPSolver::handleForwardPassFailure(CDDP &context,
+                                               std::string &termination_reason)
+  {
+    const CDDPOptions &options = context.getOptions();
+    context.increaseRegularization();
 
-          offset += dual_dim;
-        }
+    // Extra regularization bump for terminal equality problems
+    const bool no_barrier_needed =
+        context.getConstraintSet().empty() &&
+        getTerminalInequalityLayout(context).empty();
+    if (!no_barrier_needed && getTerminalEqualityDim(context) > 0)
+    {
+      context.increaseRegularization();
+    }
 
-        auto [l_x, l_u] = context.getObjective().getRunningCostGradients(x, u, t);
-        auto [l_xx, l_uu, l_ux] =
-            context.getObjective().getRunningCostHessians(x, u, t);
+    if (context.isRegularizationLimitReached())
+    {
+      const double scaled_inf_du = computeScaledDualInfeasibility(context);
+      const double scaled_inf_comp = context.inf_comp_;
+      const double accept_tol =
+          no_barrier_needed
+              ? std::sqrt(std::max(options.acceptable_tolerance,
+                                   options.tolerance))
+              : std::max(std::sqrt(std::max(options.acceptable_tolerance,
+                                            options.tolerance)),
+                         options.ipddp.barrier_tol_mult * mu_);
+      const bool acceptable =
+          options.acceptable_tolerance > 0.0 &&
+          context.inf_pr_ < accept_tol &&
+          scaled_inf_du < accept_tol &&
+          (no_barrier_needed || scaled_inf_comp < accept_tol);
+      if (acceptable)
+      {
+        termination_reason = "AcceptableSolutionFound";
+        return true;
+      }
+      termination_reason = "RegularizationLimitReached_NotConverged";
+      if (options.verbose)
+      {
+        std::cerr << getSolverName()
+                  << ": Regularization limit reached. Not converged." << std::endl;
+      }
+      return true; // break
+    }
+    return false; // continue
+  }
 
-        Eigen::VectorXd Q_x = l_x + Q_yx.transpose() * y + A.transpose() * V_x;
-        Eigen::VectorXd Q_u = l_u + Q_yu.transpose() * y + B.transpose() * V_x;
-        Eigen::MatrixXd Q_xx = l_xx + A.transpose() * V_xx * A;
-        Eigen::MatrixXd Q_ux = l_ux + B.transpose() * V_xx * A;
-        Eigen::MatrixXd Q_uu = l_uu + B.transpose() * V_xx * B;
+  void IPDDPSolver::recordIterationHistory(const CDDP &context)
+  {
+    CDDPSolverBase::recordIterationHistory(context);
+    history_.barrier_mu.push_back(mu_);
+  }
 
-        if (!options.use_ilqr)
-        {
-          const auto &Fxx = F_xx_[t];
-          const auto &Fuu = F_uu_[t];
-          const auto &Fux = F_ux_[t];
+  void IPDDPSolver::populateSolverSpecificSolution(CDDPSolution &solution,
+                                                    const CDDP &context)
+  {
+    solution.final_barrier_mu = mu_;
+    solution.final_primal_infeasibility = context.inf_pr_;
+    solution.final_dual_infeasibility = context.inf_du_;
+    solution.final_complementary_infeasibility = context.inf_comp_;
+  }
 
-          for (int i = 0; i < state_dim; ++i)
-          {
-            Q_xx += timestep * V_x(i) * Fxx[i];
-            Q_ux += timestep * V_x(i) * Fux[i];
-            Q_uu += timestep * V_x(i) * Fuu[i];
-          }
-        }
+  void IPDDPSolver::printIteration(int iter, const CDDP &context) const
+  {
+    printIterationLegacy(iter, context.cost_, context.inf_pr_, context.inf_du_,
+                         context.inf_comp_, mu_, context.step_norm_,
+                         context.regularization_, context.alpha_du_,
+                         context.alpha_pr_);
+  }
 
-        Eigen::MatrixXd &YSinv = workspace_.YSinv;
-        YSinv.setZero();
-        for (int i = 0; i < total_dual_dim; ++i) {
-          YSinv(i, i) = y(i) / s(i);
-        }
+  void IPDDPSolver::printIterationLegacy(int iter, double objective, double inf_pr,
+                                          double inf_du, double inf_comp, double mu,
+                                          double step_norm, double regularization,
+                                          double alpha_du, double alpha_pr) const
+  {
+    detail::printInteriorPointIteration(iter, objective, inf_pr, inf_du,
+                                        inf_comp, mu, step_norm,
+                                        regularization, alpha_du, alpha_pr);
+  }
 
-        Eigen::VectorXd primal_residual = g + s;
-        Eigen::VectorXd complementary_residual = y.cwiseProduct(s).array() - mu_;
-        Eigen::VectorXd rhat = y.cwiseProduct(primal_residual) - complementary_residual;
+  void IPDDPSolver::printSolutionSummary(const CDDPSolution &solution) const
+  {
+    std::cout << "\n========================================\n";
+    std::cout << "           IPDDP Solution Summary\n";
+    std::cout << "========================================\n";
 
-        Eigen::MatrixXd Q_uu_reg = Q_uu;
-        Q_uu_reg = 0.5 * (Q_uu_reg + Q_uu_reg.transpose());
+    std::cout << "Status: " << solution.status_message << "\n";
+    std::cout << "Iterations: " << solution.iterations_completed << "\n";
+    std::cout << "Solve Time: " << std::setprecision(2) << solution.solve_time_ms << " ms\n";
+    std::cout << "Final Cost: " << std::setprecision(6) << solution.final_objective << "\n";
+    std::cout << "Final Barrier mu: " << std::setprecision(2) << std::scientific
+              << solution.final_barrier_mu << "\n";
+    std::cout << "========================================\n\n";
+  }
 
-        Q_uu_reg.noalias() += Q_yu.transpose() * YSinv * Q_yu;
-        Q_uu_reg.diagonal().array() += context.regularization_;
+  // === Private helper methods ===
 
-        Eigen::LDLT<Eigen::MatrixXd> ldlt(Q_uu_reg);
-        if (ldlt.info() != Eigen::Success)
-        {
-          if (options.debug)
-          {
-            std::cerr << "IPDDP: Backward pass failed at time " << t << " (Q_uu not positive definite)" << std::endl;
-          }
-          return false;
-        }
+  int IPDDPSolver::getTotalDualDim(const CDDP &context) const
+  {
+    int total_dual_dim = 0;
+    const auto &constraint_set = context.getConstraintSet();
+    for (const auto &constraint_pair : constraint_set)
+    {
+      total_dual_dim += constraint_pair.second->getDualDim();
+    }
+    return total_dual_dim;
+  }
 
-        Eigen::MatrixXd &bigRHS = workspace_.bigRHS;
-        Eigen::VectorXd S_inv_rhat(total_dual_dim);
-        for (int i = 0; i < total_dual_dim; ++i) {
-          S_inv_rhat(i) = rhat(i) / s(i);
-        }
-        bigRHS.col(0).noalias() = Q_u + Q_yu.transpose() * S_inv_rhat;
-        bigRHS.rightCols(state_dim).noalias() = Q_ux + Q_yu.transpose() * YSinv * Q_yx;
+  void IPDDPSolver::precomputeConstraintGradients(CDDP &context)
+  {
+    const CDDPOptions &options = context.getOptions();
+    const int horizon = context.getHorizon();
+    const auto &constraint_set = context.getConstraintSet();
 
-        Eigen::MatrixXd kK = -ldlt.solve(bigRHS);
+    if (constraint_set.empty())
+    {
+      return;
+    }
 
-        Eigen::VectorXd k_u = kK.col(0);
-        Eigen::MatrixXd K_u(control_dim, state_dim);
-        for (int col = 0; col < state_dim; col++)
-        {
-          K_u.col(col) = kK.col(col + 1);
+    for (const auto &constraint_pair : constraint_set)
+    {
+      const std::string &constraint_name = constraint_pair.first;
+      if (G_x_.find(constraint_name) == G_x_.end() || static_cast<int>(G_x_[constraint_name].size()) != horizon) {
+        G_x_[constraint_name].resize(horizon);
+        G_u_[constraint_name].resize(horizon);
+        int state_dim = context.getStateDim();
+        int control_dim = context.getControlDim();
+        int constraint_dim = constraint_pair.second->getDualDim();
+        for (int t = 0; t < horizon; ++t) {
+          G_x_[constraint_name][t] = Eigen::MatrixXd::Zero(constraint_dim, state_dim);
+          G_u_[constraint_name][t] = Eigen::MatrixXd::Zero(constraint_dim, control_dim);
         }
+      }
+    }
 
-        k_u_[t] = k_u;
-        K_u_[t] = K_u;
+    const int MIN_HORIZON_FOR_PARALLEL = 50;
+    const bool use_parallel =
+        options.enable_parallel && horizon >= MIN_HORIZON_FOR_PARALLEL;
 
-        Eigen::VectorXd k_y(total_dual_dim);
-        Eigen::VectorXd temp = Q_yu * k_u;
-        for (int i = 0; i < total_dual_dim; ++i) {
-          k_y(i) = (rhat(i) + y(i) * temp(i)) / s(i);
-        }
-        Eigen::MatrixXd K_y = YSinv * (Q_yx + Q_yu * K_u);
-        Eigen::VectorXd k_s = -primal_residual - temp;
-        Eigen::MatrixXd K_s = -Q_yx - Q_yu * K_u;
+    if (!use_parallel)
+    {
+      for (int t = 0; t < horizon; ++t)
+      {
+        const Eigen::VectorXd &x = context.X_[t];
+        const Eigen::VectorXd &u = context.U_[t];
 
-        offset = 0;
         for (const auto &constraint_pair : constraint_set)
         {
           const std::string &constraint_name = constraint_pair.first;
-          int dual_dim = constraint_pair.second->getDualDim();
-
-          k_y_[constraint_name][t] = k_y.segment(offset, dual_dim);
-          K_y_[constraint_name][t] = K_y.block(offset, 0, dual_dim, state_dim);
-          k_s_[constraint_name][t] = k_s.segment(offset, dual_dim);
-          K_s_[constraint_name][t] = K_s.block(offset, 0, dual_dim, state_dim);
-
-          offset += dual_dim;
+          G_x_[constraint_name][t] =
+              constraint_pair.second->getStateJacobian(x, u);
+          G_u_[constraint_name][t] =
+              constraint_pair.second->getControlJacobian(x, u);
         }
+      }
+    }
+    else
+    {
+      const int num_threads =
+          std::min(options.num_threads,
+                   static_cast<int>(std::thread::hardware_concurrency()));
+      const int chunk_size = std::max(1, horizon / num_threads);
 
-        Q_u.noalias() += Q_yu.transpose() * S_inv_rhat;
-        Q_x.noalias() += Q_yx.transpose() * S_inv_rhat;
-        Q_xx.noalias() += Q_yx.transpose() * YSinv * Q_yx;
-        Q_ux.noalias() += Q_yx.transpose() * YSinv * Q_yu;
-        Q_uu.noalias() += Q_yu.transpose() * YSinv * Q_yu;
+      std::vector<std::future<void>> futures;
+      futures.reserve(num_threads);
 
-        dV_[0] += k_u.dot(Q_u);
-        dV_[1] += 0.5 * k_u.dot(Q_uu * k_u);
+      for (int thread_id = 0; thread_id < num_threads; ++thread_id)
+      {
+        int start_t = thread_id * chunk_size;
+        int end_t = (thread_id == num_threads - 1) ? horizon
+                                                   : (thread_id + 1) * chunk_size;
 
-        V_x = Q_x + K_u.transpose() * Q_u + Q_ux.transpose() * k_u +
-              K_u.transpose() * Q_uu * k_u;
-        V_xx = Q_xx + K_u.transpose() * Q_ux + Q_ux.transpose() * K_u +
-               K_u.transpose() * Q_uu * K_u;
-        V_xx = 0.5 * (V_xx + V_xx.transpose());
+        if (start_t >= horizon)
+          break;
 
-        inf_du = std::max(inf_du, Q_u.lpNorm<Eigen::Infinity>());
-        inf_pr = std::max(inf_pr, primal_residual.lpNorm<Eigen::Infinity>());
-        inf_comp = std::max(inf_comp, complementary_residual.lpNorm<Eigen::Infinity>());
-        step_norm = std::max(step_norm, k_u.lpNorm<Eigen::Infinity>());
-      }
+        futures.push_back(
+            std::async(std::launch::async, [this, &context, &constraint_set,
+                                            start_t, end_t]()
+                       {
+            for (int t = start_t; t < end_t; ++t) {
+              const Eigen::VectorXd &x = context.X_[t];
+              const Eigen::VectorXd &u = context.U_[t];
 
-      context.inf_pr_ = inf_pr;
-      context.inf_du_ = inf_du;
-      context.inf_comp_ = inf_comp;
-      context.step_norm_ = step_norm;
+              for (const auto &constraint_pair : constraint_set) {
+                const std::string &constraint_name = constraint_pair.first;
+                G_x_[constraint_name][t] =
+                    constraint_pair.second->getStateJacobian(x, u);
+                G_u_[constraint_name][t] =
+                    constraint_pair.second->getControlJacobian(x, u);
+              }
+            } }));
+      }
 
-      if (options.debug)
+      for (auto &future : futures)
       {
-        std::cout << "[IPDDP Backward] inf_du: " << std::scientific << std::setprecision(2)
-                  << inf_du << " inf_pr: " << inf_pr << " inf_comp: " << inf_comp
-                  << " ||d||: " << context.step_norm_ << " dV: " << dV_.transpose() << std::endl;
+        try
+        {
+          if (future.valid())
+          {
+            future.get();
+          }
+        }
+        catch (const std::exception &e)
+        {
+          if (options.verbose)
+          {
+            std::cerr << "IPDDP: Constraint gradients computation thread failed: "
+                      << e.what() << std::endl;
+          }
+          throw;
+        }
       }
-      return true;
     }
   }
 
-  ForwardPassResult IPDDPSolver::forwardPass(CDDP &context, double alpha)
+  void IPDDPSolver::evaluateTrajectory(CDDP &context)
   {
-    const CDDPOptions &options = context.getOptions();
-    const auto &constraint_set = context.getConstraintSet();
-
-    ForwardPassResult result;
-    result.success = false;
-    result.cost = std::numeric_limits<double>::infinity();
-    result.merit_function = std::numeric_limits<double>::infinity();
-    result.alpha_pr = alpha;
-
     const int horizon = context.getHorizon();
-    const double tau =
-        std::max(options.ipddp.barrier.min_fraction_to_boundary, 1.0 - mu_);
+    double cost = 0.0;
 
-    result.state_trajectory = context.X_;
-    result.control_trajectory = context.U_;
-    result.state_trajectory[0] = context.getInitialState();
+    context.X_[0] = context.getInitialState();
 
-    std::map<std::string, std::vector<Eigen::VectorXd>> Y_new = Y_;
-    std::map<std::string, std::vector<Eigen::VectorXd>> S_new = S_;
-    std::map<std::string, std::vector<Eigen::VectorXd>> G_new = G_;
+    for (int t = 0; t < horizon; ++t)
+    {
+      const Eigen::VectorXd &x = context.X_[t];
+      const Eigen::VectorXd &u = context.U_[t];
 
-    double cost_new = 0.0;
-    double merit_function_new = 0.0;
-    double constraint_violation_new = 0.0;
+      cost += context.getObjective().running_cost(x, u, t);
 
-    if (constraint_set.empty())
-    {
-      for (int t = 0; t < horizon; ++t)
+      const auto &constraint_set = context.getConstraintSet();
+      for (const auto &constraint_pair : constraint_set)
       {
-        const Eigen::VectorXd delta_x =
-            result.state_trajectory[t] - context.X_[t];
-        result.control_trajectory[t] =
-            context.U_[t] + alpha * k_u_[t] + K_u_[t] * delta_x;
+        const std::string &constraint_name = constraint_pair.first;
+        Eigen::VectorXd g_val = constraint_pair.second->evaluate(x, u) -
+                                constraint_pair.second->getUpperBound();
+        G_raw_[constraint_name][t] = g_val;
+        G_[constraint_name][t] = g_val;
+      }
+
+      context.X_[t + 1] = context.getSystem().getDiscreteDynamics(
+          x, u, t * context.getTimestep());
+    }
 
-        result.state_trajectory[t + 1] = context.getSystem().getDiscreteDynamics(
-            result.state_trajectory[t], result.control_trajectory[t],
-            t * context.getTimestep());
+    cost += context.getObjective().terminal_cost(context.X_.back());
 
-        cost_new += context.getObjective().running_cost(
-            result.state_trajectory[t], result.control_trajectory[t], t);
+    for (const auto &entry : getTerminalInequalityLayout(context))
+    {
+      const auto *constraint =
+          dynamic_cast<const TerminalInequalityConstraint *>(
+              context.getTerminalConstraintSet().at(entry.name).get());
+      if (!constraint) {
+        throw std::runtime_error(
+            "IPDDP: terminal constraint '" + entry.name +
+            "' is not a TerminalInequalityConstraint");
       }
-      cost_new +=
-          context.getObjective().terminal_cost(result.state_trajectory.back());
+      G_T_[entry.name] = constraint->evaluate(context.X_.back());
+    }
 
-      double dJ = context.cost_ - cost_new;
-      double expected = -alpha * (dV_(0) + 0.5 * alpha * dV_(1));
-      double reduction_ratio =
-          expected > 0.0 ? dJ / expected : std::copysign(1.0, dJ);
+    context.cost_ = cost;
+  }
 
-      result.success = reduction_ratio > 1e-6;
-      result.cost = cost_new;
-      result.merit_function = cost_new;
-      result.constraint_violation = 0.0;
-      result.alpha_du = 1.0;
-      return result;
-    }
+  void IPDDPSolver::evaluateTrajectoryWarmStart(CDDP &context)
+  {
+    const int horizon = context.getHorizon();
+    double cost = 0.0;
 
-    double alpha_s = alpha;
+    const auto &constraint_set = context.getConstraintSet();
+    for (const auto &constraint_pair : constraint_set)
+    {
+      const std::string &constraint_name = constraint_pair.first;
+      G_[constraint_name].resize(horizon);
+    }
 
-    bool s_trajectory_feasible = true;
     for (int t = 0; t < horizon; ++t)
     {
-      const Eigen::VectorXd delta_x = result.state_trajectory[t] - context.X_[t];
+      const Eigen::VectorXd &x = context.X_[t];
+      const Eigen::VectorXd &u = context.U_[t];
+
+      cost += context.getObjective().running_cost(x, u, t);
 
       for (const auto &constraint_pair : constraint_set)
       {
         const std::string &constraint_name = constraint_pair.first;
-        int dual_dim = constraint_pair.second->getDualDim();
-        const Eigen::VectorXd &s_old = S_[constraint_name][t];
-
-        Eigen::VectorXd s_new = s_old + alpha_s * k_s_[constraint_name][t] +
-                                K_s_[constraint_name][t] * delta_x;
-        Eigen::VectorXd s_min = (1.0 - tau) * s_old;
+        Eigen::VectorXd g_val = constraint_pair.second->evaluate(x, u) -
+                                constraint_pair.second->getUpperBound();
+        G_raw_[constraint_name][t] = g_val;
+        G_[constraint_name][t] = g_val;
+      }
+    }
 
-        for (int i = 0; i < dual_dim; ++i)
-        {
-          if (s_new[i] < s_min[i])
-          {
-            s_trajectory_feasible = false;
-            break;
-          }
-        }
-        if (!s_trajectory_feasible)
-          break;
+    cost += context.getObjective().terminal_cost(context.X_.back());
 
-        S_new[constraint_name][t] = s_new;
+    for (const auto &entry : getTerminalInequalityLayout(context))
+    {
+      const auto *constraint =
+          dynamic_cast<const TerminalInequalityConstraint *>(
+              context.getTerminalConstraintSet().at(entry.name).get());
+      if (!constraint) {
+        throw std::runtime_error(
+            "IPDDP: terminal constraint '" + entry.name +
+            "' is not a TerminalInequalityConstraint");
       }
-      if (!s_trajectory_feasible)
-        break;
+      G_T_[entry.name] = constraint->evaluate(context.X_.back());
+    }
 
-      result.control_trajectory[t] =
-          context.U_[t] + alpha_s * k_u_[t] + K_u_[t] * delta_x;
+    context.cost_ = cost;
+  }
 
-      result.state_trajectory[t + 1] = context.getSystem().getDiscreteDynamics(
-          result.state_trajectory[t], result.control_trajectory[t],
-          t * context.getTimestep());
-    }
+  void IPDDPSolver::initializeDualSlackVariablesWarmStart(CDDP &context)
+  {
+    const CDDPOptions &options = context.getOptions();
+    const int horizon = context.getHorizon();
+    const auto &constraint_set = context.getConstraintSet();
 
-    if (!s_trajectory_feasible)
+    bool has_existing_dual_slack = true;
+    for (const auto &constraint_pair : constraint_set)
     {
-      return result;
+      const std::string &constraint_name = constraint_pair.first;
+      auto y_it = Y_.find(constraint_name);
+      auto s_it = S_.find(constraint_name);
+      if (y_it == Y_.end() || s_it == S_.end() ||
+          y_it->second.size() != static_cast<size_t>(horizon) ||
+          s_it->second.size() != static_cast<size_t>(horizon))
+      {
+        has_existing_dual_slack = false;
+        break;
+      }
     }
 
-    bool suitable_alpha_y_found = false;
-    std::map<std::string, std::vector<Eigen::VectorXd>> Y_trial;
-
-    for (double alpha_y_candidate : context.alphas_)
+    for (const auto &constraint_pair : constraint_set)
     {
-      bool current_alpha_y_globally_feasible = true;
-      Y_trial = Y_;
+      const std::string &constraint_name = constraint_pair.first;
+      const int dual_dim = constraint_pair.second->getDualDim();
+
+      if (!has_existing_dual_slack)
+      {
+        Y_[constraint_name].resize(horizon);
+        S_[constraint_name].resize(horizon);
+      }
+
+      dY_[constraint_name].resize(horizon);
+      dS_[constraint_name].resize(horizon);
+      k_y_[constraint_name].resize(horizon);
+      K_y_[constraint_name].resize(horizon);
+      k_s_[constraint_name].resize(horizon);
+      K_s_[constraint_name].resize(horizon);
 
       for (int t = 0; t < horizon; ++t)
       {
-        const Eigen::VectorXd delta_x =
-            result.state_trajectory[t] - context.X_[t];
+        const Eigen::VectorXd &g_val = G_raw_[constraint_name][t];
+        Eigen::VectorXd s_current = Eigen::VectorXd::Zero(dual_dim);
+        Eigen::VectorXd y_current = Eigen::VectorXd::Zero(dual_dim);
+        bool need_reinit = !has_existing_dual_slack;
 
-        for (const auto &constraint_pair : constraint_set)
+        if (!need_reinit)
         {
-          const std::string &constraint_name = constraint_pair.first;
-          int dual_dim = constraint_pair.second->getDualDim();
-          const Eigen::VectorXd &y_old = Y_[constraint_name][t];
-
-          Eigen::VectorXd y_new = y_old +
-                                  alpha_y_candidate * k_y_[constraint_name][t] +
-                                  K_y_[constraint_name][t] * delta_x;
-          Eigen::VectorXd y_min = (1.0 - tau) * y_old;
+          s_current = S_[constraint_name][t];
+          y_current = Y_[constraint_name][t];
+          need_reinit =
+              warmstartNeedsReinit(y_current, s_current, g_val, options);
+        }
 
+        if (need_reinit)
+        {
+          s_current = g_val.unaryExpr([&](double g) {
+            return std::max(options.ipddp.slack_var_init_scale,
+                            -g + kSlackInteriorOffset);
+          });
           for (int i = 0; i < dual_dim; ++i)
           {
-            if (y_new[i] < y_min[i])
-            {
-              current_alpha_y_globally_feasible = false;
-              break;
-            }
+            y_current(i) =
+                (mu_ * options.ipddp.dual_var_init_scale) /
+                std::max(s_current(i), EPS_SLACK);
           }
-          if (!current_alpha_y_globally_feasible)
-            break;
-
-          Y_trial[constraint_name][t] = y_new;
         }
-        if (!current_alpha_y_globally_feasible)
-          break;
-      }
 
-      if (current_alpha_y_globally_feasible)
-      {
-        suitable_alpha_y_found = true;
-        Y_new = Y_trial;
-        result.alpha_du = alpha_y_candidate;
-        break;
+        repairWarmstartInterior(s_current, y_current, options);
+        Y_[constraint_name][t] = y_current;
+        S_[constraint_name][t] = s_current;
+        dY_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
+        dS_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
+        k_y_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
+        K_y_[constraint_name][t] =
+            Eigen::MatrixXd::Zero(dual_dim, context.getStateDim());
+        k_s_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
+        K_s_[constraint_name][t] =
+            Eigen::MatrixXd::Zero(dual_dim, context.getStateDim());
       }
     }
+  }
+
+  void IPDDPSolver::initializeDualSlackVariables(CDDP &context)
+  {
+    const CDDPOptions &options = context.getOptions();
+    const int horizon = context.getHorizon();
+    const auto &constraint_set = context.getConstraintSet();
 
-    if (!suitable_alpha_y_found)
+    for (const auto &constraint_pair : constraint_set)
     {
-      return result;
-    }
+      const std::string &constraint_name = constraint_pair.first;
+      int dual_dim = constraint_pair.second->getDualDim();
 
-    for (int t = 0; t < horizon; ++t)
-    {
-      cost_new += context.getObjective().running_cost(
-          result.state_trajectory[t], result.control_trajectory[t], t);
+      G_raw_[constraint_name].resize(horizon);
+      G_[constraint_name].resize(horizon);
+      Y_[constraint_name].resize(horizon);
+      S_[constraint_name].resize(horizon);
+      k_y_[constraint_name].resize(horizon);
+      K_y_[constraint_name].resize(horizon);
+      k_s_[constraint_name].resize(horizon);
+      K_s_[constraint_name].resize(horizon);
 
-      for (const auto &constraint_pair : constraint_set)
+      for (int t = 0; t < horizon; ++t)
       {
-        const std::string &constraint_name = constraint_pair.first;
-        G_new[constraint_name][t] =
-            constraint_pair.second->evaluate(result.state_trajectory[t],
-                                             result.control_trajectory[t]) -
+        Eigen::VectorXd g_val =
+            constraint_pair.second->evaluate(context.X_[t], context.U_[t]) -
             constraint_pair.second->getUpperBound();
+        G_raw_[constraint_name][t] = g_val;
+        G_[constraint_name][t] = g_val;
 
-        const Eigen::VectorXd &s_vec = S_new[constraint_name][t];
-        merit_function_new -= mu_ * s_vec.array().log().sum();
-
-        Eigen::VectorXd primal_residual = G_new[constraint_name][t] + s_vec;
-        constraint_violation_new += primal_residual.lpNorm<1>();
-      }
-    }
-
-    cost_new +=
-        context.getObjective().terminal_cost(result.state_trajectory.back());
-    merit_function_new += cost_new;
+        Eigen::VectorXd s_init = Eigen::VectorXd::Zero(dual_dim);
+        Eigen::VectorXd y_init = Eigen::VectorXd::Zero(dual_dim);
 
-    bool filter_acceptance = false;
-    double expected_improvement = alpha * dV_(0);
-    double constraint_violation_old = filter_.empty() ? 0.0 : filter_.back().constraint_violation;
-    double merit_function_old = context.merit_function_;
+        for (int i = 0; i < dual_dim; ++i)
+        {
+          s_init(i) = std::max(options.ipddp.slack_var_init_scale,
+                               -g_val(i) + kSlackInteriorOffset);
+          y_init(i) = (mu_ * options.ipddp.dual_var_init_scale) /
+                      std::max(s_init(i), EPS_SLACK);
+        }
+        repairWarmstartInterior(s_init, y_init, options);
+        Y_[constraint_name][t] = y_init;
+        S_[constraint_name][t] = s_init;
+        dY_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
+        dS_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
 
-    if (constraint_violation_new > options.filter.max_violation_threshold)
-    {
-      if (constraint_violation_new < (1 - options.filter.violation_acceptance_threshold) * constraint_violation_old)
-      {
-        filter_acceptance = true;
-      }
-    }
-    else if (std::max(constraint_violation_new, constraint_violation_old) <
-                 options.filter.min_violation_for_armijo_check &&
-             expected_improvement < 0)
-    {
-      if (merit_function_new <
-          merit_function_old +
-              options.filter.armijo_constant * expected_improvement)
-      {
-        filter_acceptance = true;
-      }
-    }
-    else
-    {
-      if (merit_function_new <
-              merit_function_old - options.filter.merit_acceptance_threshold *
-                                       constraint_violation_new ||
-          constraint_violation_new <
-              (1 - options.filter.violation_acceptance_threshold) *
-                  constraint_violation_old)
-      {
-        filter_acceptance = true;
+        k_y_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
+        K_y_[constraint_name][t] =
+            Eigen::MatrixXd::Zero(dual_dim, context.getStateDim());
+        k_s_[constraint_name][t] = Eigen::VectorXd::Zero(dual_dim);
+        K_s_[constraint_name][t] =
+            Eigen::MatrixXd::Zero(dual_dim, context.getStateDim());
       }
     }
 
-    if (filter_acceptance)
-    {
-      result.success = true;
-      result.cost = cost_new;
-      result.merit_function = merit_function_new;
-      result.constraint_violation = constraint_violation_new;
-      result.dual_trajectory = Y_new;
-      result.slack_trajectory = S_new;
-      result.constraint_eval_trajectory = G_new;
-    }
-
-    return result;
+    context.cost_ = context.getObjective().evaluate(context.X_, context.U_);
   }
 
-  void IPDDPSolver::printIterationLegacy(int iter, double objective, double inf_pr,
-                                   double inf_du, double inf_comp, double mu,
-                                   double step_norm, double regularization,
-                                   double alpha_du, double alpha_pr) const
+  void IPDDPSolver::resetBarrierFilter(CDDP &context)
   {
-    if (iter == 0)
+    const bool has_terminal_ineq = !getTerminalInequalityLayout(context).empty();
+    const bool has_terminal_eq = getTerminalEqualityDim(context) > 0;
+    Eigen::VectorXd h_T =
+        has_terminal_eq
+            ? evaluateTerminalEqualityResidual(context, context.X_.back())
+            : Eigen::VectorXd::Zero(0);
+    const auto [inf_pr, inf_comp] = computePrimalAndComplementarity(
+        context, G_, S_, Y_, mu_, has_terminal_ineq ? &G_T_ : nullptr,
+        has_terminal_ineq ? &S_T_ : nullptr, has_terminal_ineq ? &Y_T_ : nullptr,
+        has_terminal_eq ? &h_T : nullptr);
+
+    context.merit_function_ = computeBarrierMerit(
+        context, S_, context.cost_, has_terminal_ineq ? &S_T_ : nullptr,
+        has_terminal_eq ? &Lambda_T_eq_ : nullptr,
+        has_terminal_eq ? &h_T : nullptr);
+    context.inf_pr_ = inf_pr;
+    context.inf_comp_ = inf_comp;
+    phi_ = context.merit_function_;
+    filter_theta_ =
+        std::max(computeTheta(context.getOptions(), G_, S_,
+                              has_terminal_ineq ? &G_T_ : nullptr,
+                              has_terminal_ineq ? &S_T_ : nullptr,
+                              has_terminal_eq ? &h_T : nullptr),
+                 1e-8);
+    theta_ = std::max(filter_theta_,
+                      std::max(context.getOptions().ipddp.theta_0_floor, 1e-8));
+    filter_.clear();
+    if (has_terminal_ineq || has_terminal_eq)
     {
-      std::cout << std::setw(4) << "iter" << " " << std::setw(12) << "objective"
-                << " " << std::setw(9) << "inf_pr" << " " << std::setw(9)
-                << "inf_du" << " " << std::setw(9) << "inf_comp" << " "
-                << std::setw(7) << "lg(mu)" << " " << std::setw(9) << "||d||"
-                << " " << std::setw(7) << "lg(rg)"
-                << " " << std::setw(9) << "alpha_du" << " " << std::setw(9)
-                << "alpha_pr" << std::endl;
+      acceptFilterEntry(phi_, filter_theta_);
     }
+  }
 
-    std::cout << std::setw(4) << iter << " ";
-
-    std::cout << std::setw(12) << std::scientific << std::setprecision(6)
-              << objective << " ";
-
-    std::cout << std::setw(9) << std::scientific << std::setprecision(2) << inf_pr
-              << " ";
-
-    std::cout << std::setw(9) << std::scientific << std::setprecision(2) << inf_du
-              << " ";
-
-    std::cout << std::setw(9) << std::scientific << std::setprecision(2)
-              << inf_comp << " ";
-
-    if (mu > 0.0)
-    {
-      std::cout << std::setw(7) << std::fixed << std::setprecision(1)
-                << std::log10(mu) << " ";
-    }
-    else
-    {
-      std::cout << std::setw(7) << "-inf" << " ";
-    }
+  void IPDDPSolver::resetFilter(CDDP &context) { resetBarrierFilter(context); }
 
-    std::cout << std::setw(9) << std::scientific << std::setprecision(2)
-              << step_norm << " ";
+  bool IPDDPSolver::acceptFilterEntry(double merit_function,
+                                      double constraint_violation)
+  {
+    return detail::acceptFilterEntry(filter_, merit_function,
+                                     constraint_violation);
+  }
 
-    if (regularization > 0.0)
+  bool IPDDPSolver::isFilterAcceptable(double merit_function,
+                                       double constraint_violation) const
+  {
+    if (!std::isfinite(merit_function) || !std::isfinite(constraint_violation))
     {
-      std::cout << std::setw(7) << std::fixed << std::setprecision(1)
-                << std::log10(regularization) << " ";
+      return false;
     }
-    else
+    constexpr double eps = 1e-12;
+    if (filter_.empty())
     {
-      std::cout << std::setw(7) << "-" << " ";
+      const bool ties_current =
+          std::abs(merit_function - phi_) <= eps &&
+          std::abs(constraint_violation - theta_) <= eps;
+      return merit_function < phi_ || constraint_violation < theta_ ||
+             ties_current;
     }
-
-    std::cout << std::setw(9) << std::fixed << std::setprecision(6) << alpha_du
-              << " ";
-
-    std::cout << std::setw(9) << std::fixed << std::setprecision(6) << alpha_pr;
-
-    std::cout << std::endl;
-  }
-
-
-  void IPDDPSolver::printSolutionSummary(const CDDPSolution &solution) const
-  {
-    std::cout << "\n========================================\n";
-    std::cout << "           IPDDP Solution Summary\n";
-    std::cout << "========================================\n";
-
-    std::cout << "Status: " << solution.status_message << "\n";
-    std::cout << "Iterations: " << solution.iterations_completed << "\n";
-    std::cout << "Solve Time: " << std::setprecision(2) << solution.solve_time_ms << " ms\n";
-    std::cout << "Final Cost: " << std::setprecision(6) << solution.final_objective << "\n";
-    std::cout << "Final Barrier μ: " << std::setprecision(2) << std::scientific
-              << solution.final_barrier_mu << "\n";
-    std::cout << "========================================\n\n";
+    return !detail::isFilterCandidateDominated(filter_, merit_function,
+                                               constraint_violation);
   }
 
   void IPDDPSolver::updateBarrierParameters(CDDP &context, bool forward_pass_success)
   {
     const CDDPOptions &options = context.getOptions();
     const auto &barrier_opts = options.ipddp.barrier;
-    const auto &constraint_set = context.getConstraintSet();
+    const bool no_barrier_needed =
+        context.getConstraintSet().empty() &&
+        getTerminalInequalityLayout(context).empty();
 
-    if (constraint_set.empty())
+    if (!forward_pass_success)
     {
       return;
     }
 
-    switch (barrier_opts.strategy)
+    const double scaled_inf_du = computeScaledDualInfeasibility(context);
+    const double scaled_inf_comp = context.inf_comp_;
+    const double mu_old = mu_;
+
+    if (no_barrier_needed)
+    {
+      mu_ = mu_old;
+    }
+    else if (barrier_opts.strategy == BarrierStrategy::ADAPTIVE)
+    {
+      const double kkt_error =
+          std::max({context.inf_pr_, scaled_inf_du, scaled_inf_comp});
+      const double threshold =
+          std::max(barrier_opts.mu_update_factor * mu_, 2.0 * mu_);
+      if (kkt_error <= threshold)
+      {
+        double factor = barrier_opts.mu_update_factor;
+        if (mu_ > 1e-20)
+        {
+          const double ratio = kkt_error / std::max(mu_, 1e-20);
+          if (ratio < 0.01)
+          {
+            factor = 0.1 * barrier_opts.mu_update_factor;
+          }
+          else if (ratio < 0.1)
+          {
+            factor = 0.3 * barrier_opts.mu_update_factor;
+          }
+          else if (ratio < 0.5)
+          {
+            factor = 0.6 * barrier_opts.mu_update_factor;
+          }
+        }
+        const double linear = factor * mu_;
+        const double superlinear = std::pow(mu_, barrier_opts.mu_update_power);
+        mu_ = std::max(
+            std::min(linear, superlinear),
+            std::max(barrier_opts.mu_min_value, options.tolerance / 100.0));
+      }
+    }
+    else
     {
-      case BarrierStrategy::MONOTONIC:
+      const double weighted_inf_du =
+          scaled_inf_du * options.ipddp.barrier_update_dual_weight;
+      const double kkt_error =
+          std::max({context.inf_pr_, weighted_inf_du, scaled_inf_comp});
+      if (kkt_error <= options.ipddp.mu_kappa_epsilon * mu_)
       {
+        const double linear = barrier_opts.mu_update_factor * mu_;
+        const double superlinear = std::pow(mu_, barrier_opts.mu_update_power);
         mu_ = std::max(barrier_opts.mu_min_value,
-                       barrier_opts.mu_update_factor * mu_);
-        resetFilter(context);
-        break;
+                       std::min(linear, superlinear));
       }
+    }
 
-      case BarrierStrategy::IPOPT:
+    const bool has_terminal_ineq = !getTerminalInequalityLayout(context).empty();
+    const bool has_terminal_eq = getTerminalEqualityDim(context) > 0;
+    Eigen::VectorXd h_T =
+        has_terminal_eq
+            ? evaluateTerminalEqualityResidual(context, context.X_.back())
+            : Eigen::VectorXd::Zero(0);
+    const double filter_theta =
+        std::max(computeTheta(options, G_, S_,
+                              has_terminal_ineq ? &G_T_ : nullptr,
+                              has_terminal_ineq ? &S_T_ : nullptr,
+                              has_terminal_eq ? &h_T : nullptr),
+                 1e-8);
+
+    const bool reset_filter = (mu_ < mu_old) && (mu_ > 0.0);
+    if (reset_filter)
+    {
+      filter_.clear();
+      if (has_terminal_eq || has_terminal_ineq)
       {
-        double scaled_inf_du = computeScaledDualInfeasibility(context);
-        double error_k = std::max({scaled_inf_du, context.inf_pr_, context.inf_comp_});
-
-        double kappa_epsilon = 10.0;
-
-        if (error_k <= kappa_epsilon * mu_)
-        {
-          double new_mu_linear = barrier_opts.mu_update_factor * mu_;
-          double new_mu_superlinear = std::pow(mu_, barrier_opts.mu_update_power);
-          mu_ = std::max(options.tolerance / 10.0,
-                         std::min(new_mu_linear, new_mu_superlinear));
-          resetFilter(context);
-        }
-        break;
+        acceptFilterEntry(phi_, filter_theta);
       }
-
-      case BarrierStrategy::ADAPTIVE:
-      default:
+    }
+    else
+    {
+      acceptFilterEntry(phi_, filter_theta);
+      if (static_cast<int>(filter_.size()) > options.ipddp.max_filter_size)
       {
-        double scaled_inf_du = computeScaledDualInfeasibility(context);
-        double termination_metric = std::max({scaled_inf_du, context.inf_pr_, context.inf_comp_});
-
-        double barrier_update_threshold = std::max(barrier_opts.mu_update_factor * mu_, mu_ * 2.0);
-
-        if (termination_metric <= barrier_update_threshold)
-        {
-          double reduction_factor = barrier_opts.mu_update_factor;
-
-          if (mu_ > 1e-12)
-          {
-            double kkt_progress_ratio = termination_metric / mu_;
-
-            if (kkt_progress_ratio < 0.01)
-            {
-              reduction_factor = barrier_opts.mu_update_factor * 0.1;
-            }
-            else if (kkt_progress_ratio < 0.1)
-            {
-              reduction_factor = barrier_opts.mu_update_factor * 0.3;
-            }
-            else if (kkt_progress_ratio < 0.5)
-            {
-              reduction_factor = barrier_opts.mu_update_factor * 0.6;
-            }
-          }
-
-          double new_mu_linear = reduction_factor * mu_;
-          double new_mu_superlinear = std::pow(mu_, barrier_opts.mu_update_power);
-
-          mu_ = std::max(options.tolerance / 100.0,
-                         std::min(new_mu_linear, new_mu_superlinear));
-
-          resetFilter(context);
-        }
-        break;
+        detail::pruneFilterToBestPoints(filter_);
       }
     }
+    const auto [inf_pr, inf_comp] = computePrimalAndComplementarity(
+        context, G_, S_, Y_, mu_, has_terminal_ineq ? &G_T_ : nullptr,
+        has_terminal_ineq ? &S_T_ : nullptr, has_terminal_ineq ? &Y_T_ : nullptr,
+        has_terminal_eq ? &h_T : nullptr);
+    context.inf_pr_ = inf_pr;
+    context.inf_comp_ = inf_comp;
+    context.merit_function_ = computeBarrierMerit(
+        context, S_, context.cost_, has_terminal_ineq ? &S_T_ : nullptr,
+        has_terminal_eq ? &Lambda_T_eq_ : nullptr,
+        has_terminal_eq ? &h_T : nullptr);
+    phi_ = context.merit_function_;
+    filter_theta_ = filter_theta;
+    theta_ = std::max(filter_theta,
+                      std::max(options.ipddp.theta_0_floor, 1e-8));
   }
 
   void IPDDPSolver::initializeConstraintStorage(CDDP &context)
@@ -1548,96 +2664,327 @@ namespace cddp
     const auto &constraint_set = context.getConstraintSet();
     const int horizon = context.getHorizon();
 
+    G_raw_.clear();
     G_.clear();
     G_x_.clear();
     G_u_.clear();
     Y_.clear();
     S_.clear();
+    dY_.clear();
+    dS_.clear();
     k_y_.clear();
     K_y_.clear();
     k_s_.clear();
     K_s_.clear();
+    G_T_.clear();
+    Y_T_.clear();
+    S_T_.clear();
+    dY_T_.clear();
+    dS_T_.clear();
 
     for (const auto &constraint_pair : constraint_set)
     {
       const std::string &constraint_name = constraint_pair.first;
+      G_raw_[constraint_name].resize(horizon);
       G_[constraint_name].resize(horizon);
       Y_[constraint_name].resize(horizon);
       S_[constraint_name].resize(horizon);
+      dY_[constraint_name].resize(horizon);
+      dS_[constraint_name].resize(horizon);
       k_y_[constraint_name].resize(horizon);
       K_y_[constraint_name].resize(horizon);
       k_s_[constraint_name].resize(horizon);
       K_s_[constraint_name].resize(horizon);
     }
+
+    for (const auto &entry : getTerminalInequalityLayout(context))
+    {
+      G_T_[entry.name] = Eigen::VectorXd::Zero(entry.dim);
+      Y_T_[entry.name] = Eigen::VectorXd::Zero(entry.dim);
+      S_T_[entry.name] = Eigen::VectorXd::Zero(entry.dim);
+      dY_T_[entry.name] = Eigen::VectorXd::Zero(entry.dim);
+      dS_T_[entry.name] = Eigen::VectorXd::Zero(entry.dim);
+    }
   }
 
   double IPDDPSolver::computeMaxConstraintViolation(const CDDP &context) const
   {
-    const auto &constraint_set = context.getConstraintSet();
-    const int horizon = context.getHorizon();
-    double max_violation = 0.0;
-
-    for (const auto &constraint_pair : constraint_set)
+    (void)context;
+    double max_violation = detail::computeMaxConstraintViolation(G_);
+    for (const auto &entry : G_T_)
     {
-      const std::string &constraint_name = constraint_pair.first;
-      auto g_it = G_.find(constraint_name);
-      if (g_it != G_.end())
+      if (entry.second.size() == 0)
       {
-        for (int t = 0; t < horizon; ++t)
-        {
-          const Eigen::VectorXd &g_vec = g_it->second[t];
-          max_violation = std::max(max_violation, g_vec.maxCoeff());
-        }
+        continue;
       }
+      max_violation = std::max(max_violation, entry.second.maxCoeff());
     }
     return max_violation;
   }
 
   double IPDDPSolver::computeScaledDualInfeasibility(const CDDP &context) const
   {
-    const auto &constraint_set = context.getConstraintSet();
-    const int horizon = context.getHorizon();
-    const int control_dim = context.getControlDim();
+    double scaled_dual_infeasibility = context.inf_du_;
 
-    if (constraint_set.empty())
+    if (!context.getOptions().ipddp.check_state_stationarity)
     {
-      return context.inf_du_;
+      return scaled_dual_infeasibility;
     }
 
-    const double smax = 100.0;
-
-    double y_norm_l1 = 0.0;
-    double s_norm_l1 = 0.0;
-    int total_dual_dim = 0;
-
-    for (const auto &constraint_pair : constraint_set)
+    double state_stationarity = 0.0;
+    for (const auto &constraint_pair : G_x_)
     {
-      const std::string &constraint_name = constraint_pair.first;
-      auto y_it = Y_.find(constraint_name);
-      auto s_it = S_.find(constraint_name);
+      const auto y_it = Y_.find(constraint_pair.first);
+      if (y_it == Y_.end())
+      {
+        continue;
+      }
+
+      const auto &G_x_stages = constraint_pair.second;
+      const auto &Y_stages = y_it->second;
+      const std::size_t stage_count =
+          std::min(G_x_stages.size(), Y_stages.size());
 
-      if (y_it != Y_.end() && s_it != S_.end())
+      for (std::size_t stage = 0; stage < stage_count; ++stage)
       {
-        for (int t = 0; t < horizon; ++t)
+        const auto &G_x_stage = G_x_stages[stage];
+        const auto &Y_stage = Y_stages[stage];
+        if (G_x_stage.rows() == 0 || G_x_stage.cols() == 0 ||
+            Y_stage.size() == 0)
+        {
+          continue;
+        }
+        if (G_x_stage.rows() != Y_stage.size())
+        {
+          continue;
+        }
+
+        const Eigen::VectorXd stage_stationarity =
+            G_x_stage.transpose() * Y_stage;
+        if (stage_stationarity.size() == 0)
         {
-          const Eigen::VectorXd &y_vec = y_it->second[t];
-          const Eigen::VectorXd &s_vec = s_it->second[t];
+          continue;
+        }
+
+        state_stationarity = std::max(
+            state_stationarity,
+            stage_stationarity.lpNorm<Eigen::Infinity>());
+      }
+    }
 
-          y_norm_l1 += y_vec.lpNorm<1>();
-          s_norm_l1 += s_vec.lpNorm<1>();
-          total_dual_dim += y_vec.size();
+    return std::max(scaled_dual_infeasibility, state_stationarity);
+  }
+
+  double IPDDPSolver::computeTheta(
+      const CDDPOptions &options,
+      const std::map<std::string, std::vector<Eigen::VectorXd>> &constraints,
+      const std::map<std::string, std::vector<Eigen::VectorXd>> &slacks,
+      const std::map<std::string, Eigen::VectorXd> *terminal_constraints,
+      const std::map<std::string, Eigen::VectorXd> *terminal_slacks,
+      const Eigen::VectorXd *terminal_equality_residual) const
+  {
+    const bool use_l2 = options.ipddp.theta_norm == "l2";
+    double total = 0.0;
+    double max_entry = 0.0;
+    for (const auto &constraint_pair : constraints)
+    {
+      const auto slack_it = slacks.find(constraint_pair.first);
+      if (slack_it == slacks.end())
+      {
+        continue;
+      }
+      const auto &g_traj = constraint_pair.second;
+      const auto &s_traj = slack_it->second;
+      for (size_t t = 0; t < g_traj.size(); ++t)
+      {
+        const Eigen::VectorXd residual = g_traj[t] + s_traj[t];
+        if (use_l2)
+        {
+          total += residual.squaredNorm();
+        }
+        else
+        {
+          total += residual.lpNorm<1>();
+        }
+        max_entry = std::max(max_entry, residual.lpNorm<Eigen::Infinity>());
+      }
+    }
+    if (terminal_constraints != nullptr && terminal_slacks != nullptr)
+    {
+      for (const auto &constraint_pair : *terminal_constraints)
+      {
+        const auto slack_it = terminal_slacks->find(constraint_pair.first);
+        if (slack_it == terminal_slacks->end())
+        {
+          continue;
+        }
+        const Eigen::VectorXd residual = constraint_pair.second + slack_it->second;
+        if (use_l2)
+        {
+          total += residual.squaredNorm();
+        }
+        else
+        {
+          total += residual.lpNorm<1>();
         }
+        max_entry = std::max(max_entry, residual.lpNorm<Eigen::Infinity>());
+      }
+    }
+    if (terminal_equality_residual != nullptr && terminal_equality_residual->size() > 0)
+    {
+      if (use_l2)
+      {
+        total += terminal_equality_residual->squaredNorm();
+      }
+      else
+      {
+        total += terminal_equality_residual->lpNorm<1>();
       }
+      max_entry =
+          std::max(max_entry, terminal_equality_residual->lpNorm<Eigen::Infinity>());
     }
+    const double theta = use_l2 ? std::sqrt(total) : total;
+    return std::max(theta, max_entry);
+  }
 
-    int m = total_dual_dim;
-    int n = control_dim * horizon;
-    int m_plus_n = m + n;
+  double IPDDPSolver::computeBarrierMerit(
+      const CDDP &context,
+      const std::map<std::string, std::vector<Eigen::VectorXd>> &slacks,
+      double cost,
+      const std::map<std::string, Eigen::VectorXd> *terminal_slacks,
+      const Eigen::VectorXd *terminal_equality_multipliers,
+      const Eigen::VectorXd *terminal_equality_residual) const
+  {
+    double merit = cost;
+    for (const auto &entry : slacks)
+    {
+      for (const auto &s_vec : entry.second)
+      {
+        merit -= mu_ * s_vec.array().max(EPS_SLACK).log().sum();
+      }
+    }
+    if (terminal_slacks != nullptr)
+    {
+      for (const auto &entry : *terminal_slacks)
+      {
+        merit -= mu_ * entry.second.array().max(EPS_SLACK).log().sum();
+      }
+    }
+    if (terminal_equality_multipliers != nullptr &&
+        terminal_equality_residual != nullptr &&
+        terminal_equality_multipliers->size() == terminal_equality_residual->size())
+    {
+      merit += terminal_equality_multipliers->dot(*terminal_equality_residual);
+    }
+    return merit;
+  }
 
-    double scaling_numerator = (m_plus_n > 0) ? (y_norm_l1 + s_norm_l1) / static_cast<double>(m_plus_n) : 0.0;
-    double sd = std::max(smax, scaling_numerator) / smax;
+  std::pair<double, double> IPDDPSolver::computePrimalAndComplementarity(
+      const CDDP &context,
+      const std::map<std::string, std::vector<Eigen::VectorXd>> &constraints,
+      const std::map<std::string, std::vector<Eigen::VectorXd>> &slacks,
+      const std::map<std::string, std::vector<Eigen::VectorXd>> &duals,
+      double mu,
+      const std::map<std::string, Eigen::VectorXd> *terminal_constraints,
+      const std::map<std::string, Eigen::VectorXd> *terminal_slacks,
+      const std::map<std::string, Eigen::VectorXd> *terminal_duals,
+      const Eigen::VectorXd *terminal_equality_residual) const
+  {
+    double inf_pr = 0.0;
+    double inf_comp = 0.0;
+    for (const auto &constraint_pair : constraints)
+    {
+      const auto slack_it = slacks.find(constraint_pair.first);
+      const auto dual_it = duals.find(constraint_pair.first);
+      if (slack_it == slacks.end() || dual_it == duals.end())
+      {
+        continue;
+      }
+      for (size_t t = 0; t < constraint_pair.second.size(); ++t)
+      {
+        const Eigen::VectorXd r_p = constraint_pair.second[t] + slack_it->second[t];
+        const Eigen::VectorXd r_d =
+            dual_it->second[t].cwiseProduct(slack_it->second[t]).array() - mu;
+        inf_pr = std::max(inf_pr, r_p.lpNorm<Eigen::Infinity>());
+        inf_comp = std::max(inf_comp, r_d.lpNorm<Eigen::Infinity>());
+      }
+    }
+    if (terminal_constraints != nullptr && terminal_slacks != nullptr &&
+        terminal_duals != nullptr)
+    {
+      for (const auto &entry : *terminal_constraints)
+      {
+        const auto slack_it = terminal_slacks->find(entry.first);
+        const auto dual_it = terminal_duals->find(entry.first);
+        if (slack_it == terminal_slacks->end() ||
+            dual_it == terminal_duals->end())
+        {
+          continue;
+        }
+        const Eigen::VectorXd r_p = entry.second + slack_it->second;
+        const Eigen::VectorXd r_d =
+            dual_it->second.cwiseProduct(slack_it->second).array() - mu;
+        inf_pr = std::max(inf_pr, r_p.lpNorm<Eigen::Infinity>());
+        inf_comp = std::max(inf_comp, r_d.lpNorm<Eigen::Infinity>());
+      }
+    }
+    if (terminal_equality_residual != nullptr && terminal_equality_residual->size() > 0)
+    {
+      inf_pr = std::max(
+          inf_pr, terminal_equality_residual->lpNorm<Eigen::Infinity>());
+    }
+    return {inf_pr, inf_comp};
+  }
 
-    return context.inf_du_ / sd;
+  std::pair<double, double> IPDDPSolver::computeMaxStepSizes(
+      const CDDP &context) const
+  {
+    const double tau =
+        std::max(context.getOptions().ipddp.barrier.min_fraction_to_boundary,
+                 1.0 - mu_);
+    double alpha_pr = 1.0;
+    double alpha_du = 1.0;
+    for (const auto &constraint_pair : context.getConstraintSet())
+    {
+      const auto &name = constraint_pair.first;
+      for (size_t t = 0; t < dS_.at(name).size(); ++t)
+      {
+        const Eigen::VectorXd &s = S_.at(name)[t];
+        const Eigen::VectorXd &y = Y_.at(name)[t];
+        const Eigen::VectorXd &ds = dS_.at(name)[t];
+        const Eigen::VectorXd &dy = dY_.at(name)[t];
+        for (int i = 0; i < s.size(); ++i)
+        {
+          if (ds(i) < 0.0)
+          {
+            alpha_pr = std::min(alpha_pr, -tau * s(i) / ds(i));
+          }
+          if (dy(i) < 0.0)
+          {
+            alpha_du = std::min(alpha_du, -tau * y(i) / dy(i));
+          }
+        }
+      }
+    }
+    for (const auto &entry : getTerminalInequalityLayout(context))
+    {
+      const Eigen::VectorXd &s = S_T_.at(entry.name);
+      const Eigen::VectorXd &y = Y_T_.at(entry.name);
+      const Eigen::VectorXd &ds = dS_T_.at(entry.name);
+      const Eigen::VectorXd &dy = dY_T_.at(entry.name);
+      for (int i = 0; i < entry.dim; ++i)
+      {
+        if (ds(i) < 0.0)
+        {
+          alpha_pr = std::min(alpha_pr, -tau * s(i) / ds(i));
+        }
+        if (dy(i) < 0.0)
+        {
+          alpha_du = std::min(alpha_du, -tau * y(i) / dy(i));
+        }
+      }
+    }
+    return {std::clamp(alpha_pr, 0.0, 1.0), std::clamp(alpha_du, 0.0, 1.0)};
   }
 
 } // namespace cddp
diff --git a/src/cddp_core/msipddp_solver.cpp b/src/cddp_core/msipddp_solver.cpp
index 3067d5c..42fe040 100644
--- a/src/cddp_core/msipddp_solver.cpp
+++ b/src/cddp_core/msipddp_solver.cpp
@@ -14,6 +14,7 @@
  limitations under the License.
 */
 
+#include "interior_point_utils.hpp"
 #include "cddp_core/msipddp_solver.hpp"
 #include "cddp_core/cddp_core.hpp"
 #include <chrono>
@@ -712,7 +713,6 @@ namespace cddp
     double merit_function = context.cost_;
     double inf_pr = 0.0;
     double filter_constraint_violation = 0.0;
-    double inf_du = 0.0;
     double inf_comp = 0.0;
     double inf_defect = 0.0;
 
@@ -751,7 +751,6 @@ namespace cddp
     {
       inf_pr = 0.0;
       filter_constraint_violation = 0.0;
-      inf_du = 0.0;
       inf_comp = 0.0;
     }
 
@@ -767,26 +766,8 @@ namespace cddp
 
   bool MSIPDDPSolver::acceptFilterEntry(double merit_function, double constraint_violation)
   {
-    FilterPoint candidate(merit_function, constraint_violation);
-
-    for (const auto &filter_point : filter_)
-    {
-      if (filter_point.dominates(candidate))
-      {
-        return false;
-      }
-    }
-
-    filter_.erase(
-        std::remove_if(filter_.begin(), filter_.end(),
-                      [&candidate](const FilterPoint &point) {
-                        return candidate.dominates(point);
-                      }),
-        filter_.end());
-
-    filter_.push_back(candidate);
-
-    return true;
+    return detail::acceptFilterEntry(filter_, merit_function,
+                                     constraint_violation);
   }
 
   bool MSIPDDPSolver::isFilterAcceptable(double merit_function, double constraint_violation,
@@ -798,13 +779,9 @@ namespace cddp
       return true;
     }
 
-    FilterPoint candidate(merit_function, constraint_violation);
-    for (const auto &filter_point : filter_)
-    {
-      if (filter_point.dominates(candidate))
-      {
-        return false;
-      }
+    if (detail::isFilterCandidateDominated(filter_, merit_function,
+                                           constraint_violation)) {
+      return false;
     }
 
     double best_violation = std::numeric_limits<double>::infinity();
@@ -843,15 +820,7 @@ namespace cddp
 
     if (!needs_restoration)
     {
-      for (const auto &point : filter_)
-      {
-        if (std::isnan(point.merit_function) || std::isnan(point.constraint_violation) ||
-            std::isinf(point.merit_function) || std::isinf(point.constraint_violation))
-        {
-          needs_restoration = true;
-          break;
-        }
-      }
+      needs_restoration = detail::filterContainsInvalidValues(filter_);
     }
 
     if (needs_restoration && !filter_.empty())
@@ -861,23 +830,7 @@ namespace cddp
         std::cout << "MSIPDDP: Filter restoration: " << filter_.size() << " -> ";
       }
 
-      auto best_violation = *std::min_element(filter_.begin(), filter_.end(),
-        [](const FilterPoint &a, const FilterPoint &b) {
-          return a.constraint_violation < b.constraint_violation;
-        });
-
-      auto best_merit = *std::min_element(filter_.begin(), filter_.end(),
-        [](const FilterPoint &a, const FilterPoint &b) {
-          return a.merit_function < b.merit_function;
-        });
-
-      filter_.clear();
-      filter_.push_back(best_violation);
-      if (std::abs(best_merit.constraint_violation - best_violation.constraint_violation) > 1e-12 ||
-          std::abs(best_merit.merit_function - best_violation.merit_function) > 1e-12)
-      {
-        filter_.push_back(best_merit);
-      }
+      detail::pruneFilterToBestPoints(filter_);
 
       if (options.debug)
       {
@@ -1771,64 +1724,13 @@ namespace cddp
   }
 
   void MSIPDDPSolver::printIterationLegacy(int iter, double objective, double inf_pr,
-                                     double inf_du, double inf_comp, double mu,
-                                     double step_norm, double regularization,
-                                     double alpha_du, double alpha_pr) const
+                                      double inf_du, double inf_comp, double mu,
+                                      double step_norm, double regularization,
+                                      double alpha_du, double alpha_pr) const
   {
-    if (iter == 0)
-    {
-      std::cout << std::setw(4) << "iter" << " " << std::setw(12) << "objective"
-                << " " << std::setw(9) << "inf_pr" << " " << std::setw(9)
-                << "inf_du" << " " << std::setw(9) << "inf_comp" << " "
-                << std::setw(7) << "lg(mu)" << " " << std::setw(9) << "||d||"
-                << " " << std::setw(7) << "lg(rg)"
-                << " " << std::setw(9) << "alpha_du" << " " << std::setw(9)
-                << "alpha_pr" << std::endl;
-    }
-
-    std::cout << std::setw(4) << iter << " ";
-
-    std::cout << std::setw(12) << std::scientific << std::setprecision(6)
-              << objective << " ";
-
-    std::cout << std::setw(9) << std::scientific << std::setprecision(2) << inf_pr
-              << " ";
-
-    std::cout << std::setw(9) << std::scientific << std::setprecision(2) << inf_du
-              << " ";
-
-    std::cout << std::setw(9) << std::scientific << std::setprecision(2)
-              << inf_comp << " ";
-
-    if (mu > 0.0)
-    {
-      std::cout << std::setw(7) << std::fixed << std::setprecision(1)
-                << std::log10(mu) << " ";
-    }
-    else
-    {
-      std::cout << std::setw(7) << "-inf" << " ";
-    }
-
-    std::cout << std::setw(9) << std::scientific << std::setprecision(2)
-              << step_norm << " ";
-
-    if (regularization > 0.0)
-    {
-      std::cout << std::setw(7) << std::fixed << std::setprecision(1)
-                << std::log10(regularization) << " ";
-    }
-    else
-    {
-      std::cout << std::setw(7) << "-" << " ";
-    }
-
-    std::cout << std::setw(9) << std::fixed << std::setprecision(6) << alpha_du
-              << " ";
-
-    std::cout << std::setw(9) << std::fixed << std::setprecision(6) << alpha_pr;
-
-    std::cout << std::endl;
+    detail::printInteriorPointIteration(iter, objective, inf_pr, inf_du,
+                                        inf_comp, mu, step_norm,
+                                        regularization, alpha_du, alpha_pr);
   }
 
   void MSIPDDPSolver::printSolutionSummary(const CDDPSolution &solution) const
@@ -1977,24 +1879,8 @@ namespace cddp
 
   double MSIPDDPSolver::computeMaxConstraintViolation(const CDDP &context) const
   {
-    const auto &constraint_set = context.getConstraintSet();
-    const int horizon = context.getHorizon();
-    double max_violation = 0.0;
-
-    for (const auto &constraint_pair : constraint_set)
-    {
-      const std::string &constraint_name = constraint_pair.first;
-      auto g_it = G_.find(constraint_name);
-      if (g_it != G_.end())
-      {
-        for (int t = 0; t < horizon; ++t)
-        {
-          const Eigen::VectorXd &g_vec = g_it->second[t];
-          max_violation = std::max(max_violation, g_vec.maxCoeff());
-        }
-      }
-    }
-    return max_violation;
+    (void)context;
+    return detail::computeMaxConstraintViolation(G_);
   }
 
   double MSIPDDPSolver::computeScaledDualInfeasibility(const CDDP &context) const
diff --git a/tests/cddp_core/test_cddp_core.cpp b/tests/cddp_core/test_cddp_core.cpp
index 857632c..d0d37c4 100644
--- a/tests/cddp_core/test_cddp_core.cpp
+++ b/tests/cddp_core/test_cddp_core.cpp
@@ -544,6 +544,38 @@ TEST_F(CDDPCoreTest, IntegrationWithTrajectoryAndOptions) {
     EXPECT_EQ(solution.control_trajectory.size(), horizon);
 }
 
+TEST_F(CDDPCoreTest, SolveReinitializesStaleTrajectoryDimensions) {
+    cddp::CDDP::registerSolver("MockExternalSolver", cddp::createMockExternalSolver);
+
+    cddp::CDDP cddp_solver(initial_state, goal_state, horizon, timestep,
+                          std::make_unique<cddp::Unicycle>(timestep, "euler"),
+                          std::make_unique<cddp::QuadraticObjective>(
+                              Eigen::MatrixXd::Identity(state_dim, state_dim),
+                              Eigen::MatrixXd::Identity(control_dim, control_dim),
+                              10.0 * Eigen::MatrixXd::Identity(state_dim, state_dim),
+                              goal_state, std::vector<Eigen::VectorXd>(), timestep),
+                          options);
+
+    cddp_solver.X_.assign(static_cast<size_t>(horizon + 1),
+                          Eigen::VectorXd::Zero(state_dim + 1));
+    cddp_solver.U_.assign(static_cast<size_t>(horizon),
+                          Eigen::VectorXd::Zero(control_dim + 1));
+
+    auto solution = cddp_solver.solve("MockExternalSolver");
+
+    EXPECT_EQ(solution.solver_name, "MockExternalSolver");
+    ASSERT_EQ(cddp_solver.X_.size(), static_cast<size_t>(horizon + 1));
+    ASSERT_EQ(cddp_solver.U_.size(), static_cast<size_t>(horizon));
+
+    for (const auto &state : cddp_solver.X_) {
+        EXPECT_EQ(state.size(), state_dim);
+    }
+    for (const auto &control : cddp_solver.U_) {
+        EXPECT_EQ(control.size(), control_dim);
+    }
+    EXPECT_TRUE(cddp_solver.X_.front().isApprox(initial_state));
+}
+
 TEST_F(CDDPCoreTest, SetReferenceStatesUpdatesObjectiveTerminalReference) {
     cddp::CDDP cddp_solver(initial_state, goal_state, horizon, timestep,
                           std::make_unique<cddp::Unicycle>(timestep, "euler"),
diff --git a/tests/cddp_core/test_ipddp_solver.cpp b/tests/cddp_core/test_ipddp_solver.cpp
index 4e1d501..24d3ad8 100644
--- a/tests/cddp_core/test_ipddp_solver.cpp
+++ b/tests/cddp_core/test_ipddp_solver.cpp
@@ -25,6 +25,327 @@
 
 #include "cddp.hpp"
 
+namespace cddp
+{
+class IPDDPSolverTestAccess
+{
+public:
+    static bool backwardPass(IPDDPSolver &solver, CDDP &context)
+    {
+        return solver.backwardPass(context);
+    }
+
+    static double scaledDualInfeasibility(const IPDDPSolver &solver,
+                                          const CDDP &context)
+    {
+        return solver.computeScaledDualInfeasibility(context);
+    }
+
+    static void updateBarrierParameters(IPDDPSolver &solver,
+                                        CDDP &context,
+                                        bool forward_pass_success)
+    {
+        solver.updateBarrierParameters(context, forward_pass_success);
+    }
+
+    static const std::vector<FilterPoint> &filter(const IPDDPSolver &solver)
+    {
+        return solver.filter_;
+    }
+
+    static double filterTheta(const IPDDPSolver &solver)
+    {
+        return solver.filter_theta_;
+    }
+
+    static const std::vector<Eigen::VectorXd> &
+    pathSlack(const IPDDPSolver &solver, const std::string &name)
+    {
+        return solver.S_.at(name);
+    }
+
+    static const std::vector<Eigen::VectorXd> &
+    pathDual(const IPDDPSolver &solver, const std::string &name)
+    {
+        return solver.Y_.at(name);
+    }
+
+    static const Eigen::VectorXd &
+    terminalSlack(const IPDDPSolver &solver, const std::string &name)
+    {
+        return solver.S_T_.at(name);
+    }
+
+    static const Eigen::VectorXd &
+    terminalDual(const IPDDPSolver &solver, const std::string &name)
+    {
+        return solver.Y_T_.at(name);
+    }
+
+    static const Eigen::VectorXd &
+    terminalEqualityMultiplier(const IPDDPSolver &solver)
+    {
+        return solver.Lambda_T_eq_;
+    }
+
+    static const std::vector<Eigen::VectorXd> &
+    controlFeedforward(const IPDDPSolver &solver)
+    {
+        return solver.k_u_;
+    }
+
+    static void setCostateTrajectory(IPDDPSolver &solver,
+                                     const std::vector<Eigen::VectorXd> &lambda)
+    {
+        solver.Lambda_ = lambda;
+    }
+
+    static void setPathInterior(IPDDPSolver &solver,
+                                const std::string &name,
+                                double slack_value,
+                                double dual_value)
+    {
+        for (auto &s : solver.S_.at(name))
+        {
+            s = Eigen::VectorXd::Constant(s.size(), slack_value);
+        }
+        for (auto &y : solver.Y_.at(name))
+        {
+            y = Eigen::VectorXd::Constant(y.size(), dual_value);
+        }
+    }
+
+    static void setTerminalInterior(IPDDPSolver &solver,
+                                    const std::string &name,
+                                    double slack_value,
+                                    double dual_value)
+    {
+        solver.S_T_.at(name) =
+            Eigen::VectorXd::Constant(solver.S_T_.at(name).size(), slack_value);
+        solver.Y_T_.at(name) =
+            Eigen::VectorXd::Constant(solver.Y_T_.at(name).size(), dual_value);
+    }
+
+    static void setTerminalEqualityMultiplier(IPDDPSolver &solver,
+                                              const Eigen::VectorXd &lambda)
+    {
+        solver.Lambda_T_eq_ = lambda;
+    }
+};
+} // namespace cddp
+
+namespace
+{
+cddp::CDDPOptions makeIpddpRegressionOptions()
+{
+    cddp::CDDPOptions options;
+    options.max_iterations = 20;
+    options.tolerance = 1e-6;
+    options.acceptable_tolerance = 1e-6;
+    options.enable_parallel = false;
+    options.num_threads = 1;
+    options.verbose = false;
+    options.debug = false;
+    options.regularization.initial_value = 1e-6;
+    options.ipddp.barrier.mu_initial = 1e-1;
+    options.ipddp.slack_var_init_scale = 1e-2;
+    options.ipddp.dual_var_init_scale = 1e-1;
+    return options;
+}
+
+cddp::CDDP makeScalarIntegratorProblem(const cddp::CDDPOptions &options,
+                                       bool add_path_constraint,
+                                       bool add_terminal_inequality)
+{
+    const int state_dim = 1;
+    const int control_dim = 1;
+    const int horizon = 4;
+    const double timestep = 1.0;
+
+    Eigen::MatrixXd A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::MatrixXd B = Eigen::MatrixXd::Identity(state_dim, control_dim);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << 1.0;
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 0.0;
+
+    Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    Eigen::MatrixXd R = 1e-2 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+    Eigen::MatrixXd Qf = Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+    cddp::CDDP problem(initial_state, goal_state, horizon, timestep);
+    problem.setDynamicalSystem(std::make_unique<cddp::LTISystem>(A, B, timestep));
+    problem.setObjective(std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, std::vector<Eigen::VectorXd>(), timestep));
+    problem.setOptions(options);
+
+    if (add_path_constraint)
+    {
+        Eigen::MatrixXd path_A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+        Eigen::VectorXd path_b(state_dim);
+        path_b << 0.25;
+        problem.addPathConstraint(
+            "PathUpperBound",
+            std::make_unique<cddp::LinearConstraint>(path_A, path_b));
+    }
+
+    if (add_terminal_inequality)
+    {
+        Eigen::MatrixXd terminal_A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+        Eigen::VectorXd terminal_b(state_dim);
+        terminal_b << 0.25;
+        problem.addTerminalConstraint(
+            "TerminalUpperBound",
+            std::make_unique<cddp::TerminalInequalityConstraint>(terminal_A, terminal_b));
+    }
+
+    std::vector<Eigen::VectorXd> X(horizon + 1, initial_state);
+    std::vector<Eigen::VectorXd> U(horizon, Eigen::VectorXd::Zero(control_dim));
+    problem.setInitialTrajectory(X, U);
+    return problem;
+}
+
+cddp::CDDP makeScalarIntegratorTerminalEqualityProblem(
+    const cddp::CDDPOptions &options)
+{
+    const int state_dim = 1;
+    const int control_dim = 1;
+    const int horizon = 4;
+    const double timestep = 1.0;
+
+    Eigen::MatrixXd A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::MatrixXd B = Eigen::MatrixXd::Identity(state_dim, control_dim);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << 1.0;
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 0.0;
+
+    Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    Eigen::MatrixXd R = 1e-2 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+    Eigen::MatrixXd Qf = Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+    cddp::CDDP problem(initial_state, goal_state, horizon, timestep);
+    problem.setDynamicalSystem(std::make_unique<cddp::LTISystem>(A, B, timestep));
+    problem.setObjective(std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, std::vector<Eigen::VectorXd>(), timestep));
+    problem.setOptions(options);
+    problem.addTerminalConstraint(
+        "TerminalTarget",
+        std::make_unique<cddp::TerminalEqualityConstraint>(goal_state));
+
+    std::vector<Eigen::VectorXd> X(horizon + 1, initial_state);
+    std::vector<Eigen::VectorXd> U(horizon, Eigen::VectorXd::Zero(control_dim));
+    problem.setInitialTrajectory(X, U);
+    return problem;
+}
+
+class UnsupportedTerminalConstraint final : public cddp::TerminalConstraint
+{
+public:
+    UnsupportedTerminalConstraint()
+        : cddp::TerminalConstraint("UnsupportedTerminalConstraint") {}
+
+    int getDualDim() const override
+    {
+        return 1;
+    }
+
+    Eigen::VectorXd evaluate(const Eigen::VectorXd &state,
+                             const Eigen::VectorXd & /*control*/,
+                             int /*index*/ = 0) const override
+    {
+        return state.head(1);
+    }
+
+    Eigen::VectorXd getLowerBound() const override
+    {
+        return Eigen::VectorXd::Zero(1);
+    }
+
+    Eigen::VectorXd getUpperBound() const override
+    {
+        return Eigen::VectorXd::Zero(1);
+    }
+
+    Eigen::MatrixXd getStateJacobian(const Eigen::VectorXd &state,
+                                     const Eigen::VectorXd & /*control*/,
+                                     int /*index*/ = 0) const override
+    {
+        return Eigen::MatrixXd::Identity(1, state.size());
+    }
+
+    double computeViolation(const Eigen::VectorXd &state,
+                            const Eigen::VectorXd &control,
+                            int index = 0) const override
+    {
+        return computeViolationFromValue(evaluate(state, control, index));
+    }
+
+    double computeViolationFromValue(const Eigen::VectorXd &g) const override
+    {
+        return g.lpNorm<Eigen::Infinity>();
+    }
+};
+
+class QuadraticScalarSystem final : public cddp::DynamicalSystem
+{
+public:
+    QuadraticScalarSystem()
+        : cddp::DynamicalSystem(1, 1, 1.0, "euler") {}
+
+    Eigen::VectorXd getDiscreteDynamics(const Eigen::VectorXd &state,
+                                        const Eigen::VectorXd &control,
+                                        double /*time*/) const override
+    {
+        Eigen::VectorXd next_state(1);
+        next_state << state(0) + control(0) + 0.5 * state(0) * state(0);
+        return next_state;
+    }
+
+    Eigen::MatrixXd getStateJacobian(const Eigen::VectorXd &state,
+                                     const Eigen::VectorXd & /*control*/,
+                                     double /*time*/) const override
+    {
+        Eigen::MatrixXd jacobian(1, 1);
+        jacobian << 1.0 + state(0);
+        return jacobian;
+    }
+
+    Eigen::MatrixXd getControlJacobian(const Eigen::VectorXd & /*state*/,
+                                       const Eigen::VectorXd & /*control*/,
+                                       double /*time*/) const override
+    {
+        return Eigen::MatrixXd::Identity(1, 1);
+    }
+
+    std::vector<Eigen::MatrixXd> getStateHessian(
+        const Eigen::VectorXd & /*state*/,
+        const Eigen::VectorXd & /*control*/,
+        double /*time*/) const override
+    {
+        return {Eigen::MatrixXd::Identity(1, 1)};
+    }
+
+    std::vector<Eigen::MatrixXd> getControlHessian(
+        const Eigen::VectorXd & /*state*/,
+        const Eigen::VectorXd & /*control*/,
+        double /*time*/) const override
+    {
+        return {Eigen::MatrixXd::Zero(1, 1)};
+    }
+
+    std::vector<Eigen::MatrixXd> getCrossHessian(
+        const Eigen::VectorXd & /*state*/,
+        const Eigen::VectorXd & /*control*/,
+        double /*time*/) const override
+    {
+        return {Eigen::MatrixXd::Zero(1, 1)};
+    }
+};
+} // namespace
+
 TEST(IPDDPTest, SolvePendulum)
 {
     int state_dim = 2;
@@ -479,6 +800,7 @@ TEST(IPDDPTest, SolveCar)
         << "Algorithm should converge";
     EXPECT_GT(iterations_completed, 0) << "Should take at least one iteration";
     EXPECT_LT(final_objective, J) << "Final cost should be better than initial cost";
+    EXPECT_LT(final_objective, 1.91) << "Cold-start IPDDP should reach the low-cost parking solution";
 
     // =========================================================================
     // Test warm start capability
@@ -821,3 +1143,495 @@ TEST(IPDDPTest, SolveQuadrotor)
         << "Warm start should also converge";
     EXPECT_LE(warm_iterations, iterations_completed + 20) << "Warm start should not take significantly more iterations";
 }
+
+TEST(IPDDPTest, SolveWithTerminalInequalityOnly)
+{
+    const int state_dim = 1;
+    const int control_dim = 1;
+    const int horizon = 8;
+    const double timestep = 1.0;
+
+    Eigen::MatrixXd A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::MatrixXd B = Eigen::MatrixXd::Identity(state_dim, control_dim);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << 0.0;
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 1.0;
+
+    Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    Eigen::MatrixXd R = 1e-2 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+    Eigen::MatrixXd Qf = 100.0 * Eigen::MatrixXd::Identity(state_dim, state_dim);
+
+    cddp::CDDPOptions options;
+    options.max_iterations = 60;
+    options.tolerance = 1e-6;
+    options.acceptable_tolerance = 1e-6;
+    options.enable_parallel = false;
+    options.num_threads = 1;
+    options.verbose = false;
+    options.debug = false;
+    options.regularization.initial_value = 1e-6;
+    options.ipddp.barrier.mu_initial = 1e-1;
+
+    cddp::CDDP cddp_solver(initial_state, goal_state, horizon, timestep);
+    cddp_solver.setDynamicalSystem(
+        std::make_unique<cddp::LTISystem>(A, B, timestep));
+    cddp_solver.setObjective(std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, std::vector<Eigen::VectorXd>(), timestep));
+    cddp_solver.setOptions(options);
+
+    Eigen::MatrixXd terminal_A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::VectorXd terminal_b = Eigen::VectorXd::Zero(state_dim);
+    cddp_solver.addTerminalConstraint(
+        "TerminalUpperBound",
+        std::make_unique<cddp::TerminalInequalityConstraint>(terminal_A, terminal_b));
+
+    std::vector<Eigen::VectorXd> X(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    std::vector<Eigen::VectorXd> U(horizon, Eigen::VectorXd::Zero(control_dim));
+    X[0] = initial_state;
+    for (int t = 0; t < horizon; ++t)
+    {
+        X[t + 1] = A * X[t] + B * U[t];
+    }
+    cddp_solver.setInitialTrajectory(X, U);
+
+    const cddp::CDDPSolution solution = cddp_solver.solve("IPDDP");
+
+    EXPECT_TRUE(solution.status_message == "OptimalSolutionFound" ||
+                solution.status_message == "AcceptableSolutionFound");
+    ASSERT_FALSE(solution.state_trajectory.empty());
+    const double terminal_state = solution.state_trajectory.back()(0);
+    EXPECT_LE(terminal_state, 1e-4);
+    EXPECT_NEAR(terminal_state, 0.0, 1e-3);
+}
+
+TEST(IPDDPTest, TracksInitialViolationForPathOnlyProblems)
+{
+    cddp::CDDPOptions options = makeIpddpRegressionOptions();
+    cddp::CDDP problem = makeScalarIntegratorProblem(
+        options, /*add_path_constraint=*/true, /*add_terminal_inequality=*/false);
+
+    cddp::IPDDPSolver solver;
+    solver.initialize(problem);
+
+    const auto &filter = cddp::IPDDPSolverTestAccess::filter(solver);
+    EXPECT_TRUE(filter.empty());
+    EXPECT_GT(cddp::IPDDPSolverTestAccess::filterTheta(solver), 0.0);
+}
+
+TEST(IPDDPTest, BarrierResetTracksThetaForPathOnlyProblems)
+{
+    cddp::CDDPOptions options = makeIpddpRegressionOptions();
+    cddp::CDDP problem = makeScalarIntegratorProblem(
+        options, /*add_path_constraint=*/true, /*add_terminal_inequality=*/false);
+
+    cddp::IPDDPSolver solver;
+    solver.initialize(problem);
+
+    ASSERT_TRUE(cddp::IPDDPSolverTestAccess::filter(solver).empty());
+    cddp::IPDDPSolverTestAccess::updateBarrierParameters(
+        solver, problem, /*forward_pass_success=*/true);
+
+    const auto &filter = cddp::IPDDPSolverTestAccess::filter(solver);
+    const double violation_reference =
+        filter.empty() ? cddp::IPDDPSolverTestAccess::filterTheta(solver)
+                       : filter.back().constraint_violation;
+    EXPECT_GT(violation_reference, 0.0);
+}
+
+TEST(IPDDPTest, ScaledDualInfeasibilityIncludesStateStationarityWhenEnabled)
+{
+    const int state_dim = 1;
+    const int control_dim = 1;
+    const int horizon = 1;
+    const double timestep = 1.0;
+
+    Eigen::MatrixXd A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::MatrixXd B = Eigen::MatrixXd::Identity(state_dim, control_dim);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << 1.0;
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 0.0;
+
+    auto make_problem = [&](bool check_state_stationarity) {
+        cddp::CDDPOptions options = makeIpddpRegressionOptions();
+        options.ipddp.check_state_stationarity = check_state_stationarity;
+
+        cddp::CDDP problem(initial_state, goal_state, horizon, timestep);
+        problem.setDynamicalSystem(
+            std::make_unique<cddp::LTISystem>(A, B, timestep));
+
+        Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+        Eigen::MatrixXd R = Eigen::MatrixXd::Zero(control_dim, control_dim);
+        Eigen::MatrixXd Qf = Eigen::MatrixXd::Zero(state_dim, state_dim);
+        problem.setObjective(std::make_unique<cddp::QuadraticObjective>(
+            Q, R, Qf, goal_state, std::vector<Eigen::VectorXd>(), timestep));
+        problem.setOptions(options);
+
+        Eigen::MatrixXd path_A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+        Eigen::VectorXd path_b(state_dim);
+        path_b << 0.25;
+        problem.addPathConstraint(
+            "PathUpperBound",
+            std::make_unique<cddp::LinearConstraint>(path_A, path_b));
+
+        std::vector<Eigen::VectorXd> X(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+        std::vector<Eigen::VectorXd> U(horizon, Eigen::VectorXd::Zero(control_dim));
+        X[0] = initial_state;
+        X[1] = A * X[0] + B * U[0];
+        problem.setInitialTrajectory(X, U);
+        return problem;
+    };
+
+    cddp::CDDP problem_without = make_problem(false);
+    cddp::IPDDPSolver solver_without;
+    solver_without.initialize(problem_without);
+    ASSERT_TRUE(cddp::IPDDPSolverTestAccess::backwardPass(solver_without, problem_without));
+    const double dual_without =
+        cddp::IPDDPSolverTestAccess::scaledDualInfeasibility(solver_without, problem_without);
+
+    cddp::CDDP problem_with = make_problem(true);
+    cddp::IPDDPSolver solver_with;
+    solver_with.initialize(problem_with);
+    ASSERT_TRUE(cddp::IPDDPSolverTestAccess::backwardPass(solver_with, problem_with));
+    const double dual_with =
+        cddp::IPDDPSolverTestAccess::scaledDualInfeasibility(solver_with, problem_with);
+
+    EXPECT_NEAR(dual_without, problem_without.getCurrentDualInfeasibility(), 1e-12);
+    EXPECT_GT(dual_with, dual_without);
+}
+
+TEST(IPDDPTest, WarmStartPreservesPathDualSlackState)
+{
+    cddp::CDDPOptions options = makeIpddpRegressionOptions();
+    cddp::CDDP problem = makeScalarIntegratorProblem(
+        options, /*add_path_constraint=*/true, /*add_terminal_inequality=*/false);
+
+    cddp::IPDDPSolver solver;
+    solver.initialize(problem);
+    cddp::IPDDPSolverTestAccess::setPathInterior(
+        solver, "PathUpperBound", /*slack_value=*/0.42, /*dual_value=*/0.73);
+
+    options.warm_start = true;
+    problem.setOptions(options);
+    solver.initialize(problem);
+
+    const auto &slacks =
+        cddp::IPDDPSolverTestAccess::pathSlack(solver, "PathUpperBound");
+    const auto &duals =
+        cddp::IPDDPSolverTestAccess::pathDual(solver, "PathUpperBound");
+    for (const auto &slack : slacks)
+    {
+        ASSERT_EQ(slack.size(), 1);
+        EXPECT_NEAR(slack(0), 0.42, 1e-12);
+    }
+    for (const auto &dual : duals)
+    {
+        ASSERT_EQ(dual.size(), 1);
+        EXPECT_NEAR(dual(0), 0.73, 1e-12);
+    }
+}
+
+TEST(IPDDPTest, WarmStartPreservesTerminalInequalityInteriorState)
+{
+    cddp::CDDPOptions options = makeIpddpRegressionOptions();
+    cddp::CDDP problem = makeScalarIntegratorProblem(
+        options, /*add_path_constraint=*/false, /*add_terminal_inequality=*/true);
+
+    cddp::IPDDPSolver solver;
+    solver.initialize(problem);
+    cddp::IPDDPSolverTestAccess::setTerminalInterior(
+        solver, "TerminalUpperBound", /*slack_value=*/0.37, /*dual_value=*/0.61);
+
+    options.warm_start = true;
+    problem.setOptions(options);
+    solver.initialize(problem);
+
+    const Eigen::VectorXd &terminal_slack =
+        cddp::IPDDPSolverTestAccess::terminalSlack(solver, "TerminalUpperBound");
+    const Eigen::VectorXd &terminal_dual =
+        cddp::IPDDPSolverTestAccess::terminalDual(solver, "TerminalUpperBound");
+    ASSERT_EQ(terminal_slack.size(), 1);
+    ASSERT_EQ(terminal_dual.size(), 1);
+    EXPECT_NEAR(terminal_slack(0), 0.37, 1e-12);
+    EXPECT_NEAR(terminal_dual(0), 0.61, 1e-12);
+}
+
+TEST(IPDDPTest, WarmStartPreservesTerminalEqualityMultiplierState)
+{
+    cddp::CDDPOptions options = makeIpddpRegressionOptions();
+    cddp::CDDP problem = makeScalarIntegratorTerminalEqualityProblem(options);
+
+    cddp::IPDDPSolver solver;
+    solver.initialize(problem);
+    cddp::IPDDPSolverTestAccess::setTerminalEqualityMultiplier(
+        solver, Eigen::VectorXd::Constant(1, 0.53));
+
+    options.warm_start = true;
+    problem.setOptions(options);
+    solver.initialize(problem);
+
+    const Eigen::VectorXd &terminal_multiplier =
+        cddp::IPDDPSolverTestAccess::terminalEqualityMultiplier(solver);
+    ASSERT_EQ(terminal_multiplier.size(), 1);
+    EXPECT_NEAR(terminal_multiplier(0), 0.53, 1e-12);
+}
+
+TEST(IPDDPTest, SolveWithPathConstraintAndTerminalEquality)
+{
+    const int state_dim = 1;
+    const int control_dim = 1;
+    const int horizon = 8;
+    const double timestep = 1.0;
+
+    Eigen::MatrixXd A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::MatrixXd B = Eigen::MatrixXd::Identity(state_dim, control_dim);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << 1.0;
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 0.0;
+
+    cddp::CDDP cddp_solver(initial_state, goal_state, horizon, timestep);
+    cddp_solver.setDynamicalSystem(
+        std::make_unique<cddp::LTISystem>(A, B, timestep));
+
+    Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    Eigen::MatrixXd R = 1e-2 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+    Eigen::MatrixXd Qf = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    cddp_solver.setObjective(std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, std::vector<Eigen::VectorXd>(), timestep));
+
+    cddp::CDDPOptions options = makeIpddpRegressionOptions();
+    options.max_iterations = 100;
+    cddp_solver.setOptions(options);
+
+    Eigen::MatrixXd path_A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::VectorXd path_b(state_dim);
+    path_b << 10.0;
+    cddp_solver.addPathConstraint(
+        "LoosePathUpperBound",
+        std::make_unique<cddp::LinearConstraint>(path_A, path_b));
+    cddp_solver.addTerminalConstraint(
+        "TerminalTarget",
+        std::make_unique<cddp::TerminalEqualityConstraint>(goal_state));
+
+    std::vector<Eigen::VectorXd> X(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    std::vector<Eigen::VectorXd> U(horizon, Eigen::VectorXd::Zero(control_dim));
+    X[0] = initial_state;
+    for (int t = 0; t < horizon; ++t)
+    {
+        X[t + 1] = A * X[t] + B * U[t];
+    }
+    cddp_solver.setInitialTrajectory(X, U);
+
+    const cddp::CDDPSolution solution = cddp_solver.solve("IPDDP");
+
+    EXPECT_TRUE(solution.status_message == "OptimalSolutionFound" ||
+                solution.status_message == "AcceptableSolutionFound");
+    ASSERT_FALSE(solution.state_trajectory.empty());
+    const double terminal_residual =
+        (solution.state_trajectory.back() - goal_state).lpNorm<Eigen::Infinity>();
+    EXPECT_LE(terminal_residual, 1e-4);
+}
+
+TEST(IPDDPTest, RejectsUnsupportedTerminalConstraintType)
+{
+    cddp::CDDPOptions options = makeIpddpRegressionOptions();
+    cddp::CDDP problem = makeScalarIntegratorProblem(
+        options, /*add_path_constraint=*/false, /*add_terminal_inequality=*/false);
+    problem.addTerminalConstraint(
+        "UnsupportedTerminalConstraint",
+        std::make_unique<UnsupportedTerminalConstraint>());
+
+    cddp::IPDDPSolver solver;
+    EXPECT_THROW(
+        {
+            try
+            {
+                solver.initialize(problem);
+            }
+            catch (const std::runtime_error &error)
+            {
+                EXPECT_THAT(std::string(error.what()),
+                            ::testing::HasSubstr("unsupported type"));
+                throw;
+            }
+        },
+        std::runtime_error);
+}
+
+TEST(IPDDPTest, TerminalEqualityBackwardPassTracksStationarityResidual)
+{
+    const int state_dim = 1;
+    const int control_dim = 1;
+    const int horizon = 1;
+    const double timestep = 1.0;
+
+    Eigen::MatrixXd A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::MatrixXd B = Eigen::MatrixXd::Identity(state_dim, control_dim);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << 1.0;
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 0.0;
+
+    cddp::CDDPOptions options = makeIpddpRegressionOptions();
+    options.regularization.initial_value = 1e-12;
+
+    cddp::CDDP problem(initial_state, goal_state, horizon, timestep);
+    problem.setDynamicalSystem(
+        std::make_unique<cddp::LTISystem>(A, B, timestep));
+
+    Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    Eigen::MatrixXd R = 1e8 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+    Eigen::MatrixXd Qf = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    problem.setObjective(std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, std::vector<Eigen::VectorXd>(), timestep));
+    problem.setOptions(options);
+    problem.addTerminalConstraint(
+        "TerminalTarget",
+        std::make_unique<cddp::TerminalEqualityConstraint>(goal_state));
+
+    std::vector<Eigen::VectorXd> X(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    std::vector<Eigen::VectorXd> U(horizon, Eigen::VectorXd::Zero(control_dim));
+    X[0] = initial_state;
+    X[1] = A * X[0] + B * U[0];
+    problem.setInitialTrajectory(X, U);
+
+    cddp::IPDDPSolver solver;
+    solver.initialize(problem);
+    ASSERT_TRUE(cddp::IPDDPSolverTestAccess::backwardPass(solver, problem));
+
+    EXPECT_GT(problem.getCurrentDualInfeasibility(), 1e-4);
+    EXPECT_LT(problem.getCurrentStepNorm(), 1e-6);
+}
+
+TEST(IPDDPTest, TerminalEqualityBackwardPassUsesSecondOrderDynamicsWhenEnabled)
+{
+    auto make_problem = [](bool use_ilqr) {
+        const int state_dim = 1;
+        const int control_dim = 1;
+        const int horizon = 2;
+        const double timestep = 1.0;
+
+        Eigen::VectorXd initial_state(state_dim);
+        initial_state << 1.0;
+        Eigen::VectorXd goal_state(state_dim);
+        goal_state << 0.0;
+
+        cddp::CDDPOptions options = makeIpddpRegressionOptions();
+        options.use_ilqr = use_ilqr;
+
+        cddp::CDDP problem(initial_state, goal_state, horizon, timestep);
+        problem.setDynamicalSystem(std::make_unique<QuadraticScalarSystem>());
+
+        Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+        Eigen::MatrixXd R = 1e-2 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+        Eigen::MatrixXd Qf = Eigen::MatrixXd::Zero(state_dim, state_dim);
+        problem.setObjective(std::make_unique<cddp::QuadraticObjective>(
+            Q, R, Qf, goal_state, std::vector<Eigen::VectorXd>(), timestep));
+        problem.setOptions(options);
+        problem.addTerminalConstraint(
+            "TerminalTarget",
+            std::make_unique<cddp::TerminalEqualityConstraint>(goal_state));
+
+        std::vector<Eigen::VectorXd> X(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+        std::vector<Eigen::VectorXd> U(horizon, Eigen::VectorXd::Zero(control_dim));
+        X[0] = initial_state;
+        QuadraticScalarSystem system;
+        for (int t = 0; t < horizon; ++t)
+        {
+            X[t + 1] = system.getDiscreteDynamics(X[t], U[t], t * timestep);
+        }
+        problem.setInitialTrajectory(X, U);
+        return problem;
+    };
+
+    cddp::CDDP problem_ilqr = make_problem(true);
+    cddp::IPDDPSolver solver_ilqr;
+    solver_ilqr.initialize(problem_ilqr);
+    cddp::IPDDPSolverTestAccess::setCostateTrajectory(
+        solver_ilqr,
+        std::vector<Eigen::VectorXd>{Eigen::VectorXd::Zero(1),
+                                     Eigen::VectorXd::Constant(1, 0.8),
+                                     Eigen::VectorXd::Constant(1, 0.4)});
+    ASSERT_TRUE(cddp::IPDDPSolverTestAccess::backwardPass(solver_ilqr, problem_ilqr));
+
+    cddp::CDDP problem_ddp = make_problem(false);
+    cddp::IPDDPSolver solver_ddp;
+    solver_ddp.initialize(problem_ddp);
+    cddp::IPDDPSolverTestAccess::setCostateTrajectory(
+        solver_ddp,
+        std::vector<Eigen::VectorXd>{Eigen::VectorXd::Zero(1),
+                                     Eigen::VectorXd::Constant(1, 0.8),
+                                     Eigen::VectorXd::Constant(1, 0.4)});
+    ASSERT_TRUE(cddp::IPDDPSolverTestAccess::backwardPass(solver_ddp, problem_ddp));
+
+    const auto &k_ilqr = cddp::IPDDPSolverTestAccess::controlFeedforward(solver_ilqr);
+    const auto &k_ddp = cddp::IPDDPSolverTestAccess::controlFeedforward(solver_ddp);
+    ASSERT_GE(k_ilqr.size(), 1u);
+    ASSERT_GE(k_ddp.size(), 1u);
+    EXPECT_GT(std::abs(k_ddp[0](0) - k_ilqr[0](0)), 1e-8);
+}
+
+TEST(IPDDPTest, SolveWithTerminalEqualityOnly)
+{
+    const int state_dim = 1;
+    const int control_dim = 1;
+    const int horizon = 8;
+    const double timestep = 1.0;
+
+    Eigen::MatrixXd A = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    Eigen::MatrixXd B = Eigen::MatrixXd::Identity(state_dim, control_dim);
+
+    Eigen::VectorXd initial_state(state_dim);
+    initial_state << 1.0;
+    Eigen::VectorXd goal_state(state_dim);
+    goal_state << 0.0;
+
+    cddp::CDDP cddp_solver(initial_state, goal_state, horizon, timestep);
+    cddp_solver.setDynamicalSystem(
+        std::make_unique<cddp::LTISystem>(A, B, timestep));
+
+    Eigen::MatrixXd Q = Eigen::MatrixXd::Zero(state_dim, state_dim);
+    Eigen::MatrixXd R = 1e-2 * Eigen::MatrixXd::Identity(control_dim, control_dim);
+    Eigen::MatrixXd Qf = Eigen::MatrixXd::Identity(state_dim, state_dim);
+    cddp_solver.setObjective(std::make_unique<cddp::QuadraticObjective>(
+        Q, R, Qf, goal_state, std::vector<Eigen::VectorXd>(), timestep));
+
+    cddp::CDDPOptions options;
+    options.max_iterations = 100;
+    options.tolerance = 1e-6;
+    options.acceptable_tolerance = 1e-6;
+    options.enable_parallel = false;
+    options.num_threads = 1;
+    options.verbose = false;
+    options.debug = false;
+    options.regularization.initial_value = 1e-6;
+    cddp_solver.setOptions(options);
+
+    cddp_solver.addTerminalConstraint(
+        "TerminalTarget",
+        std::make_unique<cddp::TerminalEqualityConstraint>(goal_state));
+
+    std::vector<Eigen::VectorXd> X(horizon + 1, Eigen::VectorXd::Zero(state_dim));
+    std::vector<Eigen::VectorXd> U(horizon, Eigen::VectorXd::Zero(control_dim));
+    X[0] = initial_state;
+    for (int t = 0; t < horizon; ++t)
+    {
+        X[t + 1] = A * X[t] + B * U[t];
+    }
+    cddp_solver.setInitialTrajectory(X, U);
+
+    const cddp::CDDPSolution solution = cddp_solver.solve("IPDDP");
+
+    EXPECT_TRUE(solution.status_message == "OptimalSolutionFound" ||
+                solution.status_message == "AcceptableSolutionFound");
+    ASSERT_FALSE(solution.state_trajectory.empty());
+    const double terminal_residual =
+        (solution.state_trajectory.back() - goal_state).lpNorm<Eigen::Infinity>();
+    EXPECT_LE(terminal_residual, 1e-4);
+}