From 6e78f71d439665a4cfcb839c79fa490450181a8b Mon Sep 17 00:00:00 2001
From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
Date: Fri, 3 Jul 2026 20:00:19 +0800
Subject: [PATCH] chore: clean up srt-slurm recipes

---
 .../1k1k/disagg/mtp/1k1k_mtp_lowlat_0.yaml    | 122 ------------
 .../1k1k/disagg/mtp/1k1k_mtp_lowlat_1.yaml    | 122 ------------
 .../1k1k/disagg/mtp/1k1k_mtp_maxtpt_0.yaml    | 127 ------------
 .../1k1k/disagg/mtp/1k1k_mtp_maxtpt_1.yaml    | 127 ------------
 .../1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml    |  98 ----------
 .../1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml    |  98 ----------
 .../1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml    | 101 ----------
 .../1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml    | 101 ----------
 .../1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml    | 101 ----------
 .../1k1k/disagg/stp/1k1k_stp_maxtpt_3.yaml    | 101 ----------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml    |  98 ----------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml    |  98 ----------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml    |  98 ----------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_3.yaml    |  98 ----------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_4.yaml    |  98 ----------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_5.yaml    |  98 ----------
 .../8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml    | 101 ----------
 .../8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml    | 101 ----------
 .../8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml    | 101 ----------
 .../1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml    | 173 ----------------
 .../1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml    | 173 ----------------
 .../1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml    | 185 ------------------
 .../1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml    | 185 ------------------
 .../1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml    | 185 ------------------
 .../8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml    | 185 ------------------
 .../8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml    | 185 ------------------
 .../8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml    | 185 ------------------
 .../1k1k/disagg/stp/1k1k_stp_hightpt_0.yaml   | 148 --------------
 .../1k1k/disagg/stp/1k1k_stp_hightpt_1.yaml   | 148 --------------
 .../1k1k/disagg/stp/1k1k_stp_hightpt_2.yaml   | 148 --------------
 .../1k1k/disagg/stp/1k1k_stp_hightpt_3.yaml   | 148 --------------
 .../1k1k/disagg/stp/1k1k_stp_hightpt_4.yaml   | 148 --------------
 .../1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml    | 140 -------------
 .../1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml    | 140 -------------
 .../8k1k/disagg/stp/8k1k_stp_hightpt_0.yaml   | 148 --------------
 .../8k1k/disagg/stp/8k1k_stp_hightpt_1.yaml   | 148 --------------
 .../8k1k/disagg/stp/8k1k_stp_hightpt_2.yaml   | 148 --------------
 .../8k1k/disagg/stp/8k1k_stp_hightpt_3.yaml   | 148 --------------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml    | 140 -------------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml    | 140 -------------
 .../8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml    | 140 -------------
 .../8k1k/disagg-b200-high-tpt-megamoe.yaml    | 138 -------------
 .../8k1k/disagg-b200-low-latency.yaml         | 137 -------------
 .../8k1k/disagg-b200-low-middle-curve.yaml    | 138 -------------
 .../8k1k/disagg-b200-max-tpt-megamoe.yaml     | 138 -------------
 .../8k1k/disagg-b200-mid-curve-megamoe.yaml   | 138 -------------
 .../8k1k/disagg-b300-low-middle-curve.yaml    | 125 ------------
 .../8k1k/disagg-b300-max-tpt-megamoe.yaml     | 130 ------------
 .../minimax-m2.5-b200-fp4/1k1k/dep2-1p2d.yaml |  72 -------
 .../1k1k/dep2-2p3d-c6144.yaml                 |  72 -------
 .../minimax-m2.5-b200-fp4/1k1k/dep2-2p3d.yaml |  72 -------
 .../minimax-m2.5-b200-fp4/1k1k/dep4-3p2d.yaml |  74 -------
 .../minimax-m2.5-b200-fp4/1k1k/dep8-2p1d.yaml |  70 -------
 .../minimax-m2.5-b200-fp4/1k1k/tp4-1p1d.yaml  |  72 -------
 .../minimax-m2.5-b200-fp4/1k1k/tp4-1p2d.yaml  |  68 -------
 .../1k1k/tp4ep-1p1d-hi-conc.yaml              |  68 -------
 .../1k1k/tp4ep-1p1d.yaml                      |  70 -------
 .../1k1k/tp4ep-1p2d.yaml                      |  68 -------
 .../1k1k/tp4ep-1p3d-hi-conc.yaml              |  68 -------
 .../1k1k/tp4ep-1p3d.yaml                      |  68 -------
 .../1k1k/tp4ep-2p3d.yaml                      |  72 -------
 .../minimax-m2.5-b200-fp4/8k1k/dep4-2p1d.yaml |  75 -------
 .../minimax-m2.5-b200-fp4/8k1k/tp4-1p1d.yaml  |  68 -------
 .../8k1k/tp4ep-1p1d-hi-conc.yaml              |  68 -------
 .../8k1k/tp4ep-1p1d.yaml                      |  68 -------
 .../vllm/minimax-m2.5-b200-fp8/1k1k/dep8.yaml |  75 -------
 .../1k1k/disagg-b200-1p1d-tp4ep.yaml          |  69 -------
 .../1k1k/disagg-b200-1p3d-tp4ep.yaml          |  72 -------
 .../1k1k/disagg-b200-1p4d-dep2-hi-conc.yaml   |  74 -------
 .../1k1k/disagg-b200-2p1d-dep8.yaml           |  86 --------
 .../1k1k/disagg-b200-2p3d-dep4-hi-conc.yaml   |  74 -------
 .../minimax-m2.5-b200-fp8/1k1k/tp4ep.yaml     |  73 -------
 .../8k1k/disagg-b200-1p1d-tp4ep-hi-conc.yaml  |  69 -------
 .../8k1k/disagg-b200-1p1d-tp4ep.yaml          |  69 -------
 .../8k1k/disagg-b200-3p2d-dep4.yaml           |  76 -------
 .../1k1k/disagg-b300-1p1d-tp4.yaml            |  65 ------
 .../1k1k/disagg-b300-1p2d-tp4.yaml            |  65 ------
 .../1k1k/disagg-b300-1p2d-tp4ep.yaml          |  67 -------
 .../1k1k/disagg-b300-2p1d-dep8.yaml           |  69 -------
 .../1k1k/disagg-b300-2p2d-dep4-hi-conc.yaml   |  69 -------
 .../1k1k/disagg-b300-2p2d-dep4.yaml           |  69 -------
 .../1k1k/disagg-b300-2p3d-dep2.yaml           |  69 -------
 .../8k1k/disagg-b300-1p1d-tp4ep-hi-conc.yaml  |  67 -------
 .../8k1k/disagg-b300-1p1d-tp4ep.yaml          |  67 -------
 .../8k1k/disagg-b300-2p1d-tp2.yaml            |  68 -------
 .../8k1k/disagg-b300-2p1d-tp4ep.yaml          |  67 -------
 .../8k1k/disagg-b300-3p1d-dep4-hi-conc.yaml   |  72 -------
 .../8k1k/disagg-b300-3p1d-dep4.yaml           |  72 -------
 .../8k1k/disagg-b300-3p1d-tp4.yaml            |  68 -------
 .../8k1k/disagg-b300-5p2d-dep4.yaml           |  69 -------
 .../minimax-m2.5-b300/1k1k/dep2-1p2d.yaml     |  72 -------
 .../1k1k/dep2-2p3d-c6144.yaml                 |  72 -------
 .../minimax-m2.5-b300/1k1k/dep2-2p3d.yaml     |  72 -------
 .../minimax-m2.5-b300/1k1k/dep8-2p1d.yaml     |  71 -------
 .../vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml |  73 -------
 .../vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml |  69 -------
 .../minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml    |  71 -------
 .../minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml    |  69 -------
 .../vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml |  78 --------
 .../minimax-m2.5-b300/8k1k/dep4-4p1d.yaml     |  71 -------
 .../minimax-m2.5-b300/8k1k/dep8-4p1d.yaml     |  71 -------
 .../vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml |  69 -------
 .../minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml    |  69 -------
 .../minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml    |  69 -------
 .../vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml |  73 -------
 .../1k1k/disagg-gb300-1p1d-tp4.yaml           |  64 ------
 .../1k1k/disagg-gb300-1p2d-tp4.yaml           |  69 -------
 .../1k1k/disagg-gb300-1p2d-tp4ep.yaml         |  66 -------
 .../1k1k/disagg-gb300-2p1d-dep8.yaml          |  83 --------
 .../1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml  |  82 --------
 .../1k1k/disagg-gb300-2p2d-dep4.yaml          |  82 --------
 .../1k1k/disagg-gb300-2p3d-dep2.yaml          |  68 -------
 .../8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml |  66 -------
 .../8k1k/disagg-gb300-1p1d-tp4ep.yaml         |  66 -------
 .../8k1k/disagg-gb300-2p1d-tp2.yaml           |  68 -------
 .../8k1k/disagg-gb300-2p1d-tp4ep.yaml         |  66 -------
 .../8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml  |  82 --------
 .../8k1k/disagg-gb300-3p1d-dep4.yaml          |  82 --------
 .../8k1k/disagg-gb300-3p1d-tp4.yaml           |  68 -------
 .../8k1k/disagg-gb300-5p2d-dep4.yaml          |  68 -------
 .../1k1k/disagg-gb200-1p1d-tp4.yaml           |  67 -------
 .../1k1k/disagg-gb200-1p2d-tp4.yaml           |  67 -------
 .../1k1k/disagg-gb200-1p3d-tp4ep.yaml         |  72 -------
 .../1k1k/disagg-gb200-1p4d-dep2.yaml          |  74 -------
 .../1k1k/disagg-gb200-2p1d-dep8.yaml          |  86 --------
 .../1k1k/disagg-gb200-2p3d-dep4.yaml          |  74 -------
 .../8k1k/disagg-gb200-1p1d-tp4.yaml           |  68 -------
 .../8k1k/disagg-gb200-1p1d-tp4ep.yaml         |  70 -------
 .../8k1k/disagg-gb200-3p2d-dep4.yaml          |  76 -------
 .../minimax-m2.5-gb200/1k1k/dep2-1p2d.yaml    |  73 -------
 .../1k1k/dep2-2p3d-c6144.yaml                 |  73 -------
 .../minimax-m2.5-gb200/1k1k/dep2-2p3d.yaml    |  73 -------
 .../minimax-m2.5-gb200/1k1k/dep4-3p2d.yaml    |  74 -------
 .../minimax-m2.5-gb200/1k1k/dep8-2p1d.yaml    |  70 -------
 .../minimax-m2.5-gb200/1k1k/tp4-1p1d.yaml     |  72 -------
 .../minimax-m2.5-gb200/1k1k/tp4-1p2d.yaml     |  68 -------
 .../1k1k/tp4ep-1p1d-hi-conc.yaml              |  68 -------
 .../minimax-m2.5-gb200/1k1k/tp4ep-1p1d.yaml   |  70 -------
 .../minimax-m2.5-gb200/1k1k/tp4ep-1p2d.yaml   |  68 -------
 .../1k1k/tp4ep-1p3d-hi-conc.yaml              |  68 -------
 .../minimax-m2.5-gb200/1k1k/tp4ep-1p3d.yaml   |  68 -------
 .../minimax-m2.5-gb200/1k1k/tp4ep-2p3d.yaml   |  72 -------
 .../minimax-m2.5-gb200/8k1k/dep4-2p1d.yaml    |  75 -------
 .../minimax-m2.5-gb200/8k1k/tp4-1p1d.yaml     |  68 -------
 .../8k1k/tp4ep-1p1d-hi-conc.yaml              |  68 -------
 .../minimax-m2.5-gb200/8k1k/tp4ep-1p1d.yaml   |  68 -------
 .../minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml    |  73 -------
 .../vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml     |  73 -------
 .../vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml     |  70 -------
 .../vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml      |  72 -------
 .../vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml      |  68 -------
 .../vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml    |  70 -------
 .../vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml    |  68 -------
 .../vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml     |  70 -------
 .../vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml     |  70 -------
 .../vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml      |  68 -------
 .../vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml    |  68 -------
 .../vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml    |  68 -------
 .../b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml    |  81 --------
 .../b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml    |  81 --------
 .../b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml    |  79 --------
 .../b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml    |  81 --------
 .../b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml    |  79 --------
 configs/nvidia-master.yaml                    |  82 ++++----
 runners/launch_b200-dgxc.sh                   |  17 +-
 runners/launch_b300-nv.sh                     |  12 --
 runners/launch_gb200-nv.sh                    |  14 --
 runners/launch_gb300-nv.sh                    |  24 +--
 168 files changed, 50 insertions(+), 14711 deletions(-)
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_3.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_3.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_5.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_3.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_3.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-high-tpt-megamoe.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-low-latency.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-low-middle-curve.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-max-tpt-megamoe.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-mid-curve-megamoe.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b300-low-middle-curve.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b300-max-tpt-megamoe.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d-c6144.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep4-3p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep8-2p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-2p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/dep4-2p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/dep8.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p1d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p3d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p4d-dep2-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-2p1d-dep8.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-2p3d-dep4-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-1p1d-tp4ep-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-1p1d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-3p2d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p1d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p2d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p2d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p1d-dep8.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p2d-dep4-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p2d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p3d-dep2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-1p1d-tp4ep-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-1p1d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-2p1d-tp2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-2p1d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-dep4-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-5p2d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p1d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p2d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p3d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p4d-dep2.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-2p1d-dep8.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-2p3d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-1p1d-tp4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-1p1d-tp4ep.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-3p2d-dep4.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-2p3d-c6144.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-2p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep4-3p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep8-2p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p1d-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p3d-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-2p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/dep4-2p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4ep-1p1d-hi-conc.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4ep-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml
 delete mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml

diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_0.yaml
deleted file mode 100644
index b5fe566457..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_0.yaml
+++ /dev/null
@@ -1,122 +0,0 @@
-# Derived from the srt-slurm b200-fp4 1k1k recipe (recipes/b200-fp4/1k1k.yaml
-# base + zip_override_mtp_lowlat[0]): 1p5d low-latency (dep4 prefill / tep8 decode, 5 decode nodes).
-# One flat YAML per concrete topology, matching the 8k1k local recipe layout
-# (sglang/<model>/<hw>-<precision>/<seq>/disagg/<variant>/...).
-
-name: b200-fp4-mtp-low-latency-dep4-1p-tep8-5d
-model:
-  path: dsr1
-  container: dynamo-sglang
-  precision: fp4
-
-dynamo:
-  hash: "5b4bc1dd70965017a737c71b19db5a0aeaa88727"
-  install: true
-
-resources:
-  gpu_type: b200
-  prefill_nodes: 1
-  prefill_workers: 1
-  gpus_per_prefill: 4
-  decode_nodes: 5
-  decode_workers: 5
-  gpus_per_node: 8
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_ENABLE_JIT_DEEPGEMM: 'false'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: '1'
-    DYN_REQUEST_PLANE: nats
-    SGLANG_ENABLE_SPEC_V2: '1'
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_ENABLE_JIT_DEEPGEMM: 'false'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: '1000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: '1'
-    DYN_REQUEST_PLANE: nats
-    SGLANG_ENABLE_SPEC_V2: '1'
-  sglang_config:
-    prefill:
-      served-model-name: deepseek-ai/DeepSeek-R1
-      trust-remote-code: true
-      quantization: modelopt_fp4
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.85
-      max-prefill-tokens: 32768
-      chunked-prefill-size: 32768
-      context-length: 2200
-      max-running-requests: 512
-      disable-cuda-graph: true
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 4
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      attention-backend: trtllm_mla
-      kv-cache-dtype: fp8_e4m3
-      moe-runner-backend: flashinfer_trtllm
-      moe-dense-tp-size: 1
-      stream-interval: 30
-      watchdog-timeout: 1000000
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      fp4-gemm-backend: flashinfer_trtllm
-    decode:
-      served-model-name: deepseek-ai/DeepSeek-R1
-      trust-remote-code: true
-      quantization: modelopt_fp4
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.85
-      max-prefill-tokens: 32768
-      chunked-prefill-size: 32768
-      context-length: 2200
-      max-running-requests: 512
-      cuda-graph-max-bs: 512
-      tensor-parallel-size: 8
-      data-parallel-size: 1
-      expert-parallel-size: 8
-      attention-backend: trtllm_mla
-      kv-cache-dtype: fp8_e4m3
-      moe-runner-backend: flashinfer_trtllm
-      stream-interval: 30
-      watchdog-timeout: 1000000
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      fp4-gemm-backend: flashinfer_trtllm
-      speculative-algorithm: EAGLE
-      speculative-num-steps: 2
-      speculative-eagle-topk: 1
-      speculative-num-draft-tokens: 3
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  isl: 1024
-  osl: 1024
-  req_rate: inf
-  concurrencies: 16x512
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_1.yaml
deleted file mode 100644
index 77905ed598..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_1.yaml
+++ /dev/null
@@ -1,122 +0,0 @@
-# Derived from the srt-slurm b200-fp4 1k1k recipe (recipes/b200-fp4/1k1k.yaml
-# base + zip_override_mtp_lowlat[1]): 1p6d low-latency (dep4 prefill / tep8 decode, 6 decode nodes).
-# One flat YAML per concrete topology, matching the 8k1k local recipe layout
-# (sglang/<model>/<hw>-<precision>/<seq>/disagg/<variant>/...).
-
-name: b200-fp4-mtp-low-latency-dep4-1p-tep8-6d
-model:
-  path: dsr1
-  container: dynamo-sglang
-  precision: fp4
-
-dynamo:
-  hash: "5b4bc1dd70965017a737c71b19db5a0aeaa88727"
-  install: true
-
-resources:
-  gpu_type: b200
-  prefill_nodes: 1
-  prefill_workers: 1
-  gpus_per_prefill: 4
-  decode_nodes: 6
-  decode_workers: 6
-  gpus_per_node: 8
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_ENABLE_JIT_DEEPGEMM: 'false'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: '1'
-    DYN_REQUEST_PLANE: nats
-    SGLANG_ENABLE_SPEC_V2: '1'
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_ENABLE_JIT_DEEPGEMM: 'false'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: '1000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: '1'
-    DYN_REQUEST_PLANE: nats
-    SGLANG_ENABLE_SPEC_V2: '1'
-  sglang_config:
-    prefill:
-      served-model-name: deepseek-ai/DeepSeek-R1
-      trust-remote-code: true
-      quantization: modelopt_fp4
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.85
-      max-prefill-tokens: 32768
-      chunked-prefill-size: 32768
-      context-length: 2200
-      max-running-requests: 512
-      disable-cuda-graph: true
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 4
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      attention-backend: trtllm_mla
-      kv-cache-dtype: fp8_e4m3
-      moe-runner-backend: flashinfer_trtllm
-      moe-dense-tp-size: 1
-      stream-interval: 30
-      watchdog-timeout: 1000000
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      fp4-gemm-backend: flashinfer_trtllm
-    decode:
-      served-model-name: deepseek-ai/DeepSeek-R1
-      trust-remote-code: true
-      quantization: modelopt_fp4
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.85
-      max-prefill-tokens: 32768
-      chunked-prefill-size: 32768
-      context-length: 2200
-      max-running-requests: 512
-      cuda-graph-max-bs: 512
-      tensor-parallel-size: 8
-      data-parallel-size: 1
-      expert-parallel-size: 8
-      attention-backend: trtllm_mla
-      kv-cache-dtype: fp8_e4m3
-      moe-runner-backend: flashinfer_trtllm
-      stream-interval: 30
-      watchdog-timeout: 1000000
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      fp4-gemm-backend: flashinfer_trtllm
-      speculative-algorithm: EAGLE
-      speculative-num-steps: 2
-      speculative-eagle-topk: 1
-      speculative-num-draft-tokens: 3
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  isl: 1024
-  osl: 1024
-  req_rate: inf
-  concurrencies: 32x64x256x512
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_0.yaml
deleted file mode 100644
index 7cc3a5848d..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_0.yaml
+++ /dev/null
@@ -1,127 +0,0 @@
-# Derived from the srt-slurm b200-fp4 1k1k recipe (recipes/b200-fp4/1k1k.yaml
-# base + zip_override_mtp_maxtpt[0]): 1p1d max-throughput (dep4 prefill / dep8 decode, mem-fraction 0.75).
-# One flat YAML per concrete topology, matching the 8k1k local recipe layout
-# (sglang/<model>/<hw>-<precision>/<seq>/disagg/<variant>/...).
-
-name: b200-fp4-mtp-max-tpt-dep4-1p-dep8-1d
-model:
-  path: dsr1
-  container: dynamo-sglang
-  precision: fp4
-
-dynamo:
-  hash: "5b4bc1dd70965017a737c71b19db5a0aeaa88727"
-  install: true
-
-resources:
-  gpu_type: b200
-  prefill_nodes: 1
-  prefill_workers: 1
-  gpus_per_prefill: 4
-  decode_nodes: 1
-  decode_workers: 1
-  gpus_per_node: 8
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_ENABLE_JIT_DEEPGEMM: 'false'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: '1'
-    DYN_REQUEST_PLANE: nats
-    SGLANG_ENABLE_SPEC_V2: '1'
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_ENABLE_JIT_DEEPGEMM: 'false'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: '1000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: '1'
-    DYN_REQUEST_PLANE: nats
-    SGLANG_MOE_NVFP4_DISPATCH: '1'
-    SGLANG_FLASHINFER_FP4_GEMM_BACKEND: cutlass
-    SGLANG_ENABLE_SPEC_V2: '1'
-  sglang_config:
-    prefill:
-      served-model-name: deepseek-ai/DeepSeek-R1
-      trust-remote-code: true
-      quantization: modelopt_fp4
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.85
-      max-prefill-tokens: 32768
-      chunked-prefill-size: 32768
-      context-length: 2200
-      max-running-requests: 1024
-      disable-cuda-graph: true
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 4
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      attention-backend: trtllm_mla
-      kv-cache-dtype: fp8_e4m3
-      moe-runner-backend: flashinfer_trtllm
-      moe-dense-tp-size: 1
-      stream-interval: 30
-      watchdog-timeout: 1000000
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      fp4-gemm-backend: flashinfer_trtllm
-    decode:
-      served-model-name: deepseek-ai/DeepSeek-R1
-      trust-remote-code: true
-      quantization: modelopt_fp4
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.75
-      max-prefill-tokens: 32768
-      chunked-prefill-size: 32768
-      context-length: 2200
-      max-running-requests: 1024
-      cuda-graph-max-bs: 1024
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 8
-      attention-backend: trtllm_mla
-      kv-cache-dtype: fp8_e4m3
-      moe-runner-backend: flashinfer_trtllm
-      stream-interval: 30
-      watchdog-timeout: 1000000
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      fp4-gemm-backend: flashinfer_trtllm
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      moe-dense-tp-size: 1
-      speculative-algorithm: EAGLE
-      speculative-num-steps: 2
-      speculative-eagle-topk: 1
-      speculative-num-draft-tokens: 3
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  isl: 1024
-  osl: 1024
-  req_rate: inf
-  concurrencies: 512x1024
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_1.yaml
deleted file mode 100644
index 17c334d5b8..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_1.yaml
+++ /dev/null
@@ -1,127 +0,0 @@
-# Derived from the srt-slurm b200-fp4 1k1k recipe (recipes/b200-fp4/1k1k.yaml
-# base + zip_override_mtp_maxtpt[1]): 1p2d max-throughput (dep4 prefill / dep8 decode, mem-fraction 0.85).
-# One flat YAML per concrete topology, matching the 8k1k local recipe layout
-# (sglang/<model>/<hw>-<precision>/<seq>/disagg/<variant>/...).
-
-name: b200-fp4-mtp-max-tpt-dep4-1p-dep8-2d
-model:
-  path: dsr1
-  container: dynamo-sglang
-  precision: fp4
-
-dynamo:
-  hash: "5b4bc1dd70965017a737c71b19db5a0aeaa88727"
-  install: true
-
-resources:
-  gpu_type: b200
-  prefill_nodes: 1
-  prefill_workers: 1
-  gpus_per_prefill: 4
-  decode_nodes: 2
-  decode_workers: 2
-  gpus_per_node: 8
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_ENABLE_JIT_DEEPGEMM: 'false'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: '1'
-    DYN_REQUEST_PLANE: nats
-    SGLANG_ENABLE_SPEC_V2: '1'
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_ENABLE_JIT_DEEPGEMM: 'false'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_DECODE_BOOTSTRAP_TIMEOUT: '1000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_PER_TOKEN_GROUP_QUANT_8BIT_V2: '1'
-    DYN_REQUEST_PLANE: nats
-    SGLANG_MOE_NVFP4_DISPATCH: '1'
-    SGLANG_FLASHINFER_FP4_GEMM_BACKEND: cutlass
-    SGLANG_ENABLE_SPEC_V2: '1'
-  sglang_config:
-    prefill:
-      served-model-name: deepseek-ai/DeepSeek-R1
-      trust-remote-code: true
-      quantization: modelopt_fp4
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.85
-      max-prefill-tokens: 32768
-      chunked-prefill-size: 32768
-      context-length: 2200
-      max-running-requests: 512
-      disable-cuda-graph: true
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 4
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      attention-backend: trtllm_mla
-      kv-cache-dtype: fp8_e4m3
-      moe-runner-backend: flashinfer_trtllm
-      moe-dense-tp-size: 1
-      stream-interval: 30
-      watchdog-timeout: 1000000
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      fp4-gemm-backend: flashinfer_trtllm
-    decode:
-      served-model-name: deepseek-ai/DeepSeek-R1
-      trust-remote-code: true
-      quantization: modelopt_fp4
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.85
-      max-prefill-tokens: 32768
-      chunked-prefill-size: 32768
-      context-length: 2200
-      max-running-requests: 512
-      cuda-graph-max-bs: 512
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 8
-      attention-backend: trtllm_mla
-      kv-cache-dtype: fp8_e4m3
-      moe-runner-backend: flashinfer_trtllm
-      stream-interval: 30
-      watchdog-timeout: 1000000
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      fp4-gemm-backend: flashinfer_trtllm
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      moe-dense-tp-size: 1
-      speculative-algorithm: EAGLE
-      speculative-num-steps: 2
-      speculative-eagle-topk: 1
-      speculative-num-draft-tokens: 3
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  isl: 1024
-  osl: 1024
-  req_rate: inf
-  concurrencies: '512'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
deleted file mode 100644
index 8ad78c93eb..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: b200-fp8-glm5_1k1k_lowlat_0
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 8
-  decode_workers: 8
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 1
-      max-running-requests: 64
-      cuda-graph-max-bs: 64
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: 512x256x128x64x32
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
deleted file mode 100644
index fced286164..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: b200-fp8-glm5_1k1k_lowlat_1
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 8
-  decode_workers: 8
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 1
-      max-running-requests: 1
-      cuda-graph-max-bs: 1
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '16'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml
deleted file mode 100644
index 3627b0fce3..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-name: b200-fp8-glm5_1k1k_hightpt_0
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 1
-  decode_workers: 1
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 8
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      load-balance-method: total_tokens
-      max-running-requests: 2560
-      cuda-graph-max-bs: 2560
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '2576'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml
deleted file mode 100644
index dd18582708..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-name: b200-fp8-glm5_1k1k_hightpt_1
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 2
-  decode_workers: 2
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 8
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      load-balance-method: total_tokens
-      max-running-requests: 1232
-      cuda-graph-max-bs: 1232
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '1248'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml
deleted file mode 100644
index c93f2b294a..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-name: b200-fp8-glm5_1k1k_hightpt_2
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 3
-  decode_workers: 3
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 8
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      load-balance-method: total_tokens
-      max-running-requests: 784
-      cuda-graph-max-bs: 784
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '800'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_3.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_3.yaml
deleted file mode 100644
index e6ad090411..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_3.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-name: b200-fp8-glm5_1k1k_hightpt_3
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 4
-  decode_workers: 4
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 8
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      load-balance-method: total_tokens
-      max-running-requests: 560
-      cuda-graph-max-bs: 560
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '576'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml
deleted file mode 100644
index a635e99419..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: b200-fp8-glm5_8k1k_lowlat_0
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 2
-  decode_workers: 2
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 1
-      max-running-requests: 80
-      cuda-graph-max-bs: 80
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '256'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml
deleted file mode 100644
index 4fa3e72857..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: b200-fp8-glm5_8k1k_lowlat_1
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 3
-  decode_workers: 3
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 1
-      max-running-requests: 48
-      cuda-graph-max-bs: 48
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '256'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml
deleted file mode 100644
index f1404ae279..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: b200-fp8-glm5_8k1k_lowlat_2
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 4
-  decode_workers: 4
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 1
-      max-running-requests: 34
-      cuda-graph-max-bs: 34
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '200'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_3.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_3.yaml
deleted file mode 100644
index 1b0bff9b51..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_3.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: b200-fp8-glm5_8k1k_lowlat_3
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 5
-  decode_workers: 5
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 1
-      max-running-requests: 22
-      cuda-graph-max-bs: 22
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '128'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_4.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_4.yaml
deleted file mode 100644
index 1fa1e8f6c0..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_4.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: b200-fp8-glm5_8k1k_lowlat_4
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 7
-  decode_workers: 7
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 1
-      max-running-requests: 8
-      cuda-graph-max-bs: 8
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '64'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_5.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_5.yaml
deleted file mode 100644
index 6115cbf7a1..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_5.yaml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: b200-fp8-glm5_8k1k_lowlat_5
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 8
-  decode_workers: 8
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 1
-      max-running-requests: 1
-      cuda-graph-max-bs: 1
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '12'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml
deleted file mode 100644
index ae824b4a7a..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-name: b200-fp8-glm5_8k1k_hightpt_0
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 2
-  prefill_workers: 2
-  decode_nodes: 1
-  decode_workers: 1
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 8
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      load-balance-method: total_tokens
-      max-running-requests: 544
-      cuda-graph-max-bs: 544
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '560'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml
deleted file mode 100644
index 12844af4a3..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-name: b200-fp8-glm5_8k1k_hightpt_1
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 1
-  decode_workers: 1
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 8
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      load-balance-method: total_tokens
-      max-running-requests: 224
-      cuda-graph-max-bs: 224
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '240'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml
deleted file mode 100644
index 1e8d7599ae..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-name: b200-fp8-glm5_8k1k_hightpt_2
-model:
-  path: glm5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-resources:
-  gpu_type: b200
-  gpus_per_node: 8
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 2
-  decode_workers: 2
-frontend:
-  type: dynamo
-dynamo:
-  version: "1.1.0"
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    NCCL_CUMEM_ENABLE: '1'
-    DYN_REQUEST_PLANE: nats
-  sglang_config:
-    prefill:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 65536
-      max-prefill-tokens: 8192
-      tensor-parallel-size: 8
-      data-parallel-size: 8
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-    decode:
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-      mem-fraction-static: 0.8
-      context-length: 9600
-      tensor-parallel-size: 8
-      expert-parallel-size: 1
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-      enable-flashinfer-allreduce-fusion: true
-      weight-loader-prefetch-checkpoints: true
-      disable-radix-cache: true
-      stream-interval: 30
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      data-parallel-size: 8
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      load-balance-method: total_tokens
-      max-running-requests: 208
-      cuda-graph-max-bs: 208
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '224'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
deleted file mode 100644
index 32cfbd4b72..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
+++ /dev/null
@@ -1,173 +0,0 @@
-name: "gb300-fp4-glm5_1k1k_lowlat_0"
-
-# Ported from upstream srt-slurm recipes/gb300-fp4/glm5.yaml (PR #152).
-# Upstream uses a single combined file with `zip_override_*` arrays
-# expanded by srtctl across zip indices. We split into one flat yaml
-# per concrete topology to match the InferenceX dsv4 sglang convention
-# (see ../deepseek-v4/8k1k/*.yaml). All shared base envs and the
-# prefill sglang_config are inlined here verbatim from the upstream
-# `base:` block; the decode block is the upstream base plus the
-# topology-specific override from this zip index.
-
-model:
-  path: "glm-5-fp4"
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: "fp4"
-
-# Released dynamo wheel; unlike hash-based sources, this recipe does not
-# require a persistent /configs/dynamo-wheels build cache.
-dynamo:
-  version: "1.1.0"
-
-slurm:
-  time_limit: "03:00:00"
-
-# Mirror dsv4 sglang recipes: cpus-per-task=144 avoids the 1-CPU
-# default that turns dynamo install + sglang weight load into a serial
-# crawl; mem=0 grants whole-node memory.
-sbatch_directives:
-  cpus-per-task: "144"
-  mem: "0"
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  prefill_workers: 1
-  gpus_per_prefill: 4
-  decode_nodes: 17
-  decode_workers: 17
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-
-backend:
-  type: sglang
-
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
-    SGLANG_MOE_NVFP4_DISPATCH: "1"
-
-  sglang_config:
-    prefill:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "prefill"
-      disaggregation-transfer-backend: "nixl"
-
-      # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-      # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: "total_tokens"
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_trtllm"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "decode"
-      disaggregation-transfer-backend: "nixl"
-
-      # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_cutedsl"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      # Parallelism (override from upstream zip_override_*_lowlat)
-      tensor-parallel-size: 4
-      expert-parallel-size: 1
-      data-parallel-size:   1
-      enable-flashinfer-allreduce-fusion: true
-
-      moe-runner-backend: "flashinfer_trtllm"
-      max-running-requests: 32
-      cuda-graph-max-bs:    32
-
-
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "512x256x128x64"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
deleted file mode 100644
index cf7ab32ee5..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
+++ /dev/null
@@ -1,173 +0,0 @@
-name: "gb300-fp4-glm5_1k1k_lowlat_1"
-
-# Ported from upstream srt-slurm recipes/gb300-fp4/glm5.yaml (PR #152).
-# Upstream uses a single combined file with `zip_override_*` arrays
-# expanded by srtctl across zip indices. We split into one flat yaml
-# per concrete topology to match the InferenceX dsv4 sglang convention
-# (see ../deepseek-v4/8k1k/*.yaml). All shared base envs and the
-# prefill sglang_config are inlined here verbatim from the upstream
-# `base:` block; the decode block is the upstream base plus the
-# topology-specific override from this zip index.
-
-model:
-  path: "glm-5-fp4"
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: "fp4"
-
-# Released dynamo wheel; unlike hash-based sources, this recipe does not
-# require a persistent /configs/dynamo-wheels build cache.
-dynamo:
-  version: "1.1.0"
-
-slurm:
-  time_limit: "03:00:00"
-
-# Mirror dsv4 sglang recipes: cpus-per-task=144 avoids the 1-CPU
-# default that turns dynamo install + sglang weight load into a serial
-# crawl; mem=0 grants whole-node memory.
-sbatch_directives:
-  cpus-per-task: "144"
-  mem: "0"
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  prefill_workers: 1
-  gpus_per_prefill: 4
-  decode_nodes: 17
-  decode_workers: 17
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-
-backend:
-  type: sglang
-
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
-    SGLANG_MOE_NVFP4_DISPATCH: "1"
-
-  sglang_config:
-    prefill:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "prefill"
-      disaggregation-transfer-backend: "nixl"
-
-      # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-      # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: "total_tokens"
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_trtllm"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "decode"
-      disaggregation-transfer-backend: "nixl"
-
-      # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_cutedsl"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      # Parallelism (override from upstream zip_override_*_lowlat)
-      tensor-parallel-size: 4
-      expert-parallel-size: 1
-      data-parallel-size:   1
-      enable-flashinfer-allreduce-fusion: true
-
-      moe-runner-backend: "flashinfer_trtllm"
-      max-running-requests: 1
-      cuda-graph-max-bs:    1
-
-
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "32"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml
deleted file mode 100644
index 9cadc4c6f3..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml
+++ /dev/null
@@ -1,185 +0,0 @@
-name: "gb300-fp4-glm5_1k1k_maxtpt_0"
-
-# Ported from upstream srt-slurm recipes/gb300-fp4/glm5.yaml (PR #152).
-# Upstream uses a single combined file with `zip_override_*` arrays
-# expanded by srtctl across zip indices. We split into one flat yaml
-# per concrete topology to match the InferenceX dsv4 sglang convention
-# (see ../deepseek-v4/8k1k/*.yaml). All shared base envs and the
-# prefill sglang_config are inlined here verbatim from the upstream
-# `base:` block; the decode block is the upstream base plus the
-# topology-specific override from this zip index.
-
-model:
-  path: "glm-5-fp4"
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: "fp4"
-
-# Released dynamo wheel; unlike hash-based sources, this recipe does not
-# require a persistent /configs/dynamo-wheels build cache.
-dynamo:
-  version: "1.1.0"
-
-slurm:
-  time_limit: "03:00:00"
-
-# Mirror dsv4 sglang recipes: cpus-per-task=144 avoids the 1-CPU
-# default that turns dynamo install + sglang weight load into a serial
-# crawl; mem=0 grants whole-node memory.
-sbatch_directives:
-  cpus-per-task: "144"
-  mem: "0"
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 3
-  prefill_workers: 3
-  gpus_per_prefill: 4
-  decode_nodes: 8
-  decode_workers: 1
-  gpus_per_decode: 32
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-
-backend:
-  type: sglang
-
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
-    SGLANG_MOE_NVFP4_DISPATCH: "1"
-
-  sglang_config:
-    prefill:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "prefill"
-      disaggregation-transfer-backend: "nixl"
-
-      # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-      # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: "total_tokens"
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_trtllm"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "decode"
-      disaggregation-transfer-backend: "nixl"
-
-      # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_cutedsl"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      # Parallelism (override from upstream zip_override_*_hightpt)
-      tensor-parallel-size: 32
-      expert-parallel-size: 32
-      data-parallel-size:   32
-
-      # dp
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-
-      # ep
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: "static"
-
-      moe-a2a-backend: "deepep"
-      deepep-mode: "low_latency"
-      deepep-config: "/configs/deepep_config.json"
-      max-running-requests: 16384
-      cuda-graph-max-bs:    512
-
-
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "16500"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml
deleted file mode 100644
index 73d8a2e307..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml
+++ /dev/null
@@ -1,185 +0,0 @@
-name: "gb300-fp4-glm5_1k1k_maxtpt_1"
-
-# Ported from upstream srt-slurm recipes/gb300-fp4/glm5.yaml (PR #152).
-# Upstream uses a single combined file with `zip_override_*` arrays
-# expanded by srtctl across zip indices. We split into one flat yaml
-# per concrete topology to match the InferenceX dsv4 sglang convention
-# (see ../deepseek-v4/8k1k/*.yaml). All shared base envs and the
-# prefill sglang_config are inlined here verbatim from the upstream
-# `base:` block; the decode block is the upstream base plus the
-# topology-specific override from this zip index.
-
-model:
-  path: "glm-5-fp4"
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: "fp4"
-
-# Released dynamo wheel; unlike hash-based sources, this recipe does not
-# require a persistent /configs/dynamo-wheels build cache.
-dynamo:
-  version: "1.1.0"
-
-slurm:
-  time_limit: "03:00:00"
-
-# Mirror dsv4 sglang recipes: cpus-per-task=144 avoids the 1-CPU
-# default that turns dynamo install + sglang weight load into a serial
-# crawl; mem=0 grants whole-node memory.
-sbatch_directives:
-  cpus-per-task: "144"
-  mem: "0"
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 2
-  prefill_workers: 2
-  gpus_per_prefill: 4
-  decode_nodes: 8
-  decode_workers: 1
-  gpus_per_decode: 32
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-
-backend:
-  type: sglang
-
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
-    SGLANG_MOE_NVFP4_DISPATCH: "1"
-
-  sglang_config:
-    prefill:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "prefill"
-      disaggregation-transfer-backend: "nixl"
-
-      # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-      # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: "total_tokens"
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_trtllm"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "decode"
-      disaggregation-transfer-backend: "nixl"
-
-      # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_cutedsl"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      # Parallelism (override from upstream zip_override_*_hightpt)
-      tensor-parallel-size: 32
-      expert-parallel-size: 32
-      data-parallel-size:   32
-
-      # dp
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-
-      # ep
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: "static"
-
-      moe-a2a-backend: "deepep"
-      deepep-mode: "low_latency"
-      deepep-config: "/configs/deepep_config.json"
-      max-running-requests: 8192
-      cuda-graph-max-bs:    256
-
-
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "8300"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml
deleted file mode 100644
index b7086cfc0f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml
+++ /dev/null
@@ -1,185 +0,0 @@
-name: "gb300-fp4-glm5_1k1k_maxtpt_2"
-
-# Ported from upstream srt-slurm recipes/gb300-fp4/glm5.yaml (PR #152).
-# Upstream uses a single combined file with `zip_override_*` arrays
-# expanded by srtctl across zip indices. We split into one flat yaml
-# per concrete topology to match the InferenceX dsv4 sglang convention
-# (see ../deepseek-v4/8k1k/*.yaml). All shared base envs and the
-# prefill sglang_config are inlined here verbatim from the upstream
-# `base:` block; the decode block is the upstream base plus the
-# topology-specific override from this zip index.
-
-model:
-  path: "glm-5-fp4"
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: "fp4"
-
-# Released dynamo wheel; unlike hash-based sources, this recipe does not
-# require a persistent /configs/dynamo-wheels build cache.
-dynamo:
-  version: "1.1.0"
-
-slurm:
-  time_limit: "03:00:00"
-
-# Mirror dsv4 sglang recipes: cpus-per-task=144 avoids the 1-CPU
-# default that turns dynamo install + sglang weight load into a serial
-# crawl; mem=0 grants whole-node memory.
-sbatch_directives:
-  cpus-per-task: "144"
-  mem: "0"
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  prefill_workers: 1
-  gpus_per_prefill: 4
-  decode_nodes: 8
-  decode_workers: 1
-  gpus_per_decode: 32
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-
-backend:
-  type: sglang
-
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
-    SGLANG_MOE_NVFP4_DISPATCH: "1"
-
-  sglang_config:
-    prefill:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "prefill"
-      disaggregation-transfer-backend: "nixl"
-
-      # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-      # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: "total_tokens"
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_trtllm"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "decode"
-      disaggregation-transfer-backend: "nixl"
-
-      # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_cutedsl"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      # Parallelism (override from upstream zip_override_*_hightpt)
-      tensor-parallel-size: 32
-      expert-parallel-size: 32
-      data-parallel-size:   32
-
-      # dp
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-
-      # ep
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: "static"
-
-      moe-a2a-backend: "deepep"
-      deepep-mode: "low_latency"
-      deepep-config: "/configs/deepep_config.json"
-      max-running-requests: 2304
-      cuda-graph-max-bs:    72
-
-
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "2500x1024x512x256"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml
deleted file mode 100644
index 56669629df..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml
+++ /dev/null
@@ -1,185 +0,0 @@
-name: "gb300-fp4-glm5_8k1k_maxtpt_0"
-
-# Ported from upstream srt-slurm recipes/gb300-fp4/glm5.yaml (PR #152).
-# Upstream uses a single combined file with `zip_override_*` arrays
-# expanded by srtctl across zip indices. We split into one flat yaml
-# per concrete topology to match the InferenceX dsv4 sglang convention
-# (see ../deepseek-v4/8k1k/*.yaml). All shared base envs and the
-# prefill sglang_config are inlined here verbatim from the upstream
-# `base:` block; the decode block is the upstream base plus the
-# topology-specific override from this zip index.
-
-model:
-  path: "glm-5-fp4"
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: "fp4"
-
-# Released dynamo wheel; unlike hash-based sources, this recipe does not
-# require a persistent /configs/dynamo-wheels build cache.
-dynamo:
-  version: "1.1.0"
-
-slurm:
-  time_limit: "03:00:00"
-
-# Mirror dsv4 sglang recipes: cpus-per-task=144 avoids the 1-CPU
-# default that turns dynamo install + sglang weight load into a serial
-# crawl; mem=0 grants whole-node memory.
-sbatch_directives:
-  cpus-per-task: "144"
-  mem: "0"
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 5
-  prefill_workers: 5
-  gpus_per_prefill: 4
-  decode_nodes: 8
-  decode_workers: 1
-  gpus_per_decode: 32
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-
-backend:
-  type: sglang
-
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
-    SGLANG_MOE_NVFP4_DISPATCH: "1"
-
-  sglang_config:
-    prefill:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "prefill"
-      disaggregation-transfer-backend: "nixl"
-
-      # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-      # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: "total_tokens"
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_trtllm"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "decode"
-      disaggregation-transfer-backend: "nixl"
-
-      # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_cutedsl"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      # Parallelism (override from upstream zip_override_*_hightpt)
-      tensor-parallel-size: 32
-      expert-parallel-size: 32
-      data-parallel-size:   32
-
-      # dp
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-
-      # ep
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: "static"
-
-      moe-a2a-backend: "deepep"
-      deepep-mode: "low_latency"
-      deepep-config: "/configs/deepep_config.json"
-      max-running-requests: 4096
-      cuda-graph-max-bs:    4096
-
-
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "2048"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml
deleted file mode 100644
index 7fa40fa423..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml
+++ /dev/null
@@ -1,185 +0,0 @@
-name: "gb300-fp4-glm5_8k1k_maxtpt_1"
-
-# Ported from upstream srt-slurm recipes/gb300-fp4/glm5.yaml (PR #152).
-# Upstream uses a single combined file with `zip_override_*` arrays
-# expanded by srtctl across zip indices. We split into one flat yaml
-# per concrete topology to match the InferenceX dsv4 sglang convention
-# (see ../deepseek-v4/8k1k/*.yaml). All shared base envs and the
-# prefill sglang_config are inlined here verbatim from the upstream
-# `base:` block; the decode block is the upstream base plus the
-# topology-specific override from this zip index.
-
-model:
-  path: "glm-5-fp4"
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: "fp4"
-
-# Released dynamo wheel; unlike hash-based sources, this recipe does not
-# require a persistent /configs/dynamo-wheels build cache.
-dynamo:
-  version: "1.1.0"
-
-slurm:
-  time_limit: "03:00:00"
-
-# Mirror dsv4 sglang recipes: cpus-per-task=144 avoids the 1-CPU
-# default that turns dynamo install + sglang weight load into a serial
-# crawl; mem=0 grants whole-node memory.
-sbatch_directives:
-  cpus-per-task: "144"
-  mem: "0"
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 7
-  prefill_workers: 7
-  gpus_per_prefill: 4
-  decode_nodes: 8
-  decode_workers: 1
-  gpus_per_decode: 32
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-
-backend:
-  type: sglang
-
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
-    SGLANG_MOE_NVFP4_DISPATCH: "1"
-
-  sglang_config:
-    prefill:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "prefill"
-      disaggregation-transfer-backend: "nixl"
-
-      # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-      # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: "total_tokens"
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_trtllm"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "decode"
-      disaggregation-transfer-backend: "nixl"
-
-      # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_cutedsl"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      # Parallelism (override from upstream zip_override_*_hightpt)
-      tensor-parallel-size: 32
-      expert-parallel-size: 32
-      data-parallel-size:   32
-
-      # dp
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-
-      # ep
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: "static"
-
-      moe-a2a-backend: "deepep"
-      deepep-mode: "low_latency"
-      deepep-config: "/configs/deepep_config.json"
-      max-running-requests: 4096
-      cuda-graph-max-bs:    4096
-
-
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "3072"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml
deleted file mode 100644
index 2b6ef93511..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml
+++ /dev/null
@@ -1,185 +0,0 @@
-name: "gb300-fp4-glm5_8k1k_maxtpt_2"
-
-# Ported from upstream srt-slurm recipes/gb300-fp4/glm5.yaml (PR #152).
-# Upstream uses a single combined file with `zip_override_*` arrays
-# expanded by srtctl across zip indices. We split into one flat yaml
-# per concrete topology to match the InferenceX dsv4 sglang convention
-# (see ../deepseek-v4/8k1k/*.yaml). All shared base envs and the
-# prefill sglang_config are inlined here verbatim from the upstream
-# `base:` block; the decode block is the upstream base plus the
-# topology-specific override from this zip index.
-
-model:
-  path: "glm-5-fp4"
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: "fp4"
-
-# Released dynamo wheel; unlike hash-based sources, this recipe does not
-# require a persistent /configs/dynamo-wheels build cache.
-dynamo:
-  version: "1.1.0"
-
-slurm:
-  time_limit: "03:00:00"
-
-# Mirror dsv4 sglang recipes: cpus-per-task=144 avoids the 1-CPU
-# default that turns dynamo install + sglang weight load into a serial
-# crawl; mem=0 grants whole-node memory.
-sbatch_directives:
-  cpus-per-task: "144"
-  mem: "0"
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 10
-  prefill_workers: 10
-  gpus_per_prefill: 4
-  decode_nodes: 8
-  decode_workers: 1
-  gpus_per_decode: 32
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-
-backend:
-  type: sglang
-
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: "1800"
-    PYTHONUNBUFFERED: "1"
-    DYN_SKIP_SGLANG_LOG_FORMATTING: "1"
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: "100000"
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: "100000"
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: "100000"
-    MC_TE_METRIC: "true"
-    MC_FORCE_MNNVL: "1"
-    NCCL_MNNVL_ENABLE: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True"
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: "0"
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: "1"
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "512"
-    SGLANG_MOE_NVFP4_DISPATCH: "1"
-
-  sglang_config:
-    prefill:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "prefill"
-      disaggregation-transfer-backend: "nixl"
-
-      # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-      # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: "total_tokens"
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_trtllm"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-      # Model configuration
-      served-model-name: "GLM-5-FP4"
-      trust-remote-code: true
-
-      quantization: "modelopt_fp4"
-      kv-cache-dtype: "fp8_e4m3"
-
-      # Disaggregation mode
-      disaggregation-mode: "decode"
-      disaggregation-transfer-backend: "nixl"
-
-      # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-      # Backend
-      nsa-decode-backend: "trtllm"
-      nsa-prefill-backend: "trtllm"
-      moe-runner-backend: "flashinfer_cutedsl"
-      fp4-gemm-backend: "flashinfer_cutlass"
-
-      # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-      # Other flags
-      # disable-shared-experts-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      # Parallelism (override from upstream zip_override_*_hightpt)
-      tensor-parallel-size: 32
-      expert-parallel-size: 32
-      data-parallel-size:   32
-
-      # dp
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-
-      # ep
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: "static"
-
-      moe-a2a-backend: "deepep"
-      deepep-mode: "low_latency"
-      deepep-config: "/configs/deepep_config.json"
-      max-running-requests: 4096
-      cuda-graph-max-bs:    4096
-
-
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "4096"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_0.yaml
deleted file mode 100644
index 33da57e947..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_0.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_1k1k_hightpt_0
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 12
-  prefill_workers: 12
-  decode_nodes: 6
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 24
-      expert-parallel-size: 24
-      data-parallel-size: 24
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 8192
-      cuda-graph-max-bs: 512
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '8192'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_1.yaml
deleted file mode 100644
index 03cb7e671f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_1.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_1k1k_hightpt_1
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 10
-  prefill_workers: 10
-  decode_nodes: 8
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 32
-      expert-parallel-size: 32
-      data-parallel-size: 32
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 8192
-      cuda-graph-max-bs: 256
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '7500'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_2.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_2.yaml
deleted file mode 100644
index c06206c81c..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_2.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_1k1k_hightpt_2
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 8
-  prefill_workers: 8
-  decode_nodes: 10
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 40
-      expert-parallel-size: 40
-      data-parallel-size: 40
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 24
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 7200
-      cuda-graph-max-bs: 180
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '7300'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_3.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_3.yaml
deleted file mode 100644
index 9517724799..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_3.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_1k1k_hightpt_3
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 6
-  prefill_workers: 6
-  decode_nodes: 12
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 48
-      expert-parallel-size: 48
-      data-parallel-size: 48
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 6144
-      cuda-graph-max-bs: 128
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '6500'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_4.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_4.yaml
deleted file mode 100644
index 9a1f320a59..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_4.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_1k1k_hightpt_4
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 4
-  prefill_workers: 4
-  decode_nodes: 14
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 56
-      expert-parallel-size: 56
-      data-parallel-size: 56
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 24
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 5600
-      cuda-graph-max-bs: 100
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '5700'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
deleted file mode 100644
index 3ace5647c5..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml
+++ /dev/null
@@ -1,140 +0,0 @@
-name: gb300-fp8-glm5_1k1k_lowlat_0
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 17
-  decode_workers: 17
-frontend:
-  type: dynamo
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 4
-      expert-parallel-size: 1
-      data-parallel-size: 1
-      enable-flashinfer-allreduce-fusion: true
-      moe-runner-backend: flashinfer_trtllm
-      max-running-requests: 32
-      cuda-graph-max-bs: 32
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: 512x256x128x64
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
deleted file mode 100644
index 965b6f1485..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml
+++ /dev/null
@@ -1,140 +0,0 @@
-name: gb300-fp8-glm5_1k1k_lowlat_1
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 17
-  decode_workers: 17
-frontend:
-  type: dynamo
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 4
-      expert-parallel-size: 1
-      data-parallel-size: 1
-      enable-flashinfer-allreduce-fusion: true
-      moe-runner-backend: flashinfer_trtllm
-      max-running-requests: 1
-      cuda-graph-max-bs: 1
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 1024
-  osl: 1024
-  concurrencies: '32'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_0.yaml
deleted file mode 100644
index 150e62233f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_0.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_8k1k_hightpt_0
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 14
-  prefill_workers: 14
-  decode_nodes: 4
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 16
-      expert-parallel-size: 16
-      data-parallel-size: 16
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 2800
-      cuda-graph-max-bs: 175
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '2800'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_1.yaml
deleted file mode 100644
index 6393069c87..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_1.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_8k1k_hightpt_1
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 12
-  prefill_workers: 12
-  decode_nodes: 6
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 24
-      expert-parallel-size: 24
-      data-parallel-size: 24
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 1680
-      cuda-graph-max-bs: 70
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '1700'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_2.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_2.yaml
deleted file mode 100644
index 56b11ed8a9..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_2.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_8k1k_hightpt_2
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 10
-  prefill_workers: 10
-  decode_nodes: 8
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 32
-      expert-parallel-size: 32
-      data-parallel-size: 32
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 32
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 1280
-      cuda-graph-max-bs: 40
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '1300'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_3.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_3.yaml
deleted file mode 100644
index 13fb0f3267..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_3.yaml
+++ /dev/null
@@ -1,148 +0,0 @@
-name: gb300-fp8-glm5_8k1k_hightpt_3
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 8
-  prefill_workers: 8
-  decode_nodes: 10
-  decode_workers: 1
-frontend:
-  type: dynamo
-  enable_multiple_frontends: true
-  num_additional_frontends: 9
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 40
-      expert-parallel-size: 40
-      data-parallel-size: 40
-      enable-dp-lm-head: true
-      enable-dp-attention: true
-      moe-dense-tp-size: 1
-      ep-num-redundant-experts: 24
-      ep-dispatch-algorithm: static
-      moe-a2a-backend: deepep
-      deepep-mode: low_latency
-      deepep-config: /configs/deepep_config.json
-      max-running-requests: 880
-      cuda-graph-max-bs: 22
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '900'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml
deleted file mode 100644
index 8065160bd7..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml
+++ /dev/null
@@ -1,140 +0,0 @@
-name: gb300-fp8-glm5_8k1k_lowlat_0
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 9
-  decode_workers: 9
-frontend:
-  type: dynamo
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 4
-      expert-parallel-size: 1
-      data-parallel-size: 1
-      enable-flashinfer-allreduce-fusion: true
-      moe-runner-backend: flashinfer_trtllm
-      max-running-requests: 15
-      cuda-graph-max-bs: 15
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '150'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml
deleted file mode 100644
index 33f0324b69..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml
+++ /dev/null
@@ -1,140 +0,0 @@
-name: gb300-fp8-glm5_8k1k_lowlat_1
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 17
-  decode_workers: 17
-frontend:
-  type: dynamo
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 4
-      expert-parallel-size: 1
-      data-parallel-size: 1
-      enable-flashinfer-allreduce-fusion: true
-      moe-runner-backend: flashinfer_trtllm
-      max-running-requests: 8
-      cuda-graph-max-bs: 8
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: 128x64x32
diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml
deleted file mode 100644
index 64d4c701a4..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml
+++ /dev/null
@@ -1,140 +0,0 @@
-name: gb300-fp8-glm5_8k1k_lowlat_2
-
-model:
-  path: glm-5-fp8
-  container: "lmsysorg/sglang:v0.5.11-cu130"
-  precision: fp8
-
-resources:
-  gpu_type: gb300
-  gpus_per_node: 4
-  prefill_nodes: 1
-  prefill_workers: 1
-  decode_nodes: 17
-  decode_workers: 17
-frontend:
-  type: dynamo
-dynamo:
-  version: 1.1.0
-
-backend:
-  prefill_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-
-  decode_environment:
-    TORCH_DISTRIBUTED_DEFAULT_TIMEOUT: '1800'
-    PYTHONUNBUFFERED: '1'
-    DYN_SKIP_SGLANG_LOG_FORMATTING: '1'
-    SGLANG_DISAGGREGATION_HEARTBEAT_MAX_FAILURE: '100000'
-    SGLANG_DISAGGREGATION_BOOTSTRAP_TIMEOUT: '100000'
-    SGLANG_DISAGGREGATION_WAITING_TIMEOUT: '100000'
-    MC_TE_METRIC: 'true'
-    MC_FORCE_MNNVL: '1'
-    NCCL_MNNVL_ENABLE: '1'
-    NCCL_CUMEM_ENABLE: '1'
-    SGLANG_MOONCAKE_CUSTOM_MEM_POOL: 'True'
-    SGLANG_USE_MESSAGE_QUEUE_BROADCASTER: '0'
-    SGLANG_DISABLE_TP_MEMORY_INBALANCE_CHECK: '1'
-    DYN_REQUEST_PLANE: nats
-      # DeepEP per-rank dispatch buffer; must be >= ceil(cuda_graph_max_bs / dp_size).
-      # Default 128 overflows with large DP + batch (e.g. 4096/24 ~= 171 > 128). Limit 1024.
-    SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: '512'
-
-  sglang_config:
-    prefill:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: prefill
-      disaggregation-transfer-backend: nixl
-
-        # Size limits
-      max-running-requests: 256
-      cuda-graph-max-bs: 256
-      mem-fraction-static: 0.7
-      context-length: 9600
-      chunked-prefill-size: 32768
-      max-prefill-tokens: 8192
-
-        # Parallelism
-      tensor-parallel-size: 4
-      data-parallel-size: 4
-      expert-parallel-size: 1
-      enable-dp-attention: true
-      enable-dp-lm-head: true
-      load-balance-method: total_tokens
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-      moe-runner-backend: flashinfer_trtllm
-
-        # Other flags
-      enable-flashinfer-allreduce-fusion: true
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-
-    decode:
-        # Model configuration
-      served-model-name: GLM-5-FP8
-      trust-remote-code: true
-
-      quantization: fp8
-      kv-cache-dtype: fp8_e4m3
-
-        # Disaggregation mode
-      disaggregation-mode: decode
-      disaggregation-transfer-backend: nixl
-
-        # Memory and token limits
-      mem-fraction-static: 0.8
-      context-length: 9600
-
-        # Backend
-      nsa-decode-backend: trtllm
-      nsa-prefill-backend: trtllm
-        # moe-runner-backend: "cutedsl"
-
-        # Detokenizer
-      skip-tokenizer-init: true
-      stream-interval: 30
-
-        # Other flags
-      disable-radix-cache: true
-      weight-loader-prefetch-checkpoints: true
-      model-loader-extra-config: '{"enable_multithread_load": true}'
-      tensor-parallel-size: 4
-      expert-parallel-size: 1
-      data-parallel-size: 1
-      enable-flashinfer-allreduce-fusion: true
-      moe-runner-backend: flashinfer_trtllm
-      max-running-requests: 1
-      cuda-graph-max-bs: 1
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: sa-bench
-  req_rate: inf
-  isl: 8192
-  osl: 1024
-  concurrencies: '24'
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-high-tpt-megamoe.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-high-tpt-megamoe.yaml
deleted file mode 100644
index cdbe0668b9..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-high-tpt-megamoe.yaml
+++ /dev/null
@@ -1,138 +0,0 @@
-name: "svf-vllm-disagg-b200-high-tpt-megamoe"
-
-# Mirrored from NVIDIA/srt-slurm aflowers/vllm-gb200-v0.20.0 branch:
-#   recipes/vllm/deepseek-v4-pro/GB200/8k1k/disagg-gb200-high-tpt-megamoe.yaml
-#
-# B200 adaptation of the GB200 recipe below. Each prefill/decode worker uses
-# one full 8-GPU B200 node, plus a dedicated NATS/etcd infra node.
-#
-# Local deltas vs upstream:
-#   * model.path alias renamed deepseekv4-fp4 -> deepseek-v4-pro to match
-#     SRT_SLURM_MODEL_PREFIX in the launch script.
-#   * model.container set to vllm/vllm-openai:v0.20.1 to
-#     match nvidia-master.yaml image (which the launch script registers as
-#     the alias key in srtslurm.yaml). Upstream variants ship either the
-#     non-dynamo floating tag or a sha256 pin.
-#   * slurm.time_limit + health_check set to 8h / 1440 attempts to
-#     absorb cold-cache model loads.
-model:
-  path: "deepseek-v4-pro"
-  container: "vllm/vllm-openai:v0.23.0"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260426"
-
-setup_script: vllm-container-deps.sh
-
-slurm:
-  time_limit: "8:00:00"
-
-health_check:
-  max_attempts: 1440
-  interval_seconds: 10
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 2
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 8
-  gpus_per_decode: 8
-
-infra:
-  etcd_nats_dedicated_node: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-backend:
-  type: vllm
-  connector: null
-  prefill_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    VLLM_SPARSE_INDEXER_MAX_LOGITS_MB: "1024"
-    VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: "2048"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  decode_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      enforce-eager: true
-      max-model-len: 9280
-      max-num-seqs: 16
-      max-num-batched-tokens: 16384
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      no-async-scheduling: true
-      block-size: 256
-      gpu-memory-utilization: 0.9
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      numa-bind: true
-      tokenizer-mode: deepseek_v4
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      max-model-len: 9280
-      max-num-seqs: 512
-      max-cudagraph-capture-size: 512
-      max-num-batched-tokens: 512
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","mode":0}'
-      gpu-memory-utilization: 0.9
-      stream-interval: 50
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      tokenizer-mode: deepseek_v4
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512x1024"
-  req_rate: "inf"
-  use_chat_template: true
-  custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer"
-
-identity:
-  model:
-    repo: "deepseek-ai/DeepSeek-V4-Pro"
-    revision: "0366e4e064385807ea86b088a5c6c878ff23343b"
-  container:
-    image: "vllm/vllm-openai:v0.23.0"
-  frameworks:
-    dynamo: "1.2.0.dev20260426"
-    vllm: "0.23.0"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-low-latency.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-low-latency.yaml
deleted file mode 100644
index 7549794136..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-low-latency.yaml
+++ /dev/null
@@ -1,137 +0,0 @@
-name: "svf-vllm-disagg-b200-low-latency"
-
-# Mirrored from NVIDIA/srt-slurm aflowers/vllm-gb200-v0.20.0 branch:
-#   recipes/vllm/deepseek-v4-pro/GB200/8k1k/disagg-gb200-low-latency.yaml
-#
-# B200 adaptation of the GB200 recipe below. Each prefill/decode worker uses
-# one full 8-GPU B200 node, plus a dedicated NATS/etcd infra node.
-#
-# Local deltas vs upstream:
-#   * model.path alias renamed deepseekv4-fp4 -> deepseek-v4-pro to match
-#     SRT_SLURM_MODEL_PREFIX in the launch script.
-#   * model.container set to vllm/vllm-openai:v0.20.1 to
-#     match nvidia-master.yaml image (which the launch script registers as
-#     the alias key in srtslurm.yaml). Upstream variants ship either the
-#     non-dynamo floating tag or a sha256 pin.
-#   * slurm.time_limit + health_check set to 8h / 1440 attempts to
-#     absorb cold-cache model loads.
-model:
-  path: "deepseek-v4-pro"
-  container: "vllm/vllm-openai:v0.23.0"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260426"
-
-setup_script: vllm-container-deps.sh
-
-slurm:
-  time_limit: "8:00:00"
-
-health_check:
-  max_attempts: 1440
-  interval_seconds: 10
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 8
-  gpus_per_decode: 8
-
-infra:
-  etcd_nats_dedicated_node: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-backend:
-  type: vllm
-  connector: null
-  prefill_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    VLLM_SPARSE_INDEXER_MAX_LOGITS_MB: "1024"
-    VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: "2048"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  decode_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enforce-eager: true
-      max-model-len: 16384
-      max-num-seqs: 16
-      max-num-batched-tokens: 32768
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      no-async-scheduling: true
-      block-size: 256
-      gpu-memory-utilization: 0.8
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      numa-bind: true
-      offload-group-size: 3
-      offload-num-in-group: 1
-      offload-prefetch-step: 2
-      # offload-params: "w13_weight w2_weight w13_weight_scale w2_weight_scale wq_b wo_a wo_b shared_experts"
-      tokenizer-mode: deepseek_v4
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 8
-      pipeline-parallel-size: 1
-#      data-parallel-size: 8
-#      data-parallel-rpc-port: 13345
-#      enable-expert-parallel: true
-      max-model-len: 16384
-      max-num-seqs: 256
-      max-cudagraph-capture-size: 256
-      max-num-batched-tokens: 256
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","mode":0}'
-      gpu-memory-utilization: 0.9
-      stream-interval: 50
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      tokenizer-mode: deepseek_v4
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "1x16x32x64x128"
-  req_rate: "inf"
-  use_chat_template: true
-  custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer"
-
-identity:
-  container:
-    image: "vllm/vllm-openai:v0.23.0"
-  frameworks:
-    dynamo: "1.2.0.dev20260426"
-    vllm: "0.23.0"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-low-middle-curve.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-low-middle-curve.yaml
deleted file mode 100644
index 533ad0bf88..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-low-middle-curve.yaml
+++ /dev/null
@@ -1,138 +0,0 @@
-name: "svf-vllm-disagg-b200-low-middle-curve"
-
-# Mirrored from NVIDIA/srt-slurm aflowers/vllm-gb200-v0.20.0 branch:
-#   recipes/vllm/deepseek-v4-pro/GB200/8k1k/disagg-gb200-low-middle-curve.yaml
-#
-# B200 adaptation of the GB200 recipe below. Each prefill/decode worker uses
-# one full 8-GPU B200 node, plus a dedicated NATS/etcd infra node.
-#
-# Local deltas vs upstream:
-#   * model.path alias renamed deepseekv4-fp4 -> deepseek-v4-pro to match
-#     SRT_SLURM_MODEL_PREFIX in the launch script.
-#   * model.container set to vllm/vllm-openai:v0.20.1 to
-#     match nvidia-master.yaml image (which the launch script registers as
-#     the alias key in srtslurm.yaml). Upstream variants ship either the
-#     non-dynamo floating tag or a sha256 pin.
-#   * slurm.time_limit + health_check set to 8h / 1440 attempts to
-#     absorb cold-cache model loads.
-model:
-  path: "deepseek-v4-pro"
-  container: "vllm/vllm-openai:v0.23.0"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260426"
-
-setup_script: vllm-container-deps.sh
-
-slurm:
-  time_limit: "8:00:00"
-
-health_check:
-  max_attempts: 1440
-  interval_seconds: 10
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 4
-  prefill_workers: 1
-  decode_workers: 4
-  gpus_per_prefill: 8
-  gpus_per_decode: 8
-
-infra:
-  etcd_nats_dedicated_node: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-backend:
-  type: vllm
-  connector: null
-  prefill_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    VLLM_SPARSE_INDEXER_MAX_LOGITS_MB: "1024"
-    VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: "2048"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  decode_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      enforce-eager: true
-      max-model-len: 16384
-      max-num-seqs: 16
-      max-num-batched-tokens: 32768
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      no-async-scheduling: true
-      block-size: 256
-      gpu-memory-utilization: 0.8
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      numa-bind: true
-      offload-group-size: 3
-      offload-num-in-group: 1
-      offload-prefetch-step: 2
-      # offload-params: "w13_weight w2_weight w13_weight_scale w2_weight_scale wq_b wo_a wo_b shared_experts"
-      tokenizer-mode: deepseek_v4
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 8
-      pipeline-parallel-size: 1
-#      data-parallel-size: 8
-#      data-parallel-rpc-port: 13345
-#      enable-expert-parallel: true
-      max-model-len: 16384
-      max-num-seqs: 256
-      max-cudagraph-capture-size: 256
-      max-num-batched-tokens: 256
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","mode":0}'
-      gpu-memory-utilization: 0.9
-      stream-interval: 50
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      tokenizer-mode: deepseek_v4
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512"
-  req_rate: "inf"
-  use_chat_template: true
-  custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer"
-
-identity:
-  container:
-    image: "vllm/vllm-openai:v0.23.0"
-  frameworks:
-    dynamo: "1.2.0.dev20260426"
-    vllm: "0.23.0"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-max-tpt-megamoe.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-max-tpt-megamoe.yaml
deleted file mode 100644
index eb4c5308b5..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-max-tpt-megamoe.yaml
+++ /dev/null
@@ -1,138 +0,0 @@
-name: "svf-vllm-disagg-b200-max-tpt-megamoe"
-
-# Mirrored from NVIDIA/srt-slurm aflowers/vllm-gb200-v0.20.0 branch:
-#   recipes/vllm/deepseek-v4-pro/GB200/8k1k/disagg-gb200-max-tpt-megamoe.yaml
-#
-# B200 adaptation of the GB200 recipe below. Each prefill/decode worker uses
-# one full 8-GPU B200 node, plus a dedicated NATS/etcd infra node.
-#
-# Local deltas vs upstream:
-#   * model.path alias renamed deepseekv4-fp4 -> deepseek-v4-pro to match
-#     SRT_SLURM_MODEL_PREFIX in the launch script.
-#   * model.container set to vllm/vllm-openai:v0.20.1 to
-#     match nvidia-master.yaml image (which the launch script registers as
-#     the alias key in srtslurm.yaml). Upstream variants ship either the
-#     non-dynamo floating tag or a sha256 pin.
-#   * slurm.time_limit + health_check set to 8h / 1440 attempts to
-#     absorb cold-cache model loads.
-model:
-  path: "deepseek-v4-pro"
-  container: "vllm/vllm-openai:v0.23.0"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260426"
-
-setup_script: vllm-container-deps.sh
-
-slurm:
-  time_limit: "8:00:00"
-
-health_check:
-  max_attempts: 1440
-  interval_seconds: 10
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 3
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 1
-  gpus_per_prefill: 8
-  gpus_per_decode: 8
-
-infra:
-  etcd_nats_dedicated_node: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-backend:
-  type: vllm
-  connector: null
-  prefill_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    VLLM_SPARSE_INDEXER_MAX_LOGITS_MB: "1024"
-    VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: "2048"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  decode_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      enforce-eager: true
-      max-model-len: 9280
-      max-num-seqs: 16
-      max-num-batched-tokens: 16384
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      no-async-scheduling: true
-      block-size: 256
-      gpu-memory-utilization: 0.9
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      numa-bind: true
-      tokenizer-mode: deepseek_v4
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      max-model-len: 9280
-      max-num-seqs: 512
-      max-cudagraph-capture-size: 512
-      max-num-batched-tokens: 512
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","mode":0}'
-      gpu-memory-utilization: 0.9
-      stream-interval: 50
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      tokenizer-mode: deepseek_v4
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "8192x12345"
-  req_rate: "inf"
-  use_chat_template: true
-  custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer"
-
-identity:
-  model:
-    repo: "deepseek-ai/DeepSeek-V4-Pro"
-    revision: "0366e4e064385807ea86b088a5c6c878ff23343b"
-  container:
-    image: "vllm/vllm-openai:v0.23.0"
-  frameworks:
-    dynamo: "1.2.0.dev20260426"
-    vllm: "0.23.0"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-mid-curve-megamoe.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-mid-curve-megamoe.yaml
deleted file mode 100644
index c17605b1c8..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b200-mid-curve-megamoe.yaml
+++ /dev/null
@@ -1,138 +0,0 @@
-name: "svf-vllm-disagg-b200-mid-curve-megamoe"
-
-# Mirrored from NVIDIA/srt-slurm aflowers/vllm-gb200-v0.20.0 branch:
-#   recipes/vllm/deepseek-v4-pro/GB200/8k1k/disagg-gb200-mid-curve-megamoe.yaml
-#
-# B200 adaptation of the GB200 recipe below. Each prefill/decode worker uses
-# one full 8-GPU B200 node, plus a dedicated NATS/etcd infra node.
-#
-# Local deltas vs upstream:
-#   * model.path alias renamed deepseekv4-fp4 -> deepseek-v4-pro to match
-#     SRT_SLURM_MODEL_PREFIX in the launch script.
-#   * model.container set to vllm/vllm-openai:v0.20.1 to
-#     match nvidia-master.yaml image (which the launch script registers as
-#     the alias key in srtslurm.yaml). Upstream variants ship either the
-#     non-dynamo floating tag or a sha256 pin.
-#   * slurm.time_limit + health_check set to 8h / 1440 attempts to
-#     absorb cold-cache model loads.
-model:
-  path: "deepseek-v4-pro"
-  container: "vllm/vllm-openai:v0.23.0"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260426"
-
-setup_script: vllm-container-deps.sh
-
-slurm:
-  time_limit: "8:00:00"
-
-health_check:
-  max_attempts: 1440
-  interval_seconds: 10
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 8
-  gpus_per_decode: 8
-
-infra:
-  etcd_nats_dedicated_node: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-backend:
-  type: vllm
-  connector: null
-  prefill_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    VLLM_SPARSE_INDEXER_MAX_LOGITS_MB: "1024"
-    VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: "2048"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  decode_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    # VLLM_RANDOMIZE_DP_DUMMY_INPUTS: "1"
-    # VLLM_MOE_ROUTING_SIMULATION_STRATEGY: "uniform_random"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      enforce-eager: true
-      max-model-len: 9280
-      max-num-seqs: 16
-      max-num-batched-tokens: 16384
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      no-async-scheduling: true
-      block-size: 256
-      gpu-memory-utilization: 0.9
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      numa-bind: true
-      tokenizer-mode: deepseek_v4
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      max-model-len: 9280
-      max-num-seqs: 512
-      max-cudagraph-capture-size: 512
-      max-num-batched-tokens: 512
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_DECODE_ONLY","mode":0}'
-      gpu-memory-utilization: 0.9
-      stream-interval: 50
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      tokenizer-mode: deepseek_v4
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512x1024"
-  req_rate: "inf"
-  use_chat_template: true
-  custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer"
-
-identity:
-  model:
-    repo: "deepseek-ai/DeepSeek-V4-Pro"
-    revision: "0366e4e064385807ea86b088a5c6c878ff23343b"
-  container:
-    image: "vllm/vllm-openai:v0.23.0"
-  frameworks:
-    dynamo: "1.2.0.dev20260426"
-    vllm: "0.23.0"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b300-low-middle-curve.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b300-low-middle-curve.yaml
deleted file mode 100644
index 1ba829a513..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b300-low-middle-curve.yaml
+++ /dev/null
@@ -1,125 +0,0 @@
-name: "svf-vllm-disagg-b300-low-middle-curve"
-
-# B300 adaptation of the DSV4 GB200/B200 vLLM disagg recipe. Each worker uses
-# one full 8-GPU B300 node, plus a dedicated NATS/etcd infra node.
-model:
-  path: "deepseek-v4-pro"
-  container: "vllm/vllm-openai:v0.23.0"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260426"
-
-setup_script: vllm-container-deps.sh
-
-slurm:
-  time_limit: "8:00:00"
-
-health_check:
-  max_attempts: 1440
-  interval_seconds: 10
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 4
-  prefill_workers: 1
-  decode_workers: 4
-  gpus_per_prefill: 8
-  gpus_per_decode: 8
-
-infra:
-  etcd_nats_dedicated_node: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  prefill_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    VLLM_SPARSE_INDEXER_MAX_LOGITS_MB: "1024"
-    VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: "2048"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  decode_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      attention-config: '{"use_fp4_indexer_cache": true}'
-      enforce-eager: true
-      max-model-len: 16384
-      max-num-seqs: 16
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 32768
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      no-async-scheduling: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_AND_PIECEWISE","custom_ops":["all"]}'
-      gpu-memory-utilization: 0.8
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      numa-bind: true
-      offload-group-size: 3
-      offload-num-in-group: 1
-      offload-prefetch-step: 2
-      tokenizer-mode: deepseek_v4
-      reasoning-parser: deepseek_v4
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 8
-      pipeline-parallel-size: 1
-      max-model-len: 16384
-      max-num-seqs: 256
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 256
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_AND_PIECEWISE","custom_ops":["all"]}'
-      gpu-memory-utilization: 0.9
-      stream-interval: 50
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      tokenizer-mode: deepseek_v4
-      reasoning-parser: deepseek_v4
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512"
-  req_rate: "inf"
-  use_chat_template: true
-  custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer"
-
-identity:
-  container:
-    image: "vllm/vllm-openai:v0.23.0"
-  frameworks:
-    dynamo: "1.2.0.dev20260426"
-    vllm: "0.23.0"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b300-max-tpt-megamoe.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b300-max-tpt-megamoe.yaml
deleted file mode 100644
index cb20003ecd..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4/8k1k/disagg-b300-max-tpt-megamoe.yaml
+++ /dev/null
@@ -1,130 +0,0 @@
-name: "svf-vllm-disagg-b300-max-tpt-megamoe"
-
-# B300 adaptation of the DSV4 GB200/B200 vLLM disagg recipe. Each worker uses
-# one full 8-GPU B300 node, plus a dedicated NATS/etcd infra node.
-model:
-  path: "deepseek-v4-pro"
-  container: "vllm/vllm-openai:v0.23.0"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260426"
-
-setup_script: vllm-container-deps.sh
-
-slurm:
-  time_limit: "8:00:00"
-
-health_check:
-  max_attempts: 1440
-  interval_seconds: 10
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 3
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 1
-  gpus_per_prefill: 8
-  gpus_per_decode: 8
-
-infra:
-  etcd_nats_dedicated_node: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  prefill_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    VLLM_SPARSE_INDEXER_MAX_LOGITS_MB: "1024"
-    VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: "2048"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  decode_environment:
-    TILELANG_CLEANUP_TEMP_FILES: "1"
-    NCCL_CUMEM_ENABLE: "1"
-    VLLM_SERVER_DEV_MODE: "1"
-    UCX_MEMTYPE_CACHE: "n"
-    UCX_MEMTYPE_REG_WHOLE: "n"
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      attention-config: '{"use_fp4_indexer_cache": true}'
-      enforce-eager: true
-      max-model-len: 9280
-      max-num-seqs: 16
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 32768
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      no-async-scheduling: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_AND_PIECEWISE","custom_ops":["all"]}'
-      gpu-memory-utilization: 0.85
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      numa-bind: true
-      tokenizer-mode: deepseek_v4
-      reasoning-parser: deepseek_v4
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      served-model-name: "deepseek-ai/DeepSeek-V4-Pro"
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      enable-ep-weight-filter: true
-      attention-config: '{"use_fp4_indexer_cache": true}'
-      max-model-len: 9280
-      max-num-seqs: 512
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 512
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      no-enable-flashinfer-autotune: true
-      block-size: 256
-      compilation-config: '{"cudagraph_mode":"FULL_AND_PIECEWISE","custom_ops":["all"]}'
-      gpu-memory-utilization: 0.85
-      stream-interval: 50
-      no-disable-hybrid-kv-cache-manager: true
-      enable-sleep-mode: true
-      tokenizer-mode: deepseek_v4
-      reasoning-parser: deepseek_v4
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "4096"
-  req_rate: "inf"
-  use_chat_template: true
-  custom_tokenizer: "sa_bench_tokenizers.vllm_deepseek_v4.VLLMDeepseekV4Tokenizer"
-
-identity:
-  model:
-    repo: "deepseek-ai/DeepSeek-V4-Pro"
-    revision: "0366e4e064385807ea86b088a5c6c878ff23343b"
-  container:
-    image: "vllm/vllm-openai:v0.23.0"
-  frameworks:
-    dynamo: "1.2.0.dev20260426"
-    vllm: "0.23.0"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-1p2d.yaml
deleted file mode 100644
index badf45403e..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-1p2d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-2xdep2"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "2048x4096x8192"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d-c6144.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d-c6144.yaml
deleted file mode 100644
index c3c994bca2..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d-c6144.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-2p3xdep2-c6144"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "6144"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d.yaml
deleted file mode 100644
index 5b352e35f8..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-2p3xdep2"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep4-3p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep4-3p2d.yaml
deleted file mode 100644
index b7809a9e24..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep4-3p2d.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-1k1k-3p2xdep4"
-
-# Rate-matched dep4 at 1k/1k.
-# Measured X_dep4/P = 56.8k / 38k = 1.49; 3P:2D ratio = 1.5 ✓
-
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024x2048"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep8-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep8-2p1d.yaml
deleted file mode 100644
index 683f4c72d2..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep8-2p1d.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-2p1xdep8"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024x2048x4096"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p1d.yaml
deleted file mode 100644
index bc6a6a1ac4..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p1d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-focus-tp4-1p1d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p2d.yaml
deleted file mode 100644
index 5d7072ea5f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p2d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-focus-tp4-1p2d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4x8x16x32x64"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d-hi-conc.yaml
deleted file mode 100644
index 23ec9444c8..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d-hi-conc.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-focus-tp4ep-1p1d-hi-conc"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "256"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d.yaml
deleted file mode 100644
index 4a56ab27ee..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-focus-tp4ep-1p1d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "32x64x128"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p2d.yaml
deleted file mode 100644
index 87c928c63e..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p2d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-focus-tp4ep-1p2d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "128x256"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d-hi-conc.yaml
deleted file mode 100644
index e828387150..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d-hi-conc.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-focus-tp4ep-1p3d-hi-conc"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024x2048"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d.yaml
deleted file mode 100644
index 268a585359..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-decode-focus-tp4ep-1p3d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "64x128x256x512"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-2p3d.yaml
deleted file mode 100644
index 0d83e2e63a..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-2p3d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-1k1k-2p3xtp4ep"
-
-# Better-matched tp4ep at 1k/1k.
-# Measured X_tp4ep/P = 24.1k / 38k = 0.63; 2P:3D ratio = 0.67 ✓
-
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "256x1024"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/dep4-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/dep4-2p1d.yaml
deleted file mode 100644
index 0a867e508f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/dep4-2p1d.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-8k1k-2p1xdep4"
-
-# Rate-matched dep4 at 8k/1k.
-# Measured X_dep4_8k = 13.6k tok/s; rate-match ratio = X*8/P_8k = 13.6*8/58 = 1.88
-# 2P:1D = 2.0, much closer to optimum than 4P:1D (2× over-prefilled).
-
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512x1024x2048"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4-1p1d.yaml
deleted file mode 100644
index 75c7b9d737..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4-1p1d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-8k1k-1p1xtp4"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "4x8x16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d-hi-conc.yaml
deleted file mode 100644
index c43abe5958..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d-hi-conc.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-8k1k-1p1xtp4ep-hi-conc"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512x1024"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d.yaml
deleted file mode 100644
index 3d295e2904..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-8k1k-1p1xtp4ep"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "32x64"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/dep8.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/dep8.yaml
deleted file mode 100644
index 504da7e4d0..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/dep8.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-fp8-decode-focus-dep8"
-
-# Over-prefilled (4P:1D-dep8) at 1k/1k to measure X_dep8_fp8_gb200.
-# 4P × 48k = 192k vs dep8 X ≈ 90k → 2.1× buffer.
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 2
-  decode_nodes: 2
-  prefill_workers: 4
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "512"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p1d-tp4ep.yaml
deleted file mode 100644
index 1003a1c541..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p1d-tp4ep.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-1p1d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "32x64"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p3d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p3d-tp4ep.yaml
deleted file mode 100644
index f79e03c991..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p3d-tp4ep.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-1p3d-tp4ep"
-
-# Rate-matched tp4ep for FP8 GB200 1k/1k.
-# X_tp4ep_fp8_gb200 = 17.9k tok/s; P_per_worker = 48k; ideal X/P = 0.37; 1P:3D = 0.33 ✓
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 3
-  prefill_workers: 1
-  decode_workers: 3
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p4d-dep2-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p4d-dep2-hi-conc.yaml
deleted file mode 100644
index 8168374450..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-1p4d-dep2-hi-conc.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-1p4d-dep2-hi-conc"
-
-# Rate-matched dep2 for FP8 GB200 1k/1k.
-# X_dep2_fp8_gb200 = 12.7k tok/s; P_per_worker = 48k; ideal X/P = 0.27; 1P:4D = 0.25 ✓
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 4
-  gpus_per_prefill: 2
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-2p1d-dep8.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-2p1d-dep8.yaml
deleted file mode 100644
index 92855acbeb..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-2p1d-dep8.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-2p1d-dep8"
-
-# model:
-#   path: "minimax-m2.5-fp8"
-#   container: "v0.18.1"
-#   precision: "fp8"
-
-# dynamo:
-#   version: 1.0.1
-#   install: true
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "512x1024"
-  # warmup_prompts: 1
-  # use_chat_template: false
-  # req_rate: "inf"
-  # random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-2p3d-dep4-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-2p3d-dep4-hi-conc.yaml
deleted file mode 100644
index eb66c2041c..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/disagg-b200-2p3d-dep4-hi-conc.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-2p3d-dep4-hi-conc"
-
-# Rate-matched dep4 for FP8 GB200 1k/1k.
-# X_dep4_fp8_gb200 = 30.9k tok/s; P_per_worker = 48k; ideal X/P = 0.64; 2P:3D = 0.67 ✓
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 3
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096x8192"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/tp4ep.yaml
deleted file mode 100644
index dc54a69606..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/1k1k/tp4ep.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-fp8-decode-focus-tp4ep"
-
-# Over-prefilled (1P:1D-tp4ep, high conc) at 1k/1k to measure X_tp4ep_fp8_gb200.
-# 1P × 48k = 48k vs tp4ep X ≈ 18-24k → 2-2.7× buffer.
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "128"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-1p1d-tp4ep-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-1p1d-tp4ep-hi-conc.yaml
deleted file mode 100644
index bf89bdc574..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-1p1d-tp4ep-hi-conc.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-1p1d-tp4ep-hi-conc"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-1p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-1p1d-tp4ep.yaml
deleted file mode 100644
index 6268370cc0..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-1p1d-tp4ep.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-1p1d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "16x32x64x128"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-3p2d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-3p2d-dep4.yaml
deleted file mode 100644
index d67da94d0f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/8k1k/disagg-b200-3p2d-dep4.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b200-8k1k-3p2d-dep4"
-
-# Rate-matched dep4 for FP8 GB200 8k/1k.
-# X_dep4_fp8_gb200_8k ≈ 9.8k tok/s (from 5p2d-dep4 saturation);
-# P_per_worker_8k = 57k; ratio = X*8/P = 78.4/57 = 1.38; 3P:2D = 1.5 ✓ (closest int fit)
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b200"
-  gpus_per_node: 8
-  prefill_nodes: 2
-  decode_nodes: 2
-  prefill_workers: 3
-  decode_workers: 2
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      max-num-batched-tokens: 16384
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "1024x2048"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p1d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p1d-tp4.yaml
deleted file mode 100644
index e42e7b9419..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p1d-tp4.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-1p1d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "8x16x32x64x128"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p2d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p2d-tp4.yaml
deleted file mode 100644
index 6d8513fecc..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p2d-tp4.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-1p2d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "32x64x128x256x512"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p2d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p2d-tp4ep.yaml
deleted file mode 100644
index 927ac087eb..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-1p2d-tp4ep.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-1p2d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "256x512x1024"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p1d-dep8.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p1d-dep8.yaml
deleted file mode 100644
index 20cb76d530..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p1d-dep8.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-2p1d-dep8"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "256x512x1024"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p2d-dep4-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p2d-dep4-hi-conc.yaml
deleted file mode 100644
index f2f23c6b5e..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p2d-dep4-hi-conc.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-2p2d-dep4-hi-conc"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096x8192"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p2d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p2d-dep4.yaml
deleted file mode 100644
index 9edc59915a..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p2d-dep4.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-2p2d-dep4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "512x1024x2048"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p3d-dep2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p3d-dep2.yaml
deleted file mode 100644
index 38c7c467af..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/1k1k/disagg-b300-2p3d-dep2.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-2p3d-dep2"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-1p1d-tp4ep-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-1p1d-tp4ep-hi-conc.yaml
deleted file mode 100644
index b42a58dfb5..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-1p1d-tp4ep-hi-conc.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-1p1d-tp4ep-hi-conc"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-1p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-1p1d-tp4ep.yaml
deleted file mode 100644
index 095e737761..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-1p1d-tp4ep.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-1p1d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "16x64x128"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-2p1d-tp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-2p1d-tp2.yaml
deleted file mode 100644
index 8efa46ea41..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-2p1d-tp2.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-2p1d-tp2"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 2
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "32"
-  use_chat_template: false
-  req_rate: "inf"
-  random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-2p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-2p1d-tp4ep.yaml
deleted file mode 100644
index b1a7d92814..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-2p1d-tp4ep.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-2p1d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "64x128x256x512"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-dep4-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-dep4-hi-conc.yaml
deleted file mode 100644
index 4e859cd7e6..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-dep4-hi-conc.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-3p1d-dep4-hi-conc"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "1024x2048"
-  use_chat_template: false
-  req_rate: "inf"
-  random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-dep4.yaml
deleted file mode 100644
index c6f47d49f8..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-dep4.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-3p1d-dep4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512"
-  use_chat_template: false
-  req_rate: "inf"
-  random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-tp4.yaml
deleted file mode 100644
index 13b073cfb0..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-3p1d-tp4.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-3p1d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "64"
-  use_chat_template: false
-  req_rate: "inf"
-  random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-5p2d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-5p2d-dep4.yaml
deleted file mode 100644
index 6ece6ec6fc..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8/8k1k/disagg-b300-5p2d-dep4.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-5p2d-dep4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 5
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "512x1024x2048"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
deleted file mode 100644
index d6e6dc53c6..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-2xdep2"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
deleted file mode 100644
index 3fd24aa253..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-2p3xdep2-c6144"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "6144x8192"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
deleted file mode 100644
index bc68f6d59e..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-2p3xdep2"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "2048x4096"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
deleted file mode 100644
index 516e51f113..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-2p1xdep8"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024x1536x2048x4096"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
deleted file mode 100644
index 726b5a63b1..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4-1p1d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4x16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
deleted file mode 100644
index 77329ffcc9..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4-1p2d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "8x16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
deleted file mode 100644
index 4f25aee385..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4ep-1p1d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "32x64x128"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
deleted file mode 100644
index 8da4cb7ca1..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4ep-1p3d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "64x128x256x1024"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
deleted file mode 100644
index 757eeed973..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
+++ /dev/null
@@ -1,78 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp8-1p1d"
-
-# B300-only: full-node TP=8 decode (the 8 GPUs of a single B300 node).
-# Cousin of tp4-1p1d.yaml but exercises the wider TP that B300's per-node
-# GPU count makes available. Only the smallest concurrencies (1,4,8) —
-# this topology is decode-latency focused, not throughput.
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 8
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
deleted file mode 100644
index 258e9ba4f7..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-8k1k-4p1xdep4"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 4
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "384x512"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
deleted file mode 100644
index 1f41e52e24..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-8k1k-4p1xdep8"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 4
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "384"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
deleted file mode 100644
index 91761b75fc..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp4"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "2x4x8x16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
deleted file mode 100644
index 76b000e8c2..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp4ep"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "32x128"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
deleted file mode 100644
index b34025ee2b..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-8k1k-2p1xtp4ep"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "64x128x256x512"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml
deleted file mode 100644
index ea276c25ac..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp8"
-
-# B300-only: full-node TP=8 decode at 8k input. Cousin of tp4-1p1d.yaml
-# but exercises the wider TP that B300's per-node GPU count makes
-# available. Smallest concurrencies only (1,4,8).
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-  allow_prefill_decode_colocation: true
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 8
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "4"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml
deleted file mode 100644
index 4475c45485..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-1p1d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "8x16x32x64x128"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml
deleted file mode 100644
index 005d3ab451..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-1p2d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "32x64x128x256x512"
-  # warmup_prompts: 1
-  # use_chat_template: false
-  # req_rate: "inf"
-  # random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml
deleted file mode 100644
index 42e2bbff7d..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-1p2d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "256x512x1024"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml
deleted file mode 100644
index dadaea41cd..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-2p1d-dep8"
-
-# model:
-#   path: "minimax-m2.5-fp8"
-#   container: "v0.18.1"
-#   precision: "fp8"
-
-# dynamo:
-#   version: 1.0.1
-#   install: true
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "256x512x1024"
-  # warmup_prompts: 1
-  # use_chat_template: false
-  # req_rate: "inf"
-  # random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml
deleted file mode 100644
index 95a6f4032f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-2p2d-dep4-hi-conc"
-
-# model:
-#   path: "minimax-m2.5-fp8"
-#   container: "v0.18.1"
-#   precision: "fp8"
-
-# dynamo:
-#   version: 1.0.1
-#   install: true
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096x8192"
-  # use_chat_template: false
-  # req_rate: "inf"
-  # random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml
deleted file mode 100644
index 90d14b5b0b..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-2p2d-dep4"
-
-# model:
-#   path: "minimax-m2.5-fp8"
-#   container: "v0.18.1"
-#   precision: "fp8"
-
-# dynamo:
-#   version: 1.0.1
-#   install: true
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "512x1024x2048"
-  # use_chat_template: false
-  # req_rate: "inf"
-  # random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml
deleted file mode 100644
index ef4bfc846f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-2p3d-dep2"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml
deleted file mode 100644
index f9e9ccf793..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-1p1d-tp4ep-hi-conc"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml
deleted file mode 100644
index 76e72c229c..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-1p1d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "16x64x128"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml
deleted file mode 100644
index f71458a70b..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-2p1d-tp2"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 2
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "32"
-  use_chat_template: false
-  req_rate: "inf"
-  random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml
deleted file mode 100644
index 668cf185bd..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-2p1d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "64x128x256x512"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml
deleted file mode 100644
index 94b866d954..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-3p1d-dep4-hi-conc"
-
-# model:
-#   path: "minimax-m2.5-fp8"
-#   container: "v0.18.1"
-#   precision: "fp8"
-
-# dynamo:
-#   version: 1.0.1
-#   install: true
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "1024x2048"
-  use_chat_template: false
-  req_rate: "inf"
-  random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml
deleted file mode 100644
index 9bb6081db5..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-3p1d-dep4"
-
-# model:
-#   path: "minimax-m2.5-fp8"
-#   container: "v0.18.1"
-#   precision: "fp8"
-
-# dynamo:
-#   version: 1.0.1
-#   install: true
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512"
-  use_chat_template: false
-  req_rate: "inf"
-  random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml
deleted file mode 100644
index b638c03512..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-3p1d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 3
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "64"
-  use_chat_template: false
-  req_rate: "inf"
-  random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml
deleted file mode 100644
index ed2a9cdd4c..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-5p2d-dep4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 2
-  decode_nodes: 2
-  prefill_workers: 5
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "512x1024x2048"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p1d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p1d-tp4.yaml
deleted file mode 100644
index 120a35e45f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p1d-tp4.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-1p1d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1x4x8x16x32x64"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p2d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p2d-tp4.yaml
deleted file mode 100644
index 6b5e76e429..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p2d-tp4.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-1p2d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "2x32x64x128x256x512"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p3d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p3d-tp4ep.yaml
deleted file mode 100644
index 765562d0c8..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p3d-tp4ep.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-1p3d-tp4ep"
-
-# Rate-matched tp4ep for FP8 GB200 1k/1k.
-# X_tp4ep_fp8_gb200 = 17.9k tok/s; P_per_worker = 48k; ideal X/P = 0.37; 1P:3D = 0.33 ✓
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 3
-  prefill_workers: 1
-  decode_workers: 3
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p4d-dep2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p4d-dep2.yaml
deleted file mode 100644
index aeeb8a0125..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-1p4d-dep2.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-1p4d-dep2"
-
-# Rate-matched dep2 for FP8 GB200 1k/1k.
-# X_dep2_fp8_gb200 = 12.7k tok/s; P_per_worker = 48k; ideal X/P = 0.27; 1P:4D = 0.25 ✓
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 4
-  gpus_per_prefill: 2
-  gpus_per_decode: 2
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-2p1d-dep8.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-2p1d-dep8.yaml
deleted file mode 100644
index 83bc7aeb26..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-2p1d-dep8.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-2p1d-dep8"
-
-# model:
-#   path: "minimax-m2.5-fp8"
-#   container: "v0.18.1"
-#   precision: "fp8"
-
-# dynamo:
-#   version: 1.0.1
-#   install: true
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "512x1024"
-  # warmup_prompts: 1
-  # use_chat_template: false
-  # req_rate: "inf"
-  # random_range_ratio: 1.0
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-2p3d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-2p3d-dep4.yaml
deleted file mode 100644
index 5340192221..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/1k1k/disagg-gb200-2p3d-dep4.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-2p3d-dep4"
-
-# Rate-matched dep4 for FP8 GB200 1k/1k.
-# X_dep4_fp8_gb200 = 30.9k tok/s; P_per_worker = 48k; ideal X/P = 0.64; 2P:3D = 0.67 ✓
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 3
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096x8192"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-1p1d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-1p1d-tp4.yaml
deleted file mode 100644
index 847c4b1386..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-1p1d-tp4.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-8k1k-1p1d-tp4"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      max-num-batched-tokens: 16384
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "1x4x8x16x32x64x128"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-1p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-1p1d-tp4ep.yaml
deleted file mode 100644
index 61497185aa..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-1p1d-tp4ep.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-1p1d-tp4ep"
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      max-num-batched-tokens: 16384
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      pipeline-parallel-size: 1
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-3p2d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-3p2d-dep4.yaml
deleted file mode 100644
index c927571469..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8/8k1k/disagg-gb200-3p2d-dep4.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-8k1k-3p2d-dep4"
-
-# Rate-matched dep4 for FP8 GB200 8k/1k.
-# X_dep4_fp8_gb200_8k ≈ 9.8k tok/s (from 5p2d-dep4 saturation);
-# P_per_worker_8k = 57k; ratio = X*8/P = 78.4/57 = 1.38; 3P:2D = 1.5 ✓ (closest int fit)
-
-model:
-  path: "minimax-m2.5-fp8"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp8"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 2
-  decode_nodes: 2
-  prefill_workers: 3
-  decode_workers: 2
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13346
-      enable-expert-parallel: true
-      max-num-batched-tokens: 16384
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 1
-      pipeline-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      safetensors-load-strategy: "prefetch"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "1024x2048x4096"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-1p2d.yaml
deleted file mode 100644
index 4decc38b1e..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-1p2d.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-2xdep2"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-  spread_workers: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "2048x4096x8192"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-2p3d-c6144.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-2p3d-c6144.yaml
deleted file mode 100644
index e99cb27250..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-2p3d-c6144.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-2p3xdep2-c6144"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 2
-  decode_nodes: 3
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-  spread_workers: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "6144"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-2p3d.yaml
deleted file mode 100644
index 50d054f1d9..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep2-2p3d.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-2p3xdep2"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 2
-  decode_nodes: 3
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-  spread_workers: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4096"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep4-3p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep4-3p2d.yaml
deleted file mode 100644
index 8016fa160e..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep4-3p2d.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-1k1k-3p2xdep4"
-
-# Rate-matched dep4 at 1k/1k.
-# Measured X_dep4/P = 56.8k / 38k = 1.49; 3P:2D ratio = 1.5 ✓
-
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 3
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024x2048"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep8-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep8-2p1d.yaml
deleted file mode 100644
index f9b97131c5..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/dep8-2p1d.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-2p1xdep8"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024x2048x4096"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4-1p1d.yaml
deleted file mode 100644
index 7b4a98182e..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4-1p1d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-focus-tp4-1p1d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4-1p2d.yaml
deleted file mode 100644
index 71aafe4e5f..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4-1p2d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-focus-tp4-1p2d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4x8x16x32x64"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p1d-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p1d-hi-conc.yaml
deleted file mode 100644
index f8efd4f25b..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p1d-hi-conc.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-focus-tp4ep-1p1d-hi-conc"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "256"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p1d.yaml
deleted file mode 100644
index 345f34f351..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p1d.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-focus-tp4ep-1p1d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "32x64x128"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p2d.yaml
deleted file mode 100644
index 4194388aca..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p2d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-focus-tp4ep-1p2d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "128x256"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p3d-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p3d-hi-conc.yaml
deleted file mode 100644
index fe47c7834b..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p3d-hi-conc.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-focus-tp4ep-1p3d-hi-conc"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 3
-  prefill_workers: 1
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024x2048"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p3d.yaml
deleted file mode 100644
index 05cbf316c5..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-1p3d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-decode-focus-tp4ep-1p3d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 3
-  prefill_workers: 1
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "64x128x256x512"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-2p3d.yaml
deleted file mode 100644
index 60a6639f56..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/1k1k/tp4ep-2p3d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-1k1k-2p3xtp4ep"
-
-# Better-matched tp4ep at 1k/1k.
-# Measured X_tp4ep/P = 24.1k / 38k = 0.63; 2P:3D ratio = 0.67 ✓
-
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 3
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "256x1024"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/dep4-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/dep4-2p1d.yaml
deleted file mode 100644
index 07bab8f319..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/dep4-2p1d.yaml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-8k1k-2p1xdep4"
-
-# Rate-matched dep4 at 8k/1k.
-# Measured X_dep4_8k = 13.6k tok/s; rate-match ratio = X*8/P_8k = 13.6*8/58 = 1.88
-# 2P:1D = 2.0, much closer to optimum than 4P:1D (2× over-prefilled).
-
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512x1024x2048"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4-1p1d.yaml
deleted file mode 100644
index a03d6b69ab..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4-1p1d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-8k1k-1p1xtp4"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "4x8x16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4ep-1p1d-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4ep-1p1d-hi-conc.yaml
deleted file mode 100644
index f38890f1ee..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4ep-1p1d-hi-conc.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-8k1k-1p1xtp4ep-hi-conc"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256x512x1024"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4ep-1p1d.yaml
deleted file mode 100644
index 946bc64b00..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200/8k1k/tp4ep-1p1d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb200-8k1k-1p1xtp4ep"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb200"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "32x64"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml
deleted file mode 100644
index c7f7e28afb..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-decode-2p3xdep2-c6144"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 2
-  decode_nodes: 3
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-  spread_workers: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "6144x8192"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml
deleted file mode 100644
index adaf6f271e..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-decode-2p3xdep2"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 2
-  decode_nodes: 3
-  prefill_workers: 2
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 2
-  spread_workers: true
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "2048"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml
deleted file mode 100644
index 28427e002d..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-decode-2p1xdep8"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 128
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 128
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "1024x2048x4096"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml
deleted file mode 100644
index eee93c9f8b..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-decode-focus-tp4-1p1d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-    UCX_RCACHE_MAX_UNRELEASED: "1024"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "2x4x16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml
deleted file mode 100644
index 10ba980cac..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-decode-focus-tp4-1p2d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "4x8x16x64"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml
deleted file mode 100644
index ebff26fb00..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-decode-focus-tp4ep-1p1d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "32x64x128"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml
deleted file mode 100644
index 5353e4dd02..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-decode-focus-tp4ep-1p3d"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 3
-  prefill_workers: 1
-  decode_workers: 3
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 2048
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 1024
-  osl: 1024
-  concurrencies: "64x128x256x512x1024"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml
deleted file mode 100644
index d3c777618d..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-8k1k-4p1xdep4"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 4
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "256"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml
deleted file mode 100644
index a56c095afd..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-8k1k-4p1xdep8"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 4
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 8
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      max-num-seqs: 864
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "1024x2048"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml
deleted file mode 100644
index a92975c57a..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-8k1k-1p1xtp4"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: false
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "2x4x8x16"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml
deleted file mode 100644
index 53daeafbdb..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-8k1k-1p1xtp4ep"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 1
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "32x64x128x256"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml
deleted file mode 100644
index 163d412f5b..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: "minimax-m2.5-vllm-disagg-gb300-8k1k-2p1xtp4ep"
-
-model:
-  path: "minimax-m2.5-nvfp4"
-  container: "vllm/vllm-openai:v0.20.1"
-  precision: "fp4"
-
-dynamo:
-  install: true
-  wheel: "1.2.0.dev20260526"
-
-setup_script: install-deps.sh
-
-resources:
-  gpu_type: "gb300"
-  gpus_per_node: 4
-  prefill_nodes: 1
-  decode_nodes: 1
-  prefill_workers: 2
-  decode_workers: 1
-  gpus_per_prefill: 1
-  gpus_per_decode: 4
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  decode_environment:
-    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
-    VLLM_FLOAT32_MATMUL_PRECISION: "high"
-
-  vllm_config:
-    prefill:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-      stream-interval: 32
-
-    decode:
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      kv-cache-dtype: "fp8"
-      tensor-parallel-size: 4
-      enable-expert-parallel: true
-      no-enable-prefix-caching: true
-      max-model-len: 9280
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 2048
-      gpu-memory-utilization: 0.90
-      stream-interval: 32
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "64x128"
-  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml
deleted file mode 100644
index bc4c449b29..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/1p2d-dep2-dep8-8k1k.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-name: "minimax-m3-vllm-disagg-b300-1p2d-fp8-dep2-dep8-8k1k"
-
-model:
-  path: "MiniMaxAI/MiniMax-M3-MXFP8"
-  container: "vllm/vllm-openai:minimax-m3-0618-x86_64-cu130"
-  precision: "fp8"
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 1
-  decode_workers: 2
-  gpus_per_prefill: 2
-  gpus_per_decode: 8
-
-dynamo:
-  install: true
-  version: 1.3.0.dev20260614
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      tensor-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-
-    decode:
-      tensor-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-num-seqs: 1024
-      max-num-batched-tokens: 16384
-      max-cudagraph-capture-size: 4096
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "128"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml
deleted file mode 100644
index cf8736e143..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-dep8-8k1k.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-name: "minimax-m3-vllm-disagg-b300-3p2d-fp8-dep2-dep8-8k1k"
-
-model:
-  path: "MiniMaxAI/MiniMax-M3-MXFP8"
-  container: "vllm/vllm-openai:minimax-m3-0618-x86_64-cu130"
-  precision: "fp8"
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 3
-  decode_workers: 2
-  gpus_per_prefill: 2
-  gpus_per_decode: 8
-
-dynamo:
-  install: true
-  version: 1.3.0.dev20260614
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      tensor-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-
-    decode:
-      tensor-parallel-size: 1
-      data-parallel-size: 8
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-num-seqs: 1024
-      max-num-batched-tokens: 16384
-      max-cudagraph-capture-size: 4096
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "512"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml
deleted file mode 100644
index 9572688b2c..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/3p2d-dep2-tep8-8k1k.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-name: "minimax-m3-vllm-disagg-b300-3p2d-fp8-dep2-tep8-8k1k"
-
-model:
-  path: "MiniMaxAI/MiniMax-M3-MXFP8"
-  container: "vllm/vllm-openai:minimax-m3-0618-x86_64-cu130"
-  precision: "fp8"
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 3
-  decode_workers: 2
-  gpus_per_prefill: 2
-  gpus_per_decode: 8
-
-dynamo:
-  install: true
-  version: 1.3.0.dev20260614
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      tensor-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-
-    decode:
-      tensor-parallel-size: 8
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-num-seqs: 1024
-      max-num-batched-tokens: 16384
-      max-cudagraph-capture-size: 4096
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "32"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml
deleted file mode 100644
index 6f765ab746..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/4p3d-dep2-dep4-8k1k.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-name: "minimax-m3-vllm-disagg-b300-4p3d-fp8-dep2-dep4-8k1k"
-
-model:
-  path: "MiniMaxAI/MiniMax-M3-MXFP8"
-  container: "vllm/vllm-openai:minimax-m3-0618-x86_64-cu130"
-  precision: "fp8"
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 1
-  decode_nodes: 2
-  prefill_workers: 4
-  decode_workers: 3
-  gpus_per_prefill: 2
-  gpus_per_decode: 4
-
-dynamo:
-  install: true
-  version: 1.3.0.dev20260614
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      tensor-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-
-    decode:
-      tensor-parallel-size: 1
-      data-parallel-size: 4
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-num-seqs: 512  # Per DP rank: 3 workers x DP4 = 12 ranks.
-      max-num-batched-tokens: 16384
-      max-cudagraph-capture-size: 4096
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "4096"
-  req_rate: "inf"
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml
deleted file mode 100644
index d40a335829..0000000000
--- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3/b300-fp8/8k1k/5p2d-dep2-tep8-8k1k.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-name: "minimax-m3-vllm-disagg-b300-5p2d-fp8-dep2-tep8-8k1k"
-
-model:
-  path: "MiniMaxAI/MiniMax-M3-MXFP8"
-  container: "vllm/vllm-openai:minimax-m3-0618-x86_64-cu130"
-  precision: "fp8"
-
-resources:
-  gpu_type: "b300"
-  gpus_per_node: 8
-  prefill_nodes: 2
-  decode_nodes: 2
-  prefill_workers: 5
-  decode_workers: 2
-  gpus_per_prefill: 2
-  gpus_per_decode: 8
-
-dynamo:
-  install: true
-  version: 1.3.0.dev20260614
-
-frontend:
-  type: dynamo
-  enable_multiple_frontends: false
-
-backend:
-  type: vllm
-  connector: null
-
-  prefill_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  decode_environment:
-    VLLM_FLOAT32_MATMUL_PRECISION: high
-    UCX_TLS: "cuda_copy,rc"
-
-  vllm_config:
-    prefill:
-      tensor-parallel-size: 1
-      data-parallel-size: 2
-      data-parallel-rpc-port: 13345
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-cudagraph-capture-size: 2048
-      max-num-batched-tokens: 16384
-
-    decode:
-      tensor-parallel-size: 8
-      enable-expert-parallel: true
-      trust-remote-code: true
-      no-enable-prefix-caching: true
-      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
-      block-size: 128
-      gpu-memory-utilization: 0.90
-      max-model-len: 9472
-      language-model-only: true
-      stream-interval: 32
-      max-num-seqs: 1024
-      max-num-batched-tokens: 16384
-      max-cudagraph-capture-size: 4096
-
-health_check:
-  max_attempts: 360
-  interval_seconds: 10
-
-benchmark:
-  type: "sa-bench"
-  isl: 8192
-  osl: 1024
-  concurrencies: "4x64"
-  req_rate: "inf"
diff --git a/configs/nvidia-master.yaml b/configs/nvidia-master.yaml
index 46154341c5..bdb6a46029 100644
--- a/configs/nvidia-master.yaml
+++ b/configs/nvidia-master.yaml
@@ -7902,7 +7902,7 @@ dsr1-fp4-b200-dynamo-sglang-mtp:
           ep: 4
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_0.yaml"
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_lowlat[0]"
         decode:
           num-worker: 5
           tp: 8
@@ -7916,7 +7916,7 @@ dsr1-fp4-b200-dynamo-sglang-mtp:
           ep: 4
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_lowlat_1.yaml"
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_lowlat[1]"
         decode:
           num-worker: 6
           tp: 8
@@ -7930,7 +7930,7 @@ dsr1-fp4-b200-dynamo-sglang-mtp:
           ep: 4
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_0.yaml"
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_maxtpt[0]"
         decode:
           num-worker: 1
           tp: 8
@@ -7944,7 +7944,7 @@ dsr1-fp4-b200-dynamo-sglang-mtp:
           ep: 4
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/dsr1/b200-fp4/1k1k/disagg/mtp/1k1k_mtp_maxtpt_1.yaml"
+          - "CONFIG_FILE=recipes/b200-fp4/1k1k.yaml:zip_override_mtp_maxtpt[1]"
         decode:
           num-worker: 2
           tp: 8
@@ -10618,7 +10618,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_1k1k_hightpt[0]"
         decode:
           num-worker: 1
           tp: 8
@@ -10631,7 +10631,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_1k1k_hightpt[1]"
         decode:
           num-worker: 2
           tp: 8
@@ -10644,7 +10644,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_1k1k_hightpt[2]"
         decode:
           num-worker: 3
           tp: 8
@@ -10657,7 +10657,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_maxtpt_3.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_1k1k_hightpt[3]"
         decode:
           num-worker: 4
           tp: 8
@@ -10670,7 +10670,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_1k1k_lowlat[0]"
         decode:
           num-worker: 8
           tp: 8
@@ -10683,7 +10683,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_1k1k_lowlat[1]"
         decode:
           num-worker: 8
           tp: 8
@@ -10699,7 +10699,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_hightpt[0]"
         decode:
           num-worker: 1
           tp: 8
@@ -10712,7 +10712,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_hightpt[1]"
         decode:
           num-worker: 1
           tp: 8
@@ -10725,7 +10725,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_hightpt[2]"
         decode:
           num-worker: 2
           tp: 8
@@ -10738,7 +10738,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_lowlat[0]"
         decode:
           num-worker: 2
           tp: 8
@@ -10751,7 +10751,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_lowlat[1]"
         decode:
           num-worker: 3
           tp: 8
@@ -10764,7 +10764,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_lowlat[2]"
         decode:
           num-worker: 4
           tp: 8
@@ -10777,7 +10777,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_3.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_lowlat[3]"
         decode:
           num-worker: 5
           tp: 8
@@ -10790,7 +10790,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_4.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_lowlat[4]"
         decode:
           num-worker: 7
           tp: 8
@@ -10803,7 +10803,7 @@ glm5-fp8-b200-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/b200-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_5.yaml"
+          - "CONFIG_FILE=recipes/b200-fp8/glm5.yaml:zip_override_8k1k_lowlat[5]"
         decode:
           num-worker: 8
           tp: 8
@@ -11109,7 +11109,7 @@ glm5-fp4-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp4/glm5.yaml:zip_override_8k1k_hightpt[0]"
         decode:
           num-worker: 1
           tp: 32
@@ -11123,7 +11123,7 @@ glm5-fp4-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp4/glm5.yaml:zip_override_8k1k_hightpt[1]"
         decode:
           num-worker: 1
           tp: 32
@@ -11137,7 +11137,7 @@ glm5-fp4-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp4/glm5.yaml:zip_override_8k1k_hightpt[2]"
         decode:
           num-worker: 1
           tp: 32
@@ -11229,7 +11229,7 @@ glm5-fp4-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp4/glm5.yaml:zip_override_1k1k_hightpt[0]"
         decode:
           num-worker: 1
           tp: 32
@@ -11243,7 +11243,7 @@ glm5-fp4-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp4/glm5.yaml:zip_override_1k1k_hightpt[1]"
         decode:
           num-worker: 1
           tp: 32
@@ -11257,7 +11257,7 @@ glm5-fp4-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp4/glm5.yaml:zip_override_1k1k_hightpt[2]"
         decode:
           num-worker: 1
           tp: 32
@@ -11275,7 +11275,7 @@ glm5-fp4-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp4/glm5.yaml:zip_override_1k1k_lowlat[0]"
         decode:
           num-worker: 17
           tp: 4
@@ -11289,7 +11289,7 @@ glm5-fp4-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp4/glm5.yaml:zip_override_1k1k_lowlat[1]"
         decode:
           num-worker: 17
           tp: 4
@@ -11318,7 +11318,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_0.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_8k1k_hightpt[0]"
         decode:
           num-worker: 1
           tp: 16
@@ -11331,7 +11331,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_1.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_8k1k_hightpt[1]"
         decode:
           num-worker: 1
           tp: 24
@@ -11344,7 +11344,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_2.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_8k1k_hightpt[2]"
         decode:
           num-worker: 1
           tp: 32
@@ -11357,7 +11357,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_hightpt_3.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_8k1k_hightpt[3]"
         decode:
           num-worker: 1
           tp: 40
@@ -11374,7 +11374,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_8k1k_lowlat[0]"
         decode:
           num-worker: 9
           tp: 4
@@ -11387,7 +11387,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_8k1k_lowlat[1]"
         decode:
           num-worker: 17
           tp: 4
@@ -11400,7 +11400,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_8k1k_lowlat[2]"
         decode:
           num-worker: 17
           tp: 4
@@ -11417,7 +11417,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_0.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_1k1k_hightpt[0]"
         decode:
           num-worker: 1
           tp: 24
@@ -11430,7 +11430,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_1.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_1k1k_hightpt[1]"
         decode:
           num-worker: 1
           tp: 32
@@ -11443,7 +11443,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_2.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_1k1k_hightpt[2]"
         decode:
           num-worker: 1
           tp: 40
@@ -11456,7 +11456,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_3.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_1k1k_hightpt[3]"
         decode:
           num-worker: 1
           tp: 48
@@ -11469,7 +11469,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_hightpt_4.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_1k1k_hightpt[4]"
         decode:
           num-worker: 1
           tp: 56
@@ -11486,7 +11486,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_1k1k_lowlat[0]"
         decode:
           num-worker: 17
           tp: 4
@@ -11499,7 +11499,7 @@ glm5-fp8-gb300-dynamo-sglang:
           ep: 1
           dp-attn: true
           additional-settings:
-          - "CONFIG_FILE=recipes/sglang/glm5/gb300-fp8/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml"
+          - "CONFIG_FILE=recipes/gb300-fp8/glm5.yaml:zip_override_1k1k_lowlat[1]"
         decode:
           num-worker: 17
           tp: 4
diff --git a/runners/launch_b200-dgxc.sh b/runners/launch_b200-dgxc.sh
index 72de4fc2df..7fe590cb32 100644
--- a/runners/launch_b200-dgxc.sh
+++ b/runners/launch_b200-dgxc.sh
@@ -113,24 +113,10 @@ if [[ "$IS_MULTINODE" == "true" ]]; then
         git checkout aflowers/vllm-gb200-v0.20.0
         mkdir -p recipes/vllm/deepseek-v4
         cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4" recipes/vllm/deepseek-v4
-    elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp4" ]]; then
-        git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
-        cd "$SRT_REPO_DIR" || exit 1
-        git checkout main
-        mkdir -p recipes/vllm/minimax-m2.5-b200-fp4
-        cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp4" recipes/vllm/minimax-m2.5-b200-fp4
-    elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp8" ]]; then
-        git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
-        cd "$SRT_REPO_DIR" || exit 1
-        git checkout main
-        mkdir -p recipes/vllm/minimax-m2.5-b200-fp8
-        cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8" recipes/vllm/minimax-m2.5-b200-fp8
     elif [[ $FRAMEWORK == "dynamo-sglang" && $MODEL_PREFIX == "glm5" && $PRECISION == "fp8" ]]; then
         git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
         cd "$SRT_REPO_DIR" || exit 1
-        git checkout sa-submission-q2-2026
-        mkdir -p recipes/sglang/glm5/b200-fp8
-        cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8" recipes/sglang/glm5/b200-fp8
+        git checkout main
     elif [[ $FRAMEWORK == "dynamo-sglang" && $MODEL_PREFIX == "dsr1" && $PRECISION == "fp4" ]]; then
         git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
         cd "$SRT_REPO_DIR" || exit 1
@@ -233,6 +219,7 @@ containers:
   dynamo-trtllm: "${SQUASH_FILE}"
   dynamo-sglang: "${SQUASH_FILE}"
   dynamo-vllm: "${SQUASH_FILE}"
+  sglang-v0.5.11-cu130: "${SQUASH_FILE}"
   "${IMAGE}": "${SQUASH_FILE}"
   nginx-sqsh: "${NGINX_SQUASH_FILE}"
 use_exclusive_sbatch_directive: true
diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh
index bc94b22712..d8b3e3d86e 100644
--- a/runners/launch_b300-nv.sh
+++ b/runners/launch_b300-nv.sh
@@ -76,18 +76,6 @@ elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "dsv4" ]]; then
     git checkout aflowers/vllm-gb200-v0.20.0
     mkdir -p recipes/vllm/deepseek-v4
     cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4" recipes/vllm/deepseek-v4
-elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp4" ]]; then
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
-    cd "$SRT_REPO_DIR" || exit 1
-    git checkout main
-    mkdir -p recipes/vllm/minimax-m2.5
-    cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300" recipes/vllm/minimax-m2.5
-elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp8" ]]; then
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
-    cd "$SRT_REPO_DIR" || exit 1
-    git checkout main
-    mkdir -p recipes/vllm/minimax-m2.5-fp8
-    cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300-fp8" recipes/vllm/minimax-m2.5-fp8
 elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm3" && ( $PRECISION == "fp4" || $PRECISION == "fp8" ) ]]; then
     git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR" || exit 1
diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh
index a179f7f4fc..ba5fffa83e 100755
--- a/runners/launch_gb200-nv.sh
+++ b/runners/launch_gb200-nv.sh
@@ -307,20 +307,6 @@ elif [[ $FRAMEWORK == "dynamo-sglang" && $MODEL_PREFIX == "qwen3.5" ]]; then
     cd "$SRT_REPO_DIR"
     mkdir -p recipes/sglang/qwen3.5
     cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/sglang/qwen3.5" recipes/sglang/qwen3.5
-elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" ]]; then
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" || exit 1
-    cd "$SRT_REPO_DIR" || exit 1
-    git checkout main || exit 1
-    if [[ $PRECISION == "fp8" ]]; then
-        mkdir -p recipes/vllm/minimax-m2.5-gb200-fp8 || exit 1
-        cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200-fp8" recipes/vllm/minimax-m2.5-gb200-fp8 || exit 1
-    elif [[ $PRECISION == "fp4" ]]; then
-        mkdir -p recipes/vllm/minimax-m2.5-gb200 || exit 1
-        cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-gb200" recipes/vllm/minimax-m2.5-gb200 || exit 1
-    else
-        echo "Unsupported minimaxm2.5 precision for GB200 dynamo-vllm: $PRECISION" >&2
-        exit 1
-    fi
 elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm3" && $PRECISION == "fp8" ]]; then
     git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" || exit 1
     cd "$SRT_REPO_DIR" || exit 1
diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh
index 10d1b19287..aaf48134fa 100644
--- a/runners/launch_gb300-nv.sh
+++ b/runners/launch_gb300-nv.sh
@@ -159,29 +159,18 @@ elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "dsv4" ]]; then
 elif [[ $FRAMEWORK == "dynamo-sglang" && $MODEL_PREFIX == "glm5" ]]; then
     git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR"
-    git checkout sa-submission-q2-2026
-    mkdir -p recipes/sglang/glm5
-    cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5" recipes/sglang/glm5
+    git checkout main
+    if [[ $PRECISION == "fp4" ]]; then
+        mkdir -p recipes/sglang/glm5/gb300-fp4
+        cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4" recipes/sglang/glm5/gb300-fp4
+    fi
 elif [[ $FRAMEWORK == "dynamo-sglang" && $MODEL_PREFIX == "qwen3.5" ]]; then
-    # Same srt-slurm tooling as glm5: NVIDIA/srt-slurm @ sa-submission-q2-2026.
-    # Overlay our version-controlled Qwen3.5 recipes on top (upstream has none).
+    # Overlay our version-controlled Qwen3.5 recipes onto the submission branch.
     git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR"
     git checkout sa-submission-q2-2026
     mkdir -p recipes/sglang/qwen3.5
     cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/sglang/qwen3.5" recipes/sglang/qwen3.5
-elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp8" ]]; then
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
-    cd "$SRT_REPO_DIR"
-    git checkout main
-    mkdir -p recipes/vllm/minimax-m2.5-fp8
-    cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8" recipes/vllm/minimax-m2.5-fp8
-elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp4" ]]; then
-    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
-    cd "$SRT_REPO_DIR"
-    git checkout main
-    mkdir -p recipes/vllm/minimax-m2.5
-    cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5" recipes/vllm/minimax-m2.5
 elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm3" ]]; then
     git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR"
@@ -265,6 +254,7 @@ model_paths:
 containers:
   dynamo-trtllm: ${SQUASH_FILE}
   dynamo-sglang: ${SQUASH_FILE}
+  v0.5.11: ${SQUASH_FILE}
   "${IMAGE}": ${SQUASH_FILE}
   nginx-sqsh: ${NGINX_SQUASH_FILE}
 use_segment_sbatch_directive: false