From cc531c253d419b3e5626e4b1e2c0d8fbbf4cbcea Mon Sep 17 00:00:00 2001 From: wzhao18 Date: Thu, 2 Jul 2026 14:53:23 -0700 Subject: [PATCH 1/4] update --- configs/nvidia-master.yaml | 16 +++++++++------- perf-changelog.yaml | 5 ++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/configs/nvidia-master.yaml b/configs/nvidia-master.yaml index a23ebe349..521fe1606 100644 --- a/configs/nvidia-master.yaml +++ b/configs/nvidia-master.yaml @@ -12822,18 +12822,20 @@ minimaxm3-fp4-b300-vllm: osl: 1024 search-space: - { tp: 8, conc-start: 1, conc-end: 2 } - - { tp: 4, conc-start: 1, conc-end: 2 } + - { tp: 4, conc-start: 2, conc-end: 2 } + - { tp: 4, conc-start: 8, conc-end: 8 } + - { tp: 4, conc-start: 32, conc-end: 32 } - { tp: 2, conc-start: 4, conc-end: 256 } - - { tp: 4, conc-start: 64, conc-end: 64 } - - { tp: 4, ep: 4, conc-start: 64, conc-end: 512 } - - { tp: 2, ep: 2, dp-attn: true, conc-start: 512, conc-end: 512 } + - { tp: 2, ep: 2, dp-attn: true, conc-start: 512, conc-end: 1024 } + - { tp: 2, ep: 2, dp-attn: true, conc-start: 4096, conc-end: 4096 } - isl: 8192 osl: 1024 search-space: - { tp: 8, conc-start: 1, conc-end: 2 } - - { tp: 4, conc-start: 1, conc-end: 2 } - - { tp: 2, conc-start: 4, conc-end: 256 } - - { tp: 2, ep: 2, dp-attn: true, conc-start: 512, conc-end: 512 } + - { tp: 4, conc-start: 2, conc-end: 2 } + - { tp: 2, conc-start: 4, conc-end: 64 } + - { tp: 2, ep: 2, dp-attn: true, conc-start: 128, conc-end: 128 } + - { tp: 2, conc-start: 256, conc-end: 256 } # EAGLE3 speculative-decoding (spec-decoding: mtp) variant of MiniMax-M3 NVFP4 # (nvidia/MiniMax-M3-NVFP4) B300 single-node vLLM, pairing the target with the diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 943f4b390..6e67725d0 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -4437,6 +4437,5 @@ - config-keys: - minimaxm3-fp4-b200-vllm description: - - "Update Minimax M3 b200 vllm image tag" - - "Update search space to cover more configs" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1978 + - "Add high concurrency configs" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1994 From 9fad2557ab99472c283123fb4d2ed8c87c41116a Mon Sep 17 00:00:00 2001 From: "Wei Zhao (Engrg-Hardware 1)" Date: Thu, 2 Jul 2026 18:54:52 -0700 Subject: [PATCH 2/4] update --- perf-changelog.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 6e67725d0..c95f8d132 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -4434,6 +4434,13 @@ - "Update search space to cover more configs" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1990 +- config-keys: + - minimaxm3-fp4-b200-vllm + description: + - "Update Minimax M3 b200 vllm image tag" + - "Update search space to cover more configs" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1978 + - config-keys: - minimaxm3-fp4-b200-vllm description: From 105b7823db5cefd14b0ddc44ec98865b10331983 Mon Sep 17 00:00:00 2001 From: Wei Zhao <51183510+wzhao18@users.noreply.github.com> Date: Fri, 3 Jul 2026 11:56:20 -0400 Subject: [PATCH 3/4] Modify nvidia-master.yaml for model configurations Updated image reference and modified search-space parameters for fixed-seq-len scenarios. --- configs/nvidia-master.yaml | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/configs/nvidia-master.yaml b/configs/nvidia-master.yaml index ff6c0274a..61bc6bba7 100644 --- a/configs/nvidia-master.yaml +++ b/configs/nvidia-master.yaml @@ -12603,26 +12603,33 @@ minimaxm3-fp4-b300-vllm: precision: fp4 framework: vllm multinode: false + scenarios: + fixed-seq-len: + image: vllm/vllm-openai:nightly-93d8f834dd8acf33eb0e2a75b2711b628cb6e226 + model: nvidia/MiniMax-M3-NVFP4 + model-prefix: minimaxm3 + runner: b300 + precision: fp4 + framework: vllm + multinode: false scenarios: fixed-seq-len: - isl: 1024 osl: 1024 search-space: - { tp: 8, conc-start: 1, conc-end: 2 } - - { tp: 4, conc-start: 2, conc-end: 2 } - - { tp: 4, conc-start: 8, conc-end: 8 } - - { tp: 4, conc-start: 32, conc-end: 32 } + - { tp: 4, conc-start: 1, conc-end: 64 } - { tp: 2, conc-start: 4, conc-end: 256 } - - { tp: 2, ep: 2, dp-attn: true, conc-start: 512, conc-end: 1024 } + - { tp: 4, ep: 4, conc-start: 64, conc-end: 512 } + - { tp: 2, ep: 2, dp-attn: true, conc-start: 512, conc-end: 512 } - { tp: 2, ep: 2, dp-attn: true, conc-start: 4096, conc-end: 4096 } - isl: 8192 osl: 1024 search-space: - { tp: 8, conc-start: 1, conc-end: 2 } - - { tp: 4, conc-start: 2, conc-end: 2 } - - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 2, ep: 2, dp-attn: true, conc-start: 128, conc-end: 128 } - - { tp: 2, conc-start: 256, conc-end: 256 } + - { tp: 4, conc-start: 1, conc-end: 2 } + - { tp: 2, conc-start: 4, conc-end: 256 } + - { tp: 2, ep: 2, dp-attn: true, conc-start: 512, conc-end: 512 } # EAGLE3 speculative-decoding (spec-decoding: mtp) variant of MiniMax-M3 NVFP4 # (nvidia/MiniMax-M3-NVFP4) B300 single-node vLLM, pairing the target with the From 040d8cc2e8f8db0da7c20305ee2ca1f6ba119ecc Mon Sep 17 00:00:00 2001 From: Wei Zhao <51183510+wzhao18@users.noreply.github.com> Date: Fri, 3 Jul 2026 11:57:43 -0400 Subject: [PATCH 4/4] Remove MiniMax-M3-NVFP4 model configuration Removed deprecated model configuration for MiniMax-M3-NVFP4. --- configs/nvidia-master.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/configs/nvidia-master.yaml b/configs/nvidia-master.yaml index 61bc6bba7..486264648 100644 --- a/configs/nvidia-master.yaml +++ b/configs/nvidia-master.yaml @@ -12603,15 +12603,6 @@ minimaxm3-fp4-b300-vllm: precision: fp4 framework: vllm multinode: false - scenarios: - fixed-seq-len: - image: vllm/vllm-openai:nightly-93d8f834dd8acf33eb0e2a75b2711b628cb6e226 - model: nvidia/MiniMax-M3-NVFP4 - model-prefix: minimaxm3 - runner: b300 - precision: fp4 - framework: vllm - multinode: false scenarios: fixed-seq-len: - isl: 1024