From 2234a2da49fc5b3980d7a73001ab2135e3c1e4a7 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Fri, 3 Jul 2026 21:34:20 -0400 Subject: [PATCH] Test: add hf-overrides indexer cache to MiniMax M3 H200 vLLM (do not merge) Co-Authored-By: Claude Fable 5 --- benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_h200.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_h200.sh b/benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_h200.sh index 057c0c230b..e745541db8 100755 --- a/benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_h200.sh +++ b/benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_h200.sh @@ -85,6 +85,7 @@ $PARALLEL_ARGS \ --max-cudagraph-capture-size $CAPTURE_SIZE \ --max-num-batched-tokens "$((ISL * 2 ))" \ --stream-interval 20 --no-enable-prefix-caching \ +--hf-overrides '{"use_index_cache": true, "index_topk_freq": 4}' \ --trust-remote-code > $SERVER_LOG 2>&1 & SERVER_PID=$!