From 9be8f9686d210893dd7ee5252d0ba5c7fce19a50 Mon Sep 17 00:00:00 2001 From: Sanbao Su Date: Mon, 15 Jun 2026 20:04:41 +0000 Subject: [PATCH 1/2] [RL] Fix GPT-OSS 20B dimension mismatch error in vLLM adapter by resolving intermediate_size fallback --- src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py b/src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py index bff36397af..394435e16b 100644 --- a/src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py +++ b/src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py @@ -112,7 +112,10 @@ def generate_maxtext_config(vllm_config: VllmConfig) -> pyconfig.HyperParameters if hasattr(vllm_config.model_config.hf_config, "text_config") else vllm_config.model_config.hf_config ) - hidden_size = getattr(hf_config, "moe_intermediate_size", None) + hidden_size = ( + getattr(hf_config, "moe_intermediate_size", None) + or getattr(hf_config, "intermediate_size", None) + ) num_lanes = pltpu.get_tpu_info().num_lanes num_kv_heads = hf_config.num_key_value_heads From 3c07d86d703b8d3568c918d7f341799ff1286a14 Mon Sep 17 00:00:00 2001 From: Sanbao Su Date: Wed, 17 Jun 2026 07:17:15 +0000 Subject: [PATCH 2/2] Map sigmoid activation to swigluoai for fused MoE in vLLM TPU path --- src/maxtext/layers/moe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/maxtext/layers/moe.py b/src/maxtext/layers/moe.py index 020956098c..c0f7c58a49 100644 --- a/src/maxtext/layers/moe.py +++ b/src/maxtext/layers/moe.py @@ -2448,6 +2448,8 @@ def fused_moe_matmul( # Map MaxText config fields to fused_moe_func args activation = self.config.mlp_activations[0] # e.g. "silu" + if activation == "sigmoid": + activation = "swigluoai" scoring_fn = self.config.routed_score_func if self.config.routed_score_func else "softmax" # Check if the model architecture intrinsically renormalizes weights