diff --git a/src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py b/src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py
index bff36397af..394435e16b 100644
--- a/src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py
+++ b/src/maxtext/integration/vllm/maxtext_vllm_adapter/adapter.py
@@ -112,7 +112,10 @@ def generate_maxtext_config(vllm_config: VllmConfig) -> pyconfig.HyperParameters
       if hasattr(vllm_config.model_config.hf_config, "text_config")
       else vllm_config.model_config.hf_config
   )
-  hidden_size = getattr(hf_config, "moe_intermediate_size", None)
+  hidden_size = (
+      getattr(hf_config, "moe_intermediate_size", None)
+      or getattr(hf_config, "intermediate_size", None)
+  )
   num_lanes = pltpu.get_tpu_info().num_lanes
   num_kv_heads = hf_config.num_key_value_heads
 
diff --git a/src/maxtext/layers/moe.py b/src/maxtext/layers/moe.py
index 020956098c..c0f7c58a49 100644
--- a/src/maxtext/layers/moe.py
+++ b/src/maxtext/layers/moe.py
@@ -2448,6 +2448,8 @@ def fused_moe_matmul(
 
     # Map MaxText config fields to fused_moe_func args
     activation = self.config.mlp_activations[0]  # e.g. "silu"
+    if activation == "sigmoid":
+      activation = "swigluoai"
     scoring_fn = self.config.routed_score_func if self.config.routed_score_func else "softmax"
 
     # Check if the model architecture intrinsically renormalizes weights