From 6bae15f11228a63ce1023c8c46824ed587fc4211 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Sat, 2 May 2026 03:17:51 +0430 Subject: [PATCH] fix: remove invalid deepstack boundary check for Qwen3-VL Signed-off-by: AlpinDale --- .../model_executor/models/qwen3_omni_moe_thinker.py | 11 ----------- aphrodite/model_executor/models/qwen3_vl.py | 11 ----------- 2 files changed, 22 deletions(-) diff --git a/aphrodite/model_executor/models/qwen3_omni_moe_thinker.py b/aphrodite/model_executor/models/qwen3_omni_moe_thinker.py index aa07bd352f..0ea7b7c80d 100755 --- a/aphrodite/model_executor/models/qwen3_omni_moe_thinker.py +++ b/aphrodite/model_executor/models/qwen3_omni_moe_thinker.py @@ -1666,11 +1666,6 @@ def _get_deepstack_input_embeds( return None # If vision tower is skipped if getattr(self, "deepstack_input_embeds_num_tokens", 0) == 0: return None - if num_tokens > self.deepstack_input_embeds_num_tokens: - raise ValueError( - "Requested more deepstack tokens than available in buffer: " - f"{num_tokens=} > {self.deepstack_input_embeds_num_tokens=}" - ) # get deepstack_input_embeds from buffer, and clear the buffer return IntermediateTensors( @@ -1709,12 +1704,6 @@ def _clear_deepstack_input_embeds(self, num_tokens: int) -> None: # clear deepstack_input_embeds in buffer if num_tokens > 0: - if num_tokens > self.deepstack_input_embeds_num_tokens: - raise ValueError( - "Requested to clear more deepstack tokens than available in " - "buffer: " - f"{num_tokens=} > {self.deepstack_input_embeds_num_tokens=}" - ) for idx in range(self.deepstack_num_level): self.deepstack_input_embeds[idx][:num_tokens].zero_() self.deepstack_input_embeds_num_tokens = 0 diff --git a/aphrodite/model_executor/models/qwen3_vl.py b/aphrodite/model_executor/models/qwen3_vl.py index 0a16ae7498..46b295ab53 100644 --- a/aphrodite/model_executor/models/qwen3_vl.py +++ b/aphrodite/model_executor/models/qwen3_vl.py @@ -1624,11 +1624,6 @@ def _get_deepstack_input_embeds( return None # If vision tower is skipped if getattr(self, "deepstack_input_embeds_num_tokens", 0) == 0: return None - if num_tokens > self.deepstack_input_embeds_num_tokens: - raise ValueError( - "Requested more deepstack tokens than available in buffer: " - f"{num_tokens=} > {self.deepstack_input_embeds_num_tokens=}" - ) # get deepstack_input_embeds from buffer, and clear the buffer return IntermediateTensors( @@ -1666,12 +1661,6 @@ def _clear_deepstack_input_embeds(self, num_tokens: int) -> None: # clear deepstack_input_embeds in buffer if num_tokens > 0: - if num_tokens > self.deepstack_input_embeds_num_tokens: - raise ValueError( - "Requested to clear more deepstack tokens than available in " - "buffer: " - f"{num_tokens=} > {self.deepstack_input_embeds_num_tokens=}" - ) for idx in range(self.deepstack_num_level): self.deepstack_input_embeds[idx][:num_tokens].zero_() self.deepstack_input_embeds_num_tokens = 0