huggingface · vasqu · Apr 16, 2026 · Mar 16, 2026 · Apr 14, 2026 · zucchini-nlp
diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py
@@ -2006,7 +2006,7 @@ def _valid_auto_compile_criteria(
         cache = model_kwargs.get("past_key_values", model_kwargs.get("cache_params"))
 
         # Base logic
-        valid_hardware = self.device.type in ["cuda", "xpu"] or bool(
+        valid_hardware = self.device.type in ["cuda", "xpu", "neuron"] or bool(
             generation_config.compile_config is not None and generation_config.compile_config._compile_all_devices
         )
         # Note: for some models that only use linear attention (e.g. Mamba), even a DynamicCache is compileable since all