音频推理报错

**Describe the bug**
安装好vllm后，通过vllm seve命令部署好MiniCPM-o-4_5模型，音频推理报如下错误
```(APIServer pid=111789) ERROR:    Exception in ASGI application
(APIServer pid=111789) Traceback (most recent call last):
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 416, in run_asgi
(APIServer pid=111789)     result = await app(  # type: ignore[func-returns-value]
(APIServer pid=111789)              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
(APIServer pid=111789)     return await self.app(scope, receive, send)
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1160, in __call__
(APIServer pid=111789)     await super().__call__(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 107, in __call__
(APIServer pid=111789)     await self.middleware_stack(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 186, in __call__
(APIServer pid=111789)     raise exc
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 164, in __call__
(APIServer pid=111789)     await self.app(scope, receive, _send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/cors.py", line 87, in __call__
(APIServer pid=111789)     await self.app(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/prometheus_fastapi_instrumentator/middleware.py", line 177, in __call__
(APIServer pid=111789)     raise exc
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/prometheus_fastapi_instrumentator/middleware.py", line 175, in __call__
(APIServer pid=111789)     await self.app(scope, receive, send_wrapper)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/exceptions.py", line 63, in __call__
(APIServer pid=111789)     await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
(APIServer pid=111789)     raise exc
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 42, in wrapped_app
(APIServer pid=111789)     await app(scope, receive, sender)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
(APIServer pid=111789)     await self.app(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 716, in __call__
(APIServer pid=111789)     await self.middleware_stack(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 736, in app
(APIServer pid=111789)     await route.handle(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 290, in handle
(APIServer pid=111789)     await self.app(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 130, in app
(APIServer pid=111789)     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
(APIServer pid=111789)     raise exc
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 42, in wrapped_app
(APIServer pid=111789)     await app(scope, receive, sender)
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 116, in app
(APIServer pid=111789)     response = await f(request)
(APIServer pid=111789)                ^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 670, in app
(APIServer pid=111789)     raw_response = await run_endpoint_function(
(APIServer pid=111789)                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 324, in run_endpoint_function
(APIServer pid=111789)     return await dependant.call(**values)
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/utils.py", line 95, in wrapper
(APIServer pid=111789)     return handler_task.result()
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/utils.py", line 116, in wrapper
(APIServer pid=111789)     return await func(*args, **kwargs)
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/openai/chat_completion/api_router.py", line 55, in create_chat_completion
(APIServer pid=111789)     generator = await handler.create_chat_completion(request, raw_request)
(APIServer pid=111789)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/openai/chat_completion/serving.py", line 305, in create_chat_completion
(APIServer pid=111789)     result = await self.render_chat_request(request)
(APIServer pid=111789)              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/openai/chat_completion/serving.py", line 261, in render_chat_request
(APIServer pid=111789)     conversation, engine_prompts = await self._preprocess_chat(
(APIServer pid=111789)                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/entrypoints/openai/engine/serving.py", line 898, in _preprocess_chat
(APIServer pid=111789)     (conversation,), (engine_prompt,) = await renderer.render_chat_async(
(APIServer pid=111789)                                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 823, in render_chat_async
(APIServer pid=111789)     self.process_for_engine(prompt, arrival_time) for prompt in tok_prompts
(APIServer pid=111789)     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 715, in process_for_engine
(APIServer pid=111789)     engine_prompt = self._process_singleton(prompt)
(APIServer pid=111789)                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 691, in _process_singleton
(APIServer pid=111789)     return self._process_tokens(prompt)  # type: ignore[arg-type]
(APIServer pid=111789)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 636, in _process_tokens
(APIServer pid=111789)     inputs = self._process_multimodal(
(APIServer pid=111789)              ^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/renderers/base.py", line 622, in _process_multimodal
(APIServer pid=111789)     mm_inputs = mm_processor.apply(mm_processor_inputs, mm_timing_ctx)
(APIServer pid=111789)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1663, in apply
(APIServer pid=111789)     ) = self._cached_apply_hf_processor(inputs, timing_ctx)
(APIServer pid=111789)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1452, in _cached_apply_hf_processor
(APIServer pid=111789)     ) = self._apply_hf_processor_main(
(APIServer pid=111789)         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1269, in _apply_hf_processor_main
(APIServer pid=111789)     mm_processed_data = self._apply_hf_processor_mm_only(
(APIServer pid=111789)                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1227, in _apply_hf_processor_mm_only
(APIServer pid=111789)     _, mm_processed_data, _ = self._apply_hf_processor_text_mm(
(APIServer pid=111789)                               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/multimodal/processing/processor.py", line 1154, in _apply_hf_processor_text_mm
(APIServer pid=111789)     processed_data = self._call_hf_processor(
(APIServer pid=111789)                      ^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/model_executor/models/minicpmv.py", line 880, in _call_hf_processor
(APIServer pid=111789)     mm_inputs = self.process_mm_inputs(mm_data, mm_kwargs, tok_kwargs)
(APIServer pid=111789)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/model_executor/models/minicpmo.py", line 427, in process_mm_inputs
(APIServer pid=111789)     **self.process_audios(mm_data, mm_kwargs, tok_kwargs),
(APIServer pid=111789)       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=111789)   File "/workpace/vllm-main/vllm/model_executor/models/minicpmo.py", line 402, in process_audios
(APIServer pid=111789)     feat[:, :feature_len]
(APIServer pid=111789)     ~~~~^^^^^^^^^^^^^^^^^
(APIServer pid=111789) TypeError: only integer tensors of a single element can be converted to an index
```

**问题定位**
我按照报错，定位到如下代码
vllm/model_executor/models/minicpmo.py文件中MiniCPMOMultiModalProcessor类的process_audios
报错代码如下
```
unpadded_audio_features = [
                feat[:, :feature_len]
                for feat, feature_len in zip(
                    audio_inputs["audio_features"],
                    audio_inputs["audio_feature_lens"],
                )
            ]
            audio_inputs["audio_features"] = unpadded_audio_features
```

打印audio_inputs["audio_feature_lens"]结果如下：
```
[tensor([3000, 3000, 3000, 3000, 1187])]
```

我按照如下方式将audio_inputs["audio_feature_lens"]转换成list：
```
audio_inputs["audio_feature_lens"] = audio_inputs["audio_feature_lens"][0].cpu().tolist()
```

重新运行后报新的错误
```
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100] EngineCore encountered a fatal error.
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100] Traceback (most recent call last):
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/engine/core.py", line 1091, in run_engine_core
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     engine_core.run_busy_loop()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/engine/core.py", line 1126, in run_busy_loop
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     self._process_engine_step()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/engine/core.py", line 1163, in _process_engine_step
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     outputs, model_executed = self.step_fn()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                               ^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/engine/core.py", line 447, in step_with_batch_queue
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     exec_future = self.model_executor.execute_model(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/executor/uniproc_executor.py", line 112, in execute_model
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     output.result()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/usr/lib/python3.12/concurrent/futures/_base.py", line 449, in result
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return self.__get_result()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/usr/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     raise self._exception
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/executor/uniproc_executor.py", line 82, in collective_rpc
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     result = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/serial_utils.py", line 459, in run_method
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return func(*args, **kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/worker_base.py", line 332, in execute_model
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return self.worker.execute_model(scheduler_output)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 124, in decorate_context
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return func(*args, **kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/gpu_worker.py", line 816, in execute_model
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     output = self.model_runner.execute_model(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 124, in decorate_context
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return func(*args, **kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/gpu_model_runner.py", line 3740, in execute_model
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     ) = self._preprocess(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]         ^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/gpu_model_runner.py", line 2992, in _preprocess
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     self._execute_mm_encoder(scheduler_output)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/v1/worker/gpu_model_runner.py", line 2645, in _execute_mm_encoder
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     batch_outputs = model.embed_multimodal(**mm_kwargs_batch)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/model_executor/models/minicpmv.py", line 1149, in embed_multimodal
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     modalities = self._parse_and_validate_multimodal_inputs(**kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/model_executor/models/minicpmo.py", line 838, in _parse_and_validate_multimodal_inputs
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     modalities["audios"] = self._parse_and_validate_audio_input(**kwargs)
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/model_executor/models/minicpmo.py", line 820, in _parse_and_validate_audio_input
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     return MiniCPMOAudioFeatureInputs(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/utils/tensor_schema.py", line 70, in __init__
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     self.validate()
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/utils/tensor_schema.py", line 249, in validate
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     self._validate_tensor_shape_expected(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]   File "/workpace/vllm-main/vllm/utils/tensor_schema.py", line 173, in _validate_tensor_shape_expected
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100]     raise ValueError(
(EngineCore_DP0 pid=114924) ERROR 03-17 06:39:53 [core.py:1100] ValueError: audio_feature_lens has rank 1 but expected 2. Expected shape: ('bn', 's'), but got (1,)
```


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

音频推理报错 #113

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

音频推理报错 #113

Description

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions