11import { writeFileSync } from "fs" ;
2+ import { extname } from "path" ;
23import {
34 defineCommand ,
45 request ,
56 chatEndpoint ,
67 parseSSE ,
78 detectOutputFormat ,
9+ BailianError ,
10+ ExitCode ,
811 type Config ,
912 type GlobalFlags ,
1013 type ChatMessage ,
@@ -20,6 +23,46 @@ import { resolveOutputDir, resolveCredential } from "bailian-cli-core";
2023
2124const OMNI_VOICES = [ "Chelsie" , "Cherry" , "Ethan" , "Serena" , "Tina" ] ;
2225
26+ /**
27+ * Extension to input audio format.
28+ */
29+ const OMNI_INPUT_AUDIO_EXT : Record < string , string > = {
30+ wav : "wav" ,
31+ mp3 : "mp3" ,
32+ amr : "amr" ,
33+ aac : "aac" ,
34+ m4a : "aac" ,
35+ ogg : "ogg" ,
36+ "3gp" : "3gp" ,
37+ "3gpp" : "3gpp" ,
38+ } ;
39+
40+ const audioExts = Object . keys ( OMNI_INPUT_AUDIO_EXT ) ;
41+
42+ /**
43+ * Infer the input audio format from the source URL or local file path.
44+ */
45+ function inferInputAudioFormat ( source : string ) : string {
46+ const pathPart = source . split ( "?" ) [ 0 ] . split ( "#" ) [ 0 ] ;
47+ const ext = extname ( pathPart ) . slice ( 1 ) . toLowerCase ( ) ;
48+ if ( ! ext ) {
49+ throw new BailianError (
50+ `Cannot infer audio format from "${ source } ". ` +
51+ `Use a file/URL whose path ends with: ${ audioExts . join ( ", " ) } .` ,
52+ ExitCode . USAGE ,
53+ ) ;
54+ }
55+ const format = OMNI_INPUT_AUDIO_EXT [ ext ] ;
56+ if ( ! format ) {
57+ throw new BailianError (
58+ `Unsupported audio extension ".${ ext } " for "${ source } ". ` +
59+ `Supported extensions: ${ audioExts . join ( ", " ) } .` ,
60+ ExitCode . USAGE ,
61+ ) ;
62+ }
63+ return format ;
64+ }
65+
2366/**
2467 * Build a standard WAV file header for PCM 16-bit mono 24kHz audio.
2568 */
@@ -55,7 +98,11 @@ export default defineCommand({
5598 { flag : "--model <model>" , description : "Model ID (default: qwen3.5-omni-plus)" } ,
5699 { flag : "--system <text>" , description : "System prompt" } ,
57100 { flag : "--image <url>" , description : "Image URL or local file (repeatable)" , type : "array" } ,
58- { flag : "--audio <url>" , description : "Audio URL or local file (repeatable)" , type : "array" } ,
101+ {
102+ flag : "--audio <url>" ,
103+ description : "Audio URL or local file (.wav/.mp3/.amr/.aac/.m4a/.3gp/.3gpp)" ,
104+ type : "array" ,
105+ } ,
59106 {
60107 flag : "--video <url>" ,
61108 description : "Video file URL / local path, or comma-separated frame URLs" ,
@@ -138,7 +185,7 @@ export default defineCommand({
138185
139186 // Auto-upload local files
140187 const imageUrls : string [ ] = [ ] ;
141- const audioUrls : string [ ] = [ ] ;
188+ const audioInputs : Array < { source : string ; data : string } > = [ ] ;
142189 const videoUrls : string [ ] = [ ] ;
143190
144191 const needsResolve =
@@ -151,7 +198,7 @@ export default defineCommand({
151198 }
152199 for ( const u of rawAudioUrls ) {
153200 const resolved = await resolveFileUrl ( u , credential . token , model ) ;
154- audioUrls . push ( resolved ) ;
201+ audioInputs . push ( { source : u , data : resolved } ) ;
155202 }
156203 for ( const u of rawVideoUrls ) {
157204 // Detect: comma-separated = frame list, otherwise single video URL/file
@@ -173,7 +220,7 @@ export default defineCommand({
173220 }
174221 }
175222
176- if ( imageUrls . length > 0 || audioUrls . length > 0 || videoUrls . length > 0 ) {
223+ if ( imageUrls . length > 0 || audioInputs . length > 0 || videoUrls . length > 0 ) {
177224 // Find last user message and convert to multimodal content array
178225 for ( let i = allMessages . length - 1 ; i >= 0 ; i -- ) {
179226 if ( allMessages [ i ] . role === "user" ) {
@@ -192,9 +239,11 @@ export default defineCommand({
192239 contentArray . push ( { type : "image_url" , image_url : { url } } ) ;
193240 }
194241
195- // Add audio URLs
196- for ( const url of audioUrls ) {
197- contentArray . push ( { type : "audio_url" , audio_url : { url } } ) ;
242+ for ( const { source, data } of audioInputs ) {
243+ contentArray . push ( {
244+ type : "input_audio" ,
245+ input_audio : { data, format : inferInputAudioFormat ( source ) } ,
246+ } ) ;
198247 }
199248
200249 // Add video URLs: frame:xxx are frame list items, others are direct video URLs
0 commit comments