Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added examples/output.wav
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ public final class LiveAudioConversationAsync {
private static SourceDataLine speakerLine;
private static AsyncSession session;
private static ExecutorService micExecutor = Executors.newSingleThreadExecutor();
private static String promptString = null;
private static java.io.ByteArrayOutputStream audioResponseBytes = new java.io.ByteArrayOutputStream();

/** Creates the parameters for sending an audio chunk. */
public static LiveSendRealtimeInputParameters createAudioContent(byte[] audioData) {
Expand Down Expand Up @@ -155,31 +157,95 @@ public static void main(String[] args) throws LineUnavailableException {
System.out.println("Using Gemini Developer API");
}

String getModelFromArgs = null;
String voiceSamplePath = null;
String voiceConsentPath = null;
String voiceSignature = null;
promptString = null;

for (String arg : args) {
if (arg.startsWith("--model=")) {
getModelFromArgs = arg.substring("--model=".length());
} else if (arg.startsWith("--voice-sample=")) {
voiceSamplePath = arg.substring("--voice-sample=".length());
} else if (arg.startsWith("--voice-consent=")) {
voiceConsentPath = arg.substring("--voice-consent=".length());
} else if (arg.startsWith("--voice-signature=")) {
voiceSignature = arg.substring("--voice-signature=".length());
} else if (arg.startsWith("--prompt=")) {
promptString = arg.substring("--prompt=".length());
} else if (!arg.startsWith("--") && getModelFromArgs == null) {
getModelFromArgs = arg;
}
}

byte[] voiceSampleAudio = null;
byte[] consentAudio = null;

if (voiceSamplePath != null) {
try {
voiceSampleAudio = java.nio.file.Files.readAllBytes(java.nio.file.Paths.get(voiceSamplePath));
} catch (java.io.IOException e) {
throw new RuntimeException("Failed to read voice sample: " + e.getMessage());
}
if (voiceConsentPath == null && voiceSignature == null) {
throw new IllegalArgumentException(
"Either --voice-consent or --voice-signature must be provided when --voice-sample is"
+ " used.");
}
}
if (voiceConsentPath != null) {
try {
consentAudio = java.nio.file.Files.readAllBytes(java.nio.file.Paths.get(voiceConsentPath));
} catch (java.io.IOException e) {
throw new RuntimeException("Failed to read voice consent: " + e.getMessage());
}
}

final String modelId;
if (args.length != 0) {
modelId = args[0];
if (getModelFromArgs != null) {
modelId = getModelFromArgs;
} else if (client.vertexAI()) {
modelId = Constants.GEMINI_LIVE_MODEL_NAME;
} else {
modelId = Constants.GEMINI_LIVE_MODEL_NAME_PREVIEW;
}

// --- Audio Line Setup ---
microphoneLine = getMicrophoneLine();
speakerLine = getSpeakerLine();
if (promptString == null) {
microphoneLine = getMicrophoneLine();
speakerLine = getSpeakerLine();
}

// --- Live API Config for Audio ---
// Choice of ["Aoede", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Zephyr"]
String voiceName = "Aoede";

VoiceConfig.Builder voiceConfigBuilder = VoiceConfig.builder();
if (voiceSampleAudio != null) {
com.google.genai.types.ReplicatedVoiceConfig.Builder repBuilder =
com.google.genai.types.ReplicatedVoiceConfig.builder()
.mimeType("audio/wav")
.voiceSampleAudio(voiceSampleAudio);
if (consentAudio != null) {
repBuilder.consentAudio(consentAudio);
}
if (voiceSignature != null) {
repBuilder.voiceConsentSignature(
com.google.genai.types.VoiceConsentSignature.builder().signature(voiceSignature));
}
voiceConfigBuilder.replicatedVoiceConfig(repBuilder);
} else {
voiceConfigBuilder.prebuiltVoiceConfig(
PrebuiltVoiceConfig.builder().voiceName(voiceName));
}

LiveConnectConfig config =
LiveConnectConfig.builder()
.responseModalities(Modality.Known.AUDIO)
.speechConfig(
SpeechConfig.builder()
.voiceConfig(
VoiceConfig.builder()
.prebuiltVoiceConfig(
PrebuiltVoiceConfig.builder().voiceName(voiceName)))
.voiceConfig(voiceConfigBuilder)
.languageCode("en-US"))
.realtimeInputConfig(
RealtimeInputConfig.builder()
Expand Down Expand Up @@ -232,19 +298,33 @@ public static void main(String[] args) throws LineUnavailableException {
session = client.async.live.connect(modelId, config).get();
System.out.println("Connected.");

// --- Start Audio Lines ---
microphoneLine.start();
speakerLine.start();
System.out.println("Microphone and speakers started. Speak now (Press Ctrl+C to exit)...");
if (session.setupComplete() != null && session.setupComplete().voiceConsentSignature().isPresent()) {
System.out.println(
"\n=== Voice Consent Signature Received ===\n"
+ session.setupComplete().voiceConsentSignature().get().signature().orElse("")
+ "\n========================================\n");
}

// --- Start Receiving Audio Responses ---
CompletableFuture<Void> receiveFuture =
session.receive(LiveAudioConversationAsync::handleAudioResponse);
System.err.println("Receive stream started."); // Add this line

// --- Start Sending Microphone Audio ---
CompletableFuture<Void> sendFuture =
CompletableFuture.runAsync(LiveAudioConversationAsync::sendMicrophoneAudio, micExecutor);
System.err.println("Receive stream started.");

CompletableFuture<Void> sendFuture;
if (promptString == null) {
// --- Start Audio Lines ---
microphoneLine.start();
speakerLine.start();
System.out.println("Microphone and speakers started. Speak now (Press Ctrl+C to exit)...");

// --- Start Sending Microphone Audio ---
sendFuture = CompletableFuture.runAsync(LiveAudioConversationAsync::sendMicrophoneAudio, micExecutor);
} else {
System.out.println("Sending prompt: " + promptString);
session.sendRealtimeInput(
LiveSendRealtimeInputParameters.builder().text(promptString).build()).get();
sendFuture = CompletableFuture.completedFuture(null);
}

// Keep the main thread alive. Wait for sending or receiving to finish (or
// error).
Expand Down Expand Up @@ -313,13 +393,19 @@ public static void handleAudioResponse(LiveServerMessage message) {
content -> {
// Handle interruptions from Gemini.
if (content.interrupted().orElse(false)) {
speakerLine.flush();
if (speakerLine != null && speakerLine.isOpen()) {
speakerLine.flush();
}
return; // Skip processing the rest of this message's audio.
}

// Handle Model turn completion.
if (content.turnComplete().orElse(false)) {
// The turn is over, no more audio will be sent for this turn.
if (promptString != null) {
saveWavFile();
System.out.println("Response received, exiting.");
System.exit(0);
}
return;
}

Expand All @@ -334,15 +420,45 @@ public static void handleAudioResponse(LiveServerMessage message) {
if (speakerLine != null && speakerLine.isOpen()) {
// Write audio data to the speaker
speakerLine.write(audioBytes, 0, audioBytes.length);
} else {
System.out.println(
"Received audio response chunk: " + audioBytes.length + " bytes.");
}
try {
audioResponseBytes.write(audioBytes);
} catch (java.io.IOException e) {
System.err.println("Failed to accumulate audio bytes: " + e.getMessage());
}
});

// If this is the last message of a generation, drain the buffer.
if (content.generationComplete().orElse(false)) {
speakerLine.drain();
if (speakerLine != null && speakerLine.isOpen()) {
speakerLine.drain();
}
}
});
}

private static void saveWavFile() {
byte[] audioData = audioResponseBytes.toByteArray();
if (audioData.length == 0) {
System.out.println("No audio data received to save.");
return;
}
try {
javax.sound.sampled.AudioInputStream ais = new javax.sound.sampled.AudioInputStream(
new java.io.ByteArrayInputStream(audioData),
SPEAKER_AUDIO_FORMAT,
audioData.length / SPEAKER_AUDIO_FORMAT.getFrameSize()
);
java.io.File outputFile = new java.io.File("output.wav");
javax.sound.sampled.AudioSystem.write(ais, javax.sound.sampled.AudioFileFormat.Type.WAVE, outputFile);
System.out.println("Saved audio response to " + outputFile.getAbsolutePath());
} catch (Exception e) {
System.err.println("Failed to save WAV file: " + e.getMessage());
}
}

private LiveAudioConversationAsync() {}
}
5 changes: 4 additions & 1 deletion src/main/java/com/google/genai/AsyncLive.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import com.google.genai.types.LiveConnectConfig;
import com.google.genai.types.LiveConnectParameters;
import com.google.genai.types.LiveServerMessage;
import com.google.genai.types.LiveServerSetupComplete;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
Expand Down Expand Up @@ -283,11 +284,13 @@ private void handleIncomingMessage(String message) {
try {
LiveServerMessage initialResponse = LiveServerMessage.fromJson(message);
if (initialResponse.setupComplete().isPresent()) {
LiveServerSetupComplete setupComplete = initialResponse.setupComplete().get();
sessionFuture.complete(
new AsyncSession(
apiClient,
this,
initialResponse.setupComplete().get().sessionId().orElse(null)));
setupComplete.sessionId().orElse(null),
setupComplete));
} else {
sessionFuture.completeExceptionally(
new GenAiIOException(
Expand Down
13 changes: 12 additions & 1 deletion src/main/java/com/google/genai/AsyncSession.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.genai.types.LiveClientContent;
import com.google.genai.types.LiveClientMessage;
import com.google.genai.types.LiveClientToolResponse;
import com.google.genai.types.LiveServerSetupComplete;
import com.google.genai.types.LiveSendClientContentParameters;
import com.google.genai.types.LiveSendRealtimeInputParameters;
import com.google.genai.types.LiveSendToolResponseParameters;
Expand All @@ -40,11 +41,17 @@ public final class AsyncSession {

private final AsyncLive.GenAiWebSocketClient websocket;
final String sessionId;
private final LiveServerSetupComplete setupComplete;

AsyncSession(ApiClient apiClient, AsyncLive.GenAiWebSocketClient websocket, String sessionId) {
AsyncSession(
ApiClient apiClient,
AsyncLive.GenAiWebSocketClient websocket,
String sessionId,
LiveServerSetupComplete setupComplete) {
this.apiClient = apiClient;
this.websocket = websocket;
this.sessionId = sessionId;
this.setupComplete = setupComplete;
}

/**
Expand Down Expand Up @@ -145,4 +152,8 @@ public CompletableFuture<Void> close() {
public String sessionId() {
return sessionId;
}

public LiveServerSetupComplete setupComplete() {
return setupComplete;
}
}
Loading
Loading