diff --git a/examples/output.wav b/examples/output.wav new file mode 100644 index 00000000000..64e21955fa0 Binary files /dev/null and b/examples/output.wav differ diff --git a/examples/src/main/java/com/google/genai/examples/LiveAudioConversationAsync.java b/examples/src/main/java/com/google/genai/examples/LiveAudioConversationAsync.java index e1a6c82e0a9..077f01ff1cd 100644 --- a/examples/src/main/java/com/google/genai/examples/LiveAudioConversationAsync.java +++ b/examples/src/main/java/com/google/genai/examples/LiveAudioConversationAsync.java @@ -94,6 +94,8 @@ public final class LiveAudioConversationAsync { private static SourceDataLine speakerLine; private static AsyncSession session; private static ExecutorService micExecutor = Executors.newSingleThreadExecutor(); + private static String promptString = null; + private static java.io.ByteArrayOutputStream audioResponseBytes = new java.io.ByteArrayOutputStream(); /** Creates the parameters for sending an audio chunk. */ public static LiveSendRealtimeInputParameters createAudioContent(byte[] audioData) { @@ -155,9 +157,54 @@ public static void main(String[] args) throws LineUnavailableException { System.out.println("Using Gemini Developer API"); } + String getModelFromArgs = null; + String voiceSamplePath = null; + String voiceConsentPath = null; + String voiceSignature = null; + promptString = null; + + for (String arg : args) { + if (arg.startsWith("--model=")) { + getModelFromArgs = arg.substring("--model=".length()); + } else if (arg.startsWith("--voice-sample=")) { + voiceSamplePath = arg.substring("--voice-sample=".length()); + } else if (arg.startsWith("--voice-consent=")) { + voiceConsentPath = arg.substring("--voice-consent=".length()); + } else if (arg.startsWith("--voice-signature=")) { + voiceSignature = arg.substring("--voice-signature=".length()); + } else if (arg.startsWith("--prompt=")) { + promptString = arg.substring("--prompt=".length()); + } else if (!arg.startsWith("--") && getModelFromArgs == null) { + getModelFromArgs = arg; + } + } + + byte[] voiceSampleAudio = null; + byte[] consentAudio = null; + + if (voiceSamplePath != null) { + try { + voiceSampleAudio = java.nio.file.Files.readAllBytes(java.nio.file.Paths.get(voiceSamplePath)); + } catch (java.io.IOException e) { + throw new RuntimeException("Failed to read voice sample: " + e.getMessage()); + } + if (voiceConsentPath == null && voiceSignature == null) { + throw new IllegalArgumentException( + "Either --voice-consent or --voice-signature must be provided when --voice-sample is" + + " used."); + } + } + if (voiceConsentPath != null) { + try { + consentAudio = java.nio.file.Files.readAllBytes(java.nio.file.Paths.get(voiceConsentPath)); + } catch (java.io.IOException e) { + throw new RuntimeException("Failed to read voice consent: " + e.getMessage()); + } + } + final String modelId; - if (args.length != 0) { - modelId = args[0]; + if (getModelFromArgs != null) { + modelId = getModelFromArgs; } else if (client.vertexAI()) { modelId = Constants.GEMINI_LIVE_MODEL_NAME; } else { @@ -165,21 +212,40 @@ public static void main(String[] args) throws LineUnavailableException { } // --- Audio Line Setup --- - microphoneLine = getMicrophoneLine(); - speakerLine = getSpeakerLine(); + if (promptString == null) { + microphoneLine = getMicrophoneLine(); + speakerLine = getSpeakerLine(); + } // --- Live API Config for Audio --- // Choice of ["Aoede", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Zephyr"] String voiceName = "Aoede"; + + VoiceConfig.Builder voiceConfigBuilder = VoiceConfig.builder(); + if (voiceSampleAudio != null) { + com.google.genai.types.ReplicatedVoiceConfig.Builder repBuilder = + com.google.genai.types.ReplicatedVoiceConfig.builder() + .mimeType("audio/wav") + .voiceSampleAudio(voiceSampleAudio); + if (consentAudio != null) { + repBuilder.consentAudio(consentAudio); + } + if (voiceSignature != null) { + repBuilder.voiceConsentSignature( + com.google.genai.types.VoiceConsentSignature.builder().signature(voiceSignature)); + } + voiceConfigBuilder.replicatedVoiceConfig(repBuilder); + } else { + voiceConfigBuilder.prebuiltVoiceConfig( + PrebuiltVoiceConfig.builder().voiceName(voiceName)); + } + LiveConnectConfig config = LiveConnectConfig.builder() .responseModalities(Modality.Known.AUDIO) .speechConfig( SpeechConfig.builder() - .voiceConfig( - VoiceConfig.builder() - .prebuiltVoiceConfig( - PrebuiltVoiceConfig.builder().voiceName(voiceName))) + .voiceConfig(voiceConfigBuilder) .languageCode("en-US")) .realtimeInputConfig( RealtimeInputConfig.builder() @@ -232,19 +298,33 @@ public static void main(String[] args) throws LineUnavailableException { session = client.async.live.connect(modelId, config).get(); System.out.println("Connected."); - // --- Start Audio Lines --- - microphoneLine.start(); - speakerLine.start(); - System.out.println("Microphone and speakers started. Speak now (Press Ctrl+C to exit)..."); + if (session.setupComplete() != null && session.setupComplete().voiceConsentSignature().isPresent()) { + System.out.println( + "\n=== Voice Consent Signature Received ===\n" + + session.setupComplete().voiceConsentSignature().get().signature().orElse("") + + "\n========================================\n"); + } // --- Start Receiving Audio Responses --- CompletableFuture receiveFuture = session.receive(LiveAudioConversationAsync::handleAudioResponse); - System.err.println("Receive stream started."); // Add this line - - // --- Start Sending Microphone Audio --- - CompletableFuture sendFuture = - CompletableFuture.runAsync(LiveAudioConversationAsync::sendMicrophoneAudio, micExecutor); + System.err.println("Receive stream started."); + + CompletableFuture sendFuture; + if (promptString == null) { + // --- Start Audio Lines --- + microphoneLine.start(); + speakerLine.start(); + System.out.println("Microphone and speakers started. Speak now (Press Ctrl+C to exit)..."); + + // --- Start Sending Microphone Audio --- + sendFuture = CompletableFuture.runAsync(LiveAudioConversationAsync::sendMicrophoneAudio, micExecutor); + } else { + System.out.println("Sending prompt: " + promptString); + session.sendRealtimeInput( + LiveSendRealtimeInputParameters.builder().text(promptString).build()).get(); + sendFuture = CompletableFuture.completedFuture(null); + } // Keep the main thread alive. Wait for sending or receiving to finish (or // error). @@ -313,13 +393,19 @@ public static void handleAudioResponse(LiveServerMessage message) { content -> { // Handle interruptions from Gemini. if (content.interrupted().orElse(false)) { - speakerLine.flush(); + if (speakerLine != null && speakerLine.isOpen()) { + speakerLine.flush(); + } return; // Skip processing the rest of this message's audio. } // Handle Model turn completion. if (content.turnComplete().orElse(false)) { - // The turn is over, no more audio will be sent for this turn. + if (promptString != null) { + saveWavFile(); + System.out.println("Response received, exiting."); + System.exit(0); + } return; } @@ -334,15 +420,45 @@ public static void handleAudioResponse(LiveServerMessage message) { if (speakerLine != null && speakerLine.isOpen()) { // Write audio data to the speaker speakerLine.write(audioBytes, 0, audioBytes.length); + } else { + System.out.println( + "Received audio response chunk: " + audioBytes.length + " bytes."); + } + try { + audioResponseBytes.write(audioBytes); + } catch (java.io.IOException e) { + System.err.println("Failed to accumulate audio bytes: " + e.getMessage()); } }); // If this is the last message of a generation, drain the buffer. if (content.generationComplete().orElse(false)) { - speakerLine.drain(); + if (speakerLine != null && speakerLine.isOpen()) { + speakerLine.drain(); + } } }); } + private static void saveWavFile() { + byte[] audioData = audioResponseBytes.toByteArray(); + if (audioData.length == 0) { + System.out.println("No audio data received to save."); + return; + } + try { + javax.sound.sampled.AudioInputStream ais = new javax.sound.sampled.AudioInputStream( + new java.io.ByteArrayInputStream(audioData), + SPEAKER_AUDIO_FORMAT, + audioData.length / SPEAKER_AUDIO_FORMAT.getFrameSize() + ); + java.io.File outputFile = new java.io.File("output.wav"); + javax.sound.sampled.AudioSystem.write(ais, javax.sound.sampled.AudioFileFormat.Type.WAVE, outputFile); + System.out.println("Saved audio response to " + outputFile.getAbsolutePath()); + } catch (Exception e) { + System.err.println("Failed to save WAV file: " + e.getMessage()); + } + } + private LiveAudioConversationAsync() {} } diff --git a/src/main/java/com/google/genai/AsyncLive.java b/src/main/java/com/google/genai/AsyncLive.java index 0c1b577d297..fdebcaa8b5d 100644 --- a/src/main/java/com/google/genai/AsyncLive.java +++ b/src/main/java/com/google/genai/AsyncLive.java @@ -25,6 +25,7 @@ import com.google.genai.types.LiveConnectConfig; import com.google.genai.types.LiveConnectParameters; import com.google.genai.types.LiveServerMessage; +import com.google.genai.types.LiveServerSetupComplete; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; @@ -283,11 +284,13 @@ private void handleIncomingMessage(String message) { try { LiveServerMessage initialResponse = LiveServerMessage.fromJson(message); if (initialResponse.setupComplete().isPresent()) { + LiveServerSetupComplete setupComplete = initialResponse.setupComplete().get(); sessionFuture.complete( new AsyncSession( apiClient, this, - initialResponse.setupComplete().get().sessionId().orElse(null))); + setupComplete.sessionId().orElse(null), + setupComplete)); } else { sessionFuture.completeExceptionally( new GenAiIOException( diff --git a/src/main/java/com/google/genai/AsyncSession.java b/src/main/java/com/google/genai/AsyncSession.java index 8047efc068c..737369f7d10 100644 --- a/src/main/java/com/google/genai/AsyncSession.java +++ b/src/main/java/com/google/genai/AsyncSession.java @@ -21,6 +21,7 @@ import com.google.genai.types.LiveClientContent; import com.google.genai.types.LiveClientMessage; import com.google.genai.types.LiveClientToolResponse; +import com.google.genai.types.LiveServerSetupComplete; import com.google.genai.types.LiveSendClientContentParameters; import com.google.genai.types.LiveSendRealtimeInputParameters; import com.google.genai.types.LiveSendToolResponseParameters; @@ -40,11 +41,17 @@ public final class AsyncSession { private final AsyncLive.GenAiWebSocketClient websocket; final String sessionId; + private final LiveServerSetupComplete setupComplete; - AsyncSession(ApiClient apiClient, AsyncLive.GenAiWebSocketClient websocket, String sessionId) { + AsyncSession( + ApiClient apiClient, + AsyncLive.GenAiWebSocketClient websocket, + String sessionId, + LiveServerSetupComplete setupComplete) { this.apiClient = apiClient; this.websocket = websocket; this.sessionId = sessionId; + this.setupComplete = setupComplete; } /** @@ -145,4 +152,8 @@ public CompletableFuture close() { public String sessionId() { return sessionId; } + + public LiveServerSetupComplete setupComplete() { + return setupComplete; + } } diff --git a/src/main/java/com/google/genai/LiveConverters.java b/src/main/java/com/google/genai/LiveConverters.java index febb094e1e5..caf86fa2c2d 100644 --- a/src/main/java/com/google/genai/LiveConverters.java +++ b/src/main/java/com/google/genai/LiveConverters.java @@ -382,7 +382,10 @@ ObjectNode generationConfigToVertex(JsonNode fromObject, ObjectNode parentObject Common.setValueByPath( toObject, new String[] {"speechConfig"}, - Common.getValueByPath(fromObject, new String[] {"speechConfig"})); + speechConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"speechConfig"})), + toObject)); } if (Common.getValueByPath(fromObject, new String[] {"stopSequences"}) != null) { @@ -1219,8 +1222,11 @@ ObjectNode liveConnectConfigToVertex(JsonNode fromObject, ObjectNode parentObjec Common.setValueByPath( parentObject, new String[] {"setup", "generationConfig", "speechConfig"}, - Transformers.tLiveSpeechConfig( - Common.getValueByPath(fromObject, new String[] {"speechConfig"}))); + speechConfigToVertex( + JsonSerializable.toJsonNode( + Transformers.tLiveSpeechConfig( + Common.getValueByPath(fromObject, new String[] {"speechConfig"}))), + toObject)); } if (Common.getValueByPath(fromObject, new String[] {"thinkingConfig"}) != null) { @@ -1648,6 +1654,24 @@ ObjectNode liveServerMessageFromVertex(JsonNode fromObject, ObjectNode parentObj return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode multiSpeakerVoiceConfigToVertex(JsonNode fromObject, ObjectNode parentObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"speakerVoiceConfigs"}) != null) { + ArrayNode keyArray = + (ArrayNode) Common.getValueByPath(fromObject, new String[] {"speakerVoiceConfigs"}); + ObjectMapper objectMapper = new ObjectMapper(); + ArrayNode result = objectMapper.createArrayNode(); + + for (JsonNode item : keyArray) { + result.add(speakerVoiceConfigToVertex(JsonSerializable.toJsonNode(item), toObject)); + } + Common.setValueByPath(toObject, new String[] {"speakerVoiceConfigs"}, result); + } + + return toObject; + } + @ExcludeFromGeneratedCoverageReport ObjectNode partToMldev(JsonNode fromObject, ObjectNode parentObject) { ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); @@ -1859,6 +1883,36 @@ ObjectNode partToVertex(JsonNode fromObject, ObjectNode parentObject) { return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode replicatedVoiceConfigToVertex(JsonNode fromObject, ObjectNode parentObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"mimeType"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"mimeType"}, + Common.getValueByPath(fromObject, new String[] {"mimeType"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"voiceSampleAudio"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceSampleAudio"}, + Common.getValueByPath(fromObject, new String[] {"voiceSampleAudio"})); + } + + if (!Common.isZero(Common.getValueByPath(fromObject, new String[] {"consentAudio"}))) { + throw new IllegalArgumentException( + "consentAudio parameter is not supported in Gemini Enterprise Agent Platform."); + } + + if (!Common.isZero(Common.getValueByPath(fromObject, new String[] {"voiceConsentSignature"}))) { + throw new IllegalArgumentException( + "voiceConsentSignature parameter is not supported in Gemini Enterprise Agent Platform."); + } + + return toObject; + } + @ExcludeFromGeneratedCoverageReport ObjectNode safetySettingToMldev(JsonNode fromObject, ObjectNode parentObject) { ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); @@ -1900,6 +1954,62 @@ ObjectNode sessionResumptionConfigToMldev(JsonNode fromObject, ObjectNode parent return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode speakerVoiceConfigToVertex(JsonNode fromObject, ObjectNode parentObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"speaker"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"speaker"}, + Common.getValueByPath(fromObject, new String[] {"speaker"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"voiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceConfig"}, + voiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"voiceConfig"})), + toObject)); + } + + return toObject; + } + + @ExcludeFromGeneratedCoverageReport + ObjectNode speechConfigToVertex(JsonNode fromObject, ObjectNode parentObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"voiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceConfig"}, + voiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"voiceConfig"})), + toObject)); + } + + if (Common.getValueByPath(fromObject, new String[] {"languageCode"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"languageCode"}, + Common.getValueByPath(fromObject, new String[] {"languageCode"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"multiSpeakerVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"multiSpeakerVoiceConfig"}, + multiSpeakerVoiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"multiSpeakerVoiceConfig"})), + toObject)); + } + + return toObject; + } + @ExcludeFromGeneratedCoverageReport ObjectNode toolToMldev(JsonNode fromObject, ObjectNode parentObject) { ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); @@ -2188,4 +2298,27 @@ ObjectNode voiceActivityFromVertex(JsonNode fromObject, ObjectNode parentObject) return toObject; } + + @ExcludeFromGeneratedCoverageReport + ObjectNode voiceConfigToVertex(JsonNode fromObject, ObjectNode parentObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"replicatedVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"replicatedVoiceConfig"}, + replicatedVoiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"replicatedVoiceConfig"})), + toObject)); + } + + if (Common.getValueByPath(fromObject, new String[] {"prebuiltVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"prebuiltVoiceConfig"}, + Common.getValueByPath(fromObject, new String[] {"prebuiltVoiceConfig"})); + } + + return toObject; + } } diff --git a/src/main/java/com/google/genai/Models.java b/src/main/java/com/google/genai/Models.java index a6fb051345d..16f89763ca3 100644 --- a/src/main/java/com/google/genai/Models.java +++ b/src/main/java/com/google/genai/Models.java @@ -1863,8 +1863,12 @@ ObjectNode generateContentConfigToVertex( Common.setValueByPath( toObject, new String[] {"speechConfig"}, - Transformers.tSpeechConfig( - Common.getValueByPath(fromObject, new String[] {"speechConfig"}))); + speechConfigToVertex( + JsonSerializable.toJsonNode( + Transformers.tSpeechConfig( + Common.getValueByPath(fromObject, new String[] {"speechConfig"}))), + toObject, + rootObject)); } if (Common.getValueByPath(fromObject, new String[] {"audioTimestamp"}) != null) { @@ -3392,7 +3396,11 @@ ObjectNode generationConfigToVertex( Common.setValueByPath( toObject, new String[] {"speechConfig"}, - Common.getValueByPath(fromObject, new String[] {"speechConfig"})); + speechConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"speechConfig"})), + toObject, + rootObject)); } if (Common.getValueByPath(fromObject, new String[] {"stopSequences"}) != null) { @@ -4109,6 +4117,26 @@ ObjectNode modelFromVertex(JsonNode fromObject, ObjectNode parentObject, JsonNod return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode multiSpeakerVoiceConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"speakerVoiceConfigs"}) != null) { + ArrayNode keyArray = + (ArrayNode) Common.getValueByPath(fromObject, new String[] {"speakerVoiceConfigs"}); + ObjectMapper objectMapper = new ObjectMapper(); + ArrayNode result = objectMapper.createArrayNode(); + + for (JsonNode item : keyArray) { + result.add( + speakerVoiceConfigToVertex(JsonSerializable.toJsonNode(item), toObject, rootObject)); + } + Common.setValueByPath(toObject, new String[] {"speakerVoiceConfigs"}, result); + } + + return toObject; + } + @ExcludeFromGeneratedCoverageReport ObjectNode partToMldev(JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); @@ -4584,6 +4612,37 @@ ObjectNode referenceImageAPIToVertex( return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode replicatedVoiceConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"mimeType"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"mimeType"}, + Common.getValueByPath(fromObject, new String[] {"mimeType"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"voiceSampleAudio"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceSampleAudio"}, + Common.getValueByPath(fromObject, new String[] {"voiceSampleAudio"})); + } + + if (!Common.isZero(Common.getValueByPath(fromObject, new String[] {"consentAudio"}))) { + throw new IllegalArgumentException( + "consentAudio parameter is not supported in Gemini Enterprise Agent Platform."); + } + + if (!Common.isZero(Common.getValueByPath(fromObject, new String[] {"voiceConsentSignature"}))) { + throw new IllegalArgumentException( + "voiceConsentSignature parameter is not supported in Gemini Enterprise Agent Platform."); + } + + return toObject; + } + @ExcludeFromGeneratedCoverageReport ObjectNode safetyAttributesFromMldev( JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { @@ -4824,6 +4883,67 @@ ObjectNode segmentImageSourceToVertex( return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode speakerVoiceConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"speaker"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"speaker"}, + Common.getValueByPath(fromObject, new String[] {"speaker"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"voiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceConfig"}, + voiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"voiceConfig"})), + toObject, + rootObject)); + } + + return toObject; + } + + @ExcludeFromGeneratedCoverageReport + ObjectNode speechConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"voiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceConfig"}, + voiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"voiceConfig"})), + toObject, + rootObject)); + } + + if (Common.getValueByPath(fromObject, new String[] {"languageCode"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"languageCode"}, + Common.getValueByPath(fromObject, new String[] {"languageCode"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"multiSpeakerVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"multiSpeakerVoiceConfig"}, + multiSpeakerVoiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"multiSpeakerVoiceConfig"})), + toObject, + rootObject)); + } + + return toObject; + } + @ExcludeFromGeneratedCoverageReport ObjectNode toolConfigToMldev(JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); @@ -5539,6 +5659,31 @@ ObjectNode videoToVertex(JsonNode fromObject, ObjectNode parentObject, JsonNode return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode voiceConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"replicatedVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"replicatedVoiceConfig"}, + replicatedVoiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"replicatedVoiceConfig"})), + toObject, + rootObject)); + } + + if (Common.getValueByPath(fromObject, new String[] {"prebuiltVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"prebuiltVoiceConfig"}, + Common.getValueByPath(fromObject, new String[] {"prebuiltVoiceConfig"})); + } + + return toObject; + } + /** A shared buildRequest method for both sync and async methods. */ BuiltRequest buildRequestForPrivateGenerateContent( String model, List contents, GenerateContentConfig config) { diff --git a/src/main/java/com/google/genai/Tunings.java b/src/main/java/com/google/genai/Tunings.java index f9b6bc2b74f..5c6970b0b93 100644 --- a/src/main/java/com/google/genai/Tunings.java +++ b/src/main/java/com/google/genai/Tunings.java @@ -1005,7 +1005,11 @@ ObjectNode generationConfigToVertex( Common.setValueByPath( toObject, new String[] {"speechConfig"}, - Common.getValueByPath(fromObject, new String[] {"speechConfig"})); + speechConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"speechConfig"})), + toObject, + rootObject)); } if (Common.getValueByPath(fromObject, new String[] {"stopSequences"}) != null) { @@ -1159,6 +1163,118 @@ ObjectNode listTuningJobsResponseFromVertex( return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode multiSpeakerVoiceConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"speakerVoiceConfigs"}) != null) { + ArrayNode keyArray = + (ArrayNode) Common.getValueByPath(fromObject, new String[] {"speakerVoiceConfigs"}); + ObjectMapper objectMapper = new ObjectMapper(); + ArrayNode result = objectMapper.createArrayNode(); + + for (JsonNode item : keyArray) { + result.add( + speakerVoiceConfigToVertex(JsonSerializable.toJsonNode(item), toObject, rootObject)); + } + Common.setValueByPath(toObject, new String[] {"speakerVoiceConfigs"}, result); + } + + return toObject; + } + + @ExcludeFromGeneratedCoverageReport + ObjectNode replicatedVoiceConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"mimeType"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"mimeType"}, + Common.getValueByPath(fromObject, new String[] {"mimeType"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"voiceSampleAudio"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceSampleAudio"}, + Common.getValueByPath(fromObject, new String[] {"voiceSampleAudio"})); + } + + if (!Common.isZero(Common.getValueByPath(fromObject, new String[] {"consentAudio"}))) { + throw new IllegalArgumentException( + "consentAudio parameter is not supported in Gemini Enterprise Agent Platform."); + } + + if (!Common.isZero(Common.getValueByPath(fromObject, new String[] {"voiceConsentSignature"}))) { + throw new IllegalArgumentException( + "voiceConsentSignature parameter is not supported in Gemini Enterprise Agent Platform."); + } + + return toObject; + } + + @ExcludeFromGeneratedCoverageReport + ObjectNode speakerVoiceConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"speaker"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"speaker"}, + Common.getValueByPath(fromObject, new String[] {"speaker"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"voiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceConfig"}, + voiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"voiceConfig"})), + toObject, + rootObject)); + } + + return toObject; + } + + @ExcludeFromGeneratedCoverageReport + ObjectNode speechConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"voiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"voiceConfig"}, + voiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"voiceConfig"})), + toObject, + rootObject)); + } + + if (Common.getValueByPath(fromObject, new String[] {"languageCode"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"languageCode"}, + Common.getValueByPath(fromObject, new String[] {"languageCode"})); + } + + if (Common.getValueByPath(fromObject, new String[] {"multiSpeakerVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"multiSpeakerVoiceConfig"}, + multiSpeakerVoiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"multiSpeakerVoiceConfig"})), + toObject, + rootObject)); + } + + return toObject; + } + @ExcludeFromGeneratedCoverageReport ObjectNode tunedModelFromMldev( JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { @@ -1649,6 +1765,31 @@ ObjectNode tuningValidationDatasetToVertex( return toObject; } + @ExcludeFromGeneratedCoverageReport + ObjectNode voiceConfigToVertex( + JsonNode fromObject, ObjectNode parentObject, JsonNode rootObject) { + ObjectNode toObject = JsonSerializable.objectMapper().createObjectNode(); + if (Common.getValueByPath(fromObject, new String[] {"replicatedVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"replicatedVoiceConfig"}, + replicatedVoiceConfigToVertex( + JsonSerializable.toJsonNode( + Common.getValueByPath(fromObject, new String[] {"replicatedVoiceConfig"})), + toObject, + rootObject)); + } + + if (Common.getValueByPath(fromObject, new String[] {"prebuiltVoiceConfig"}) != null) { + Common.setValueByPath( + toObject, + new String[] {"prebuiltVoiceConfig"}, + Common.getValueByPath(fromObject, new String[] {"prebuiltVoiceConfig"})); + } + + return toObject; + } + /** A shared buildRequest method for both sync and async methods. */ BuiltRequest buildRequestForPrivateGet(String name, GetTuningJobConfig config) { diff --git a/src/main/java/com/google/genai/types/LiveServerSetupComplete.java b/src/main/java/com/google/genai/types/LiveServerSetupComplete.java index fc50c7ef3b9..516487a0e6d 100644 --- a/src/main/java/com/google/genai/types/LiveServerSetupComplete.java +++ b/src/main/java/com/google/genai/types/LiveServerSetupComplete.java @@ -34,6 +34,14 @@ public abstract class LiveServerSetupComplete extends JsonSerializable { @JsonProperty("sessionId") public abstract Optional sessionId(); + /** + * Signature of the verified consent audio. This is populated when the request has a + * ReplicatedVoiceConfig with consent_audio set, if the consent verification was successful. This + * may be used in a subsequent request instead of the consent_audio to verify the same consent. + */ + @JsonProperty("voiceConsentSignature") + public abstract Optional voiceConsentSignature(); + /** Instantiates a builder for LiveServerSetupComplete. */ @ExcludeFromGeneratedCoverageReport public static Builder builder() { @@ -70,6 +78,41 @@ public Builder clearSessionId() { return sessionId(Optional.empty()); } + /** + * Setter for voiceConsentSignature. + * + *

voiceConsentSignature: Signature of the verified consent audio. This is populated when the + * request has a ReplicatedVoiceConfig with consent_audio set, if the consent verification was + * successful. This may be used in a subsequent request instead of the consent_audio to verify + * the same consent. + */ + @JsonProperty("voiceConsentSignature") + public abstract Builder voiceConsentSignature(VoiceConsentSignature voiceConsentSignature); + + /** + * Setter for voiceConsentSignature builder. + * + *

voiceConsentSignature: Signature of the verified consent audio. This is populated when the + * request has a ReplicatedVoiceConfig with consent_audio set, if the consent verification was + * successful. This may be used in a subsequent request instead of the consent_audio to verify + * the same consent. + */ + @CanIgnoreReturnValue + public Builder voiceConsentSignature( + VoiceConsentSignature.Builder voiceConsentSignatureBuilder) { + return voiceConsentSignature(voiceConsentSignatureBuilder.build()); + } + + @ExcludeFromGeneratedCoverageReport + abstract Builder voiceConsentSignature(Optional voiceConsentSignature); + + /** Clears the value of voiceConsentSignature field. */ + @ExcludeFromGeneratedCoverageReport + @CanIgnoreReturnValue + public Builder clearVoiceConsentSignature() { + return voiceConsentSignature(Optional.empty()); + } + public abstract LiveServerSetupComplete build(); } diff --git a/src/main/java/com/google/genai/types/ReplicatedVoiceConfig.java b/src/main/java/com/google/genai/types/ReplicatedVoiceConfig.java index 64ce622b96b..7a277091a06 100644 --- a/src/main/java/com/google/genai/types/ReplicatedVoiceConfig.java +++ b/src/main/java/com/google/genai/types/ReplicatedVoiceConfig.java @@ -41,6 +41,22 @@ public abstract class ReplicatedVoiceConfig extends JsonSerializable { @JsonProperty("voiceSampleAudio") public abstract Optional voiceSampleAudio(); + /** + * Recorded consent verifying ownership of the voice. This represents 16-bit signed little-endian + * wav data, with a 24kHz sampling rate. + */ + @JsonProperty("consentAudio") + public abstract Optional consentAudio(); + + /** + * Signature of a previously verified consent audio. This should be populated with a signature + * generated by the server for a previous request containing the consent_audio field. When + * provided, the signature is verified instead of the consent_audio field to reduce latency. + * Requests will fail if the signature is invalid or expired. + */ + @JsonProperty("voiceConsentSignature") + public abstract Optional voiceConsentSignature(); + /** Instantiates a builder for ReplicatedVoiceConfig. */ @ExcludeFromGeneratedCoverageReport public static Builder builder() { @@ -97,6 +113,60 @@ public Builder clearVoiceSampleAudio() { return voiceSampleAudio(Optional.empty()); } + /** + * Setter for consentAudio. + * + *

consentAudio: Recorded consent verifying ownership of the voice. This represents 16-bit + * signed little-endian wav data, with a 24kHz sampling rate. + */ + @JsonProperty("consentAudio") + public abstract Builder consentAudio(byte[] consentAudio); + + @ExcludeFromGeneratedCoverageReport + abstract Builder consentAudio(Optional consentAudio); + + /** Clears the value of consentAudio field. */ + @ExcludeFromGeneratedCoverageReport + @CanIgnoreReturnValue + public Builder clearConsentAudio() { + return consentAudio(Optional.empty()); + } + + /** + * Setter for voiceConsentSignature. + * + *

voiceConsentSignature: Signature of a previously verified consent audio. This should be + * populated with a signature generated by the server for a previous request containing the + * consent_audio field. When provided, the signature is verified instead of the consent_audio + * field to reduce latency. Requests will fail if the signature is invalid or expired. + */ + @JsonProperty("voiceConsentSignature") + public abstract Builder voiceConsentSignature(VoiceConsentSignature voiceConsentSignature); + + /** + * Setter for voiceConsentSignature builder. + * + *

voiceConsentSignature: Signature of a previously verified consent audio. This should be + * populated with a signature generated by the server for a previous request containing the + * consent_audio field. When provided, the signature is verified instead of the consent_audio + * field to reduce latency. Requests will fail if the signature is invalid or expired. + */ + @CanIgnoreReturnValue + public Builder voiceConsentSignature( + VoiceConsentSignature.Builder voiceConsentSignatureBuilder) { + return voiceConsentSignature(voiceConsentSignatureBuilder.build()); + } + + @ExcludeFromGeneratedCoverageReport + abstract Builder voiceConsentSignature(Optional voiceConsentSignature); + + /** Clears the value of voiceConsentSignature field. */ + @ExcludeFromGeneratedCoverageReport + @CanIgnoreReturnValue + public Builder clearVoiceConsentSignature() { + return voiceConsentSignature(Optional.empty()); + } + public abstract ReplicatedVoiceConfig build(); } diff --git a/src/main/java/com/google/genai/types/VoiceConsentSignature.java b/src/main/java/com/google/genai/types/VoiceConsentSignature.java new file mode 100644 index 00000000000..216994504ac --- /dev/null +++ b/src/main/java/com/google/genai/types/VoiceConsentSignature.java @@ -0,0 +1,81 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Auto-generated code. Do not edit. + +package com.google.genai.types; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.google.auto.value.AutoValue; +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import com.google.genai.JsonSerializable; +import java.util.Optional; + +/** The signature of the voice consent check. */ +@AutoValue +@JsonDeserialize(builder = VoiceConsentSignature.Builder.class) +public abstract class VoiceConsentSignature extends JsonSerializable { + /** The signature string. */ + @JsonProperty("signature") + public abstract Optional signature(); + + /** Instantiates a builder for VoiceConsentSignature. */ + @ExcludeFromGeneratedCoverageReport + public static Builder builder() { + return new AutoValue_VoiceConsentSignature.Builder(); + } + + /** Creates a builder with the same values as this instance. */ + public abstract Builder toBuilder(); + + /** Builder for VoiceConsentSignature. */ + @AutoValue.Builder + public abstract static class Builder { + /** For internal usage. Please use `VoiceConsentSignature.builder()` for instantiation. */ + @JsonCreator + private static Builder create() { + return new AutoValue_VoiceConsentSignature.Builder(); + } + + /** + * Setter for signature. + * + *

signature: The signature string. + */ + @JsonProperty("signature") + public abstract Builder signature(String signature); + + @ExcludeFromGeneratedCoverageReport + abstract Builder signature(Optional signature); + + /** Clears the value of signature field. */ + @ExcludeFromGeneratedCoverageReport + @CanIgnoreReturnValue + public Builder clearSignature() { + return signature(Optional.empty()); + } + + public abstract VoiceConsentSignature build(); + } + + /** Deserializes a JSON string to a VoiceConsentSignature object. */ + @ExcludeFromGeneratedCoverageReport + public static VoiceConsentSignature fromJson(String jsonString) { + return JsonSerializable.fromJsonString(jsonString, VoiceConsentSignature.class); + } +} diff --git a/src/test/java/com/google/genai/AsyncLiveTest.java b/src/test/java/com/google/genai/AsyncLiveTest.java index 01907b1df5f..106b4eeab16 100644 --- a/src/test/java/com/google/genai/AsyncLiveTest.java +++ b/src/test/java/com/google/genai/AsyncLiveTest.java @@ -26,8 +26,10 @@ import com.google.genai.types.HttpOptions; import java.lang.reflect.Method; import java.net.URI; +import java.util.HashMap; import java.util.Map; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -118,4 +120,25 @@ public void testGetWebSocketHeaders_GoogleAiEphemeralToken() throws Exception { assertEquals("Token auth_tokens/ephemeral-token", headers.get("Authorization")); } + @Test + public void testOnMessage_PopulatesSetupCompleteWithVoiceConsent() throws Exception { + CompletableFuture future = new CompletableFuture<>(); + URI uri = new URI("wss://test"); + Map headers = new HashMap<>(); + String setupRequest = "{}"; + + AsyncLive.GenAiWebSocketClient client = + new AsyncLive.GenAiWebSocketClient(uri, headers, setupRequest, future, apiClient); + + String message = + "{\"setupComplete\":{\"voiceConsentSignature\":{\"signature\":\"test_sig\"}}}"; + + client.onMessage(message); + + AsyncSession session = future.get(); + assertTrue(session != null); + assertTrue(session.setupComplete() != null); + assertTrue(session.setupComplete().voiceConsentSignature().isPresent()); + assertEquals("test_sig", session.setupComplete().voiceConsentSignature().get().signature().get()); + } }