diff --git a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/AgentV2Sample.java b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/AgentV2Sample.java index 29713a6bdf4f..e03762dbe151 100644 --- a/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/AgentV2Sample.java +++ b/sdk/voicelive/azure-ai-voicelive/src/samples/java/com/azure/ai/voicelive/AgentV2Sample.java @@ -28,6 +28,9 @@ import com.azure.ai.voicelive.models.VoiceLiveSessionOptions; import com.azure.core.util.BinaryData; import com.azure.identity.DefaultAzureCredentialBuilder; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import reactor.core.publisher.Sinks; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioSystem; @@ -57,6 +60,15 @@ * using AgentSessionConfig, rather than as a tool in the session. This allows the agent to be * the primary responder for the voice session.
* + *Use this sample when you already have an Azure AI Foundry agent and want VoiceLive to talk to + * that agent directly instead of registering local tools or writing response orchestration logic in + * the sample itself.
+ * + *When you run it, the sample creates an {@code AgentSessionConfig}, opens a realtime session, + * sends the session configuration, waits for the service to report the session as ready, and then + * starts full-duplex microphone / speaker streaming while also writing a simple conversation log to + * the local {@code logs} directory.
+ * *Features demonstrated:
*Use this sample when you want to understand downstream audio playback only. It is a good next + * step after the basic sample because it avoids microphone capture and focuses on speaker output.
+ * + *When you run it, the sample sends a fixed text prompt, asks the model to generate an audio + * response, and plays the returned PCM audio through your default speaker or headphones.
+ * *This sample shows how to:
*Environment Variables Required:
*Audio Requirements:
@@ -87,12 +96,11 @@ public final class AudioPlaybackSample { * @param args Unused command line arguments */ public static void main(String[] args) { - // Get credentials from environment variables + // Get endpoint from environment variable String endpoint = System.getenv("AZURE_VOICELIVE_ENDPOINT"); - String apiKey = System.getenv("AZURE_VOICELIVE_API_KEY"); - if (endpoint == null || apiKey == null) { - System.err.println("Please set AZURE_VOICELIVE_ENDPOINT and AZURE_VOICELIVE_API_KEY environment variables"); + if (endpoint == null) { + System.err.println("Please set AZURE_VOICELIVE_ENDPOINT environment variable"); return; } @@ -102,10 +110,12 @@ public static void main(String[] args) { return; } - // Create the VoiceLive client + // Create the VoiceLive client using DefaultAzureCredential (recommended). + // To use an API key instead: + // .credential(new KeyCredential(System.getenv("AZURE_VOICELIVE_API_KEY"))) VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() .endpoint(endpoint) - .credential(new KeyCredential(apiKey)) + .credential(new DefaultAzureCredentialBuilder().build()) .buildAsyncClient(); System.out.println("Starting audio playback sample..."); @@ -123,51 +133,57 @@ public static void main(String[] args) { .setInputAudioSamplingRate(SAMPLE_RATE); // Audio playback components - final BlockingQueueUse this sample first when you are not sure whether connection failures are caused by auth or + * by session logic. It gives you a small, low-noise way to verify that your chosen credential can + * open a VoiceLive session successfully.
+ * + *When you run it, the sample builds a client with either API key auth or token-based auth, + * opens a short-lived session, applies a minimal configuration, prints a few server events, and + * then exits after a brief validation window.
+ * *This sample shows two authentication approaches:
*Start here if you're new to VoiceLive! This is the simplest sample showing core concepts.
* + *Use this sample to verify that your endpoint, credential, and basic realtime session setup are + * working before you add microphone input, speaker output, tool calls, or tracing.
+ * + *When you run it, the sample opens a realtime session, sends a minimal {@code session.update} + * request, and prints the server events that come back so you can see the connection lifecycle end + * to end.
+ * *This sample shows the simplest way to:
*Environment Variables Required:
*How to Run:
@@ -56,19 +65,20 @@ public final class BasicVoiceConversationSample { * @param args Unused command line arguments */ public static void main(String[] args) { - // Get credentials from environment variables + // Get endpoint from environment variable String endpoint = System.getenv("AZURE_VOICELIVE_ENDPOINT"); - String apiKey = System.getenv("AZURE_VOICELIVE_API_KEY"); - if (endpoint == null || apiKey == null) { - System.err.println("Please set AZURE_VOICELIVE_ENDPOINT and AZURE_VOICELIVE_API_KEY environment variables"); + if (endpoint == null) { + System.err.println("Please set AZURE_VOICELIVE_ENDPOINT environment variable"); return; } - // Create the VoiceLive client + // Create the VoiceLive client using DefaultAzureCredential (recommended). + // To use an API key instead: + // .credential(new KeyCredential(System.getenv("AZURE_VOICELIVE_API_KEY"))) VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() .endpoint(endpoint) - .credential(new KeyCredential(apiKey)) + .credential(new DefaultAzureCredentialBuilder().build()) .buildAsyncClient(); System.out.println("Starting basic voice conversation..."); @@ -92,12 +102,19 @@ public static void main(String[] args) { // Send session configuration, then listen for events. ClientEventSessionUpdate updateEvent = new ClientEventSessionUpdate(sessionOptions); - return session.sendEvent(updateEvent) - .doOnSuccess(v -> System.out.println("ā Session configured")) - .thenMany(session.receiveEvents() - .doOnNext(event -> handleEvent(event)) - .doOnError(error -> System.err.println("Error: " + error.getMessage())) - .doOnComplete(() -> System.out.println("Event stream completed"))) + Sinks.OneUse this sample when you want the model to call your local business logic instead of only + * producing text or audio. It is the best sample for understanding how tool schemas, tool calls, + * and tool outputs fit into a realtime conversation.
+ * + *When you run it, the sample registers a small set of demo functions, waits for the model to + * request them, executes the matching Java method locally, sends the result back to the session, + * and then continues the conversation with the tool output in context.
+ * *This sample shows how to:
*Environment Variables Required:
*How to Run:
@@ -90,7 +100,6 @@ public final class FunctionCallingSample { // Service configuration private static final String DEFAULT_MODEL = "gpt-realtime"; private static final String ENV_ENDPOINT = "AZURE_VOICELIVE_ENDPOINT"; - private static final String ENV_API_KEY = "AZURE_VOICELIVE_API_KEY"; // Audio format constants private static final int SAMPLE_RATE = 24000; @@ -111,29 +120,24 @@ private FunctionCallingSample() { public static void main(String[] args) { // Load configuration String endpoint = System.getenv(ENV_ENDPOINT); - String apiKey = System.getenv(ENV_API_KEY); if (endpoint == null || endpoint.isEmpty()) { System.err.println("Error: AZURE_VOICELIVE_ENDPOINT environment variable is not set."); System.exit(1); } - if (apiKey == null || apiKey.isEmpty()) { - System.err.println("Error: AZURE_VOICELIVE_API_KEY environment variable is not set."); - System.exit(1); - } - String separator = new String(new char[70]).replace("\0", "="); System.out.println(separator); System.out.println("š¤ļø Voice Assistant with Function Calling - Azure VoiceLive SDK"); System.out.println(separator); try { - // Create client - KeyCredential credential = new KeyCredential(apiKey); + // Create client using DefaultAzureCredential (recommended). + // To use an API key instead: + // .credential(new KeyCredential(System.getenv("AZURE_VOICELIVE_API_KEY"))) VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() .endpoint(endpoint) - .credential(credential) + .credential(new DefaultAzureCredentialBuilder().build()) .buildAsyncClient(); runFunctionCallingSession(client); @@ -170,43 +174,50 @@ private static void runFunctionCallingSession(VoiceLiveAsyncClient client) throw // Send session configuration with function tools, then listen for events. System.out.println("š¤ Sending session configuration with function tools..."); ClientEventSessionUpdate sessionConfig = createSessionConfigWithFunctions(); - return session.sendEvent(sessionConfig) - .doOnSuccess(v -> { - System.out.println("ā Session configured with function tools"); - - // Start audio playback - audioProcessor.startPlayback(); - - String separator = new String(new char[70]).replace("\0", "="); - System.out.println("\n" + separator); - System.out.println("š¤ VOICE ASSISTANT WITH FUNCTION CALLING READY"); - System.out.println("Try saying:"); - System.out.println(" ⢠'What's the current time?'"); - System.out.println(" ⢠'What's the weather in Seattle?'"); - System.out.println(" ⢠'What time is it in UTC?'"); - System.out.println("Press Ctrl+C to exit"); - System.out.println(separator + "\n"); - - // Add shutdown hook - Runtime.getRuntime().addShutdownHook(new Thread(() -> { - System.out.println("\nš Shutting down voice assistant..."); - running.set(false); - audioProcessor.cleanup(); - try { - session.closeAsync().block(Duration.ofSeconds(5)); - } catch (Exception e) { - // Suppress errors during forced JVM shutdown - } - })); + Sinks.OneUse this sample when your assistant needs tools that live outside the current process, such as + * documentation search or repo analysis exposed through an MCP server. It is the right sample for + * learning approval flows and external tool execution.
+ * + *When you run it, the sample configures one or more MCP-backed tools, starts a voice session, + * listens for MCP call and approval events, and forwards the user's choices and tool outputs back + * into the realtime conversation.
+ * *This sample shows how to:
*Environment Variables Required:
*How to Run:
@@ -83,7 +93,6 @@ public final class MCPSample { // Service configuration private static final String DEFAULT_MODEL = "gpt-realtime"; private static final String ENV_ENDPOINT = "AZURE_VOICELIVE_ENDPOINT"; - private static final String ENV_API_KEY = "AZURE_VOICELIVE_API_KEY"; // Audio format constants private static final int SAMPLE_RATE = 24000; @@ -109,15 +118,8 @@ public static void main(String[] args) { System.exit(1); } - String apiKey = System.getenv(ENV_API_KEY); - if (apiKey == null || apiKey.trim().isEmpty()) { - System.err.println("ā Error: No API key provided"); - System.err.println("Please set the " + ENV_API_KEY + " environment variable."); - System.exit(1); - } - try { - runMCPSample(endpoint, apiKey); + runMCPSample(endpoint); } catch (Exception e) { System.err.println("ā Error: " + e.getMessage()); e.printStackTrace(); @@ -128,21 +130,22 @@ public static void main(String[] args) { /** * Run the MCP sample. */ - private static void runMCPSample(String endpoint, String apiKey) { + private static void runMCPSample(String endpoint) { System.out.println("š Connecting to VoiceLive API with MCP support..."); System.out.println("š” Endpoint: " + endpoint); System.out.println("š¤ Model: " + DEFAULT_MODEL); - KeyCredential credential = new KeyCredential(apiKey); AtomicReferenceUse this sample when you want to validate microphone capture and upstream audio streaming + * without the extra moving parts of local speaker playback or function/tool integration.
+ * + *When you run it, the sample opens a realtime session, starts reading PCM audio from your + * default microphone, streams that audio to the service, and prints speech / response events so + * you can confirm the service is receiving your input.
+ * *This sample shows how to:
*Environment Variables Required:
*Audio Requirements:
@@ -74,12 +84,11 @@ public final class MicrophoneInputSample { * @param args Unused command line arguments */ public static void main(String[] args) { - // Get credentials from environment variables + // Get endpoint from environment variable String endpoint = System.getenv("AZURE_VOICELIVE_ENDPOINT"); - String apiKey = System.getenv("AZURE_VOICELIVE_API_KEY"); - if (endpoint == null || apiKey == null) { - System.err.println("Please set AZURE_VOICELIVE_ENDPOINT and AZURE_VOICELIVE_API_KEY environment variables"); + if (endpoint == null) { + System.err.println("Please set AZURE_VOICELIVE_ENDPOINT environment variable"); return; } @@ -89,10 +98,12 @@ public static void main(String[] args) { return; } - // Create the VoiceLive client + // Create the VoiceLive client using DefaultAzureCredential (recommended). + // To use an API key instead: + // .credential(new KeyCredential(System.getenv("AZURE_VOICELIVE_API_KEY"))) VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() .endpoint(endpoint) - .credential(new KeyCredential(apiKey)) + .credential(new DefaultAzureCredentialBuilder().build()) .buildAsyncClient(); System.out.println("Starting microphone input sample..."); @@ -108,7 +119,8 @@ public static void main(String[] args) { .setInputAudioSamplingRate(SAMPLE_RATE); final AtomicBoolean isCapturing = new AtomicBoolean(false); - final TargetDataLine[] microphoneRef = new TargetDataLine[1]; + final AtomicReferenceNOTE: This is a comprehensive sample showing all features together. * For easier understanding, see these focused samples:
+ * + *Use this sample when you want the closest thing to an end-to-end assistant experience in this + * package. It combines session configuration, microphone capture, speaker playback, interruption + * handling, and authentication in one place.
+ * + *When you run it, the sample opens a realtime session, configures the assistant, starts local + * playback, waits for the session to be ready, and then begins streaming microphone audio while + * playing the model's responses through your speakers.
*Environment Variables Required:
*Audio Requirements:
@@ -78,11 +88,11 @@ * *How to Run:
*{@code
- * # With API Key (default):
+ * # With DefaultAzureCredential (default):
* mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.VoiceAssistantSample" -Dexec.classpathScope=test
*
- * # With Token Credential:
- * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.VoiceAssistantSample" -Dexec.classpathScope=test -Dexec.args="--use-token-credential"
+ * # With API Key (requires AZURE_VOICELIVE_API_KEY):
+ * mvn exec:java -Dexec.mainClass="com.azure.ai.voicelive.VoiceAssistantSample" -Dexec.classpathScope=test -Dexec.args="--use-api-key"
* }
*/
public final class VoiceAssistantSample {
@@ -136,12 +146,14 @@ private static class AudioProcessor {
private final AudioFormat audioFormat;
// Audio capture components
- private TargetDataLine microphone;
+ // volatile: written by reactor thread (startCapture), read/closed by shutdown-hook thread
+ private volatile TargetDataLine microphone;
private final AtomicBoolean isCapturing = new AtomicBoolean(false);
// Audio playback components
- private SourceDataLine speaker;
- private final BlockingQueueSupports two authentication methods:
*Use this sample when you want to confirm that the VoiceLive client emits OpenTelemetry spans + * automatically without adding manual tracing calls around each SDK operation.
+ * + *When you run it, the sample registers a simple console span exporter, opens a short text-only + * VoiceLive session, waits for one model response to complete, closes the session, and then flushes + * the spans so you can inspect the emitted telemetry immediately.
+ * *This sample registers a global OpenTelemetry instance with * {@code OpenTelemetrySdk.builder().buildAndRegisterGlobal()}. The VoiceLive client picks it * up automatically via {@code GlobalOpenTelemetry.getOrNoop()}.
@@ -36,7 +45,7 @@ *Environment Variables Required:
*How to Run:
@@ -48,10 +57,9 @@ public final class GlobalTracingSample { public static void main(String[] args) throws InterruptedException { String endpoint = System.getenv("AZURE_VOICELIVE_ENDPOINT"); - String apiKey = System.getenv("AZURE_VOICELIVE_API_KEY"); - if (endpoint == null || apiKey == null) { - System.err.println("Please set AZURE_VOICELIVE_ENDPOINT and AZURE_VOICELIVE_API_KEY environment variables"); + if (endpoint == null) { + System.err.println("Please set AZURE_VOICELIVE_ENDPOINT environment variable"); return; } @@ -68,9 +76,11 @@ public static void main(String[] args) throws InterruptedException { System.out.println("GlobalOpenTelemetry registered (console exporter)"); // 2. Build client ā it picks up GlobalOpenTelemetry automatically. + // Uses DefaultAzureCredential (recommended). To use an API key instead: + // .credential(new KeyCredential(System.getenv("AZURE_VOICELIVE_API_KEY"))) VoiceLiveAsyncClient client = new VoiceLiveClientBuilder() .endpoint(endpoint) - .credential(new KeyCredential(apiKey)) + .credential(new DefaultAzureCredentialBuilder().build()) .buildAsyncClient(); System.out.println("Starting voice session (automatic tracing)..."); @@ -86,12 +96,21 @@ public static void main(String[] args) throws InterruptedException { // Configure the session, trigger a response, then wait for response.done. // Uses a single reactive chain: send config ā start response ā wait for done ā close. - return session.sendEvent(new ClientEventSessionUpdate(options)) - .then(session.startResponse()) - .thenMany(session.receiveEvents() - .doOnNext(event -> System.out.println("Event: " + event.getType())) - .filter(event -> event instanceof SessionUpdateResponseDone) - .take(1)) + Sinks.One