From 74bfb0d6c2c539c05b98b88f4446a96642e5a952 Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 11:09:54 -0700 Subject: [PATCH 01/12] azure-ai-voicelive 1.2.0 GA: Update API version to 2026-04-10 with new features - Web Search & File Search support (ResponseWebSearchCallItem, ResponseFileSearchCallItem) - Avatar enhancements (voice sync, idle/speaking states, video delta, output buffer) - Transcription improvements (TranscriptionPhrase, TranscriptionWord, new models) - New SessionIncludeOption enum - Personal voice model updates (DragonHDOmniLatestNeural, MAI-Voice-1) - Fix ServerEvent.deserialize -> _deserialize for new model_base - Update tests for new models, enums, and breaking changes - Consolidate CHANGELOG for 1.2.0 GA release --- sdk/voicelive/azure-ai-voicelive/CHANGELOG.md | 58 +- sdk/voicelive/azure-ai-voicelive/MANIFEST.in | 4 +- .../azure-ai-voicelive/_metadata.json | 4 +- .../apiview-properties.json | 79 +- .../azure-ai-voicelive/azure/_types.py | 15 + .../azure/_utils/__init__.py | 6 + .../azure/_utils/model_base.py | 1441 ++++++++++++ .../azure/_utils/serialization.py | 2041 +++++++++++++++++ .../azure-ai-voicelive/azure/_version.py | 9 + .../azure/ai/voicelive/aio/_patch.py | 5 +- .../azure/ai/voicelive/models/__init__.py | 46 + .../azure/ai/voicelive/models/_enums.py | 47 +- .../azure/ai/voicelive/models/_models.py | 1664 +++++++++++++- .../telemetry/_voicelive_instrumentor.py | 55 +- .../azure-ai-voicelive/azure/py.typed | 1 + .../azure-ai-voicelive/pyproject.toml | 9 +- .../samples/basic_voice_assistant_async.py | 3 + .../tests/test_live_realtime_service.py | 30 +- .../tests/test_unit_enums.py | 65 +- .../tests/test_unit_models.py | 106 + .../tests/test_unit_smoke_validation.py | 10 + .../tests/test_unit_telemetry.py | 62 +- .../tests/test_unit_voice_config.py | 9 +- 23 files changed, 5539 insertions(+), 230 deletions(-) create mode 100644 sdk/voicelive/azure-ai-voicelive/azure/_types.py create mode 100644 sdk/voicelive/azure-ai-voicelive/azure/_utils/__init__.py create mode 100644 sdk/voicelive/azure-ai-voicelive/azure/_utils/model_base.py create mode 100644 sdk/voicelive/azure-ai-voicelive/azure/_utils/serialization.py create mode 100644 sdk/voicelive/azure-ai-voicelive/azure/_version.py create mode 100644 sdk/voicelive/azure-ai-voicelive/azure/py.typed diff --git a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md index 2a3baa1d7ae9..26924c94f63d 100644 --- a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md +++ b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md @@ -1,15 +1,69 @@ # Release History -## 1.2.0b6 (Unreleased) +## 1.2.0 (Unreleased) ### Features Added +- **Web Search & File Search**: Added support for built-in web search and file search tools: + - New item types: `ResponseWebSearchCallItem`, `ResponseFileSearchCallItem` + - New server events for web/file search lifecycle (`searching`, `in_progress`, `completed`) + - New models: `ActionFind`, `ActionOpenPage`, `ActionSearch`, `ActionSearchSource`, `FileSearchResult` + - New enum values: `ItemType.WEB_SEARCH_CALL`, `ItemType.FILE_SEARCH_CALL` + - New `SessionIncludeOption` enum for controlling what data is included in session responses +- **MCP (Model Context Protocol) Support**: Added comprehensive support for Model Context Protocol integration: + - `MCPServer` tool type for defining MCP server configurations with authorization, headers, and approval requirements + - `MCPTool` model for representing MCP tool definitions with input schemas and annotations + - `MCPApprovalType` enum for controlling approval workflows (`never`, `always`, or tool-specific) + - New item types for MCP approval and call workflows + - New server events for MCP tool listing, call lifecycle, and approval flows +- **Avatar Enhancements**: + - Added `AzureAvatarVoiceSyncVoice` for avatar voice sync configuration + - Added `ServerEventSessionAvatarSwitchToIdle` and `ServerEventSessionAvatarSwitchToSpeaking` events + - Added `ServerEventResponseVideoDelta` for avatar video frame streaming + - Added `ClientEventOutputAudioBufferClear` and `ServerEventOutputAudioBufferCleared` for output buffer management + - Added `AvatarConfigTypes` enum with support for `video-avatar` and `photo-avatar` types + - Added `AvatarOutputProtocol` enum for avatar streaming protocols (`webrtc`, `websocket`) + - Added `Scene` model for controlling avatar zoom, position, rotation, and movement amplitude + - Added `output_audit_audio` field to `AvatarConfig` +- **OpenTelemetry Tracing Support**: Added `VoiceLiveInstrumentor` for opt-in OpenTelemetry-based + tracing of VoiceLive WebSocket connections, following Azure SDK and GenAI semantic conventions. + - Enable via `AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true` environment variable + - Content recording controlled by `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` + - Comprehensive session-level telemetry: session ID, audio format, first-token latency, + turn count, interruption count, audio bytes sent/received, message size + - Response & function call ID tracking for end-to-end tracing + - Agent v2 telemetry with agent identity and configuration tracking + - MCP telemetry with tool call and approval flow tracking +- **Agent Session Configuration**: Added `AgentSessionConfig` for configuring Azure AI Foundry agents + at connection time with `agent_name`, `project_name`, `agent_version`, `conversation_id`, and more +- **Transcription Improvements**: + - Added `TranscriptionPhrase` and `TranscriptionWord` models for detailed transcription data + - Added `ServerEventResponseAudioTranscriptAnnotationAdded` event + - Added `gpt-4o-transcribe-diarize` and `mai-transcribe-1` transcription model support +- **Interim Response Configuration**: Added `StaticInterimResponseConfig` and `LlmInterimResponseConfig` + for generating interim responses during latency or tool calls +- **Image Content Support**: Added `RequestImageContentPart` for image inputs in conversations +- **Reasoning Effort Control**: Added `reasoning_effort` field with `ReasoningEffort` enum +- **Response Metadata**: Added `metadata` field to `Response` and `ResponseCreateParams` +- **Server Warning Events**: Added `ServerEventWarning` for handling non-fatal warnings +- **Personal Voice Models**: Added `DragonHDOmniLatestNeural` and `MAI-Voice-1` model options +- **Enhanced OpenAI Voices**: Added `marin` and `cedar` voices to `OpenAIVoiceName` enum +- **Enhanced Azure Personal Voice**: Added `custom_lexicon_url`, `prefer_locales`, `locale`, `style`, + `pitch`, `rate`, and `volume` properties +- **Pre-generated Assistant Messages**: Added `pre_generated_assistant_message` in `ResponseCreateParams` +- **Explicit Null Values**: Enhanced `RequestSession` to properly serialize explicitly set `None` values + ### Breaking Changes -### Bugs Fixed +- Removed `PersonalVoiceModels.PHOENIX_V2_NEURAL` enum value (replaced by `DRAGON_HD_OMNI_LATEST_NEURAL` + and `MAI_VOICE1`) +- Removed Foundry Agent Tool classes (`FoundryAgentTool`, `ResponseFoundryAgentCallItem`, etc.) — + use `AgentSessionConfig` with `connect()` instead ### Other Changes +- Updated default API version to `2026-04-10` + ## 1.2.0b5 (2026-04-06) ### Features Added diff --git a/sdk/voicelive/azure-ai-voicelive/MANIFEST.in b/sdk/voicelive/azure-ai-voicelive/MANIFEST.in index d5dfcd9480c3..b22f2b88bb43 100644 --- a/sdk/voicelive/azure-ai-voicelive/MANIFEST.in +++ b/sdk/voicelive/azure-ai-voicelive/MANIFEST.in @@ -1,7 +1,5 @@ include *.md include LICENSE -include azure/ai/voicelive/py.typed +include azure/py.typed recursive-include tests *.py recursive-include samples *.py *.md -include azure/__init__.py -include azure/ai/__init__.py diff --git a/sdk/voicelive/azure-ai-voicelive/_metadata.json b/sdk/voicelive/azure-ai-voicelive/_metadata.json index 677ee476511a..5786f7fc266c 100644 --- a/sdk/voicelive/azure-ai-voicelive/_metadata.json +++ b/sdk/voicelive/azure-ai-voicelive/_metadata.json @@ -1,6 +1,6 @@ { - "apiVersion": "2026-01-01-preview", + "apiVersion": "2026-04-10", "apiVersions": { - "VoiceLive": "2026-01-01-preview" + "VoiceLive": "2026-04-10" } } \ No newline at end of file diff --git a/sdk/voicelive/azure-ai-voicelive/apiview-properties.json b/sdk/voicelive/azure-ai-voicelive/apiview-properties.json index 6de4f607c504..d2a3979800b9 100644 --- a/sdk/voicelive/azure-ai-voicelive/apiview-properties.json +++ b/sdk/voicelive/azure-ai-voicelive/apiview-properties.json @@ -1,6 +1,10 @@ { "CrossLanguagePackageId": "VoiceLive", "CrossLanguageDefinitionId": { + "azure.ai.voicelive.models.ActionFind": "VoiceLive.ActionFind", + "azure.ai.voicelive.models.ActionOpenPage": "VoiceLive.ActionOpenPage", + "azure.ai.voicelive.models.ActionSearch": "VoiceLive.ActionSearch", + "azure.ai.voicelive.models.ActionSearchSource": "VoiceLive.ActionSearchSource", "azure.ai.voicelive.models.AgentConfig": "VoiceLive.AgentConfig", "azure.ai.voicelive.models.Animation": "VoiceLive.Animation", "azure.ai.voicelive.models.ConversationRequestItem": "VoiceLive.ConversationRequestItem", @@ -11,6 +15,7 @@ "azure.ai.voicelive.models.AudioNoiseReduction": "VoiceLive.AudioNoiseReduction", "azure.ai.voicelive.models.AvatarConfig": "VoiceLive.AvatarConfig", "azure.ai.voicelive.models.AzureVoice": "VoiceLive.AzureVoice", + "azure.ai.voicelive.models.AzureAvatarVoiceSyncVoice": "VoiceLive.AzureAvatarVoiceSyncVoice", "azure.ai.voicelive.models.AzureCustomVoice": "VoiceLive.AzureCustomVoice", "azure.ai.voicelive.models.AzurePersonalVoice": "VoiceLive.AzurePersonalVoice", "azure.ai.voicelive.models.EouDetection": "VoiceLive.EouDetection", @@ -37,6 +42,7 @@ "azure.ai.voicelive.models.ClientEventInputAudioTurnCancel": "VoiceLive.ClientEventInputAudioTurnCancel", "azure.ai.voicelive.models.ClientEventInputAudioTurnEnd": "VoiceLive.ClientEventInputAudioTurnEnd", "azure.ai.voicelive.models.ClientEventInputAudioTurnStart": "VoiceLive.ClientEventInputAudioTurnStart", + "azure.ai.voicelive.models.ClientEventOutputAudioBufferClear": "VoiceLive.ClientEventOutputAudioBufferClear", "azure.ai.voicelive.models.ClientEventResponseCancel": "VoiceLive.ClientEventResponseCancel", "azure.ai.voicelive.models.ClientEventResponseCreate": "VoiceLive.ClientEventResponseCreate", "azure.ai.voicelive.models.ClientEventSessionAvatarConnect": "VoiceLive.ClientEventSessionAvatarConnect", @@ -44,6 +50,7 @@ "azure.ai.voicelive.models.ContentPart": "VoiceLive.ContentPart", "azure.ai.voicelive.models.ConversationItemBase": "VoiceLive.ConversationItemBase", "azure.ai.voicelive.models.ErrorResponse": "VoiceLive.ErrorResponse", + "azure.ai.voicelive.models.FileSearchResult": "VoiceLive.FileSearchResult", "azure.ai.voicelive.models.FunctionCallItem": "VoiceLive.FunctionCallItem", "azure.ai.voicelive.models.FunctionCallOutputItem": "VoiceLive.FunctionCallOutputItem", "azure.ai.voicelive.models.Tool": "VoiceLive.Tool", @@ -73,6 +80,7 @@ "azure.ai.voicelive.models.ResponseCreateParams": "VoiceLive.ResponseCreateParams", "azure.ai.voicelive.models.ResponseFailedDetails": "VoiceLive.ResponseFailedDetails", "azure.ai.voicelive.models.ResponseItem": "VoiceLive.ResponseItem", + "azure.ai.voicelive.models.ResponseFileSearchCallItem": "VoiceLive.ResponseFileSearchCallItem", "azure.ai.voicelive.models.ResponseFunctionCallItem": "VoiceLive.ResponseFunctionCallItem", "azure.ai.voicelive.models.ResponseFunctionCallOutputItem": "VoiceLive.ResponseFunctionCallOutputItem", "azure.ai.voicelive.models.ResponseIncompleteDetails": "VoiceLive.ResponseIncompleteDetails", @@ -83,6 +91,7 @@ "azure.ai.voicelive.models.ResponseMessageItem": "VoiceLive.ResponseMessageItem", "azure.ai.voicelive.models.ResponseSession": "VoiceLive.ResponseSession", "azure.ai.voicelive.models.ResponseTextContentPart": "VoiceLive.ResponseTextContentPart", + "azure.ai.voicelive.models.ResponseWebSearchCallItem": "VoiceLive.ResponseWebSearchCallItem", "azure.ai.voicelive.models.Scene": "VoiceLive.Scene", "azure.ai.voicelive.models.ServerEvent": "VoiceLive.ServerEvent", "azure.ai.voicelive.models.ServerEventConversationItemCreated": "VoiceLive.ServerEventConversationItemCreated", @@ -101,6 +110,7 @@ "azure.ai.voicelive.models.ServerEventMcpListToolsCompleted": "VoiceLive.ServerEventMcpListToolsCompleted", "azure.ai.voicelive.models.ServerEventMcpListToolsFailed": "VoiceLive.ServerEventMcpListToolsFailed", "azure.ai.voicelive.models.ServerEventMcpListToolsInProgress": "VoiceLive.ServerEventMcpListToolsInProgress", + "azure.ai.voicelive.models.ServerEventOutputAudioBufferCleared": "VoiceLive.ServerEventOutputAudioBufferCleared", "azure.ai.voicelive.models.ServerEventResponseAnimationBlendshapeDelta": "VoiceLive.ServerEventResponseAnimationBlendshapeDelta", "azure.ai.voicelive.models.ServerEventResponseAnimationBlendshapeDone": "VoiceLive.ServerEventResponseAnimationBlendshapeDone", "azure.ai.voicelive.models.ServerEventResponseAnimationVisemeDelta": "VoiceLive.ServerEventResponseAnimationVisemeDelta", @@ -109,12 +119,16 @@ "azure.ai.voicelive.models.ServerEventResponseAudioDone": "VoiceLive.ServerEventResponseAudioDone", "azure.ai.voicelive.models.ServerEventResponseAudioTimestampDelta": "VoiceLive.ServerEventResponseAudioTimestampDelta", "azure.ai.voicelive.models.ServerEventResponseAudioTimestampDone": "VoiceLive.ServerEventResponseAudioTimestampDone", + "azure.ai.voicelive.models.ServerEventResponseAudioTranscriptAnnotationAdded": "VoiceLive.ServerEventResponseAudioTranscriptAnnotationAdded", "azure.ai.voicelive.models.ServerEventResponseAudioTranscriptDelta": "VoiceLive.ServerEventResponseAudioTranscriptDelta", "azure.ai.voicelive.models.ServerEventResponseAudioTranscriptDone": "VoiceLive.ServerEventResponseAudioTranscriptDone", "azure.ai.voicelive.models.ServerEventResponseContentPartAdded": "VoiceLive.ServerEventResponseContentPartAdded", "azure.ai.voicelive.models.ServerEventResponseContentPartDone": "VoiceLive.ServerEventResponseContentPartDone", "azure.ai.voicelive.models.ServerEventResponseCreated": "VoiceLive.ServerEventResponseCreated", "azure.ai.voicelive.models.ServerEventResponseDone": "VoiceLive.ServerEventResponseDone", + "azure.ai.voicelive.models.ServerEventResponseFileSearchCallCompleted": "VoiceLive.ServerEventResponseFileSearchCallCompleted", + "azure.ai.voicelive.models.ServerEventResponseFileSearchCallInProgress": "VoiceLive.ServerEventResponseFileSearchCallInProgress", + "azure.ai.voicelive.models.ServerEventResponseFileSearchCallSearching": "VoiceLive.ServerEventResponseFileSearchCallSearching", "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDelta": "VoiceLive.ServerEventResponseFunctionCallArgumentsDelta", "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDone": "VoiceLive.ServerEventResponseFunctionCallArgumentsDone", "azure.ai.voicelive.models.ServerEventResponseMcpCallArgumentsDelta": "VoiceLive.ServerEventResponseMcpCallArgumentsDelta", @@ -126,7 +140,13 @@ "azure.ai.voicelive.models.ServerEventResponseOutputItemDone": "VoiceLive.ServerEventResponseOutputItemDone", "azure.ai.voicelive.models.ServerEventResponseTextDelta": "VoiceLive.ServerEventResponseTextDelta", "azure.ai.voicelive.models.ServerEventResponseTextDone": "VoiceLive.ServerEventResponseTextDone", + "azure.ai.voicelive.models.ServerEventResponseVideoDelta": "VoiceLive.ServerEventResponseVideoDelta", + "azure.ai.voicelive.models.ServerEventResponseWebSearchCallCompleted": "VoiceLive.ServerEventResponseWebSearchCallCompleted", + "azure.ai.voicelive.models.ServerEventResponseWebSearchCallInProgress": "VoiceLive.ServerEventResponseWebSearchCallInProgress", + "azure.ai.voicelive.models.ServerEventResponseWebSearchCallSearching": "VoiceLive.ServerEventResponseWebSearchCallSearching", "azure.ai.voicelive.models.ServerEventSessionAvatarConnecting": "VoiceLive.ServerEventSessionAvatarConnecting", + "azure.ai.voicelive.models.ServerEventSessionAvatarSwitchToIdle": "VoiceLive.ServerEventSessionAvatarSwitchToIdle", + "azure.ai.voicelive.models.ServerEventSessionAvatarSwitchToSpeaking": "VoiceLive.ServerEventSessionAvatarSwitchToSpeaking", "azure.ai.voicelive.models.ServerEventSessionCreated": "VoiceLive.ServerEventSessionCreated", "azure.ai.voicelive.models.ServerEventSessionUpdated": "VoiceLive.ServerEventSessionUpdated", "azure.ai.voicelive.models.ServerEventWarning": "VoiceLive.ServerEventWarning", @@ -138,38 +158,41 @@ "azure.ai.voicelive.models.TokenUsage": "VoiceLive.TokenUsage", "azure.ai.voicelive.models.ToolChoiceSelection": "VoiceLive.ToolChoiceObject", "azure.ai.voicelive.models.ToolChoiceFunctionSelection": "VoiceLive.ToolChoiceFunctionObject", + "azure.ai.voicelive.models.TranscriptionPhrase": "VoiceLive.TranscriptionPhrase", + "azure.ai.voicelive.models.TranscriptionWord": "VoiceLive.TranscriptionWord", "azure.ai.voicelive.models.UserMessageItem": "VoiceLive.UserMessageItem", "azure.ai.voicelive.models.VideoCrop": "VoiceLive.VideoCrop", "azure.ai.voicelive.models.VideoParams": "VoiceLive.VideoParams", "azure.ai.voicelive.models.VideoResolution": "VoiceLive.VideoResolution", "azure.ai.voicelive.models.VoiceLiveErrorDetails": "VoiceLive.VoiceLiveErrorDetails", - "azure.ai.voicelive.models.ClientEventType": "VoiceLive.ClientEventType", - "azure.ai.voicelive.models.ItemType": "VoiceLive.ItemType", - "azure.ai.voicelive.models.ItemParamStatus": "VoiceLive.ItemParamStatus", - "azure.ai.voicelive.models.MessageRole": "VoiceLive.MessageRole", - "azure.ai.voicelive.models.ContentPartType": "VoiceLive.ContentPartType", - "azure.ai.voicelive.models.Modality": "VoiceLive.Modality", - "azure.ai.voicelive.models.OpenAIVoiceName": "VoiceLive.OAIVoice", - "azure.ai.voicelive.models.AzureVoiceType": "VoiceLive.AzureVoiceType", - "azure.ai.voicelive.models.PersonalVoiceModels": "VoiceLive.PersonalVoiceModels", - "azure.ai.voicelive.models.OutputAudioFormat": "VoiceLive.OutputAudioFormat", - "azure.ai.voicelive.models.ToolType": "VoiceLive.ToolType", - "azure.ai.voicelive.models.MCPApprovalType": "VoiceLive.MCPApprovalType", - "azure.ai.voicelive.models.ReasoningEffort": "VoiceLive.ReasoningEffort", - "azure.ai.voicelive.models.AnimationOutputType": "VoiceLive.AnimationOutputType", - "azure.ai.voicelive.models.InputAudioFormat": "VoiceLive.InputAudioFormat", - "azure.ai.voicelive.models.TurnDetectionType": "VoiceLive.TurnDetectionType", - "azure.ai.voicelive.models.EouThresholdLevel": "VoiceLive.EouThresholdLevel", - "azure.ai.voicelive.models.AvatarConfigTypes": "VoiceLive.AvatarConfigTypes", - "azure.ai.voicelive.models.PhotoAvatarBaseModes": "VoiceLive.PhotoAvatarBaseModes", - "azure.ai.voicelive.models.AvatarOutputProtocol": "VoiceLive.AvatarOutputProtocol", - "azure.ai.voicelive.models.AudioTimestampType": "VoiceLive.AudioTimestampType", - "azure.ai.voicelive.models.ToolChoiceLiteral": "VoiceLive.ToolChoiceLiteral", - "azure.ai.voicelive.models.InterimResponseConfigType": "VoiceLive.InterimResponseConfigType", - "azure.ai.voicelive.models.InterimResponseTrigger": "VoiceLive.InterimResponseTrigger", - "azure.ai.voicelive.models.ResponseStatus": "VoiceLive.ResponseStatus", - "azure.ai.voicelive.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus", - "azure.ai.voicelive.models.RequestImageContentPartDetail": "VoiceLive.RequestImageContentPartDetail", - "azure.ai.voicelive.models.ServerEventType": "VoiceLive.ServerEventType" + "azure.models.ClientEventType": "VoiceLive.ClientEventType", + "azure.models.ItemType": "VoiceLive.ItemType", + "azure.models.ItemParamStatus": "VoiceLive.ItemParamStatus", + "azure.models.MessageRole": "VoiceLive.MessageRole", + "azure.models.ContentPartType": "VoiceLive.ContentPartType", + "azure.models.Modality": "VoiceLive.Modality", + "azure.models.OpenAIVoiceName": "VoiceLive.OAIVoice", + "azure.models.AzureVoiceType": "VoiceLive.AzureVoiceType", + "azure.models.PersonalVoiceModels": "VoiceLive.PersonalVoiceModels", + "azure.models.OutputAudioFormat": "VoiceLive.OutputAudioFormat", + "azure.models.ToolType": "VoiceLive.ToolType", + "azure.models.MCPApprovalType": "VoiceLive.MCPApprovalType", + "azure.models.ReasoningEffort": "VoiceLive.ReasoningEffort", + "azure.models.InterimResponseConfigType": "VoiceLive.InterimResponseConfigType", + "azure.models.InterimResponseTrigger": "VoiceLive.InterimResponseTrigger", + "azure.models.AnimationOutputType": "VoiceLive.AnimationOutputType", + "azure.models.InputAudioFormat": "VoiceLive.InputAudioFormat", + "azure.models.TurnDetectionType": "VoiceLive.TurnDetectionType", + "azure.models.EouThresholdLevel": "VoiceLive.EouThresholdLevel", + "azure.models.AvatarConfigTypes": "VoiceLive.AvatarConfigTypes", + "azure.models.PhotoAvatarBaseModes": "VoiceLive.PhotoAvatarBaseModes", + "azure.models.AvatarOutputProtocol": "VoiceLive.AvatarOutputProtocol", + "azure.models.AudioTimestampType": "VoiceLive.AudioTimestampType", + "azure.models.ToolChoiceLiteral": "VoiceLive.ToolChoiceLiteral", + "azure.models.SessionIncludeOption": "VoiceLive.SessionIncludeOption", + "azure.models.ResponseStatus": "VoiceLive.ResponseStatus", + "azure.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus", + "azure.models.RequestImageContentPartDetail": "VoiceLive.RequestImageContentPartDetail", + "azure.models.ServerEventType": "VoiceLive.ServerEventType" } } \ No newline at end of file diff --git a/sdk/voicelive/azure-ai-voicelive/azure/_types.py b/sdk/voicelive/azure-ai-voicelive/azure/_types.py new file mode 100644 index 000000000000..11b6be01d6e2 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/azure/_types.py @@ -0,0 +1,15 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- + +from typing import TYPE_CHECKING, Union + +if TYPE_CHECKING: + from .ai.voicelive import models as _models +Voice = Union[str, "_models.OpenAIVoiceName", "_models.OpenAIVoice", "_models.AzureVoice"] +InterimResponseConfig = Union["_models.StaticInterimResponseConfig", "_models.LlmInterimResponseConfig"] +ToolChoice = Union[str, "_models.ToolChoiceLiteral", "_models.ToolChoiceSelection"] diff --git a/sdk/voicelive/azure-ai-voicelive/azure/_utils/__init__.py b/sdk/voicelive/azure-ai-voicelive/azure/_utils/__init__.py new file mode 100644 index 000000000000..8026245c2abc --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/azure/_utils/__init__.py @@ -0,0 +1,6 @@ +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- diff --git a/sdk/voicelive/azure-ai-voicelive/azure/_utils/model_base.py b/sdk/voicelive/azure-ai-voicelive/azure/_utils/model_base.py new file mode 100644 index 000000000000..db24930fdca9 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/azure/_utils/model_base.py @@ -0,0 +1,1441 @@ +# pylint: disable=line-too-long,useless-suppression,too-many-lines +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- +# pylint: disable=protected-access, broad-except + +import copy +import calendar +import decimal +import functools +import sys +import logging +import base64 +import re +import typing +import enum +import email.utils +from datetime import datetime, date, time, timedelta, timezone +from json import JSONEncoder +import xml.etree.ElementTree as ET +from collections.abc import MutableMapping +from typing_extensions import Self +import isodate +from azure.core.exceptions import DeserializationError +from azure.core import CaseInsensitiveEnumMeta +from azure.core.pipeline import PipelineResponse +from azure.core.serialization import _Null +from azure.core.rest import HttpResponse + +_LOGGER = logging.getLogger(__name__) + +__all__ = ["SdkJSONEncoder", "Model", "rest_field", "rest_discriminator"] + +TZ_UTC = timezone.utc +_T = typing.TypeVar("_T") +_NONE_TYPE = type(None) + + +def _timedelta_as_isostr(td: timedelta) -> str: + """Converts a datetime.timedelta object into an ISO 8601 formatted string, e.g. 'P4DT12H30M05S' + + Function adapted from the Tin Can Python project: https://github.com/RusticiSoftware/TinCanPython + + :param timedelta td: The timedelta to convert + :rtype: str + :return: ISO8601 version of this timedelta + """ + + # Split seconds to larger units + seconds = td.total_seconds() + minutes, seconds = divmod(seconds, 60) + hours, minutes = divmod(minutes, 60) + days, hours = divmod(hours, 24) + + days, hours, minutes = list(map(int, (days, hours, minutes))) + seconds = round(seconds, 6) + + # Build date + date_str = "" + if days: + date_str = "%sD" % days + + if hours or minutes or seconds: + # Build time + time_str = "T" + + # Hours + bigger_exists = date_str or hours + if bigger_exists: + time_str += "{:02}H".format(hours) + + # Minutes + bigger_exists = bigger_exists or minutes + if bigger_exists: + time_str += "{:02}M".format(minutes) + + # Seconds + try: + if seconds.is_integer(): + seconds_string = "{:02}".format(int(seconds)) + else: + # 9 chars long w/ leading 0, 6 digits after decimal + seconds_string = "%09.6f" % seconds + # Remove trailing zeros + seconds_string = seconds_string.rstrip("0") + except AttributeError: # int.is_integer() raises + seconds_string = "{:02}".format(seconds) + + time_str += "{}S".format(seconds_string) + else: + time_str = "" + + return "P" + date_str + time_str + + +def _serialize_bytes(o, format: typing.Optional[str] = None) -> str: + encoded = base64.b64encode(o).decode() + if format == "base64url": + return encoded.strip("=").replace("+", "-").replace("/", "_") + return encoded + + +def _serialize_datetime(o, format: typing.Optional[str] = None): + if hasattr(o, "year") and hasattr(o, "hour"): + if format == "rfc7231": + return email.utils.format_datetime(o, usegmt=True) + if format == "unix-timestamp": + return int(calendar.timegm(o.utctimetuple())) + + # astimezone() fails for naive times in Python 2.7, so make make sure o is aware (tzinfo is set) + if not o.tzinfo: + iso_formatted = o.replace(tzinfo=TZ_UTC).isoformat() + else: + iso_formatted = o.astimezone(TZ_UTC).isoformat() + # Replace the trailing "+00:00" UTC offset with "Z" (RFC 3339: https://www.ietf.org/rfc/rfc3339.txt) + return iso_formatted.replace("+00:00", "Z") + # Next try datetime.date or datetime.time + return o.isoformat() + + +def _is_readonly(p): + try: + return p._visibility == ["read"] + except AttributeError: + return False + + +class SdkJSONEncoder(JSONEncoder): + """A JSON encoder that's capable of serializing datetime objects and bytes.""" + + def __init__(self, *args, exclude_readonly: bool = False, format: typing.Optional[str] = None, **kwargs): + super().__init__(*args, **kwargs) + self.exclude_readonly = exclude_readonly + self.format = format + + def default(self, o): # pylint: disable=too-many-return-statements + if _is_model(o): + if self.exclude_readonly: + readonly_props = [p._rest_name for p in o._attr_to_rest_field.values() if _is_readonly(p)] + return {k: v for k, v in o.items() if k not in readonly_props} + return dict(o.items()) + try: + return super(SdkJSONEncoder, self).default(o) + except TypeError: + if isinstance(o, _Null): + return None + if isinstance(o, decimal.Decimal): + return float(o) + if isinstance(o, (bytes, bytearray)): + return _serialize_bytes(o, self.format) + try: + # First try datetime.datetime + return _serialize_datetime(o, self.format) + except AttributeError: + pass + # Last, try datetime.timedelta + try: + return _timedelta_as_isostr(o) + except AttributeError: + # This will be raised when it hits value.total_seconds in the method above + pass + return super(SdkJSONEncoder, self).default(o) + + +_VALID_DATE = re.compile(r"\d{4}[-]\d{2}[-]\d{2}T\d{2}:\d{2}:\d{2}" + r"\.?\d*Z?[-+]?[\d{2}]?:?[\d{2}]?") +_VALID_RFC7231 = re.compile( + r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s\d{2}\s" + r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{4}\s\d{2}:\d{2}:\d{2}\sGMT" +) + +_ARRAY_ENCODE_MAPPING = { + "pipeDelimited": "|", + "spaceDelimited": " ", + "commaDelimited": ",", + "newlineDelimited": "\n", +} + + +def _deserialize_array_encoded(delimit: str, attr): + if isinstance(attr, str): + if attr == "": + return [] + return attr.split(delimit) + return attr + + +def _deserialize_datetime(attr: typing.Union[str, datetime]) -> datetime: + """Deserialize ISO-8601 formatted string into Datetime object. + + :param str attr: response string to be deserialized. + :rtype: ~datetime.datetime + :returns: The datetime object from that input + """ + if isinstance(attr, datetime): + # i'm already deserialized + return attr + attr = attr.upper() + match = _VALID_DATE.match(attr) + if not match: + raise ValueError("Invalid datetime string: " + attr) + + check_decimal = attr.split(".") + if len(check_decimal) > 1: + decimal_str = "" + for digit in check_decimal[1]: + if digit.isdigit(): + decimal_str += digit + else: + break + if len(decimal_str) > 6: + attr = attr.replace(decimal_str, decimal_str[0:6]) + + date_obj = isodate.parse_datetime(attr) + test_utc = date_obj.utctimetuple() + if test_utc.tm_year > 9999 or test_utc.tm_year < 1: + raise OverflowError("Hit max or min date") + return date_obj # type: ignore[no-any-return] + + +def _deserialize_datetime_rfc7231(attr: typing.Union[str, datetime]) -> datetime: + """Deserialize RFC7231 formatted string into Datetime object. + + :param str attr: response string to be deserialized. + :rtype: ~datetime.datetime + :returns: The datetime object from that input + """ + if isinstance(attr, datetime): + # i'm already deserialized + return attr + match = _VALID_RFC7231.match(attr) + if not match: + raise ValueError("Invalid datetime string: " + attr) + + return email.utils.parsedate_to_datetime(attr) + + +def _deserialize_datetime_unix_timestamp(attr: typing.Union[float, datetime]) -> datetime: + """Deserialize unix timestamp into Datetime object. + + :param str attr: response string to be deserialized. + :rtype: ~datetime.datetime + :returns: The datetime object from that input + """ + if isinstance(attr, datetime): + # i'm already deserialized + return attr + return datetime.fromtimestamp(attr, TZ_UTC) + + +def _deserialize_date(attr: typing.Union[str, date]) -> date: + """Deserialize ISO-8601 formatted string into Date object. + :param str attr: response string to be deserialized. + :rtype: date + :returns: The date object from that input + """ + # This must NOT use defaultmonth/defaultday. Using None ensure this raises an exception. + if isinstance(attr, date): + return attr + return isodate.parse_date(attr, defaultmonth=None, defaultday=None) # type: ignore + + +def _deserialize_time(attr: typing.Union[str, time]) -> time: + """Deserialize ISO-8601 formatted string into time object. + + :param str attr: response string to be deserialized. + :rtype: datetime.time + :returns: The time object from that input + """ + if isinstance(attr, time): + return attr + return isodate.parse_time(attr) # type: ignore[no-any-return] + + +def _deserialize_bytes(attr): + if isinstance(attr, (bytes, bytearray)): + return attr + return bytes(base64.b64decode(attr)) + + +def _deserialize_bytes_base64(attr): + if isinstance(attr, (bytes, bytearray)): + return attr + padding = "=" * (3 - (len(attr) + 3) % 4) # type: ignore + attr = attr + padding # type: ignore + encoded = attr.replace("-", "+").replace("_", "/") + return bytes(base64.b64decode(encoded)) + + +def _deserialize_duration(attr): + if isinstance(attr, timedelta): + return attr + return isodate.parse_duration(attr) + + +def _deserialize_decimal(attr): + if isinstance(attr, decimal.Decimal): + return attr + return decimal.Decimal(str(attr)) + + +def _deserialize_int_as_str(attr): + if isinstance(attr, int): + return attr + return int(attr) + + +_DESERIALIZE_MAPPING = { + datetime: _deserialize_datetime, + date: _deserialize_date, + time: _deserialize_time, + bytes: _deserialize_bytes, + bytearray: _deserialize_bytes, + timedelta: _deserialize_duration, + typing.Any: lambda x: x, + decimal.Decimal: _deserialize_decimal, +} + +_DESERIALIZE_MAPPING_WITHFORMAT = { + "rfc3339": _deserialize_datetime, + "rfc7231": _deserialize_datetime_rfc7231, + "unix-timestamp": _deserialize_datetime_unix_timestamp, + "base64": _deserialize_bytes, + "base64url": _deserialize_bytes_base64, +} + + +def get_deserializer(annotation: typing.Any, rf: typing.Optional["_RestField"] = None): + if annotation is int and rf and rf._format == "str": + return _deserialize_int_as_str + if annotation is str and rf and rf._format in _ARRAY_ENCODE_MAPPING: + return functools.partial(_deserialize_array_encoded, _ARRAY_ENCODE_MAPPING[rf._format]) + if rf and rf._format: + return _DESERIALIZE_MAPPING_WITHFORMAT.get(rf._format) + return _DESERIALIZE_MAPPING.get(annotation) # pyright: ignore + + +def _get_type_alias_type(module_name: str, alias_name: str): + types = { + k: v + for k, v in sys.modules[module_name].__dict__.items() + if isinstance(v, typing._GenericAlias) # type: ignore + } + if alias_name not in types: + return alias_name + return types[alias_name] + + +def _get_model(module_name: str, model_name: str): + models = {k: v for k, v in sys.modules[module_name].__dict__.items() if isinstance(v, type)} + module_end = module_name.rsplit(".", 1)[0] + models.update({k: v for k, v in sys.modules[module_end].__dict__.items() if isinstance(v, type)}) + if isinstance(model_name, str): + model_name = model_name.split(".")[-1] + if model_name not in models: + return model_name + return models[model_name] + + +_UNSET = object() + + +class _MyMutableMapping(MutableMapping[str, typing.Any]): + def __init__(self, data: dict[str, typing.Any]) -> None: + self._data = data + + def __contains__(self, key: typing.Any) -> bool: + return key in self._data + + def __getitem__(self, key: str) -> typing.Any: + # If this key has been deserialized (for mutable types), we need to handle serialization + if hasattr(self, "_attr_to_rest_field"): + cache_attr = f"_deserialized_{key}" + if hasattr(self, cache_attr): + rf = _get_rest_field(getattr(self, "_attr_to_rest_field"), key) + if rf: + value = self._data.get(key) + if isinstance(value, (dict, list, set)): + # For mutable types, serialize and return + # But also update _data with serialized form and clear flag + # so mutations via this returned value affect _data + serialized = _serialize(value, rf._format) + # If serialized form is same type (no transformation needed), + # return _data directly so mutations work + if isinstance(serialized, type(value)) and serialized == value: + return self._data.get(key) + # Otherwise return serialized copy and clear flag + try: + object.__delattr__(self, cache_attr) + except AttributeError: + pass + # Store serialized form back + self._data[key] = serialized + return serialized + return self._data.__getitem__(key) + + def __setitem__(self, key: str, value: typing.Any) -> None: + # Clear any cached deserialized value when setting through dictionary access + cache_attr = f"_deserialized_{key}" + try: + object.__delattr__(self, cache_attr) + except AttributeError: + pass + self._data.__setitem__(key, value) + + def __delitem__(self, key: str) -> None: + self._data.__delitem__(key) + + def __iter__(self) -> typing.Iterator[typing.Any]: + return self._data.__iter__() + + def __len__(self) -> int: + return self._data.__len__() + + def __ne__(self, other: typing.Any) -> bool: + return not self.__eq__(other) + + def keys(self) -> typing.KeysView[str]: + """ + :returns: a set-like object providing a view on D's keys + :rtype: ~typing.KeysView + """ + return self._data.keys() + + def values(self) -> typing.ValuesView[typing.Any]: + """ + :returns: an object providing a view on D's values + :rtype: ~typing.ValuesView + """ + return self._data.values() + + def items(self) -> typing.ItemsView[str, typing.Any]: + """ + :returns: set-like object providing a view on D's items + :rtype: ~typing.ItemsView + """ + return self._data.items() + + def get(self, key: str, default: typing.Any = None) -> typing.Any: + """ + Get the value for key if key is in the dictionary, else default. + :param str key: The key to look up. + :param any default: The value to return if key is not in the dictionary. Defaults to None + :returns: D[k] if k in D, else d. + :rtype: any + """ + try: + return self[key] + except KeyError: + return default + + @typing.overload + def pop(self, key: str) -> typing.Any: ... # pylint: disable=arguments-differ + + @typing.overload + def pop(self, key: str, default: _T) -> _T: ... # pylint: disable=signature-differs + + @typing.overload + def pop(self, key: str, default: typing.Any) -> typing.Any: ... # pylint: disable=signature-differs + + def pop(self, key: str, default: typing.Any = _UNSET) -> typing.Any: + """ + Removes specified key and return the corresponding value. + :param str key: The key to pop. + :param any default: The value to return if key is not in the dictionary + :returns: The value corresponding to the key. + :rtype: any + :raises KeyError: If key is not found and default is not given. + """ + if default is _UNSET: + return self._data.pop(key) + return self._data.pop(key, default) + + def popitem(self) -> tuple[str, typing.Any]: + """ + Removes and returns some (key, value) pair + :returns: The (key, value) pair. + :rtype: tuple + :raises KeyError: if D is empty. + """ + return self._data.popitem() + + def clear(self) -> None: + """ + Remove all items from D. + """ + self._data.clear() + + def update(self, *args: typing.Any, **kwargs: typing.Any) -> None: # pylint: disable=arguments-differ + """ + Updates D from mapping/iterable E and F. + :param any args: Either a mapping object or an iterable of key-value pairs. + """ + self._data.update(*args, **kwargs) + + @typing.overload + def setdefault(self, key: str, default: None = None) -> None: ... + + @typing.overload + def setdefault(self, key: str, default: typing.Any) -> typing.Any: ... # pylint: disable=signature-differs + + def setdefault(self, key: str, default: typing.Any = _UNSET) -> typing.Any: + """ + Same as calling D.get(k, d), and setting D[k]=d if k not found + :param str key: The key to look up. + :param any default: The value to set if key is not in the dictionary + :returns: D[k] if k in D, else d. + :rtype: any + """ + if default is _UNSET: + return self._data.setdefault(key) + return self._data.setdefault(key, default) + + def __eq__(self, other: typing.Any) -> bool: + if isinstance(other, _MyMutableMapping): + return self._data == other._data + try: + other_model = self.__class__(other) + except Exception: + return False + return self._data == other_model._data + + def __repr__(self) -> str: + return str(self._data) + + +def _is_model(obj: typing.Any) -> bool: + return getattr(obj, "_is_model", False) + + +def _serialize(o, format: typing.Optional[str] = None): # pylint: disable=too-many-return-statements + if isinstance(o, list): + if format in _ARRAY_ENCODE_MAPPING and all(isinstance(x, str) for x in o): + return _ARRAY_ENCODE_MAPPING[format].join(o) + return [_serialize(x, format) for x in o] + if isinstance(o, dict): + return {k: _serialize(v, format) for k, v in o.items()} + if isinstance(o, set): + return {_serialize(x, format) for x in o} + if isinstance(o, tuple): + return tuple(_serialize(x, format) for x in o) + if isinstance(o, (bytes, bytearray)): + return _serialize_bytes(o, format) + if isinstance(o, decimal.Decimal): + return float(o) + if isinstance(o, enum.Enum): + return o.value + if isinstance(o, int): + if format == "str": + return str(o) + return o + try: + # First try datetime.datetime + return _serialize_datetime(o, format) + except AttributeError: + pass + # Last, try datetime.timedelta + try: + return _timedelta_as_isostr(o) + except AttributeError: + # This will be raised when it hits value.total_seconds in the method above + pass + return o + + +def _get_rest_field(attr_to_rest_field: dict[str, "_RestField"], rest_name: str) -> typing.Optional["_RestField"]: + try: + return next(rf for rf in attr_to_rest_field.values() if rf._rest_name == rest_name) + except StopIteration: + return None + + +def _create_value(rf: typing.Optional["_RestField"], value: typing.Any) -> typing.Any: + if not rf: + return _serialize(value, None) + if rf._is_multipart_file_input: + return value + if rf._is_model: + return _deserialize(rf._type, value) + if isinstance(value, ET.Element): + value = _deserialize(rf._type, value) + return _serialize(value, rf._format) + + +class Model(_MyMutableMapping): + _is_model = True + # label whether current class's _attr_to_rest_field has been calculated + # could not see _attr_to_rest_field directly because subclass inherits it from parent class + _calculated: set[str] = set() + + def __init__(self, *args: typing.Any, **kwargs: typing.Any) -> None: + class_name = self.__class__.__name__ + if len(args) > 1: + raise TypeError(f"{class_name}.__init__() takes 2 positional arguments but {len(args) + 1} were given") + dict_to_pass = { + rest_field._rest_name: rest_field._default + for rest_field in self._attr_to_rest_field.values() + if rest_field._default is not _UNSET + } + if args: + if isinstance(args[0], ET.Element): + dict_to_pass.update(self._init_from_xml(args[0])) + else: + dict_to_pass.update( + {k: _create_value(_get_rest_field(self._attr_to_rest_field, k), v) for k, v in args[0].items()} + ) + else: + non_attr_kwargs = [k for k in kwargs if k not in self._attr_to_rest_field] + if non_attr_kwargs: + # actual type errors only throw the first wrong keyword arg they see, so following that. + raise TypeError(f"{class_name}.__init__() got an unexpected keyword argument '{non_attr_kwargs[0]}'") + dict_to_pass.update( + { + self._attr_to_rest_field[k]._rest_name: _create_value(self._attr_to_rest_field[k], v) + for k, v in kwargs.items() + if v is not None + } + ) + super().__init__(dict_to_pass) + + def _init_from_xml(self, element: ET.Element) -> dict[str, typing.Any]: + """Deserialize an XML element into a dict mapping rest field names to values. + + :param ET.Element element: The XML element to deserialize from. + :returns: A dictionary of rest_name to deserialized value pairs. + :rtype: dict + """ + result: dict[str, typing.Any] = {} + model_meta = getattr(self, "_xml", {}) + existed_attr_keys: list[str] = [] + + for rf in self._attr_to_rest_field.values(): + prop_meta = getattr(rf, "_xml", {}) + xml_name = prop_meta.get("name", rf._rest_name) + xml_ns = _resolve_xml_ns(prop_meta, model_meta) + if xml_ns: + xml_name = "{" + xml_ns + "}" + xml_name + + # attribute + if prop_meta.get("attribute", False) and element.get(xml_name) is not None: + existed_attr_keys.append(xml_name) + result[rf._rest_name] = _deserialize(rf._type, element.get(xml_name)) + continue + + # unwrapped element is array + if prop_meta.get("unwrapped", False): + # unwrapped array could either use prop items meta/prop meta + _items_name = prop_meta.get("itemsName") + if _items_name: + xml_name = _items_name + _items_ns = prop_meta.get("itemsNs") + if _items_ns is not None: + xml_ns = _items_ns + if xml_ns: + xml_name = "{" + xml_ns + "}" + xml_name + items = element.findall(xml_name) # pyright: ignore + if len(items) > 0: + existed_attr_keys.append(xml_name) + result[rf._rest_name] = _deserialize(rf._type, items) + elif not rf._is_optional: + existed_attr_keys.append(xml_name) + result[rf._rest_name] = [] + continue + + # text element is primitive type + if prop_meta.get("text", False): + if element.text is not None: + result[rf._rest_name] = _deserialize(rf._type, element.text) + continue + + # wrapped element could be normal property or array, it should only have one element + item = element.find(xml_name) + if item is not None: + existed_attr_keys.append(xml_name) + result[rf._rest_name] = _deserialize(rf._type, item) + + # rest thing is additional properties + for e in element: + if e.tag not in existed_attr_keys: + result[e.tag] = _convert_element(e) + + return result + + def copy(self) -> "Model": + return Model(self.__dict__) + + def __new__(cls, *args: typing.Any, **kwargs: typing.Any) -> Self: + if f"{cls.__module__}.{cls.__qualname__}" not in cls._calculated: + # we know the last nine classes in mro are going to be 'Model', '_MyMutableMapping', 'MutableMapping', + # 'Mapping', 'Collection', 'Sized', 'Iterable', 'Container' and 'object' + mros = cls.__mro__[:-9][::-1] # ignore parents, and reverse the mro order + attr_to_rest_field: dict[str, _RestField] = { # map attribute name to rest_field property + k: v for mro_class in mros for k, v in mro_class.__dict__.items() if k[0] != "_" and hasattr(v, "_type") + } + annotations = { + k: v + for mro_class in mros + if hasattr(mro_class, "__annotations__") + for k, v in mro_class.__annotations__.items() + } + for attr, rf in attr_to_rest_field.items(): + rf._module = cls.__module__ + if not rf._type: + rf._type = rf._get_deserialize_callable_from_annotation(annotations.get(attr, None)) + if not rf._rest_name_input: + rf._rest_name_input = attr + cls._attr_to_rest_field: dict[str, _RestField] = dict(attr_to_rest_field.items()) + cls._calculated.add(f"{cls.__module__}.{cls.__qualname__}") + + return super().__new__(cls) + + def __init_subclass__(cls, discriminator: typing.Optional[str] = None) -> None: + for base in cls.__bases__: + if hasattr(base, "__mapping__"): + base.__mapping__[discriminator or cls.__name__] = cls # type: ignore + + @classmethod + def _get_discriminator(cls, exist_discriminators) -> typing.Optional["_RestField"]: + for v in cls.__dict__.values(): + if isinstance(v, _RestField) and v._is_discriminator and v._rest_name not in exist_discriminators: + return v + return None + + @classmethod + def _deserialize(cls, data, exist_discriminators): + if not hasattr(cls, "__mapping__"): + return cls(data) + discriminator = cls._get_discriminator(exist_discriminators) + if discriminator is None: + return cls(data) + exist_discriminators.append(discriminator._rest_name) + if isinstance(data, ET.Element): + model_meta = getattr(cls, "_xml", {}) + prop_meta = getattr(discriminator, "_xml", {}) + xml_name = prop_meta.get("name", discriminator._rest_name) + xml_ns = _resolve_xml_ns(prop_meta, model_meta) + if xml_ns: + xml_name = "{" + xml_ns + "}" + xml_name + + if data.get(xml_name) is not None: + discriminator_value = data.get(xml_name) + else: + discriminator_value = data.find(xml_name).text # pyright: ignore + else: + discriminator_value = data.get(discriminator._rest_name) + mapped_cls = cls.__mapping__.get(discriminator_value, cls) # pyright: ignore # pylint: disable=no-member + return mapped_cls._deserialize(data, exist_discriminators) + + def as_dict(self, *, exclude_readonly: bool = False) -> dict[str, typing.Any]: + """Return a dict that can be turned into json using json.dump. + + :keyword bool exclude_readonly: Whether to remove the readonly properties. + :returns: A dict JSON compatible object + :rtype: dict + """ + + result = {} + readonly_props = [] + if exclude_readonly: + readonly_props = [p._rest_name for p in self._attr_to_rest_field.values() if _is_readonly(p)] + for k, v in self.items(): + if exclude_readonly and k in readonly_props: # pyright: ignore + continue + is_multipart_file_input = False + try: + is_multipart_file_input = next( + rf for rf in self._attr_to_rest_field.values() if rf._rest_name == k + )._is_multipart_file_input + except StopIteration: + pass + result[k] = v if is_multipart_file_input else Model._as_dict_value(v, exclude_readonly=exclude_readonly) + return result + + @staticmethod + def _as_dict_value(v: typing.Any, exclude_readonly: bool = False) -> typing.Any: + if v is None or isinstance(v, _Null): + return None + if isinstance(v, (list, tuple, set)): + return type(v)(Model._as_dict_value(x, exclude_readonly=exclude_readonly) for x in v) + if isinstance(v, dict): + return {dk: Model._as_dict_value(dv, exclude_readonly=exclude_readonly) for dk, dv in v.items()} + return v.as_dict(exclude_readonly=exclude_readonly) if hasattr(v, "as_dict") else v + + +def _deserialize_model(model_deserializer: typing.Optional[typing.Callable], obj): + if _is_model(obj): + return obj + return _deserialize(model_deserializer, obj) + + +def _deserialize_with_optional(if_obj_deserializer: typing.Optional[typing.Callable], obj): + if obj is None: + return obj + return _deserialize_with_callable(if_obj_deserializer, obj) + + +def _deserialize_with_union(deserializers, obj): + for deserializer in deserializers: + try: + return _deserialize(deserializer, obj) + except DeserializationError: + pass + raise DeserializationError() + + +def _deserialize_dict( + value_deserializer: typing.Optional[typing.Callable], + module: typing.Optional[str], + obj: dict[typing.Any, typing.Any], +): + if obj is None: + return obj + if isinstance(obj, ET.Element): + obj = {child.tag: child for child in obj} + return {k: _deserialize(value_deserializer, v, module) for k, v in obj.items()} + + +def _deserialize_multiple_sequence( + entry_deserializers: list[typing.Optional[typing.Callable]], + module: typing.Optional[str], + obj, +): + if obj is None: + return obj + return type(obj)(_deserialize(deserializer, entry, module) for entry, deserializer in zip(obj, entry_deserializers)) + + +def _is_array_encoded_deserializer(deserializer: functools.partial) -> bool: + return ( + isinstance(deserializer, functools.partial) + and isinstance(deserializer.args[0], functools.partial) + and deserializer.args[0].func == _deserialize_array_encoded # pylint: disable=comparison-with-callable + ) + + +def _deserialize_sequence( + deserializer: typing.Optional[typing.Callable], + module: typing.Optional[str], + obj, +): + if obj is None: + return obj + if isinstance(obj, ET.Element): + obj = list(obj) + + # encoded string may be deserialized to sequence + if isinstance(obj, str) and isinstance(deserializer, functools.partial): + # for list[str] + if _is_array_encoded_deserializer(deserializer): + return deserializer(obj) + + # for list[Union[...]] + if isinstance(deserializer.args[0], list): + for sub_deserializer in deserializer.args[0]: + if _is_array_encoded_deserializer(sub_deserializer): + return sub_deserializer(obj) + + return type(obj)(_deserialize(deserializer, entry, module) for entry in obj) + + +def _sorted_annotations(types: list[typing.Any]) -> list[typing.Any]: + return sorted( + types, + key=lambda x: hasattr(x, "__name__") and x.__name__.lower() in ("str", "float", "int", "bool"), + ) + + +def _get_deserialize_callable_from_annotation( # pylint: disable=too-many-return-statements, too-many-statements, too-many-branches + annotation: typing.Any, + module: typing.Optional[str], + rf: typing.Optional["_RestField"] = None, +) -> typing.Optional[typing.Callable[[typing.Any], typing.Any]]: + if not annotation: + return None + + # is it a type alias? + if isinstance(annotation, str): + if module is not None: + annotation = _get_type_alias_type(module, annotation) + + # is it a forward ref / in quotes? + if isinstance(annotation, (str, typing.ForwardRef)): + try: + model_name = annotation.__forward_arg__ # type: ignore + except AttributeError: + model_name = annotation + if module is not None: + annotation = _get_model(module, model_name) # type: ignore + + try: + if module and _is_model(annotation): + if rf: + rf._is_model = True + + return functools.partial(_deserialize_model, annotation) # pyright: ignore + except Exception: + pass + + # is it a literal? + try: + if annotation.__origin__ is typing.Literal: # pyright: ignore + return None + except AttributeError: + pass + + # is it optional? + try: + if any(a is _NONE_TYPE for a in annotation.__args__): # pyright: ignore + if rf: + rf._is_optional = True + if len(annotation.__args__) <= 2: # pyright: ignore + if_obj_deserializer = _get_deserialize_callable_from_annotation( + next(a for a in annotation.__args__ if a is not _NONE_TYPE), module, rf # pyright: ignore + ) + + return functools.partial(_deserialize_with_optional, if_obj_deserializer) + # the type is Optional[Union[...]], we need to remove the None type from the Union + annotation_copy = copy.copy(annotation) + annotation_copy.__args__ = [a for a in annotation_copy.__args__ if a is not _NONE_TYPE] # pyright: ignore + return _get_deserialize_callable_from_annotation(annotation_copy, module, rf) + except AttributeError: + pass + + # is it union? + if getattr(annotation, "__origin__", None) is typing.Union: + # initial ordering is we make `string` the last deserialization option, because it is often them most generic + deserializers = [ + _get_deserialize_callable_from_annotation(arg, module, rf) + for arg in _sorted_annotations(annotation.__args__) # pyright: ignore + ] + + return functools.partial(_deserialize_with_union, deserializers) + + try: + annotation_name = ( + annotation.__name__ if hasattr(annotation, "__name__") else annotation._name # pyright: ignore + ) + if annotation_name.lower() == "dict": + value_deserializer = _get_deserialize_callable_from_annotation( + annotation.__args__[1], module, rf # pyright: ignore + ) + + return functools.partial( + _deserialize_dict, + value_deserializer, + module, + ) + except (AttributeError, IndexError): + pass + try: + annotation_name = ( + annotation.__name__ if hasattr(annotation, "__name__") else annotation._name # pyright: ignore + ) + if annotation_name.lower() in ["list", "set", "tuple", "sequence"]: + if len(annotation.__args__) > 1: # pyright: ignore + entry_deserializers = [ + _get_deserialize_callable_from_annotation(dt, module, rf) + for dt in annotation.__args__ # pyright: ignore + ] + return functools.partial(_deserialize_multiple_sequence, entry_deserializers, module) + deserializer = _get_deserialize_callable_from_annotation( + annotation.__args__[0], module, rf # pyright: ignore + ) + + return functools.partial(_deserialize_sequence, deserializer, module) + except (TypeError, IndexError, AttributeError, SyntaxError): + pass + + def _deserialize_default( + deserializer, + obj, + ): + if obj is None: + return obj + try: + return _deserialize_with_callable(deserializer, obj) + except Exception: + pass + return obj + + if get_deserializer(annotation, rf): + return functools.partial(_deserialize_default, get_deserializer(annotation, rf)) + + return functools.partial(_deserialize_default, annotation) + + +def _deserialize_with_callable( + deserializer: typing.Optional[typing.Callable[[typing.Any], typing.Any]], + value: typing.Any, +): # pylint: disable=too-many-return-statements + try: + if value is None or isinstance(value, _Null): + return None + if isinstance(value, ET.Element): + if deserializer is str: + return value.text or "" + if deserializer is int: + return int(value.text) if value.text else None + if deserializer is float: + return float(value.text) if value.text else None + if deserializer is bool: + return value.text == "true" if value.text else None + if deserializer and deserializer in _DESERIALIZE_MAPPING.values(): + return deserializer(value.text) if value.text else None + if deserializer and deserializer in _DESERIALIZE_MAPPING_WITHFORMAT.values(): + return deserializer(value.text) if value.text else None + if deserializer is None: + return value + if deserializer in [int, float, bool]: + return deserializer(value) + if isinstance(deserializer, CaseInsensitiveEnumMeta): + try: + return deserializer(value.text if isinstance(value, ET.Element) else value) + except ValueError: + # for unknown value, return raw value + return value.text if isinstance(value, ET.Element) else value + if isinstance(deserializer, type) and issubclass(deserializer, Model): + return deserializer._deserialize(value, []) + return typing.cast(typing.Callable[[typing.Any], typing.Any], deserializer)(value) + except Exception as e: + raise DeserializationError() from e + + +def _deserialize( + deserializer: typing.Any, + value: typing.Any, + module: typing.Optional[str] = None, + rf: typing.Optional["_RestField"] = None, + format: typing.Optional[str] = None, +) -> typing.Any: + if isinstance(value, PipelineResponse): + value = value.http_response.json() + if rf is None and format: + rf = _RestField(format=format) + if not isinstance(deserializer, functools.partial): + deserializer = _get_deserialize_callable_from_annotation(deserializer, module, rf) + return _deserialize_with_callable(deserializer, value) + + +def _failsafe_deserialize( + deserializer: typing.Any, + response: HttpResponse, + module: typing.Optional[str] = None, + rf: typing.Optional["_RestField"] = None, + format: typing.Optional[str] = None, +) -> typing.Any: + try: + return _deserialize(deserializer, response.json(), module, rf, format) + except Exception: # pylint: disable=broad-except + _LOGGER.warning( + "Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True + ) + return None + + +def _failsafe_deserialize_xml( + deserializer: typing.Any, + response: HttpResponse, +) -> typing.Any: + try: + return _deserialize_xml(deserializer, response.text()) + except Exception: # pylint: disable=broad-except + _LOGGER.warning( + "Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True + ) + return None + + +# pylint: disable=too-many-instance-attributes +class _RestField: + def __init__( + self, + *, + name: typing.Optional[str] = None, + type: typing.Optional[typing.Callable] = None, # pylint: disable=redefined-builtin + is_discriminator: bool = False, + visibility: typing.Optional[list[str]] = None, + default: typing.Any = _UNSET, + format: typing.Optional[str] = None, + is_multipart_file_input: bool = False, + xml: typing.Optional[dict[str, typing.Any]] = None, + ): + self._type = type + self._rest_name_input = name + self._module: typing.Optional[str] = None + self._is_discriminator = is_discriminator + self._visibility = visibility + self._is_model = False + self._is_optional = False + self._default = default + self._format = format + self._is_multipart_file_input = is_multipart_file_input + self._xml = xml if xml is not None else {} + + @property + def _class_type(self) -> typing.Any: + result = getattr(self._type, "args", [None])[0] + # type may be wrapped by nested functools.partial so we need to check for that + if isinstance(result, functools.partial): + return getattr(result, "args", [None])[0] + return result + + @property + def _rest_name(self) -> str: + if self._rest_name_input is None: + raise ValueError("Rest name was never set") + return self._rest_name_input + + def __get__(self, obj: Model, type=None): # pylint: disable=redefined-builtin + # by this point, type and rest_name will have a value bc we default + # them in __new__ of the Model class + # Use _data.get() directly to avoid triggering __getitem__ which clears the cache + item = obj._data.get(self._rest_name) + if item is None: + return item + if self._is_model: + return item + + # For mutable types, we want mutations to directly affect _data + # Check if we've already deserialized this value + cache_attr = f"_deserialized_{self._rest_name}" + if hasattr(obj, cache_attr): + # Return the value from _data directly (it's been deserialized in place) + return obj._data.get(self._rest_name) + + deserialized = _deserialize(self._type, _serialize(item, self._format), rf=self) + + # For mutable types, store the deserialized value back in _data + # so mutations directly affect _data + if isinstance(deserialized, (dict, list, set)): + obj._data[self._rest_name] = deserialized + object.__setattr__(obj, cache_attr, True) # Mark as deserialized + return deserialized + + return deserialized + + def __set__(self, obj: Model, value) -> None: + # Clear the cached deserialized object when setting a new value + cache_attr = f"_deserialized_{self._rest_name}" + if hasattr(obj, cache_attr): + object.__delattr__(obj, cache_attr) + + if value is None: + # we want to wipe out entries if users set attr to None + try: + obj.__delitem__(self._rest_name) + except KeyError: + pass + return + if self._is_model: + if not _is_model(value): + value = _deserialize(self._type, value) + obj.__setitem__(self._rest_name, value) + return + obj.__setitem__(self._rest_name, _serialize(value, self._format)) + + def _get_deserialize_callable_from_annotation( + self, annotation: typing.Any + ) -> typing.Optional[typing.Callable[[typing.Any], typing.Any]]: + return _get_deserialize_callable_from_annotation(annotation, self._module, self) + + +def rest_field( + *, + name: typing.Optional[str] = None, + type: typing.Optional[typing.Callable] = None, # pylint: disable=redefined-builtin + visibility: typing.Optional[list[str]] = None, + default: typing.Any = _UNSET, + format: typing.Optional[str] = None, + is_multipart_file_input: bool = False, + xml: typing.Optional[dict[str, typing.Any]] = None, +) -> typing.Any: + return _RestField( + name=name, + type=type, + visibility=visibility, + default=default, + format=format, + is_multipart_file_input=is_multipart_file_input, + xml=xml, + ) + + +def rest_discriminator( + *, + name: typing.Optional[str] = None, + type: typing.Optional[typing.Callable] = None, # pylint: disable=redefined-builtin + visibility: typing.Optional[list[str]] = None, + xml: typing.Optional[dict[str, typing.Any]] = None, +) -> typing.Any: + return _RestField(name=name, type=type, is_discriminator=True, visibility=visibility, xml=xml) + + +def serialize_xml(model: Model, exclude_readonly: bool = False) -> str: + """Serialize a model to XML. + + :param Model model: The model to serialize. + :param bool exclude_readonly: Whether to exclude readonly properties. + :returns: The XML representation of the model. + :rtype: str + """ + return ET.tostring(_get_element(model, exclude_readonly), encoding="unicode") # type: ignore + + +def _get_xml_ns(meta: dict[str, typing.Any]) -> typing.Optional[str]: + """Return the XML namespace from a metadata dict, checking both 'ns' (old-style) and 'namespace' (DPG) keys. + + :param dict meta: The metadata dictionary to extract namespace from. + :returns: The namespace string if 'ns' or 'namespace' key is present, None otherwise. + :rtype: str or None + """ + ns = meta.get("ns") + if ns is None: + ns = meta.get("namespace") + return ns + + +def _resolve_xml_ns( + prop_meta: dict[str, typing.Any], model_meta: typing.Optional[dict[str, typing.Any]] = None +) -> typing.Optional[str]: + """Resolve XML namespace for a property, falling back to model namespace when appropriate. + + Checks the property metadata first; if no namespace is found and the model does not declare + an explicit prefix, falls back to the model-level namespace. + + :param dict prop_meta: The property metadata dictionary. + :param dict model_meta: The model metadata dictionary, used as fallback. + :returns: The resolved namespace string, or None. + :rtype: str or None + """ + ns = _get_xml_ns(prop_meta) + if ns is None and model_meta is not None and not model_meta.get("prefix"): + ns = _get_xml_ns(model_meta) + return ns + + +def _set_xml_attribute(element: ET.Element, name: str, value: typing.Any, prop_meta: dict[str, typing.Any]) -> None: + """Set an XML attribute on an element, handling namespace prefix registration. + + :param ET.Element element: The element to set the attribute on. + :param str name: The default attribute name (wire name). + :param any value: The attribute value. + :param dict prop_meta: The property metadata dictionary. + """ + xml_name = prop_meta.get("name", name) + _attr_ns = _get_xml_ns(prop_meta) + if _attr_ns: + _attr_prefix = prop_meta.get("prefix") + if _attr_prefix: + _safe_register_namespace(_attr_prefix, _attr_ns) + xml_name = "{" + _attr_ns + "}" + xml_name + element.set(xml_name, _get_primitive_type_value(value)) + + +def _get_element( + o: typing.Any, + exclude_readonly: bool = False, + parent_meta: typing.Optional[dict[str, typing.Any]] = None, + wrapped_element: typing.Optional[ET.Element] = None, +) -> typing.Union[ET.Element, list[ET.Element]]: + if _is_model(o): + model_meta = getattr(o, "_xml", {}) + + # if prop is a model, then use the prop element directly, else generate a wrapper of model + if wrapped_element is None: + # When serializing as an array item (parent_meta is set), check if the parent has an + # explicit itemsName. This ensures correct element names for unwrapped arrays (where + # the element tag is the property/items name, not the model type name). + _items_name = parent_meta.get("itemsName") if parent_meta is not None else None + element_name = _items_name if _items_name else (model_meta.get("name") or o.__class__.__name__) + _model_ns = _get_xml_ns(model_meta) + wrapped_element = _create_xml_element( + element_name, + model_meta.get("prefix"), + _model_ns, + ) + + readonly_props = [] + if exclude_readonly: + readonly_props = [p._rest_name for p in o._attr_to_rest_field.values() if _is_readonly(p)] + + for k, v in o.items(): + # do not serialize readonly properties + if exclude_readonly and k in readonly_props: + continue + + prop_rest_field = _get_rest_field(o._attr_to_rest_field, k) + if prop_rest_field: + prop_meta = getattr(prop_rest_field, "_xml").copy() + # use the wire name as xml name if no specific name is set + if prop_meta.get("name") is None: + prop_meta["name"] = k + else: + # additional properties will not have rest field, use the wire name as xml name + prop_meta = {"name": k} + + # Propagate model namespace to properties only for old-style "ns"-keyed models. + # DPG-generated models use the "namespace" key and explicitly declare namespace on + # each property that needs it, so propagation is intentionally skipped for them. + if prop_meta.get("ns") is None and model_meta.get("ns"): + prop_meta["ns"] = model_meta.get("ns") + prop_meta["prefix"] = model_meta.get("prefix") + + if prop_meta.get("unwrapped", False): + # unwrapped could only set on array + wrapped_element.extend(_get_element(v, exclude_readonly, prop_meta)) + elif prop_meta.get("text", False): + # text could only set on primitive type + wrapped_element.text = _get_primitive_type_value(v) + elif prop_meta.get("attribute", False): + _set_xml_attribute(wrapped_element, k, v, prop_meta) + else: + # other wrapped prop element + wrapped_element.append(_get_wrapped_element(v, exclude_readonly, prop_meta)) + return wrapped_element + if isinstance(o, list): + return [_get_element(x, exclude_readonly, parent_meta) for x in o] # type: ignore + if isinstance(o, dict): + result = [] + _dict_ns = _get_xml_ns(parent_meta) if parent_meta else None + for k, v in o.items(): + result.append( + _get_wrapped_element( + v, + exclude_readonly, + { + "name": k, + "ns": _dict_ns, + "prefix": parent_meta.get("prefix") if parent_meta else None, + }, + ) + ) + return result + + # primitive case need to create element based on parent_meta + if parent_meta: + _items_ns = parent_meta.get("itemsNs") + if _items_ns is None: + _items_ns = _get_xml_ns(parent_meta) + return _get_wrapped_element( + o, + exclude_readonly, + { + "name": parent_meta.get("itemsName", parent_meta.get("name")), + "prefix": parent_meta.get("itemsPrefix", parent_meta.get("prefix")), + "ns": _items_ns, + }, + ) + + raise ValueError("Could not serialize value into xml: " + o) + + +def _get_wrapped_element( + v: typing.Any, + exclude_readonly: bool, + meta: typing.Optional[dict[str, typing.Any]], +) -> ET.Element: + _meta_ns = _get_xml_ns(meta) if meta else None + wrapped_element = _create_xml_element( + meta.get("name") if meta else None, meta.get("prefix") if meta else None, _meta_ns + ) + if isinstance(v, (dict, list)): + wrapped_element.extend(_get_element(v, exclude_readonly, meta)) + elif _is_model(v): + _get_element(v, exclude_readonly, meta, wrapped_element) + else: + wrapped_element.text = _get_primitive_type_value(v) + return wrapped_element # type: ignore[no-any-return] + + +def _get_primitive_type_value(v) -> str: + if v is True: + return "true" + if v is False: + return "false" + if isinstance(v, _Null): + return "" + return str(v) + + +def _safe_register_namespace(prefix: str, ns: str) -> None: + """Register an XML namespace prefix, handling reserved prefix patterns. + + Some prefixes (e.g. 'ns2') match Python's reserved 'ns\\d+' pattern used for + auto-generated prefixes, causing register_namespace to raise ValueError. + Falls back to directly registering in the internal namespace map. + + :param str prefix: The namespace prefix to register. + :param str ns: The namespace URI. + """ + try: + ET.register_namespace(prefix, ns) + except ValueError: + _ns_map = getattr(ET, "_namespace_map", None) + if _ns_map is not None: + _ns_map[ns] = prefix + + +def _create_xml_element( + tag: typing.Any, prefix: typing.Optional[str] = None, ns: typing.Optional[str] = None +) -> ET.Element: + if prefix and ns: + _safe_register_namespace(prefix, ns) + if ns: + return ET.Element("{" + ns + "}" + tag) + return ET.Element(tag) + + +def _deserialize_xml( + deserializer: typing.Any, + value: str, +) -> typing.Any: + element = ET.fromstring(value) # nosec + return _deserialize(deserializer, element) + + +def _convert_element(e: ET.Element): + # dict case + if len(e.attrib) > 0 or len({child.tag for child in e}) > 1: + dict_result: dict[str, typing.Any] = {} + for child in e: + if dict_result.get(child.tag) is not None: + if isinstance(dict_result[child.tag], list): + dict_result[child.tag].append(_convert_element(child)) + else: + dict_result[child.tag] = [dict_result[child.tag], _convert_element(child)] + else: + dict_result[child.tag] = _convert_element(child) + dict_result.update(e.attrib) + return dict_result + # array case + if len(e) > 0: + array_result: list[typing.Any] = [] + for child in e: + array_result.append(_convert_element(child)) + return array_result + # primitive case + return e.text diff --git a/sdk/voicelive/azure-ai-voicelive/azure/_utils/serialization.py b/sdk/voicelive/azure-ai-voicelive/azure/_utils/serialization.py new file mode 100644 index 000000000000..81ec1de5922b --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/azure/_utils/serialization.py @@ -0,0 +1,2041 @@ +# pylint: disable=line-too-long,useless-suppression,too-many-lines +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- + +# pyright: reportUnnecessaryTypeIgnoreComment=false + +from base64 import b64decode, b64encode +import calendar +import datetime +import decimal +import email +from enum import Enum +import json +import logging +import re +import sys +import codecs +from typing import ( + Any, + cast, + Optional, + Union, + AnyStr, + IO, + Mapping, + Callable, + MutableMapping, +) + +try: + from urllib import quote # type: ignore +except ImportError: + from urllib.parse import quote +import xml.etree.ElementTree as ET + +import isodate # type: ignore +from typing_extensions import Self + +from azure.core.exceptions import DeserializationError, SerializationError +from azure.core.serialization import NULL as CoreNull + +_BOM = codecs.BOM_UTF8.decode(encoding="utf-8") + +JSON = MutableMapping[str, Any] + + +class RawDeserializer: + + # Accept "text" because we're open minded people... + JSON_REGEXP = re.compile(r"^(application|text)/([a-z+.]+\+)?json$") + + # Name used in context + CONTEXT_NAME = "deserialized_data" + + @classmethod + def deserialize_from_text(cls, data: Optional[Union[AnyStr, IO]], content_type: Optional[str] = None) -> Any: + """Decode data according to content-type. + + Accept a stream of data as well, but will be load at once in memory for now. + + If no content-type, will return the string version (not bytes, not stream) + + :param data: Input, could be bytes or stream (will be decoded with UTF8) or text + :type data: str or bytes or IO + :param str content_type: The content type. + :return: The deserialized data. + :rtype: object + """ + if hasattr(data, "read"): + # Assume a stream + data = cast(IO, data).read() + + if isinstance(data, bytes): + data_as_str = data.decode(encoding="utf-8-sig") + else: + # Explain to mypy the correct type. + data_as_str = cast(str, data) + + # Remove Byte Order Mark if present in string + data_as_str = data_as_str.lstrip(_BOM) + + if content_type is None: + return data + + if cls.JSON_REGEXP.match(content_type): + try: + return json.loads(data_as_str) + except ValueError as err: + raise DeserializationError("JSON is invalid: {}".format(err), err) from err + elif "xml" in (content_type or []): + try: + + try: + if isinstance(data, unicode): # type: ignore + # If I'm Python 2.7 and unicode XML will scream if I try a "fromstring" on unicode string + data_as_str = data_as_str.encode(encoding="utf-8") # type: ignore + except NameError: + pass + + return ET.fromstring(data_as_str) # nosec + except ET.ParseError as err: + # It might be because the server has an issue, and returned JSON with + # content-type XML.... + # So let's try a JSON load, and if it's still broken + # let's flow the initial exception + def _json_attemp(data): + try: + return True, json.loads(data) + except ValueError: + return False, None # Don't care about this one + + success, json_result = _json_attemp(data) + if success: + return json_result + # If i'm here, it's not JSON, it's not XML, let's scream + # and raise the last context in this block (the XML exception) + # The function hack is because Py2.7 messes up with exception + # context otherwise. + _LOGGER.critical("Wasn't XML not JSON, failing") + raise DeserializationError("XML is invalid") from err + elif content_type.startswith("text/"): + return data_as_str + raise DeserializationError("Cannot deserialize content-type: {}".format(content_type)) + + @classmethod + def deserialize_from_http_generics(cls, body_bytes: Optional[Union[AnyStr, IO]], headers: Mapping) -> Any: + """Deserialize from HTTP response. + + Use bytes and headers to NOT use any requests/aiohttp or whatever + specific implementation. + Headers will tested for "content-type" + + :param bytes body_bytes: The body of the response. + :param dict headers: The headers of the response. + :returns: The deserialized data. + :rtype: object + """ + # Try to use content-type from headers if available + content_type = None + if "content-type" in headers: + content_type = headers["content-type"].split(";")[0].strip().lower() + # Ouch, this server did not declare what it sent... + # Let's guess it's JSON... + # Also, since Autorest was considering that an empty body was a valid JSON, + # need that test as well.... + else: + content_type = "application/json" + + if body_bytes: + return cls.deserialize_from_text(body_bytes, content_type) + return None + + +_LOGGER = logging.getLogger(__name__) + +try: + _long_type = long # type: ignore +except NameError: + _long_type = int + +TZ_UTC = datetime.timezone.utc + +_FLATTEN = re.compile(r"(? None: + self.additional_properties: Optional[dict[str, Any]] = {} + for k in kwargs: # pylint: disable=consider-using-dict-items + if k not in self._attribute_map: + _LOGGER.warning("%s is not a known attribute of class %s and will be ignored", k, self.__class__) + elif k in self._validation and self._validation[k].get("readonly", False): + _LOGGER.warning("Readonly attribute %s will be ignored in class %s", k, self.__class__) + else: + setattr(self, k, kwargs[k]) + + def __eq__(self, other: Any) -> bool: + """Compare objects by comparing all attributes. + + :param object other: The object to compare + :returns: True if objects are equal + :rtype: bool + """ + if isinstance(other, self.__class__): + return self.__dict__ == other.__dict__ + return False + + def __ne__(self, other: Any) -> bool: + """Compare objects by comparing all attributes. + + :param object other: The object to compare + :returns: True if objects are not equal + :rtype: bool + """ + return not self.__eq__(other) + + def __str__(self) -> str: + return str(self.__dict__) + + @classmethod + def enable_additional_properties_sending(cls) -> None: + cls._attribute_map["additional_properties"] = {"key": "", "type": "{object}"} + + @classmethod + def is_xml_model(cls) -> bool: + try: + cls._xml_map # type: ignore + except AttributeError: + return False + return True + + @classmethod + def _create_xml_node(cls): + """Create XML node. + + :returns: The XML node + :rtype: xml.etree.ElementTree.Element + """ + try: + xml_map = cls._xml_map # type: ignore + except AttributeError: + xml_map = {} + + return _create_xml_node(xml_map.get("name", cls.__name__), xml_map.get("prefix", None), xml_map.get("ns", None)) + + def serialize(self, keep_readonly: bool = False, **kwargs: Any) -> JSON: + """Return the JSON that would be sent to server from this model. + + This is an alias to `as_dict(full_restapi_key_transformer, keep_readonly=False)`. + + If you want XML serialization, you can pass the kwargs is_xml=True. + + :param bool keep_readonly: If you want to serialize the readonly attributes + :returns: A dict JSON compatible object + :rtype: dict + """ + serializer = Serializer(self._infer_class_models()) + return serializer._serialize( # type: ignore # pylint: disable=protected-access + self, keep_readonly=keep_readonly, **kwargs + ) + + def as_dict( + self, + keep_readonly: bool = True, + key_transformer: Callable[[str, dict[str, Any], Any], Any] = attribute_transformer, + **kwargs: Any + ) -> JSON: + """Return a dict that can be serialized using json.dump. + + Advanced usage might optionally use a callback as parameter: + + .. code::python + + def my_key_transformer(key, attr_desc, value): + return key + + Key is the attribute name used in Python. Attr_desc + is a dict of metadata. Currently contains 'type' with the + msrest type and 'key' with the RestAPI encoded key. + Value is the current value in this object. + + The string returned will be used to serialize the key. + If the return type is a list, this is considered hierarchical + result dict. + + See the three examples in this file: + + - attribute_transformer + - full_restapi_key_transformer + - last_restapi_key_transformer + + If you want XML serialization, you can pass the kwargs is_xml=True. + + :param bool keep_readonly: If you want to serialize the readonly attributes + :param function key_transformer: A key transformer function. + :returns: A dict JSON compatible object + :rtype: dict + """ + serializer = Serializer(self._infer_class_models()) + return serializer._serialize( # type: ignore # pylint: disable=protected-access + self, key_transformer=key_transformer, keep_readonly=keep_readonly, **kwargs + ) + + @classmethod + def _infer_class_models(cls): + try: + str_models = cls.__module__.rsplit(".", 1)[0] + models = sys.modules[str_models] + client_models = {k: v for k, v in models.__dict__.items() if isinstance(v, type)} + if cls.__name__ not in client_models: + raise ValueError("Not Autorest generated code") + except Exception: # pylint: disable=broad-exception-caught + # Assume it's not Autorest generated (tests?). Add ourselves as dependencies. + client_models = {cls.__name__: cls} + return client_models + + @classmethod + def deserialize(cls, data: Any, content_type: Optional[str] = None) -> Self: + """Parse a str using the RestAPI syntax and return a model. + + :param str data: A str using RestAPI structure. JSON by default. + :param str content_type: JSON by default, set application/xml if XML. + :returns: An instance of this model + :raises DeserializationError: if something went wrong + :rtype: Self + """ + deserializer = Deserializer(cls._infer_class_models()) + return deserializer(cls.__name__, data, content_type=content_type) # type: ignore + + @classmethod + def from_dict( + cls, + data: Any, + key_extractors: Optional[Callable[[str, dict[str, Any], Any], Any]] = None, + content_type: Optional[str] = None, + ) -> Self: + """Parse a dict using given key extractor return a model. + + By default consider key + extractors (rest_key_case_insensitive_extractor, attribute_key_case_insensitive_extractor + and last_rest_key_case_insensitive_extractor) + + :param dict data: A dict using RestAPI structure + :param function key_extractors: A key extractor function. + :param str content_type: JSON by default, set application/xml if XML. + :returns: An instance of this model + :raises DeserializationError: if something went wrong + :rtype: Self + """ + deserializer = Deserializer(cls._infer_class_models()) + deserializer.key_extractors = ( # type: ignore + [ # type: ignore + attribute_key_case_insensitive_extractor, + rest_key_case_insensitive_extractor, + last_rest_key_case_insensitive_extractor, + ] + if key_extractors is None + else key_extractors + ) + return deserializer(cls.__name__, data, content_type=content_type) # type: ignore + + @classmethod + def _flatten_subtype(cls, key, objects): + if "_subtype_map" not in cls.__dict__: + return {} + result = dict(cls._subtype_map[key]) + for valuetype in cls._subtype_map[key].values(): + result |= objects[valuetype]._flatten_subtype(key, objects) # pylint: disable=protected-access + return result + + @classmethod + def _classify(cls, response, objects): + """Check the class _subtype_map for any child classes. + We want to ignore any inherited _subtype_maps. + + :param dict response: The initial data + :param dict objects: The class objects + :returns: The class to be used + :rtype: class + """ + for subtype_key in cls.__dict__.get("_subtype_map", {}).keys(): + subtype_value = None + + if not isinstance(response, ET.Element): + rest_api_response_key = cls._get_rest_key_parts(subtype_key)[-1] + subtype_value = response.get(rest_api_response_key, None) or response.get(subtype_key, None) + else: + subtype_value = xml_key_extractor(subtype_key, cls._attribute_map[subtype_key], response) + if subtype_value: + # Try to match base class. Can be class name only + # (bug to fix in Autorest to support x-ms-discriminator-name) + if cls.__name__ == subtype_value: + return cls + flatten_mapping_type = cls._flatten_subtype(subtype_key, objects) + try: + return objects[flatten_mapping_type[subtype_value]] # type: ignore + except KeyError: + _LOGGER.warning( + "Subtype value %s has no mapping, use base class %s.", + subtype_value, + cls.__name__, + ) + break + else: + _LOGGER.warning("Discriminator %s is absent or null, use base class %s.", subtype_key, cls.__name__) + break + return cls + + @classmethod + def _get_rest_key_parts(cls, attr_key): + """Get the RestAPI key of this attr, split it and decode part + :param str attr_key: Attribute key must be in attribute_map. + :returns: A list of RestAPI part + :rtype: list + """ + rest_split_key = _FLATTEN.split(cls._attribute_map[attr_key]["key"]) + return [_decode_attribute_map_key(key_part) for key_part in rest_split_key] + + +def _decode_attribute_map_key(key): + """This decode a key in an _attribute_map to the actual key we want to look at + inside the received data. + + :param str key: A key string from the generated code + :returns: The decoded key + :rtype: str + """ + return key.replace("\\.", ".") + + +class Serializer: # pylint: disable=too-many-public-methods + """Request object model serializer.""" + + basic_types = {str: "str", int: "int", bool: "bool", float: "float"} + + _xml_basic_types_serializers = {"bool": lambda x: str(x).lower()} + days = {0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"} + months = { + 1: "Jan", + 2: "Feb", + 3: "Mar", + 4: "Apr", + 5: "May", + 6: "Jun", + 7: "Jul", + 8: "Aug", + 9: "Sep", + 10: "Oct", + 11: "Nov", + 12: "Dec", + } + validation = { + "min_length": lambda x, y: len(x) < y, + "max_length": lambda x, y: len(x) > y, + "minimum": lambda x, y: x < y, + "maximum": lambda x, y: x > y, + "minimum_ex": lambda x, y: x <= y, + "maximum_ex": lambda x, y: x >= y, + "min_items": lambda x, y: len(x) < y, + "max_items": lambda x, y: len(x) > y, + "pattern": lambda x, y: not re.match(y, x, re.UNICODE), + "unique": lambda x, y: len(x) != len(set(x)), + "multiple": lambda x, y: x % y != 0, + } + + def __init__(self, classes: Optional[Mapping[str, type]] = None) -> None: + self.serialize_type = { + "iso-8601": Serializer.serialize_iso, + "rfc-1123": Serializer.serialize_rfc, + "unix-time": Serializer.serialize_unix, + "duration": Serializer.serialize_duration, + "date": Serializer.serialize_date, + "time": Serializer.serialize_time, + "decimal": Serializer.serialize_decimal, + "long": Serializer.serialize_long, + "bytearray": Serializer.serialize_bytearray, + "base64": Serializer.serialize_base64, + "object": self.serialize_object, + "[]": self.serialize_iter, + "{}": self.serialize_dict, + } + self.dependencies: dict[str, type] = dict(classes) if classes else {} + self.key_transformer = full_restapi_key_transformer + self.client_side_validation = True + + def _serialize( # pylint: disable=too-many-nested-blocks, too-many-branches, too-many-statements, too-many-locals + self, target_obj, data_type=None, **kwargs + ): + """Serialize data into a string according to type. + + :param object target_obj: The data to be serialized. + :param str data_type: The type to be serialized from. + :rtype: str, dict + :raises SerializationError: if serialization fails. + :returns: The serialized data. + """ + key_transformer = kwargs.get("key_transformer", self.key_transformer) + keep_readonly = kwargs.get("keep_readonly", False) + if target_obj is None: + return None + + attr_name = None + class_name = target_obj.__class__.__name__ + + if data_type: + return self.serialize_data(target_obj, data_type, **kwargs) + + if not hasattr(target_obj, "_attribute_map"): + data_type = type(target_obj).__name__ + if data_type in self.basic_types.values(): + return self.serialize_data(target_obj, data_type, **kwargs) + + # Force "is_xml" kwargs if we detect a XML model + try: + is_xml_model_serialization = kwargs["is_xml"] + except KeyError: + is_xml_model_serialization = kwargs.setdefault("is_xml", target_obj.is_xml_model()) + + serialized = {} + if is_xml_model_serialization: + serialized = target_obj._create_xml_node() # pylint: disable=protected-access + try: + attributes = target_obj._attribute_map # pylint: disable=protected-access + for attr, attr_desc in attributes.items(): + attr_name = attr + if not keep_readonly and target_obj._validation.get( # pylint: disable=protected-access + attr_name, {} + ).get("readonly", False): + continue + + if attr_name == "additional_properties" and attr_desc["key"] == "": + if target_obj.additional_properties is not None: + serialized |= target_obj.additional_properties + continue + try: + + orig_attr = getattr(target_obj, attr) + if is_xml_model_serialization: + pass # Don't provide "transformer" for XML for now. Keep "orig_attr" + else: # JSON + keys, orig_attr = key_transformer(attr, attr_desc.copy(), orig_attr) + keys = keys if isinstance(keys, list) else [keys] + + kwargs["serialization_ctxt"] = attr_desc + new_attr = self.serialize_data(orig_attr, attr_desc["type"], **kwargs) + + if is_xml_model_serialization: + xml_desc = attr_desc.get("xml", {}) + xml_name = xml_desc.get("name", attr_desc["key"]) + xml_prefix = xml_desc.get("prefix", None) + xml_ns = xml_desc.get("ns", None) + if xml_desc.get("attr", False): + if xml_ns: + ET.register_namespace(xml_prefix, xml_ns) + xml_name = "{{{}}}{}".format(xml_ns, xml_name) + serialized.set(xml_name, new_attr) # type: ignore + continue + if xml_desc.get("text", False): + serialized.text = new_attr # type: ignore + continue + if isinstance(new_attr, list): + serialized.extend(new_attr) # type: ignore + elif isinstance(new_attr, ET.Element): + # If the down XML has no XML/Name, + # we MUST replace the tag with the local tag. But keeping the namespaces. + if "name" not in getattr(orig_attr, "_xml_map", {}): + splitted_tag = new_attr.tag.split("}") + if len(splitted_tag) == 2: # Namespace + new_attr.tag = "}".join([splitted_tag[0], xml_name]) + else: + new_attr.tag = xml_name + serialized.append(new_attr) # type: ignore + else: # That's a basic type + # Integrate namespace if necessary + local_node = _create_xml_node(xml_name, xml_prefix, xml_ns) + local_node.text = str(new_attr) + serialized.append(local_node) # type: ignore + else: # JSON + for k in reversed(keys): # type: ignore + new_attr = {k: new_attr} + + _new_attr = new_attr + _serialized = serialized + for k in keys: # type: ignore + if k not in _serialized: + _serialized.update(_new_attr) # type: ignore + _new_attr = _new_attr[k] # type: ignore + _serialized = _serialized[k] + except ValueError as err: + if isinstance(err, SerializationError): + raise + + except (AttributeError, KeyError, TypeError) as err: + msg = "Attribute {} in object {} cannot be serialized.\n{}".format(attr_name, class_name, str(target_obj)) + raise SerializationError(msg) from err + return serialized + + def body(self, data, data_type, **kwargs): + """Serialize data intended for a request body. + + :param object data: The data to be serialized. + :param str data_type: The type to be serialized from. + :rtype: dict + :raises SerializationError: if serialization fails. + :raises ValueError: if data is None + :returns: The serialized request body + """ + + # Just in case this is a dict + internal_data_type_str = data_type.strip("[]{}") + internal_data_type = self.dependencies.get(internal_data_type_str, None) + try: + is_xml_model_serialization = kwargs["is_xml"] + except KeyError: + if internal_data_type and issubclass(internal_data_type, Model): + is_xml_model_serialization = kwargs.setdefault("is_xml", internal_data_type.is_xml_model()) + else: + is_xml_model_serialization = False + if internal_data_type and not isinstance(internal_data_type, Enum): + try: + deserializer = Deserializer(self.dependencies) + # Since it's on serialization, it's almost sure that format is not JSON REST + # We're not able to deal with additional properties for now. + deserializer.additional_properties_detection = False + if is_xml_model_serialization: + deserializer.key_extractors = [ # type: ignore + attribute_key_case_insensitive_extractor, + ] + else: + deserializer.key_extractors = [ + rest_key_case_insensitive_extractor, + attribute_key_case_insensitive_extractor, + last_rest_key_case_insensitive_extractor, + ] + data = deserializer._deserialize(data_type, data) # pylint: disable=protected-access + except DeserializationError as err: + raise SerializationError("Unable to build a model: " + str(err)) from err + + return self._serialize(data, data_type, **kwargs) + + def url(self, name, data, data_type, **kwargs): + """Serialize data intended for a URL path. + + :param str name: The name of the URL path parameter. + :param object data: The data to be serialized. + :param str data_type: The type to be serialized from. + :rtype: str + :returns: The serialized URL path + :raises TypeError: if serialization fails. + :raises ValueError: if data is None + """ + try: + output = self.serialize_data(data, data_type, **kwargs) + if data_type == "bool": + output = json.dumps(output) + + if kwargs.get("skip_quote") is True: + output = str(output) + output = output.replace("{", quote("{")).replace("}", quote("}")) + else: + output = quote(str(output), safe="") + except SerializationError as exc: + raise TypeError("{} must be type {}.".format(name, data_type)) from exc + return output + + def query(self, name, data, data_type, **kwargs): + """Serialize data intended for a URL query. + + :param str name: The name of the query parameter. + :param object data: The data to be serialized. + :param str data_type: The type to be serialized from. + :rtype: str, list + :raises TypeError: if serialization fails. + :raises ValueError: if data is None + :returns: The serialized query parameter + """ + try: + # Treat the list aside, since we don't want to encode the div separator + if data_type.startswith("["): + internal_data_type = data_type[1:-1] + do_quote = not kwargs.get("skip_quote", False) + return self.serialize_iter(data, internal_data_type, do_quote=do_quote, **kwargs) + + # Not a list, regular serialization + output = self.serialize_data(data, data_type, **kwargs) + if data_type == "bool": + output = json.dumps(output) + if kwargs.get("skip_quote") is True: + output = str(output) + else: + output = quote(str(output), safe="") + except SerializationError as exc: + raise TypeError("{} must be type {}.".format(name, data_type)) from exc + return str(output) + + def header(self, name, data, data_type, **kwargs): + """Serialize data intended for a request header. + + :param str name: The name of the header. + :param object data: The data to be serialized. + :param str data_type: The type to be serialized from. + :rtype: str + :raises TypeError: if serialization fails. + :raises ValueError: if data is None + :returns: The serialized header + """ + try: + if data_type in ["[str]"]: + data = ["" if d is None else d for d in data] + + output = self.serialize_data(data, data_type, **kwargs) + if data_type == "bool": + output = json.dumps(output) + except SerializationError as exc: + raise TypeError("{} must be type {}.".format(name, data_type)) from exc + return str(output) + + def serialize_data(self, data, data_type, **kwargs): + """Serialize generic data according to supplied data type. + + :param object data: The data to be serialized. + :param str data_type: The type to be serialized from. + :raises AttributeError: if required data is None. + :raises ValueError: if data is None + :raises SerializationError: if serialization fails. + :returns: The serialized data. + :rtype: str, int, float, bool, dict, list + """ + if data is None: + raise ValueError("No value for given attribute") + + try: + if data is CoreNull: + return None + if data_type in self.basic_types.values(): + return self.serialize_basic(data, data_type, **kwargs) + + if data_type in self.serialize_type: + return self.serialize_type[data_type](data, **kwargs) + + # If dependencies is empty, try with current data class + # It has to be a subclass of Enum anyway + enum_type = self.dependencies.get(data_type, cast(type, data.__class__)) + if issubclass(enum_type, Enum): + return Serializer.serialize_enum(data, enum_obj=enum_type) + + iter_type = data_type[0] + data_type[-1] + if iter_type in self.serialize_type: + return self.serialize_type[iter_type](data, data_type[1:-1], **kwargs) + + except (ValueError, TypeError) as err: + msg = "Unable to serialize value: {!r} as type: {!r}." + raise SerializationError(msg.format(data, data_type)) from err + return self._serialize(data, **kwargs) + + @classmethod + def _get_custom_serializers(cls, data_type, **kwargs): # pylint: disable=inconsistent-return-statements + custom_serializer = kwargs.get("basic_types_serializers", {}).get(data_type) + if custom_serializer: + return custom_serializer + if kwargs.get("is_xml", False): + return cls._xml_basic_types_serializers.get(data_type) + + @classmethod + def serialize_basic(cls, data, data_type, **kwargs): + """Serialize basic builting data type. + Serializes objects to str, int, float or bool. + + Possible kwargs: + - basic_types_serializers dict[str, callable] : If set, use the callable as serializer + - is_xml bool : If set, use xml_basic_types_serializers + + :param obj data: Object to be serialized. + :param str data_type: Type of object in the iterable. + :rtype: str, int, float, bool + :return: serialized object + :raises TypeError: raise if data_type is not one of str, int, float, bool. + """ + custom_serializer = cls._get_custom_serializers(data_type, **kwargs) + if custom_serializer: + return custom_serializer(data) + if data_type == "str": + return cls.serialize_unicode(data) + if data_type == "int": + return int(data) + if data_type == "float": + return float(data) + if data_type == "bool": + return bool(data) + raise TypeError("Unknown basic data type: {}".format(data_type)) + + @classmethod + def serialize_unicode(cls, data): + """Special handling for serializing unicode strings in Py2. + Encode to UTF-8 if unicode, otherwise handle as a str. + + :param str data: Object to be serialized. + :rtype: str + :return: serialized object + """ + try: # If I received an enum, return its value + return data.value + except AttributeError: + pass + + try: + if isinstance(data, unicode): # type: ignore + # Don't change it, JSON and XML ElementTree are totally able + # to serialize correctly u'' strings + return data + except NameError: + return str(data) + return str(data) + + def serialize_iter(self, data, iter_type, div=None, **kwargs): + """Serialize iterable. + + Supported kwargs: + - serialization_ctxt dict : The current entry of _attribute_map, or same format. + serialization_ctxt['type'] should be same as data_type. + - is_xml bool : If set, serialize as XML + + :param list data: Object to be serialized. + :param str iter_type: Type of object in the iterable. + :param str div: If set, this str will be used to combine the elements + in the iterable into a combined string. Default is 'None'. + Defaults to False. + :rtype: list, str + :return: serialized iterable + """ + if isinstance(data, str): + raise SerializationError("Refuse str type as a valid iter type.") + + serialization_ctxt = kwargs.get("serialization_ctxt", {}) + is_xml = kwargs.get("is_xml", False) + + serialized = [] + for d in data: + try: + serialized.append(self.serialize_data(d, iter_type, **kwargs)) + except ValueError as err: + if isinstance(err, SerializationError): + raise + serialized.append(None) + + if kwargs.get("do_quote", False): + serialized = ["" if s is None else quote(str(s), safe="") for s in serialized] + + if div: + serialized = ["" if s is None else str(s) for s in serialized] + serialized = div.join(serialized) + + if "xml" in serialization_ctxt or is_xml: + # XML serialization is more complicated + xml_desc = serialization_ctxt.get("xml", {}) + xml_name = xml_desc.get("name") + if not xml_name: + xml_name = serialization_ctxt["key"] + + # Create a wrap node if necessary (use the fact that Element and list have "append") + is_wrapped = xml_desc.get("wrapped", False) + node_name = xml_desc.get("itemsName", xml_name) + if is_wrapped: + final_result = _create_xml_node(xml_name, xml_desc.get("prefix", None), xml_desc.get("ns", None)) + else: + final_result = [] + # All list elements to "local_node" + for el in serialized: + if isinstance(el, ET.Element): + el_node = el + else: + el_node = _create_xml_node(node_name, xml_desc.get("prefix", None), xml_desc.get("ns", None)) + if el is not None: # Otherwise it writes "None" :-p + el_node.text = str(el) + final_result.append(el_node) + return final_result + return serialized + + def serialize_dict(self, attr, dict_type, **kwargs): + """Serialize a dictionary of objects. + + :param dict attr: Object to be serialized. + :param str dict_type: Type of object in the dictionary. + :rtype: dict + :return: serialized dictionary + """ + serialization_ctxt = kwargs.get("serialization_ctxt", {}) + serialized = {} + for key, value in attr.items(): + try: + serialized[self.serialize_unicode(key)] = self.serialize_data(value, dict_type, **kwargs) + except ValueError as err: + if isinstance(err, SerializationError): + raise + serialized[self.serialize_unicode(key)] = None + + if "xml" in serialization_ctxt: + # XML serialization is more complicated + xml_desc = serialization_ctxt["xml"] + xml_name = xml_desc["name"] + + final_result = _create_xml_node(xml_name, xml_desc.get("prefix", None), xml_desc.get("ns", None)) + for key, value in serialized.items(): + ET.SubElement(final_result, key).text = value + return final_result + + return serialized + + def serialize_object(self, attr, **kwargs): # pylint: disable=too-many-return-statements + """Serialize a generic object. + This will be handled as a dictionary. If object passed in is not + a basic type (str, int, float, dict, list) it will simply be + cast to str. + + :param dict attr: Object to be serialized. + :rtype: dict or str + :return: serialized object + """ + if attr is None: + return None + if isinstance(attr, ET.Element): + return attr + obj_type = type(attr) + if obj_type in self.basic_types: + return self.serialize_basic(attr, self.basic_types[obj_type], **kwargs) + if obj_type is _long_type: + return self.serialize_long(attr) + if obj_type is str: + return self.serialize_unicode(attr) + if obj_type is datetime.datetime: + return self.serialize_iso(attr) + if obj_type is datetime.date: + return self.serialize_date(attr) + if obj_type is datetime.time: + return self.serialize_time(attr) + if obj_type is datetime.timedelta: + return self.serialize_duration(attr) + if obj_type is decimal.Decimal: + return self.serialize_decimal(attr) + + # If it's a model or I know this dependency, serialize as a Model + if obj_type in self.dependencies.values() or isinstance(attr, Model): + return self._serialize(attr) + + if obj_type == dict: + serialized = {} + for key, value in attr.items(): + try: + serialized[self.serialize_unicode(key)] = self.serialize_object(value, **kwargs) + except ValueError: + serialized[self.serialize_unicode(key)] = None + return serialized + + if obj_type == list: + serialized = [] + for obj in attr: + try: + serialized.append(self.serialize_object(obj, **kwargs)) + except ValueError: + pass + return serialized + return str(attr) + + @staticmethod + def serialize_enum(attr, enum_obj=None): + try: + result = attr.value + except AttributeError: + result = attr + try: + enum_obj(result) # type: ignore + return result + except ValueError as exc: + for enum_value in enum_obj: # type: ignore + if enum_value.value.lower() == str(attr).lower(): + return enum_value.value + error = "{!r} is not valid value for enum {!r}" + raise SerializationError(error.format(attr, enum_obj)) from exc + + @staticmethod + def serialize_bytearray(attr, **kwargs): # pylint: disable=unused-argument + """Serialize bytearray into base-64 string. + + :param str attr: Object to be serialized. + :rtype: str + :return: serialized base64 + """ + return b64encode(attr).decode() + + @staticmethod + def serialize_base64(attr, **kwargs): # pylint: disable=unused-argument + """Serialize str into base-64 string. + + :param str attr: Object to be serialized. + :rtype: str + :return: serialized base64 + """ + encoded = b64encode(attr).decode("ascii") + return encoded.strip("=").replace("+", "-").replace("/", "_") + + @staticmethod + def serialize_decimal(attr, **kwargs): # pylint: disable=unused-argument + """Serialize Decimal object to float. + + :param decimal attr: Object to be serialized. + :rtype: float + :return: serialized decimal + """ + return float(attr) + + @staticmethod + def serialize_long(attr, **kwargs): # pylint: disable=unused-argument + """Serialize long (Py2) or int (Py3). + + :param int attr: Object to be serialized. + :rtype: int/long + :return: serialized long + """ + return _long_type(attr) + + @staticmethod + def serialize_date(attr, **kwargs): # pylint: disable=unused-argument + """Serialize Date object into ISO-8601 formatted string. + + :param Date attr: Object to be serialized. + :rtype: str + :return: serialized date + """ + if isinstance(attr, str): + attr = isodate.parse_date(attr) + t = "{:04}-{:02}-{:02}".format(attr.year, attr.month, attr.day) + return t + + @staticmethod + def serialize_time(attr, **kwargs): # pylint: disable=unused-argument + """Serialize Time object into ISO-8601 formatted string. + + :param datetime.time attr: Object to be serialized. + :rtype: str + :return: serialized time + """ + if isinstance(attr, str): + attr = isodate.parse_time(attr) + t = "{:02}:{:02}:{:02}".format(attr.hour, attr.minute, attr.second) + if attr.microsecond: + t += ".{:02}".format(attr.microsecond) + return t + + @staticmethod + def serialize_duration(attr, **kwargs): # pylint: disable=unused-argument + """Serialize TimeDelta object into ISO-8601 formatted string. + + :param TimeDelta attr: Object to be serialized. + :rtype: str + :return: serialized duration + """ + if isinstance(attr, str): + attr = isodate.parse_duration(attr) + return isodate.duration_isoformat(attr) + + @staticmethod + def serialize_rfc(attr, **kwargs): # pylint: disable=unused-argument + """Serialize Datetime object into RFC-1123 formatted string. + + :param Datetime attr: Object to be serialized. + :rtype: str + :raises TypeError: if format invalid. + :return: serialized rfc + """ + try: + if not attr.tzinfo: + _LOGGER.warning("Datetime with no tzinfo will be considered UTC.") + utc = attr.utctimetuple() + except AttributeError as exc: + raise TypeError("RFC1123 object must be valid Datetime object.") from exc + + return "{}, {:02} {} {:04} {:02}:{:02}:{:02} GMT".format( + Serializer.days[utc.tm_wday], + utc.tm_mday, + Serializer.months[utc.tm_mon], + utc.tm_year, + utc.tm_hour, + utc.tm_min, + utc.tm_sec, + ) + + @staticmethod + def serialize_iso(attr, **kwargs): # pylint: disable=unused-argument + """Serialize Datetime object into ISO-8601 formatted string. + + :param Datetime attr: Object to be serialized. + :rtype: str + :raises SerializationError: if format invalid. + :return: serialized iso + """ + if isinstance(attr, str): + attr = isodate.parse_datetime(attr) + try: + if not attr.tzinfo: + _LOGGER.warning("Datetime with no tzinfo will be considered UTC.") + utc = attr.utctimetuple() + if utc.tm_year > 9999 or utc.tm_year < 1: + raise OverflowError("Hit max or min date") + + microseconds = str(attr.microsecond).rjust(6, "0").rstrip("0").ljust(3, "0") + if microseconds: + microseconds = "." + microseconds + date = "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}".format( + utc.tm_year, utc.tm_mon, utc.tm_mday, utc.tm_hour, utc.tm_min, utc.tm_sec + ) + return date + microseconds + "Z" + except (ValueError, OverflowError) as err: + msg = "Unable to serialize datetime object." + raise SerializationError(msg) from err + except AttributeError as err: + msg = "ISO-8601 object must be valid Datetime object." + raise TypeError(msg) from err + + @staticmethod + def serialize_unix(attr, **kwargs): # pylint: disable=unused-argument + """Serialize Datetime object into IntTime format. + This is represented as seconds. + + :param Datetime attr: Object to be serialized. + :rtype: int + :raises SerializationError: if format invalid + :return: serialied unix + """ + if isinstance(attr, int): + return attr + try: + if not attr.tzinfo: + _LOGGER.warning("Datetime with no tzinfo will be considered UTC.") + return int(calendar.timegm(attr.utctimetuple())) + except AttributeError as exc: + raise TypeError("Unix time object must be valid Datetime object.") from exc + + +def rest_key_extractor(attr, attr_desc, data): # pylint: disable=unused-argument + key = attr_desc["key"] + working_data = data + + while "." in key: + # Need the cast, as for some reasons "split" is typed as list[str | Any] + dict_keys = cast(list[str], _FLATTEN.split(key)) + if len(dict_keys) == 1: + key = _decode_attribute_map_key(dict_keys[0]) + break + working_key = _decode_attribute_map_key(dict_keys[0]) + working_data = working_data.get(working_key, data) + if working_data is None: + # If at any point while following flatten JSON path see None, it means + # that all properties under are None as well + return None + key = ".".join(dict_keys[1:]) + + return working_data.get(key) + + +def rest_key_case_insensitive_extractor( # pylint: disable=unused-argument, inconsistent-return-statements + attr, attr_desc, data +): + key = attr_desc["key"] + working_data = data + + while "." in key: + dict_keys = _FLATTEN.split(key) + if len(dict_keys) == 1: + key = _decode_attribute_map_key(dict_keys[0]) + break + working_key = _decode_attribute_map_key(dict_keys[0]) + working_data = attribute_key_case_insensitive_extractor(working_key, None, working_data) + if working_data is None: + # If at any point while following flatten JSON path see None, it means + # that all properties under are None as well + return None + key = ".".join(dict_keys[1:]) + + if working_data: + return attribute_key_case_insensitive_extractor(key, None, working_data) + + +def last_rest_key_extractor(attr, attr_desc, data): # pylint: disable=unused-argument + """Extract the attribute in "data" based on the last part of the JSON path key. + + :param str attr: The attribute to extract + :param dict attr_desc: The attribute description + :param dict data: The data to extract from + :rtype: object + :returns: The extracted attribute + """ + key = attr_desc["key"] + dict_keys = _FLATTEN.split(key) + return attribute_key_extractor(dict_keys[-1], None, data) + + +def last_rest_key_case_insensitive_extractor(attr, attr_desc, data): # pylint: disable=unused-argument + """Extract the attribute in "data" based on the last part of the JSON path key. + + This is the case insensitive version of "last_rest_key_extractor" + :param str attr: The attribute to extract + :param dict attr_desc: The attribute description + :param dict data: The data to extract from + :rtype: object + :returns: The extracted attribute + """ + key = attr_desc["key"] + dict_keys = _FLATTEN.split(key) + return attribute_key_case_insensitive_extractor(dict_keys[-1], None, data) + + +def attribute_key_extractor(attr, _, data): + return data.get(attr) + + +def attribute_key_case_insensitive_extractor(attr, _, data): + found_key = None + lower_attr = attr.lower() + for key in data: + if lower_attr == key.lower(): + found_key = key + break + + return data.get(found_key) + + +def _extract_name_from_internal_type(internal_type): + """Given an internal type XML description, extract correct XML name with namespace. + + :param dict internal_type: An model type + :rtype: tuple + :returns: A tuple XML name + namespace dict + """ + internal_type_xml_map = getattr(internal_type, "_xml_map", {}) + xml_name = internal_type_xml_map.get("name", internal_type.__name__) + xml_ns = internal_type_xml_map.get("ns", None) + if xml_ns: + xml_name = "{{{}}}{}".format(xml_ns, xml_name) + return xml_name + + +def xml_key_extractor(attr, attr_desc, data): # pylint: disable=unused-argument,too-many-return-statements + if isinstance(data, dict): + return None + + # Test if this model is XML ready first + if not isinstance(data, ET.Element): + return None + + xml_desc = attr_desc.get("xml", {}) + xml_name = xml_desc.get("name", attr_desc["key"]) + + # Look for a children + is_iter_type = attr_desc["type"].startswith("[") + is_wrapped = xml_desc.get("wrapped", False) + internal_type = attr_desc.get("internalType", None) + internal_type_xml_map = getattr(internal_type, "_xml_map", {}) + + # Integrate namespace if necessary + xml_ns = xml_desc.get("ns", internal_type_xml_map.get("ns", None)) + if xml_ns: + xml_name = "{{{}}}{}".format(xml_ns, xml_name) + + # If it's an attribute, that's simple + if xml_desc.get("attr", False): + return data.get(xml_name) + + # If it's x-ms-text, that's simple too + if xml_desc.get("text", False): + return data.text + + # Scenario where I take the local name: + # - Wrapped node + # - Internal type is an enum (considered basic types) + # - Internal type has no XML/Name node + if is_wrapped or (internal_type and (issubclass(internal_type, Enum) or "name" not in internal_type_xml_map)): + children = data.findall(xml_name) + # If internal type has a local name and it's not a list, I use that name + elif not is_iter_type and internal_type and "name" in internal_type_xml_map: + xml_name = _extract_name_from_internal_type(internal_type) + children = data.findall(xml_name) + # That's an array + else: + if internal_type: # Complex type, ignore itemsName and use the complex type name + items_name = _extract_name_from_internal_type(internal_type) + else: + items_name = xml_desc.get("itemsName", xml_name) + children = data.findall(items_name) + + if len(children) == 0: + if is_iter_type: + if is_wrapped: + return None # is_wrapped no node, we want None + return [] # not wrapped, assume empty list + return None # Assume it's not there, maybe an optional node. + + # If is_iter_type and not wrapped, return all found children + if is_iter_type: + if not is_wrapped: + return children + # Iter and wrapped, should have found one node only (the wrap one) + if len(children) != 1: + raise DeserializationError( + "Tried to deserialize an array not wrapped, and found several nodes '{}'. Maybe you should declare this array as wrapped?".format( + xml_name + ) + ) + return list(children[0]) # Might be empty list and that's ok. + + # Here it's not a itertype, we should have found one element only or empty + if len(children) > 1: + raise DeserializationError("Find several XML '{}' where it was not expected".format(xml_name)) + return children[0] + + +class Deserializer: + """Response object model deserializer. + + :param dict classes: Class type dictionary for deserializing complex types. + :ivar list key_extractors: Ordered list of extractors to be used by this deserializer. + """ + + basic_types = {str: "str", int: "int", bool: "bool", float: "float"} + + valid_date = re.compile(r"\d{4}[-]\d{2}[-]\d{2}T\d{2}:\d{2}:\d{2}\.?\d*Z?[-+]?[\d{2}]?:?[\d{2}]?") + + def __init__(self, classes: Optional[Mapping[str, type]] = None) -> None: + self.deserialize_type = { + "iso-8601": Deserializer.deserialize_iso, + "rfc-1123": Deserializer.deserialize_rfc, + "unix-time": Deserializer.deserialize_unix, + "duration": Deserializer.deserialize_duration, + "date": Deserializer.deserialize_date, + "time": Deserializer.deserialize_time, + "decimal": Deserializer.deserialize_decimal, + "long": Deserializer.deserialize_long, + "bytearray": Deserializer.deserialize_bytearray, + "base64": Deserializer.deserialize_base64, + "object": self.deserialize_object, + "[]": self.deserialize_iter, + "{}": self.deserialize_dict, + } + self.deserialize_expected_types = { + "duration": (isodate.Duration, datetime.timedelta), + "iso-8601": (datetime.datetime), + } + self.dependencies: dict[str, type] = dict(classes) if classes else {} + self.key_extractors = [rest_key_extractor, xml_key_extractor] + # Additional properties only works if the "rest_key_extractor" is used to + # extract the keys. Making it to work whatever the key extractor is too much + # complicated, with no real scenario for now. + # So adding a flag to disable additional properties detection. This flag should be + # used if your expect the deserialization to NOT come from a JSON REST syntax. + # Otherwise, result are unexpected + self.additional_properties_detection = True + + def __call__(self, target_obj, response_data, content_type=None): + """Call the deserializer to process a REST response. + + :param str target_obj: Target data type to deserialize to. + :param requests.Response response_data: REST response object. + :param str content_type: Swagger "produces" if available. + :raises DeserializationError: if deserialization fails. + :return: Deserialized object. + :rtype: object + """ + data = self._unpack_content(response_data, content_type) + return self._deserialize(target_obj, data) + + def _deserialize(self, target_obj, data): # pylint: disable=inconsistent-return-statements + """Call the deserializer on a model. + + Data needs to be already deserialized as JSON or XML ElementTree + + :param str target_obj: Target data type to deserialize to. + :param object data: Object to deserialize. + :raises DeserializationError: if deserialization fails. + :return: Deserialized object. + :rtype: object + """ + # This is already a model, go recursive just in case + if hasattr(data, "_attribute_map"): + constants = [name for name, config in getattr(data, "_validation", {}).items() if config.get("constant")] + try: + for attr, mapconfig in data._attribute_map.items(): # pylint: disable=protected-access + if attr in constants: + continue + value = getattr(data, attr) + if value is None: + continue + local_type = mapconfig["type"] + internal_data_type = local_type.strip("[]{}") + if internal_data_type not in self.dependencies or isinstance(internal_data_type, Enum): + continue + setattr(data, attr, self._deserialize(local_type, value)) + return data + except AttributeError: + return + + response, class_name = self._classify_target(target_obj, data) + + if isinstance(response, str): + return self.deserialize_data(data, response) + if isinstance(response, type) and issubclass(response, Enum): + return self.deserialize_enum(data, response) + + if data is None or data is CoreNull: + return data + try: + attributes = response._attribute_map # type: ignore # pylint: disable=protected-access + d_attrs = {} + for attr, attr_desc in attributes.items(): + # Check empty string. If it's not empty, someone has a real "additionalProperties"... + if attr == "additional_properties" and attr_desc["key"] == "": + continue + raw_value = None + # Enhance attr_desc with some dynamic data + attr_desc = attr_desc.copy() # Do a copy, do not change the real one + internal_data_type = attr_desc["type"].strip("[]{}") + if internal_data_type in self.dependencies: + attr_desc["internalType"] = self.dependencies[internal_data_type] + + for key_extractor in self.key_extractors: + found_value = key_extractor(attr, attr_desc, data) + if found_value is not None: + if raw_value is not None and raw_value != found_value: + msg = ( + "Ignoring extracted value '%s' from %s for key '%s'" + " (duplicate extraction, follow extractors order)" + ) + _LOGGER.warning(msg, found_value, key_extractor, attr) + continue + raw_value = found_value + + value = self.deserialize_data(raw_value, attr_desc["type"]) + d_attrs[attr] = value + except (AttributeError, TypeError, KeyError) as err: + msg = "Unable to deserialize to object: " + class_name # type: ignore + raise DeserializationError(msg) from err + additional_properties = self._build_additional_properties(attributes, data) + return self._instantiate_model(response, d_attrs, additional_properties) + + def _build_additional_properties(self, attribute_map, data): + if not self.additional_properties_detection: + return None + if "additional_properties" in attribute_map and attribute_map.get("additional_properties", {}).get("key") != "": + # Check empty string. If it's not empty, someone has a real "additionalProperties" + return None + if isinstance(data, ET.Element): + data = {el.tag: el.text for el in data} + + known_keys = { + _decode_attribute_map_key(_FLATTEN.split(desc["key"])[0]) + for desc in attribute_map.values() + if desc["key"] != "" + } + present_keys = set(data.keys()) + missing_keys = present_keys - known_keys + return {key: data[key] for key in missing_keys} + + def _classify_target(self, target, data): + """Check to see whether the deserialization target object can + be classified into a subclass. + Once classification has been determined, initialize object. + + :param str target: The target object type to deserialize to. + :param str/dict data: The response data to deserialize. + :return: The classified target object and its class name. + :rtype: tuple + """ + if target is None: + return None, None + + if isinstance(target, str): + try: + target = self.dependencies[target] + except KeyError: + return target, target + + try: + target = target._classify(data, self.dependencies) # type: ignore # pylint: disable=protected-access + except AttributeError: + pass # Target is not a Model, no classify + return target, target.__class__.__name__ # type: ignore + + def failsafe_deserialize(self, target_obj, data, content_type=None): + """Ignores any errors encountered in deserialization, + and falls back to not deserializing the object. Recommended + for use in error deserialization, as we want to return the + HttpResponseError to users, and not have them deal with + a deserialization error. + + :param str target_obj: The target object type to deserialize to. + :param str/dict data: The response data to deserialize. + :param str content_type: Swagger "produces" if available. + :return: Deserialized object. + :rtype: object + """ + try: + return self(target_obj, data, content_type=content_type) + except: # pylint: disable=bare-except + _LOGGER.debug( + "Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True + ) + return None + + @staticmethod + def _unpack_content(raw_data, content_type=None): + """Extract the correct structure for deserialization. + + If raw_data is a PipelineResponse, try to extract the result of RawDeserializer. + if we can't, raise. Your Pipeline should have a RawDeserializer. + + If not a pipeline response and raw_data is bytes or string, use content-type + to decode it. If no content-type, try JSON. + + If raw_data is something else, bypass all logic and return it directly. + + :param obj raw_data: Data to be processed. + :param str content_type: How to parse if raw_data is a string/bytes. + :raises JSONDecodeError: If JSON is requested and parsing is impossible. + :raises UnicodeDecodeError: If bytes is not UTF8 + :rtype: object + :return: Unpacked content. + """ + # Assume this is enough to detect a Pipeline Response without importing it + context = getattr(raw_data, "context", {}) + if context: + if RawDeserializer.CONTEXT_NAME in context: + return context[RawDeserializer.CONTEXT_NAME] + raise ValueError("This pipeline didn't have the RawDeserializer policy; can't deserialize") + + # Assume this is enough to recognize universal_http.ClientResponse without importing it + if hasattr(raw_data, "body"): + return RawDeserializer.deserialize_from_http_generics(raw_data.text(), raw_data.headers) + + # Assume this enough to recognize requests.Response without importing it. + if hasattr(raw_data, "_content_consumed"): + return RawDeserializer.deserialize_from_http_generics(raw_data.text, raw_data.headers) + + if isinstance(raw_data, (str, bytes)) or hasattr(raw_data, "read"): + return RawDeserializer.deserialize_from_text(raw_data, content_type) # type: ignore + return raw_data + + def _instantiate_model(self, response, attrs, additional_properties=None): + """Instantiate a response model passing in deserialized args. + + :param Response response: The response model class. + :param dict attrs: The deserialized response attributes. + :param dict additional_properties: Additional properties to be set. + :rtype: Response + :return: The instantiated response model. + """ + if callable(response): + subtype = getattr(response, "_subtype_map", {}) + try: + readonly = [ + k + for k, v in response._validation.items() # pylint: disable=protected-access # type: ignore + if v.get("readonly") + ] + const = [ + k + for k, v in response._validation.items() # pylint: disable=protected-access # type: ignore + if v.get("constant") + ] + kwargs = {k: v for k, v in attrs.items() if k not in subtype and k not in readonly + const} + response_obj = response(**kwargs) + for attr in readonly: + setattr(response_obj, attr, attrs.get(attr)) + if additional_properties: + response_obj.additional_properties = additional_properties # type: ignore + return response_obj + except TypeError as err: + msg = "Unable to deserialize {} into model {}. ".format(kwargs, response) # type: ignore + raise DeserializationError(msg + str(err)) from err + else: + try: + for attr, value in attrs.items(): + setattr(response, attr, value) + return response + except Exception as exp: + msg = "Unable to populate response model. " + msg += "Type: {}, Error: {}".format(type(response), exp) + raise DeserializationError(msg) from exp + + def deserialize_data(self, data, data_type): # pylint: disable=too-many-return-statements + """Process data for deserialization according to data type. + + :param str data: The response string to be deserialized. + :param str data_type: The type to deserialize to. + :raises DeserializationError: if deserialization fails. + :return: Deserialized object. + :rtype: object + """ + if data is None: + return data + + try: + if not data_type: + return data + if data_type in self.basic_types.values(): + return self.deserialize_basic(data, data_type) + if data_type in self.deserialize_type: + if isinstance(data, self.deserialize_expected_types.get(data_type, tuple())): + return data + + is_a_text_parsing_type = lambda x: x not in [ # pylint: disable=unnecessary-lambda-assignment + "object", + "[]", + r"{}", + ] + if isinstance(data, ET.Element) and is_a_text_parsing_type(data_type) and not data.text: + return None + data_val = self.deserialize_type[data_type](data) + return data_val + + iter_type = data_type[0] + data_type[-1] + if iter_type in self.deserialize_type: + return self.deserialize_type[iter_type](data, data_type[1:-1]) + + obj_type = self.dependencies[data_type] + if issubclass(obj_type, Enum): + if isinstance(data, ET.Element): + data = data.text + return self.deserialize_enum(data, obj_type) + + except (ValueError, TypeError, AttributeError) as err: + msg = "Unable to deserialize response data." + msg += " Data: {}, {}".format(data, data_type) + raise DeserializationError(msg) from err + return self._deserialize(obj_type, data) + + def deserialize_iter(self, attr, iter_type): + """Deserialize an iterable. + + :param list attr: Iterable to be deserialized. + :param str iter_type: The type of object in the iterable. + :return: Deserialized iterable. + :rtype: list + """ + if attr is None: + return None + if isinstance(attr, ET.Element): # If I receive an element here, get the children + attr = list(attr) + if not isinstance(attr, (list, set)): + raise DeserializationError("Cannot deserialize as [{}] an object of type {}".format(iter_type, type(attr))) + return [self.deserialize_data(a, iter_type) for a in attr] + + def deserialize_dict(self, attr, dict_type): + """Deserialize a dictionary. + + :param dict/list attr: Dictionary to be deserialized. Also accepts + a list of key, value pairs. + :param str dict_type: The object type of the items in the dictionary. + :return: Deserialized dictionary. + :rtype: dict + """ + if isinstance(attr, list): + return {x["key"]: self.deserialize_data(x["value"], dict_type) for x in attr} + + if isinstance(attr, ET.Element): + # Transform value into {"Key": "value"} + attr = {el.tag: el.text for el in attr} + return {k: self.deserialize_data(v, dict_type) for k, v in attr.items()} + + def deserialize_object(self, attr, **kwargs): # pylint: disable=too-many-return-statements + """Deserialize a generic object. + This will be handled as a dictionary. + + :param dict attr: Dictionary to be deserialized. + :return: Deserialized object. + :rtype: dict + :raises TypeError: if non-builtin datatype encountered. + """ + if attr is None: + return None + if isinstance(attr, ET.Element): + # Do no recurse on XML, just return the tree as-is + return attr + if isinstance(attr, str): + return self.deserialize_basic(attr, "str") + obj_type = type(attr) + if obj_type in self.basic_types: + return self.deserialize_basic(attr, self.basic_types[obj_type]) + if obj_type is _long_type: + return self.deserialize_long(attr) + + if obj_type == dict: + deserialized = {} + for key, value in attr.items(): + try: + deserialized[key] = self.deserialize_object(value, **kwargs) + except ValueError: + deserialized[key] = None + return deserialized + + if obj_type == list: + deserialized = [] + for obj in attr: + try: + deserialized.append(self.deserialize_object(obj, **kwargs)) + except ValueError: + pass + return deserialized + + error = "Cannot deserialize generic object with type: " + raise TypeError(error + str(obj_type)) + + def deserialize_basic(self, attr, data_type): # pylint: disable=too-many-return-statements + """Deserialize basic builtin data type from string. + Will attempt to convert to str, int, float and bool. + This function will also accept '1', '0', 'true' and 'false' as + valid bool values. + + :param str attr: response string to be deserialized. + :param str data_type: deserialization data type. + :return: Deserialized basic type. + :rtype: str, int, float or bool + :raises TypeError: if string format is not valid or data_type is not one of str, int, float, bool. + """ + # If we're here, data is supposed to be a basic type. + # If it's still an XML node, take the text + if isinstance(attr, ET.Element): + attr = attr.text + if not attr: + if data_type == "str": + # None or '', node is empty string. + return "" + # None or '', node with a strong type is None. + # Don't try to model "empty bool" or "empty int" + return None + + if data_type == "bool": + if attr in [True, False, 1, 0]: + return bool(attr) + if isinstance(attr, str): + if attr.lower() in ["true", "1"]: + return True + if attr.lower() in ["false", "0"]: + return False + raise TypeError("Invalid boolean value: {}".format(attr)) + + if data_type == "str": + return self.deserialize_unicode(attr) + if data_type == "int": + return int(attr) + if data_type == "float": + return float(attr) + raise TypeError("Unknown basic data type: {}".format(data_type)) + + @staticmethod + def deserialize_unicode(data): + """Preserve unicode objects in Python 2, otherwise return data + as a string. + + :param str data: response string to be deserialized. + :return: Deserialized string. + :rtype: str or unicode + """ + # We might be here because we have an enum modeled as string, + # and we try to deserialize a partial dict with enum inside + if isinstance(data, Enum): + return data + + # Consider this is real string + try: + if isinstance(data, unicode): # type: ignore + return data + except NameError: + return str(data) + return str(data) + + @staticmethod + def deserialize_enum(data, enum_obj): + """Deserialize string into enum object. + + If the string is not a valid enum value it will be returned as-is + and a warning will be logged. + + :param str data: Response string to be deserialized. If this value is + None or invalid it will be returned as-is. + :param Enum enum_obj: Enum object to deserialize to. + :return: Deserialized enum object. + :rtype: Enum + """ + if isinstance(data, enum_obj) or data is None: + return data + if isinstance(data, Enum): + data = data.value + if isinstance(data, int): + # Workaround. We might consider remove it in the future. + try: + return list(enum_obj.__members__.values())[data] + except IndexError as exc: + error = "{!r} is not a valid index for enum {!r}" + raise DeserializationError(error.format(data, enum_obj)) from exc + try: + return enum_obj(str(data)) + except ValueError: + for enum_value in enum_obj: + if enum_value.value.lower() == str(data).lower(): + return enum_value + # We don't fail anymore for unknown value, we deserialize as a string + _LOGGER.warning("Deserializer is not able to find %s as valid enum in %s", data, enum_obj) + return Deserializer.deserialize_unicode(data) + + @staticmethod + def deserialize_bytearray(attr): + """Deserialize string into bytearray. + + :param str attr: response string to be deserialized. + :return: Deserialized bytearray + :rtype: bytearray + :raises TypeError: if string format invalid. + """ + if isinstance(attr, ET.Element): + attr = attr.text + return bytearray(b64decode(attr)) # type: ignore + + @staticmethod + def deserialize_base64(attr): + """Deserialize base64 encoded string into string. + + :param str attr: response string to be deserialized. + :return: Deserialized base64 string + :rtype: bytearray + :raises TypeError: if string format invalid. + """ + if isinstance(attr, ET.Element): + attr = attr.text + padding = "=" * (3 - (len(attr) + 3) % 4) # type: ignore + attr = attr + padding # type: ignore + encoded = attr.replace("-", "+").replace("_", "/") + return b64decode(encoded) + + @staticmethod + def deserialize_decimal(attr): + """Deserialize string into Decimal object. + + :param str attr: response string to be deserialized. + :return: Deserialized decimal + :raises DeserializationError: if string format invalid. + :rtype: decimal + """ + if isinstance(attr, ET.Element): + attr = attr.text + try: + return decimal.Decimal(str(attr)) # type: ignore + except decimal.DecimalException as err: + msg = "Invalid decimal {}".format(attr) + raise DeserializationError(msg) from err + + @staticmethod + def deserialize_long(attr): + """Deserialize string into long (Py2) or int (Py3). + + :param str attr: response string to be deserialized. + :return: Deserialized int + :rtype: long or int + :raises ValueError: if string format invalid. + """ + if isinstance(attr, ET.Element): + attr = attr.text + return _long_type(attr) # type: ignore + + @staticmethod + def deserialize_duration(attr): + """Deserialize ISO-8601 formatted string into TimeDelta object. + + :param str attr: response string to be deserialized. + :return: Deserialized duration + :rtype: TimeDelta + :raises DeserializationError: if string format invalid. + """ + if isinstance(attr, ET.Element): + attr = attr.text + try: + duration = isodate.parse_duration(attr) + except (ValueError, OverflowError, AttributeError) as err: + msg = "Cannot deserialize duration object." + raise DeserializationError(msg) from err + return duration + + @staticmethod + def deserialize_date(attr): + """Deserialize ISO-8601 formatted string into Date object. + + :param str attr: response string to be deserialized. + :return: Deserialized date + :rtype: Date + :raises DeserializationError: if string format invalid. + """ + if isinstance(attr, ET.Element): + attr = attr.text + if re.search(r"[^\W\d_]", attr, re.I + re.U): # type: ignore + raise DeserializationError("Date must have only digits and -. Received: %s" % attr) + # This must NOT use defaultmonth/defaultday. Using None ensure this raises an exception. + return isodate.parse_date(attr, defaultmonth=0, defaultday=0) + + @staticmethod + def deserialize_time(attr): + """Deserialize ISO-8601 formatted string into time object. + + :param str attr: response string to be deserialized. + :return: Deserialized time + :rtype: datetime.time + :raises DeserializationError: if string format invalid. + """ + if isinstance(attr, ET.Element): + attr = attr.text + if re.search(r"[^\W\d_]", attr, re.I + re.U): # type: ignore + raise DeserializationError("Date must have only digits and -. Received: %s" % attr) + return isodate.parse_time(attr) + + @staticmethod + def deserialize_rfc(attr): + """Deserialize RFC-1123 formatted string into Datetime object. + + :param str attr: response string to be deserialized. + :return: Deserialized RFC datetime + :rtype: Datetime + :raises DeserializationError: if string format invalid. + """ + if isinstance(attr, ET.Element): + attr = attr.text + try: + parsed_date = email.utils.parsedate_tz(attr) # type: ignore + date_obj = datetime.datetime( + *parsed_date[:6], tzinfo=datetime.timezone(datetime.timedelta(minutes=(parsed_date[9] or 0) / 60)) + ) + if not date_obj.tzinfo: + date_obj = date_obj.astimezone(tz=TZ_UTC) + except ValueError as err: + msg = "Cannot deserialize to rfc datetime object." + raise DeserializationError(msg) from err + return date_obj + + @staticmethod + def deserialize_iso(attr): + """Deserialize ISO-8601 formatted string into Datetime object. + + :param str attr: response string to be deserialized. + :return: Deserialized ISO datetime + :rtype: Datetime + :raises DeserializationError: if string format invalid. + """ + if isinstance(attr, ET.Element): + attr = attr.text + try: + attr = attr.upper() # type: ignore + match = Deserializer.valid_date.match(attr) + if not match: + raise ValueError("Invalid datetime string: " + attr) + + check_decimal = attr.split(".") + if len(check_decimal) > 1: + decimal_str = "" + for digit in check_decimal[1]: + if digit.isdigit(): + decimal_str += digit + else: + break + if len(decimal_str) > 6: + attr = attr.replace(decimal_str, decimal_str[0:6]) + + date_obj = isodate.parse_datetime(attr) + test_utc = date_obj.utctimetuple() + if test_utc.tm_year > 9999 or test_utc.tm_year < 1: + raise OverflowError("Hit max or min date") + except (ValueError, OverflowError, AttributeError) as err: + msg = "Cannot deserialize datetime object." + raise DeserializationError(msg) from err + return date_obj + + @staticmethod + def deserialize_unix(attr): + """Serialize Datetime object into IntTime format. + This is represented as seconds. + + :param int attr: Object to be serialized. + :return: Deserialized datetime + :rtype: Datetime + :raises DeserializationError: if format invalid + """ + if isinstance(attr, ET.Element): + attr = int(attr.text) # type: ignore + try: + attr = int(attr) + date_obj = datetime.datetime.fromtimestamp(attr, TZ_UTC) + except ValueError as err: + msg = "Cannot deserialize to unix datetime object." + raise DeserializationError(msg) from err + return date_obj diff --git a/sdk/voicelive/azure-ai-voicelive/azure/_version.py b/sdk/voicelive/azure-ai-voicelive/azure/_version.py new file mode 100644 index 000000000000..be71c81bd282 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/azure/_version.py @@ -0,0 +1,9 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# Code generated by Microsoft (R) Python Code Generator. +# Changes may cause incorrect behavior and will be lost if the code is regenerated. +# -------------------------------------------------------------------------- + +VERSION = "1.0.0b1" diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py index 98d54bfd1880..52a39f70b9e2 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py @@ -22,8 +22,7 @@ import aiohttp except ImportError as exc: raise ImportError( - "aiohttp is required for azure-ai-voicelive. " - "Install it with: pip install azure-ai-voicelive[aiohttp]" + "aiohttp is required for azure-ai-voicelive. " "Install it with: pip install azure-ai-voicelive[aiohttp]" ) from exc from azure.ai.voicelive.models._models import ( ClientEventConversationItemCreate, @@ -512,7 +511,7 @@ async def recv(self) -> ServerEvent: raise ConnectionClosed(1006, "Empty WebSocket frame") payload = json.loads(raw.decode("utf-8")) - event = cast("ServerEvent", ServerEvent.deserialize(payload)) + event = cast("ServerEvent", ServerEvent._deserialize(payload, [])) return event except (ValueError, TypeError) as e: log.error("Error parsing message: %s", e) diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py index 74cea898bcad..9b2114496b5e 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/__init__.py @@ -14,6 +14,10 @@ from ._models import ( # type: ignore + ActionFind, + ActionOpenPage, + ActionSearch, + ActionSearchSource, AgentConfig, Animation, AssistantMessageItem, @@ -21,6 +25,7 @@ AudioInputTranscriptionOptions, AudioNoiseReduction, AvatarConfig, + AzureAvatarVoiceSyncVoice, AzureCustomVoice, AzurePersonalVoice, AzureSemanticDetection, @@ -46,6 +51,7 @@ ClientEventInputAudioTurnCancel, ClientEventInputAudioTurnEnd, ClientEventInputAudioTurnStart, + ClientEventOutputAudioBufferClear, ClientEventResponseCancel, ClientEventResponseCreate, ClientEventSessionAvatarConnect, @@ -55,6 +61,7 @@ ConversationRequestItem, EouDetection, ErrorResponse, + FileSearchResult, FunctionCallItem, FunctionCallOutputItem, FunctionTool, @@ -82,6 +89,7 @@ ResponseCancelledDetails, ResponseCreateParams, ResponseFailedDetails, + ResponseFileSearchCallItem, ResponseFunctionCallItem, ResponseFunctionCallOutputItem, ResponseIncompleteDetails, @@ -94,6 +102,7 @@ ResponseSession, ResponseStatusDetails, ResponseTextContentPart, + ResponseWebSearchCallItem, Scene, ServerEvent, ServerEventConversationItemCreated, @@ -112,6 +121,7 @@ ServerEventMcpListToolsCompleted, ServerEventMcpListToolsFailed, ServerEventMcpListToolsInProgress, + ServerEventOutputAudioBufferCleared, ServerEventResponseAnimationBlendshapeDelta, ServerEventResponseAnimationBlendshapeDone, ServerEventResponseAnimationVisemeDelta, @@ -120,12 +130,16 @@ ServerEventResponseAudioDone, ServerEventResponseAudioTimestampDelta, ServerEventResponseAudioTimestampDone, + ServerEventResponseAudioTranscriptAnnotationAdded, ServerEventResponseAudioTranscriptDelta, ServerEventResponseAudioTranscriptDone, ServerEventResponseContentPartAdded, ServerEventResponseContentPartDone, ServerEventResponseCreated, ServerEventResponseDone, + ServerEventResponseFileSearchCallCompleted, + ServerEventResponseFileSearchCallInProgress, + ServerEventResponseFileSearchCallSearching, ServerEventResponseFunctionCallArgumentsDelta, ServerEventResponseFunctionCallArgumentsDone, ServerEventResponseMcpCallArgumentsDelta, @@ -137,7 +151,13 @@ ServerEventResponseOutputItemDone, ServerEventResponseTextDelta, ServerEventResponseTextDone, + ServerEventResponseVideoDelta, + ServerEventResponseWebSearchCallCompleted, + ServerEventResponseWebSearchCallInProgress, + ServerEventResponseWebSearchCallSearching, ServerEventSessionAvatarConnecting, + ServerEventSessionAvatarSwitchToIdle, + ServerEventSessionAvatarSwitchToSpeaking, ServerEventSessionCreated, ServerEventSessionUpdated, ServerEventWarning, @@ -150,6 +170,8 @@ Tool, ToolChoiceFunctionSelection, ToolChoiceSelection, + TranscriptionPhrase, + TranscriptionWord, TurnDetection, UserMessageItem, VideoCrop, @@ -184,6 +206,7 @@ ResponseItemStatus, ResponseStatus, ServerEventType, + SessionIncludeOption, ToolChoiceLiteral, ToolType, TurnDetectionType, @@ -193,6 +216,10 @@ from ._patch import patch_sdk as _patch_sdk __all__ = [ + "ActionFind", + "ActionOpenPage", + "ActionSearch", + "ActionSearchSource", "AgentConfig", "Animation", "AssistantMessageItem", @@ -200,6 +227,7 @@ "AudioInputTranscriptionOptions", "AudioNoiseReduction", "AvatarConfig", + "AzureAvatarVoiceSyncVoice", "AzureCustomVoice", "AzurePersonalVoice", "AzureSemanticDetection", @@ -225,6 +253,7 @@ "ClientEventInputAudioTurnCancel", "ClientEventInputAudioTurnEnd", "ClientEventInputAudioTurnStart", + "ClientEventOutputAudioBufferClear", "ClientEventResponseCancel", "ClientEventResponseCreate", "ClientEventSessionAvatarConnect", @@ -234,6 +263,7 @@ "ConversationRequestItem", "EouDetection", "ErrorResponse", + "FileSearchResult", "FunctionCallItem", "FunctionCallOutputItem", "FunctionTool", @@ -261,6 +291,7 @@ "ResponseCancelledDetails", "ResponseCreateParams", "ResponseFailedDetails", + "ResponseFileSearchCallItem", "ResponseFunctionCallItem", "ResponseFunctionCallOutputItem", "ResponseIncompleteDetails", @@ -273,6 +304,7 @@ "ResponseSession", "ResponseStatusDetails", "ResponseTextContentPart", + "ResponseWebSearchCallItem", "Scene", "ServerEvent", "ServerEventConversationItemCreated", @@ -291,6 +323,7 @@ "ServerEventMcpListToolsCompleted", "ServerEventMcpListToolsFailed", "ServerEventMcpListToolsInProgress", + "ServerEventOutputAudioBufferCleared", "ServerEventResponseAnimationBlendshapeDelta", "ServerEventResponseAnimationBlendshapeDone", "ServerEventResponseAnimationVisemeDelta", @@ -299,12 +332,16 @@ "ServerEventResponseAudioDone", "ServerEventResponseAudioTimestampDelta", "ServerEventResponseAudioTimestampDone", + "ServerEventResponseAudioTranscriptAnnotationAdded", "ServerEventResponseAudioTranscriptDelta", "ServerEventResponseAudioTranscriptDone", "ServerEventResponseContentPartAdded", "ServerEventResponseContentPartDone", "ServerEventResponseCreated", "ServerEventResponseDone", + "ServerEventResponseFileSearchCallCompleted", + "ServerEventResponseFileSearchCallInProgress", + "ServerEventResponseFileSearchCallSearching", "ServerEventResponseFunctionCallArgumentsDelta", "ServerEventResponseFunctionCallArgumentsDone", "ServerEventResponseMcpCallArgumentsDelta", @@ -316,7 +353,13 @@ "ServerEventResponseOutputItemDone", "ServerEventResponseTextDelta", "ServerEventResponseTextDone", + "ServerEventResponseVideoDelta", + "ServerEventResponseWebSearchCallCompleted", + "ServerEventResponseWebSearchCallInProgress", + "ServerEventResponseWebSearchCallSearching", "ServerEventSessionAvatarConnecting", + "ServerEventSessionAvatarSwitchToIdle", + "ServerEventSessionAvatarSwitchToSpeaking", "ServerEventSessionCreated", "ServerEventSessionUpdated", "ServerEventWarning", @@ -329,6 +372,8 @@ "Tool", "ToolChoiceFunctionSelection", "ToolChoiceSelection", + "TranscriptionPhrase", + "TranscriptionWord", "TurnDetection", "UserMessageItem", "VideoCrop", @@ -360,6 +405,7 @@ "ResponseItemStatus", "ResponseStatus", "ServerEventType", + "SessionIncludeOption", "ToolChoiceLiteral", "ToolType", "TurnDetectionType", diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_enums.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_enums.py index 6512161f6f78..99f13766f574 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_enums.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_enums.py @@ -53,6 +53,8 @@ class AzureVoiceType(str, Enum, metaclass=CaseInsensitiveEnumMeta): """Azure standard voice.""" AZURE_PERSONAL = "azure-personal" """Azure personal voice.""" + AVATAR_VOICE_SYNC = "avatar-voice-sync" + """Azure avatar voice sync.""" class ClientEventType(str, Enum, metaclass=CaseInsensitiveEnumMeta): @@ -92,6 +94,8 @@ class ClientEventType(str, Enum, metaclass=CaseInsensitiveEnumMeta): """SESSION_AVATAR_CONNECT.""" MCP_APPROVAL_RESPONSE = "mcp_approval_response" """MCP_APPROVAL_RESPONSE.""" + OUTPUT_AUDIO_BUFFER_CLEAR = "output_audio_buffer.clear" + """Client request to clear the avatar output buffer.""" class ContentPartType(str, Enum, metaclass=CaseInsensitiveEnumMeta): @@ -177,6 +181,10 @@ class ItemType(str, Enum, metaclass=CaseInsensitiveEnumMeta): """MCP_APPROVAL_REQUEST.""" MCP_APPROVAL_RESPONSE = "mcp_approval_response" """MCP_APPROVAL_RESPONSE.""" + WEB_SEARCH_CALL = "web_search_call" + """Web search call item.""" + FILE_SEARCH_CALL = "file_search_call" + """File search call item.""" class MCPApprovalType(str, Enum, metaclass=CaseInsensitiveEnumMeta): @@ -259,8 +267,10 @@ class PersonalVoiceModels(str, Enum, metaclass=CaseInsensitiveEnumMeta): """Use the latest Dragon model.""" PHOENIX_LATEST_NEURAL = "PhoenixLatestNeural" """Use the latest Phoenix model.""" - PHOENIX_V2_NEURAL = "PhoenixV2Neural" - """Use the Phoenix V2 model.""" + DRAGON_HD_OMNI_LATEST_NEURAL = "DragonHDOmniLatestNeural" + """Use the latest Dragon HD Omni model.""" + MAI_VOICE1 = "MAI-Voice-1" + """Use the MAI-Voice-1 model.""" class PhotoAvatarBaseModes(str, Enum, metaclass=CaseInsensitiveEnumMeta): @@ -418,6 +428,39 @@ class ServerEventType(str, Enum, metaclass=CaseInsensitiveEnumMeta): """RESPONSE_MCP_CALL_COMPLETED.""" RESPONSE_MCP_CALL_FAILED = "response.mcp_call.failed" """RESPONSE_MCP_CALL_FAILED.""" + SESSION_AVATAR_SWITCH_TO_SPEAKING = "session.avatar.switch_to_speaking" + """Avatar switches to speaking state.""" + SESSION_AVATAR_SWITCH_TO_IDLE = "session.avatar.switch_to_idle" + """Avatar switches to idle state.""" + RESPONSE_VIDEO_DELTA = "response.video.delta" + """Delta update for avatar video frames.""" + RESPONSE_WEB_SEARCH_CALL_SEARCHING = "response.web_search_call.searching" + """Web search call is searching.""" + RESPONSE_WEB_SEARCH_CALL_IN_PROGRESS = "response.web_search_call.in_progress" + """Web search call is in progress.""" + RESPONSE_WEB_SEARCH_CALL_COMPLETED = "response.web_search_call.completed" + """Web search call completed.""" + RESPONSE_FILE_SEARCH_CALL_SEARCHING = "response.file_search_call.searching" + """File search call is searching.""" + RESPONSE_FILE_SEARCH_CALL_IN_PROGRESS = "response.file_search_call.in_progress" + """File search call is in progress.""" + RESPONSE_FILE_SEARCH_CALL_COMPLETED = "response.file_search_call.completed" + """File search call completed.""" + OUTPUT_AUDIO_BUFFER_CLEARED = "output_audio_buffer.cleared" + """Output audio buffer has been cleared.""" + RESPONSE_AUDIO_TRANSCRIPT_ANNOTATION_ADDED = "response.audio_transcript.annotation.added" + """Audio transcript annotation added.""" + + +class SessionIncludeOption(str, Enum, metaclass=CaseInsensitiveEnumMeta): + """Options for what additional data to include in session responses.""" + + ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS = "item.input_audio_transcription.logprobs" + """Include log probabilities for input audio transcription.""" + ITEM_INPUT_AUDIO_TRANSCRIPTION_PHRASES = "item.input_audio_transcription.phrases" + """Include phrase-level details for input audio transcription.""" + FILE_SEARCH_CALL_RESULTS = "file_search_call.results" + """Include file search call results.""" class ToolChoiceLiteral(str, Enum, metaclass=CaseInsensitiveEnumMeta): diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py index 2f7200c86799..48377eb3897a 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py @@ -10,7 +10,7 @@ from typing import Any, Literal, Mapping, Optional, TYPE_CHECKING, Union, overload -from .._utils.model_base import Model as _Model, rest_discriminator, rest_field +from ...._utils.model_base import Model as _Model, rest_discriminator, rest_field from ._enums import ( AzureVoiceType, ClientEventType, @@ -25,7 +25,152 @@ ) if TYPE_CHECKING: - from .. import _types, models as _models + from .. import models as _models + from .... import _types + + +class ActionFind(_Model): + """A find action to search text within a page. + + :ivar pattern: The pattern or text to search for within the page. Required. + :vartype pattern: str + :ivar type: The action type. Always 'find'. Required. Default value is "find". + :vartype type: str + :ivar url: The URL of the page searched for the pattern. Required. + :vartype url: str + """ + + pattern: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The pattern or text to search for within the page. Required.""" + type: Literal["find"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The action type. Always 'find'. Required. Default value is \"find\".""" + url: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The URL of the page searched for the pattern. Required.""" + + @overload + def __init__( + self, + *, + pattern: str, + url: str, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type: Literal["find"] = "find" + + +class ActionOpenPage(_Model): + """An open page action. + + :ivar type: The action type. Always 'open_page'. Required. Default value is "open_page". + :vartype type: str + :ivar url: The URL opened by the model. Required. + :vartype url: str + """ + + type: Literal["open_page"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The action type. Always 'open_page'. Required. Default value is \"open_page\".""" + url: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The URL opened by the model. Required.""" + + @overload + def __init__( + self, + *, + url: str, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type: Literal["open_page"] = "open_page" + + +class ActionSearch(_Model): + """A web search action. + + :ivar query: The search query. + :vartype query: str + :ivar type: The action type. Always 'search'. Required. Default value is "search". + :vartype type: str + :ivar sources: The sources used in the search. + :vartype sources: list[~azure.ai.voicelive.models.ActionSearchSource] + """ + + query: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The search query.""" + type: Literal["search"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The action type. Always 'search'. Required. Default value is \"search\".""" + sources: Optional[list["_models.ActionSearchSource"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """The sources used in the search.""" + + @overload + def __init__( + self, + *, + query: Optional[str] = None, + sources: Optional[list["_models.ActionSearchSource"]] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type: Literal["search"] = "search" + + +class ActionSearchSource(_Model): + """A search action source URL. + + :ivar type: The type of source. Always 'url'. Required. Default value is "url". + :vartype type: str + :ivar url: The URL of the source. Required. + :vartype url: str + """ + + type: Literal["url"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The type of source. Always 'url'. Required. Default value is \"url\".""" + url: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The URL of the source. Required.""" + + @overload + def __init__( + self, + *, + url: str, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type: Literal["url"] = "url" class AgentConfig(_Model): @@ -118,7 +263,8 @@ class ConversationRequestItem(_Model): FunctionCallItem, FunctionCallOutputItem, MCPApprovalResponseRequestItem, MessageItem :ivar type: Required. Known values are: "message", "function_call", "function_call_output", - "mcp_list_tools", "mcp_call", "mcp_approval_request", and "mcp_approval_response". + "mcp_list_tools", "mcp_call", "mcp_approval_request", "mcp_approval_response", + "web_search_call", and "file_search_call". :vartype type: str or ~azure.ai.voicelive.models.ItemType :ivar id: :vartype id: str @@ -127,7 +273,8 @@ class ConversationRequestItem(_Model): __mapping__: dict[str, _Model] = {} type: str = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) """Required. Known values are: \"message\", \"function_call\", \"function_call_output\", - \"mcp_list_tools\", \"mcp_call\", \"mcp_approval_request\", and \"mcp_approval_response\".""" + \"mcp_list_tools\", \"mcp_call\", \"mcp_approval_request\", \"mcp_approval_response\", + \"web_search_call\", and \"file_search_call\".""" id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) @overload @@ -266,10 +413,11 @@ class AudioInputTranscriptionOptions(_Model): """Configuration for input audio transcription. :ivar model: The transcription model to use. Supported values: 'whisper-1', - 'gpt-4o-transcribe', 'gpt-4o-mini-transcribe', 'azure-speech'. Required. Is one of the - following types: Literal["whisper-1"], Literal["gpt-4o-transcribe"], - Literal["gpt-4o-mini-transcribe"], Literal["azure-speech"], str - :vartype model: str + 'gpt-4o-transcribe', 'gpt-4o-mini-transcribe', 'mai-transcribe-1', 'azure-speech'. Required. Is + one of the following types: Literal["whisper-1"], Literal["gpt-4o-transcribe"], + Literal["gpt-4o-mini-transcribe"], Literal["gpt-4o-transcribe-diarize"], + Literal["mai-transcribe-1"], Literal["azure-speech"], str + :vartype model: str or str or str or str or str or str or str :ivar language: Optional language code in BCP-47 (e.g., 'en-US'), or ISO-639-1 (e.g., 'en'), or multi languages with auto detection, (e.g., 'en,zh'). :vartype language: str @@ -283,13 +431,16 @@ class AudioInputTranscriptionOptions(_Model): Literal["whisper-1"], Literal["gpt-4o-transcribe"], Literal["gpt-4o-mini-transcribe"], + Literal["gpt-4o-transcribe-diarize"], + Literal["mai-transcribe-1"], Literal["azure-speech"], str, ] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The transcription model to use. Supported values: 'whisper-1', 'gpt-4o-transcribe', - 'gpt-4o-mini-transcribe', 'azure-speech'. Required. Is one of the following types: - Literal[\"whisper-1\"], Literal[\"gpt-4o-transcribe\"], Literal[\"gpt-4o-mini-transcribe\"], - Literal[\"azure-speech\"], str""" + 'gpt-4o-mini-transcribe', 'mai-transcribe-1', 'azure-speech'. Required. Is one of the following + types: Literal[\"whisper-1\"], Literal[\"gpt-4o-transcribe\"], + Literal[\"gpt-4o-mini-transcribe\"], Literal[\"gpt-4o-transcribe-diarize\"], + Literal[\"mai-transcribe-1\"], Literal[\"azure-speech\"], str""" language: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Optional language code in BCP-47 (e.g., 'en-US'), or ISO-639-1 (e.g., 'en'), or multi languages with auto detection, (e.g., 'en,zh').""" @@ -306,6 +457,8 @@ def __init__( Literal["whisper-1"], Literal["gpt-4o-transcribe"], Literal["gpt-4o-mini-transcribe"], + Literal["gpt-4o-transcribe-diarize"], + Literal["mai-transcribe-1"], Literal["azure-speech"], str, ], @@ -330,7 +483,7 @@ class AudioNoiseReduction(_Model): :ivar type: The type of noise reduction model. Required. Is one of the following types: Literal["azure_deep_noise_suppression"], Literal["near_field"], Literal["far_field"], str - :vartype type: str + :vartype type: str or str or str or str """ type: Union[Literal["azure_deep_noise_suppression"], Literal["near_field"], Literal["far_field"], str] = rest_field( @@ -448,17 +601,17 @@ class AzureVoice(_Model): """Base for Azure voice configurations. You probably want to use the sub-classes and not this class directly. Known sub-classes are: - AzureCustomVoice, AzurePersonalVoice, AzureStandardVoice + AzureAvatarVoiceSyncVoice, AzureCustomVoice, AzurePersonalVoice, AzureStandardVoice :ivar type: The type of the Azure voice. Required. Known values are: "azure-custom", - "azure-standard", and "azure-personal". + "azure-standard", "azure-personal", and "avatar-voice-sync". :vartype type: str or ~azure.ai.voicelive.models.AzureVoiceType """ __mapping__: dict[str, _Model] = {} type: str = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) """The type of the Azure voice. Required. Known values are: \"azure-custom\", \"azure-standard\", - and \"azure-personal\".""" + \"azure-personal\", and \"avatar-voice-sync\".""" @overload def __init__( @@ -478,6 +631,142 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) +class AzureAvatarVoiceSyncVoice(AzureVoice, discriminator="avatar-voice-sync"): + """Azure avatar voice sync configuration. Uses personal voice synthesis with avatar character. + + :ivar type: Required. Azure avatar voice sync. + :vartype type: str or ~azure.ai.voicelive.models.AVATAR_VOICE_SYNC + :ivar model: Underlying neural model to use. Required. Known values are: "DragonLatestNeural", + "PhoenixLatestNeural", "DragonHDOmniLatestNeural", and "MAI-Voice-1". + :vartype model: str or ~azure.ai.voicelive.models.PersonalVoiceModels + :ivar temperature: Temperature must be between 0.0 and 1.0. + :vartype temperature: float + :ivar custom_lexicon_url: URL of a custom lexicon file for pronunciation customization. + :vartype custom_lexicon_url: str + :ivar custom_text_normalization_url: URL of a custom text normalization endpoint. + :vartype custom_text_normalization_url: str + :ivar prefer_locales: Preferred locales in BCP-47 format that change the accents of languages. + If not set, TTS uses the default accent for each language (e.g., American English for English, + Mexican Spanish for Spanish). Setting this to ``["en-GB", "es-ES"]`` changes the English accent + to British English and the Spanish accent to European Spanish, while TTS can still speak other + languages like French or Chinese with their default accents. + :vartype prefer_locales: list[str] + :ivar locale: Enforced locale in BCP-47 format for TTS output. If set, TTS will always use the + specified locale to speak. For example, setting locale to ``en-US`` forces American English + accent for all text content, even if the text is in another language, and TTS will output + silence for unsupported languages (e.g., Chinese text with ``en-US`` locale). If not set, TTS + automatically detects the language from the text content. + :vartype locale: str + :ivar style: Speaking style for the voice (e.g., 'cheerful', 'sad'). + :vartype style: str + :ivar pitch: Pitch adjustment for the voice output. Follows the same rules as the ``pitch`` + attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-low``, ``low``, ``medium``, ``high``, ``x-high``, + ``default``), a relative change (e.g., ``+10%``, ``-5%``, ``+50Hz``, ``-2st``), or an absolute + frequency (e.g., ``200Hz``). + :vartype pitch: str + :ivar rate: Speaking rate adjustment for the voice output. Follows the same rules as the + ``rate`` attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-slow``, ``slow``, ``medium``, ``fast``, ``x-fast``, + ``default``), a relative percentage (e.g., ``+20%``, ``-10%``), or a non-negative multiplier + (e.g., ``0.5``, ``1.5``). + :vartype rate: str + :ivar volume: Volume adjustment for the voice output. Follows the same rules as the ``volume`` + attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``silent``, ``x-soft``, ``soft``, ``medium``, ``loud``, + ``x-loud``, ``default``), an absolute number from 0.0 to 100.0, or a relative change (e.g., + ``+10``, ``-6dB``). + :vartype volume: str + """ + + type: Literal[AzureVoiceType.AVATAR_VOICE_SYNC] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """Required. Azure avatar voice sync.""" + model: Union[str, "_models.PersonalVoiceModels"] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """Underlying neural model to use. Required. Known values are: \"DragonLatestNeural\", + \"PhoenixLatestNeural\", \"DragonHDOmniLatestNeural\", and \"MAI-Voice-1\".""" + temperature: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Temperature must be between 0.0 and 1.0.""" + custom_lexicon_url: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """URL of a custom lexicon file for pronunciation customization.""" + custom_text_normalization_url: Optional[str] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """URL of a custom text normalization endpoint.""" + prefer_locales: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Preferred locales in BCP-47 format that change the accents of languages. If not set, TTS uses + the default accent for each language (e.g., American English for English, Mexican Spanish for + Spanish). Setting this to ``[\"en-GB\", \"es-ES\"]`` changes the English accent to British + English and the Spanish accent to European Spanish, while TTS can still speak other languages + like French or Chinese with their default accents.""" + locale: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Enforced locale in BCP-47 format for TTS output. If set, TTS will always use the specified + locale to speak. For example, setting locale to ``en-US`` forces American English accent for + all text content, even if the text is in another language, and TTS will output silence for + unsupported languages (e.g., Chinese text with ``en-US`` locale). If not set, TTS automatically + detects the language from the text content.""" + style: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Speaking style for the voice (e.g., 'cheerful', 'sad').""" + pitch: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Pitch adjustment for the voice output. Follows the same rules as the ``pitch`` attribute of the + SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-low``, ``low``, ``medium``, ``high``, ``x-high``, + ``default``), a relative change (e.g., ``+10%``, ``-5%``, ``+50Hz``, ``-2st``), or an absolute + frequency (e.g., ``200Hz``).""" + rate: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Speaking rate adjustment for the voice output. Follows the same rules as the ``rate`` attribute + of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-slow``, ``slow``, ``medium``, ``fast``, ``x-fast``, + ``default``), a relative percentage (e.g., ``+20%``, ``-10%``), or a non-negative multiplier + (e.g., ``0.5``, ``1.5``).""" + volume: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Volume adjustment for the voice output. Follows the same rules as the ``volume`` attribute of + the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``silent``, ``x-soft``, ``soft``, ``medium``, ``loud``, + ``x-loud``, ``default``), an absolute number from 0.0 to 100.0, or a relative change (e.g., + ``+10``, ``-6dB``).""" + + @overload + def __init__( + self, + *, + model: Union[str, "_models.PersonalVoiceModels"], + temperature: Optional[float] = None, + custom_lexicon_url: Optional[str] = None, + custom_text_normalization_url: Optional[str] = None, + prefer_locales: Optional[list[str]] = None, + locale: Optional[str] = None, + style: Optional[str] = None, + pitch: Optional[str] = None, + rate: Optional[str] = None, + volume: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = AzureVoiceType.AVATAR_VOICE_SYNC # type: ignore + + class AzureCustomVoice(AzureVoice, discriminator="azure-custom"): """Azure custom voice configuration. @@ -489,21 +778,47 @@ class AzureCustomVoice(AzureVoice, discriminator="azure-custom"): :vartype endpoint_id: str :ivar temperature: Temperature must be between 0.0 and 1.0. :vartype temperature: float - :ivar custom_lexicon_url: + :ivar custom_lexicon_url: URL of a custom lexicon file for pronunciation customization. :vartype custom_lexicon_url: str - :ivar custom_text_normalization_url: + :ivar custom_text_normalization_url: URL of a custom text normalization endpoint. :vartype custom_text_normalization_url: str - :ivar prefer_locales: + :ivar prefer_locales: Preferred locales in BCP-47 format that change the accents of languages. + If not set, TTS uses the default accent for each language (e.g., American English for English, + Mexican Spanish for Spanish). Setting this to ``["en-GB", "es-ES"]`` changes the English accent + to British English and the Spanish accent to European Spanish, while TTS can still speak other + languages like French or Chinese with their default accents. :vartype prefer_locales: list[str] - :ivar locale: + :ivar locale: Enforced locale in BCP-47 format for TTS output. If set, TTS will always use the + specified locale to speak. For example, setting locale to ``en-US`` forces American English + accent for all text content, even if the text is in another language, and TTS will output + silence for unsupported languages (e.g., Chinese text with ``en-US`` locale). If not set, TTS + automatically detects the language from the text content. :vartype locale: str - :ivar style: + :ivar style: Speaking style for the voice (e.g., 'cheerful', 'sad'). :vartype style: str - :ivar pitch: + :ivar pitch: Pitch adjustment for the voice output. Follows the same rules as the ``pitch`` + attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-low``, ``low``, ``medium``, ``high``, ``x-high``, + ``default``), a relative change (e.g., ``+10%``, ``-5%``, ``+50Hz``, ``-2st``), or an absolute + frequency (e.g., ``200Hz``). :vartype pitch: str - :ivar rate: + :ivar rate: Speaking rate adjustment for the voice output. Follows the same rules as the + ``rate`` attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-slow``, ``slow``, ``medium``, ``fast``, ``x-fast``, + ``default``), a relative percentage (e.g., ``+20%``, ``-10%``), or a non-negative multiplier + (e.g., ``0.5``, ``1.5``). :vartype rate: str - :ivar volume: + :ivar volume: Volume adjustment for the voice output. Follows the same rules as the ``volume`` + attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``silent``, ``x-soft``, ``soft``, ``medium``, ``loud``, + ``x-loud``, ``default``), an absolute number from 0.0 to 100.0, or a relative change (e.g., + ``+10``, ``-6dB``). :vartype volume: str """ @@ -516,15 +831,49 @@ class AzureCustomVoice(AzureVoice, discriminator="azure-custom"): temperature: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Temperature must be between 0.0 and 1.0.""" custom_lexicon_url: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """URL of a custom lexicon file for pronunciation customization.""" custom_text_normalization_url: Optional[str] = rest_field( visibility=["read", "create", "update", "delete", "query"] ) + """URL of a custom text normalization endpoint.""" prefer_locales: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Preferred locales in BCP-47 format that change the accents of languages. If not set, TTS uses + the default accent for each language (e.g., American English for English, Mexican Spanish for + Spanish). Setting this to ``[\"en-GB\", \"es-ES\"]`` changes the English accent to British + English and the Spanish accent to European Spanish, while TTS can still speak other languages + like French or Chinese with their default accents.""" locale: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Enforced locale in BCP-47 format for TTS output. If set, TTS will always use the specified + locale to speak. For example, setting locale to ``en-US`` forces American English accent for + all text content, even if the text is in another language, and TTS will output silence for + unsupported languages (e.g., Chinese text with ``en-US`` locale). If not set, TTS automatically + detects the language from the text content.""" style: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Speaking style for the voice (e.g., 'cheerful', 'sad').""" pitch: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Pitch adjustment for the voice output. Follows the same rules as the ``pitch`` attribute of the + SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-low``, ``low``, ``medium``, ``high``, ``x-high``, + ``default``), a relative change (e.g., ``+10%``, ``-5%``, ``+50Hz``, ``-2st``), or an absolute + frequency (e.g., ``200Hz``).""" rate: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Speaking rate adjustment for the voice output. Follows the same rules as the ``rate`` attribute + of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-slow``, ``slow``, ``medium``, ``fast``, ``x-fast``, + ``default``), a relative percentage (e.g., ``+20%``, ``-10%``), or a non-negative multiplier + (e.g., ``0.5``, ``1.5``).""" volume: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Volume adjustment for the voice output. Follows the same rules as the ``volume`` attribute of + the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``silent``, ``x-soft``, ``soft``, ``medium``, ``loud``, + ``x-loud``, ``default``), an absolute number from 0.0 to 100.0, or a relative change (e.g., + ``+10``, ``-6dB``).""" @overload def __init__( @@ -565,23 +914,49 @@ class AzurePersonalVoice(AzureVoice, discriminator="azure-personal"): :ivar temperature: Temperature must be between 0.0 and 1.0. :vartype temperature: float :ivar model: Underlying neural model to use for personal voice. Required. Known values are: - "DragonLatestNeural", "PhoenixLatestNeural", and "PhoenixV2Neural". + "DragonLatestNeural", "PhoenixLatestNeural", "DragonHDOmniLatestNeural", and "MAI-Voice-1". :vartype model: str or ~azure.ai.voicelive.models.PersonalVoiceModels - :ivar custom_lexicon_url: + :ivar custom_lexicon_url: URL of a custom lexicon file for pronunciation customization. :vartype custom_lexicon_url: str - :ivar custom_text_normalization_url: + :ivar custom_text_normalization_url: URL of a custom text normalization endpoint. :vartype custom_text_normalization_url: str - :ivar prefer_locales: + :ivar prefer_locales: Preferred locales in BCP-47 format that change the accents of languages. + If not set, TTS uses the default accent for each language (e.g., American English for English, + Mexican Spanish for Spanish). Setting this to ``["en-GB", "es-ES"]`` changes the English accent + to British English and the Spanish accent to European Spanish, while TTS can still speak other + languages like French or Chinese with their default accents. :vartype prefer_locales: list[str] - :ivar locale: + :ivar locale: Enforced locale in BCP-47 format for TTS output. If set, TTS will always use the + specified locale to speak. For example, setting locale to ``en-US`` forces American English + accent for all text content, even if the text is in another language, and TTS will output + silence for unsupported languages (e.g., Chinese text with ``en-US`` locale). If not set, TTS + automatically detects the language from the text content. :vartype locale: str - :ivar style: + :ivar style: Speaking style for the voice (e.g., 'cheerful', 'sad'). :vartype style: str - :ivar pitch: + :ivar pitch: Pitch adjustment for the voice output. Follows the same rules as the ``pitch`` + attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-low``, ``low``, ``medium``, ``high``, ``x-high``, + ``default``), a relative change (e.g., ``+10%``, ``-5%``, ``+50Hz``, ``-2st``), or an absolute + frequency (e.g., ``200Hz``). :vartype pitch: str - :ivar rate: + :ivar rate: Speaking rate adjustment for the voice output. Follows the same rules as the + ``rate`` attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-slow``, ``slow``, ``medium``, ``fast``, ``x-fast``, + ``default``), a relative percentage (e.g., ``+20%``, ``-10%``), or a non-negative multiplier + (e.g., ``0.5``, ``1.5``). :vartype rate: str - :ivar volume: + :ivar volume: Volume adjustment for the voice output. Follows the same rules as the ``volume`` + attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``silent``, ``x-soft``, ``soft``, ``medium``, ``loud``, + ``x-loud``, ``default``), an absolute number from 0.0 to 100.0, or a relative change (e.g., + ``+10``, ``-6dB``). :vartype volume: str """ @@ -595,17 +970,52 @@ class AzurePersonalVoice(AzureVoice, discriminator="azure-personal"): visibility=["read", "create", "update", "delete", "query"] ) """Underlying neural model to use for personal voice. Required. Known values are: - \"DragonLatestNeural\", \"PhoenixLatestNeural\", and \"PhoenixV2Neural\".""" + \"DragonLatestNeural\", \"PhoenixLatestNeural\", \"DragonHDOmniLatestNeural\", and + \"MAI-Voice-1\".""" custom_lexicon_url: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """URL of a custom lexicon file for pronunciation customization.""" custom_text_normalization_url: Optional[str] = rest_field( visibility=["read", "create", "update", "delete", "query"] ) + """URL of a custom text normalization endpoint.""" prefer_locales: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Preferred locales in BCP-47 format that change the accents of languages. If not set, TTS uses + the default accent for each language (e.g., American English for English, Mexican Spanish for + Spanish). Setting this to ``[\"en-GB\", \"es-ES\"]`` changes the English accent to British + English and the Spanish accent to European Spanish, while TTS can still speak other languages + like French or Chinese with their default accents.""" locale: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Enforced locale in BCP-47 format for TTS output. If set, TTS will always use the specified + locale to speak. For example, setting locale to ``en-US`` forces American English accent for + all text content, even if the text is in another language, and TTS will output silence for + unsupported languages (e.g., Chinese text with ``en-US`` locale). If not set, TTS automatically + detects the language from the text content.""" style: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Speaking style for the voice (e.g., 'cheerful', 'sad').""" pitch: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Pitch adjustment for the voice output. Follows the same rules as the ``pitch`` attribute of the + SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-low``, ``low``, ``medium``, ``high``, ``x-high``, + ``default``), a relative change (e.g., ``+10%``, ``-5%``, ``+50Hz``, ``-2st``), or an absolute + frequency (e.g., ``200Hz``).""" rate: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Speaking rate adjustment for the voice output. Follows the same rules as the ``rate`` attribute + of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-slow``, ``slow``, ``medium``, ``fast``, ``x-fast``, + ``default``), a relative percentage (e.g., ``+20%``, ``-10%``), or a non-negative multiplier + (e.g., ``0.5``, ``1.5``).""" volume: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Volume adjustment for the voice output. Follows the same rules as the ``volume`` attribute of + the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``silent``, ``x-soft``, ``soft``, ``medium``, ``loud``, + ``x-loud``, ``default``), an absolute number from 0.0 to 100.0, or a relative change (e.g., + ``+10``, ``-6dB``).""" @overload def __init__( @@ -644,7 +1054,7 @@ class EouDetection(_Model): :ivar model: Required. Is one of the following types: Literal["semantic_detection_v1"], Literal["semantic_detection_v1_en"], Literal["semantic_detection_v1_multilingual"], str - :vartype model: str + :vartype model: str or str or str or str """ __mapping__: dict[str, _Model] = {} @@ -838,42 +1248,55 @@ class AzureSemanticVad(TurnDetection, discriminator="azure_semantic_vad"): :ivar type: Required. AZURE_SEMANTIC_VAD. :vartype type: str or ~azure.ai.voicelive.models.AZURE_SEMANTIC_VAD - :ivar threshold: + :ivar threshold: Activation threshold for VAD detection. Range: 0.0 to 1.0. :vartype threshold: float - :ivar prefix_padding_ms: + :ivar prefix_padding_ms: Amount of audio to include before speech is detected, in milliseconds. :vartype prefix_padding_ms: int - :ivar silence_duration_ms: + :ivar silence_duration_ms: Duration of silence required to end speech detection, in + milliseconds. :vartype silence_duration_ms: int - :ivar end_of_utterance_detection: + :ivar end_of_utterance_detection: Configuration for end-of-utterance detection. :vartype end_of_utterance_detection: ~azure.ai.voicelive.models.EouDetection - :ivar speech_duration_ms: + :ivar speech_duration_ms: Minimum speech duration in milliseconds to trigger detection. :vartype speech_duration_ms: int - :ivar remove_filler_words: + :ivar remove_filler_words: Whether to remove filler words (e.g., 'um', 'uh') from + transcription. :vartype remove_filler_words: bool - :ivar languages: + :ivar languages: List of BCP-47 language codes for speech detection. :vartype languages: list[str] - :ivar auto_truncate: + :ivar auto_truncate: Whether to automatically truncate the audio buffer when speech stops. :vartype auto_truncate: bool - :ivar create_response: + :ivar create_response: Whether to automatically create a response when speech stops. :vartype create_response: bool - :ivar interrupt_response: + :ivar interrupt_response: Whether to allow the user's speech to interrupt the assistant's + response. :vartype interrupt_response: bool """ type: Literal[TurnDetectionType.AZURE_SEMANTIC_VAD] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """Required. AZURE_SEMANTIC_VAD.""" threshold: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Activation threshold for VAD detection. Range: 0.0 to 1.0.""" prefix_padding_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Amount of audio to include before speech is detected, in milliseconds.""" silence_duration_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Duration of silence required to end speech detection, in milliseconds.""" end_of_utterance_detection: Optional["_models.EouDetection"] = rest_field( visibility=["read", "create", "update", "delete", "query"] ) + """Configuration for end-of-utterance detection.""" speech_duration_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Minimum speech duration in milliseconds to trigger detection.""" remove_filler_words: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to remove filler words (e.g., 'um', 'uh') from transcription.""" languages: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """List of BCP-47 language codes for speech detection.""" auto_truncate: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to automatically truncate the audio buffer when speech stops.""" create_response: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to automatically create a response when speech stops.""" interrupt_response: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to allow the user's speech to interrupt the assistant's response.""" @overload def __init__( @@ -908,39 +1331,51 @@ class AzureSemanticVadEn(TurnDetection, discriminator="azure_semantic_vad_en"): :ivar type: Required. AZURE_SEMANTIC_VAD_EN. :vartype type: str or ~azure.ai.voicelive.models.AZURE_SEMANTIC_VAD_EN - :ivar threshold: + :ivar threshold: Activation threshold for VAD detection. Range: 0.0 to 1.0. :vartype threshold: float - :ivar prefix_padding_ms: + :ivar prefix_padding_ms: Amount of audio to include before speech is detected, in milliseconds. :vartype prefix_padding_ms: int - :ivar silence_duration_ms: + :ivar silence_duration_ms: Duration of silence required to end speech detection, in + milliseconds. :vartype silence_duration_ms: int - :ivar end_of_utterance_detection: + :ivar end_of_utterance_detection: Configuration for end-of-utterance detection. :vartype end_of_utterance_detection: ~azure.ai.voicelive.models.EouDetection - :ivar speech_duration_ms: + :ivar speech_duration_ms: Minimum speech duration in milliseconds to trigger detection. :vartype speech_duration_ms: int - :ivar remove_filler_words: + :ivar remove_filler_words: Whether to remove filler words (e.g., 'um', 'uh') from + transcription. :vartype remove_filler_words: bool - :ivar auto_truncate: + :ivar auto_truncate: Whether to automatically truncate the audio buffer when speech stops. :vartype auto_truncate: bool - :ivar create_response: + :ivar create_response: Whether to automatically create a response when speech stops. :vartype create_response: bool - :ivar interrupt_response: + :ivar interrupt_response: Whether to allow the user's speech to interrupt the assistant's + response. :vartype interrupt_response: bool """ type: Literal[TurnDetectionType.AZURE_SEMANTIC_VAD_EN] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """Required. AZURE_SEMANTIC_VAD_EN.""" threshold: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Activation threshold for VAD detection. Range: 0.0 to 1.0.""" prefix_padding_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Amount of audio to include before speech is detected, in milliseconds.""" silence_duration_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Duration of silence required to end speech detection, in milliseconds.""" end_of_utterance_detection: Optional["_models.EouDetection"] = rest_field( visibility=["read", "create", "update", "delete", "query"] ) + """Configuration for end-of-utterance detection.""" speech_duration_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Minimum speech duration in milliseconds to trigger detection.""" remove_filler_words: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to remove filler words (e.g., 'um', 'uh') from transcription.""" auto_truncate: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to automatically truncate the audio buffer when speech stops.""" create_response: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to automatically create a response when speech stops.""" interrupt_response: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to allow the user's speech to interrupt the assistant's response.""" @overload def __init__( @@ -974,42 +1409,55 @@ class AzureSemanticVadMultilingual(TurnDetection, discriminator="azure_semantic_ :ivar type: Required. AZURE_SEMANTIC_VAD_MULTILINGUAL. :vartype type: str or ~azure.ai.voicelive.models.AZURE_SEMANTIC_VAD_MULTILINGUAL - :ivar threshold: + :ivar threshold: Activation threshold for VAD detection. Range: 0.0 to 1.0. :vartype threshold: float - :ivar prefix_padding_ms: + :ivar prefix_padding_ms: Amount of audio to include before speech is detected, in milliseconds. :vartype prefix_padding_ms: int - :ivar silence_duration_ms: + :ivar silence_duration_ms: Duration of silence required to end speech detection, in + milliseconds. :vartype silence_duration_ms: int - :ivar end_of_utterance_detection: + :ivar end_of_utterance_detection: Configuration for end-of-utterance detection. :vartype end_of_utterance_detection: ~azure.ai.voicelive.models.EouDetection - :ivar speech_duration_ms: + :ivar speech_duration_ms: Minimum speech duration in milliseconds to trigger detection. :vartype speech_duration_ms: int - :ivar remove_filler_words: + :ivar remove_filler_words: Whether to remove filler words (e.g., 'um', 'uh') from + transcription. :vartype remove_filler_words: bool - :ivar languages: + :ivar languages: List of BCP-47 language codes for speech detection. :vartype languages: list[str] - :ivar auto_truncate: + :ivar auto_truncate: Whether to automatically truncate the audio buffer when speech stops. :vartype auto_truncate: bool - :ivar create_response: + :ivar create_response: Whether to automatically create a response when speech stops. :vartype create_response: bool - :ivar interrupt_response: + :ivar interrupt_response: Whether to allow the user's speech to interrupt the assistant's + response. :vartype interrupt_response: bool """ type: Literal[TurnDetectionType.AZURE_SEMANTIC_VAD_MULTILINGUAL] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """Required. AZURE_SEMANTIC_VAD_MULTILINGUAL.""" threshold: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Activation threshold for VAD detection. Range: 0.0 to 1.0.""" prefix_padding_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Amount of audio to include before speech is detected, in milliseconds.""" silence_duration_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Duration of silence required to end speech detection, in milliseconds.""" end_of_utterance_detection: Optional["_models.EouDetection"] = rest_field( visibility=["read", "create", "update", "delete", "query"] ) + """Configuration for end-of-utterance detection.""" speech_duration_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Minimum speech duration in milliseconds to trigger detection.""" remove_filler_words: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to remove filler words (e.g., 'um', 'uh') from transcription.""" languages: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """List of BCP-47 language codes for speech detection.""" auto_truncate: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to automatically truncate the audio buffer when speech stops.""" create_response: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to automatically create a response when speech stops.""" interrupt_response: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to allow the user's speech to interrupt the assistant's response.""" @overload def __init__( @@ -1048,21 +1496,47 @@ class AzureStandardVoice(AzureVoice, discriminator="azure-standard"): :vartype name: str :ivar temperature: Temperature must be between 0.0 and 1.0. :vartype temperature: float - :ivar custom_lexicon_url: + :ivar custom_lexicon_url: URL of a custom lexicon file for pronunciation customization. :vartype custom_lexicon_url: str - :ivar custom_text_normalization_url: + :ivar custom_text_normalization_url: URL of a custom text normalization endpoint. :vartype custom_text_normalization_url: str - :ivar prefer_locales: + :ivar prefer_locales: Preferred locales in BCP-47 format that change the accents of languages. + If not set, TTS uses the default accent for each language (e.g., American English for English, + Mexican Spanish for Spanish). Setting this to ``["en-GB", "es-ES"]`` changes the English accent + to British English and the Spanish accent to European Spanish, while TTS can still speak other + languages like French or Chinese with their default accents. :vartype prefer_locales: list[str] - :ivar locale: + :ivar locale: Enforced locale in BCP-47 format for TTS output. If set, TTS will always use the + specified locale to speak. For example, setting locale to ``en-US`` forces American English + accent for all text content, even if the text is in another language, and TTS will output + silence for unsupported languages (e.g., Chinese text with ``en-US`` locale). If not set, TTS + automatically detects the language from the text content. :vartype locale: str - :ivar style: + :ivar style: Speaking style for the voice (e.g., 'cheerful', 'sad'). :vartype style: str - :ivar pitch: + :ivar pitch: Pitch adjustment for the voice output. Follows the same rules as the ``pitch`` + attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-low``, ``low``, ``medium``, ``high``, ``x-high``, + ``default``), a relative change (e.g., ``+10%``, ``-5%``, ``+50Hz``, ``-2st``), or an absolute + frequency (e.g., ``200Hz``). :vartype pitch: str - :ivar rate: + :ivar rate: Speaking rate adjustment for the voice output. Follows the same rules as the + ``rate`` attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-slow``, ``slow``, ``medium``, ``fast``, ``x-fast``, + ``default``), a relative percentage (e.g., ``+20%``, ``-10%``), or a non-negative multiplier + (e.g., ``0.5``, ``1.5``). :vartype rate: str - :ivar volume: + :ivar volume: Volume adjustment for the voice output. Follows the same rules as the ``volume`` + attribute of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``silent``, ``x-soft``, ``soft``, ``medium``, ``loud``, + ``x-loud``, ``default``), an absolute number from 0.0 to 100.0, or a relative change (e.g., + ``+10``, ``-6dB``). :vartype volume: str """ @@ -1073,15 +1547,49 @@ class AzureStandardVoice(AzureVoice, discriminator="azure-standard"): temperature: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Temperature must be between 0.0 and 1.0.""" custom_lexicon_url: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """URL of a custom lexicon file for pronunciation customization.""" custom_text_normalization_url: Optional[str] = rest_field( visibility=["read", "create", "update", "delete", "query"] ) + """URL of a custom text normalization endpoint.""" prefer_locales: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Preferred locales in BCP-47 format that change the accents of languages. If not set, TTS uses + the default accent for each language (e.g., American English for English, Mexican Spanish for + Spanish). Setting this to ``[\"en-GB\", \"es-ES\"]`` changes the English accent to British + English and the Spanish accent to European Spanish, while TTS can still speak other languages + like French or Chinese with their default accents.""" locale: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Enforced locale in BCP-47 format for TTS output. If set, TTS will always use the specified + locale to speak. For example, setting locale to ``en-US`` forces American English accent for + all text content, even if the text is in another language, and TTS will output silence for + unsupported languages (e.g., Chinese text with ``en-US`` locale). If not set, TTS automatically + detects the language from the text content.""" style: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Speaking style for the voice (e.g., 'cheerful', 'sad').""" pitch: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Pitch adjustment for the voice output. Follows the same rules as the ``pitch`` attribute of the + SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-low``, ``low``, ``medium``, ``high``, ``x-high``, + ``default``), a relative change (e.g., ``+10%``, ``-5%``, ``+50Hz``, ``-2st``), or an absolute + frequency (e.g., ``200Hz``).""" rate: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Speaking rate adjustment for the voice output. Follows the same rules as the ``rate`` attribute + of the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``x-slow``, ``slow``, ``medium``, ``fast``, ``x-fast``, + ``default``), a relative percentage (e.g., ``+20%``, ``-10%``), or a non-negative multiplier + (e.g., ``0.5``, ``1.5``).""" volume: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Volume adjustment for the voice output. Follows the same rules as the ``volume`` attribute of + the SSML ``prosody`` element (see + `https://learn.microsoft.com/azure/ai-services/speech-service/speech-synthesis-markup-voice#adjust-prosody + `_). + Typical values: a named level (``silent``, ``x-soft``, ``soft``, ``medium``, ``loud``, + ``x-loud``, ``default``), an absolute number from 0.0 to 100.0, or a relative change (e.g., + ``+10``, ``-6dB``).""" @overload def __init__( @@ -1193,15 +1701,17 @@ class ClientEvent(_Model): ClientEventInputAudioClear, ClientEventInputAudioTurnAppend, ClientEventInputAudioTurnCancel, ClientEventInputAudioTurnEnd, ClientEventInputAudioTurnStart, ClientEventInputAudioBufferAppend, ClientEventInputAudioBufferClear, - ClientEventInputAudioBufferCommit, ClientEventResponseCancel, ClientEventResponseCreate, - ClientEventSessionAvatarConnect, ClientEventSessionUpdate + ClientEventInputAudioBufferCommit, ClientEventOutputAudioBufferClear, + ClientEventResponseCancel, ClientEventResponseCreate, ClientEventSessionAvatarConnect, + ClientEventSessionUpdate :ivar type: The type of event. Required. Known values are: "session.update", "input_audio_buffer.append", "input_audio_buffer.commit", "input_audio_buffer.clear", "input_audio.turn.start", "input_audio.turn.append", "input_audio.turn.end", "input_audio.turn.cancel", "input_audio.clear", "conversation.item.create", "conversation.item.retrieve", "conversation.item.truncate", "conversation.item.delete", - "response.create", "response.cancel", "session.avatar.connect", and "mcp_approval_response". + "response.create", "response.cancel", "session.avatar.connect", "mcp_approval_response", and + "output_audio_buffer.clear". :vartype type: str or ~azure.ai.voicelive.models.ClientEventType :ivar event_id: :vartype event_id: str @@ -1214,8 +1724,8 @@ class ClientEvent(_Model): \"input_audio.turn.start\", \"input_audio.turn.append\", \"input_audio.turn.end\", \"input_audio.turn.cancel\", \"input_audio.clear\", \"conversation.item.create\", \"conversation.item.retrieve\", \"conversation.item.truncate\", \"conversation.item.delete\", - \"response.create\", \"response.cancel\", \"session.avatar.connect\", and - \"mcp_approval_response\".""" + \"response.create\", \"response.cancel\", \"session.avatar.connect\", + \"mcp_approval_response\", and \"output_audio_buffer.clear\".""" event_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) @overload @@ -1730,6 +2240,39 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type = ClientEventType.INPUT_AUDIO_TURN_START # type: ignore +class ClientEventOutputAudioBufferClear(ClientEvent, discriminator="output_audio_buffer.clear"): + """Client request to clear the avatar output buffer. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``output_audio_buffer.clear``. Required. Client request to + clear the avatar output buffer. + :vartype type: str or ~azure.ai.voicelive.models.OUTPUT_AUDIO_BUFFER_CLEAR + """ + + type: Literal[ClientEventType.OUTPUT_AUDIO_BUFFER_CLEAR] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``output_audio_buffer.clear``. Required. Client request to clear the + avatar output buffer.""" + + @overload + def __init__( + self, + *, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ClientEventType.OUTPUT_AUDIO_BUFFER_CLEAR # type: ignore + + class ClientEventResponseCancel(ClientEvent, discriminator="response.cancel"): """Send this event to cancel an in-progress response. The server will respond with a ``response.cancelled`` event or an error if there is no response to cancel. @@ -1966,6 +2509,54 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) +class FileSearchResult(_Model): + """A file search result entry. + + :ivar attributes: Key-value pairs for filtering file search results. + :vartype attributes: dict[str, str] + :ivar file_id: The unique ID of the file. + :vartype file_id: str + :ivar filename: The name of the file. + :vartype filename: str + :ivar score: The relevance score of the file search result. + :vartype score: float + :ivar text: The text content of the file that matched the query. + :vartype text: str + """ + + attributes: Optional[dict[str, str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Key-value pairs for filtering file search results.""" + file_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The unique ID of the file.""" + filename: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The name of the file.""" + score: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The relevance score of the file search result.""" + text: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The text content of the file that matched the query.""" + + @overload + def __init__( + self, + *, + attributes: Optional[dict[str, str]] = None, + file_id: Optional[str] = None, + filename: Optional[str] = None, + score: Optional[float] = None, + text: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + class FunctionCallItem(ConversationRequestItem, discriminator="function_call"): """A function call item within a conversation. @@ -2695,12 +3286,16 @@ class OutputTokenDetails(_Model): :vartype text_tokens: int :ivar audio_tokens: Number of audio tokens generated in the output. Required. :vartype audio_tokens: int + :ivar reasoning_tokens: Number of reasoning tokens generated in the output. + :vartype reasoning_tokens: int """ text_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Number of text tokens generated in the output. Required.""" audio_tokens: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Number of audio tokens generated in the output. Required.""" + reasoning_tokens: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Number of reasoning tokens generated in the output.""" @overload def __init__( @@ -2708,6 +3303,7 @@ def __init__( *, text_tokens: int, audio_tokens: int, + reasoning_tokens: Optional[int] = None, ) -> None: ... @overload @@ -2866,6 +3462,15 @@ class RequestSession(_Model): calls. Is either a StaticInterimResponseConfig type or a LlmInterimResponseConfig type. :vartype interim_response: ~azure.ai.voicelive.models.StaticInterimResponseConfig or ~azure.ai.voicelive.models.LlmInterimResponseConfig + :ivar include: List of include options for the session (e.g., logprobs, phrases, file search + results). + :vartype include: list[str or ~azure.ai.voicelive.models.SessionIncludeOption] + :ivar metadata: Set of up to 16 key-value pairs that can be attached to the session. This is + useful for storing additional information about the session in a structured format, such as + tracking IDs, user context, or application-specific labels. These key-value pairs are also + included in Foundry resource logs for tracing and diagnostics. Keys can be a maximum of 64 + characters long and values can be a maximum of 512 characters long. + :vartype metadata: dict[str, str] """ model: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) @@ -2943,6 +3548,16 @@ class RequestSession(_Model): ) """Configuration for interim response generation during latency or tool calls. Is either a StaticInterimResponseConfig type or a LlmInterimResponseConfig type.""" + include: Optional[list[Union[str, "_models.SessionIncludeOption"]]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """List of include options for the session (e.g., logprobs, phrases, file search results).""" + metadata: Optional[dict[str, str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Set of up to 16 key-value pairs that can be attached to the session. This is useful for storing + additional information about the session in a structured format, such as tracking IDs, user + context, or application-specific labels. These key-value pairs are also included in Foundry + resource logs for tracing and diagnostics. Keys can be a maximum of 64 characters long and + values can be a maximum of 512 characters long.""" @overload def __init__( @@ -2968,6 +3583,8 @@ def __init__( max_response_output_tokens: Optional[Union[int, Literal["inf"]]] = None, reasoning_effort: Optional[Union[str, "_models.ReasoningEffort"]] = None, interim_response: Optional["_types.InterimResponseConfig"] = None, + include: Optional[list[Union[str, "_models.SessionIncludeOption"]]] = None, + metadata: Optional[dict[str, str]] = None, ) -> None: ... @overload @@ -3227,7 +3844,7 @@ class ResponseCancelledDetails(ResponseStatusDetails, discriminator="cancelled") :vartype type: str or ~azure.ai.voicelive.models.CANCELLED :ivar reason: Required. Is one of the following types: Literal["turn_detected"], Literal["client_cancelled"], str - :vartype reason: str + :vartype reason: str or str or str """ type: Literal[ResponseStatus.CANCELLED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore @@ -3320,6 +3937,10 @@ class ResponseCreateParams(_Model): useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long. :vartype metadata: dict[str, str] + :ivar interim_response: Configuration for interim response generation during latency or tool + calls. Is either a StaticInterimResponseConfig type or a LlmInterimResponseConfig type. + :vartype interim_response: ~azure.ai.voicelive.models.StaticInterimResponseConfig or + ~azure.ai.voicelive.models.LlmInterimResponseConfig """ commit: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) @@ -3390,6 +4011,11 @@ class ResponseCreateParams(_Model): """Set of up to 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.""" + interim_response: Optional["_types.InterimResponseConfig"] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """Configuration for interim response generation during latency or tool calls. Is either a + StaticInterimResponseConfig type or a LlmInterimResponseConfig type.""" @overload def __init__( @@ -3410,6 +4036,7 @@ def __init__( pre_generated_assistant_message: Optional["_models.AssistantMessageItem"] = None, reasoning_effort: Optional[Union[str, "_models.ReasoningEffort"]] = None, metadata: Optional[dict[str, str]] = None, + interim_response: Optional["_types.InterimResponseConfig"] = None, ) -> None: ... @overload @@ -3460,12 +4087,13 @@ class ResponseItem(_Model): """Base for any response item; discriminated by ``type``. You probably want to use the sub-classes and not this class directly. Known sub-classes are: - ResponseFunctionCallItem, ResponseFunctionCallOutputItem, ResponseMCPApprovalRequestItem, - ResponseMCPApprovalResponseItem, ResponseMCPCallItem, ResponseMCPListToolItem, - ResponseMessageItem + ResponseFileSearchCallItem, ResponseFunctionCallItem, ResponseFunctionCallOutputItem, + ResponseMCPApprovalRequestItem, ResponseMCPApprovalResponseItem, ResponseMCPCallItem, + ResponseMCPListToolItem, ResponseMessageItem, ResponseWebSearchCallItem :ivar type: Required. Known values are: "message", "function_call", "function_call_output", - "mcp_list_tools", "mcp_call", "mcp_approval_request", and "mcp_approval_response". + "mcp_list_tools", "mcp_call", "mcp_approval_request", "mcp_approval_response", + "web_search_call", and "file_search_call". :vartype type: str or ~azure.ai.voicelive.models.ItemType :ivar id: :vartype id: str @@ -3476,7 +4104,8 @@ class ResponseItem(_Model): __mapping__: dict[str, _Model] = {} type: str = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) """Required. Known values are: \"message\", \"function_call\", \"function_call_output\", - \"mcp_list_tools\", \"mcp_call\", \"mcp_approval_request\", and \"mcp_approval_response\".""" + \"mcp_list_tools\", \"mcp_call\", \"mcp_approval_request\", \"mcp_approval_response\", + \"web_search_call\", and \"file_search_call\".""" id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) object: Optional[Literal["realtime.item"]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """Default value is \"realtime.item\".""" @@ -3501,6 +4130,75 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) +class ResponseFileSearchCallItem(ResponseItem, discriminator="file_search_call"): + """A response item that represents a file search call. + + :ivar object: Default value is "realtime.item". + :vartype object: str + :ivar type: The type of the item. Always 'file_search_call'. Required. File search call item. + :vartype type: str or ~azure.ai.voicelive.models.FILE_SEARCH_CALL + :ivar id: The unique ID of the file search tool call. + :vartype id: str + :ivar queries: The queries used for the file search. + :vartype queries: list[str] + :ivar status: The status of the file search tool call. Required. Is one of the following types: + Literal["in_progress"], Literal["searching"], Literal["completed"], Literal["incomplete"], + Literal["failed"], str + :vartype status: str or str or str or str or str or str + :ivar results: The results of the file search. + :vartype results: list[~azure.ai.voicelive.models.FileSearchResult] + """ + + type: Literal[ItemType.FILE_SEARCH_CALL] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The type of the item. Always 'file_search_call'. Required. File search call item.""" + queries: Optional[list[str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The queries used for the file search.""" + status: Union[ + Literal["in_progress"], + Literal["searching"], + Literal["completed"], + Literal["incomplete"], + Literal["failed"], + str, + ] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The status of the file search tool call. Required. Is one of the following types: + Literal[\"in_progress\"], Literal[\"searching\"], Literal[\"completed\"], + Literal[\"incomplete\"], Literal[\"failed\"], str""" + results: Optional[list["_models.FileSearchResult"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """The results of the file search.""" + + @overload + def __init__( + self, + *, + status: Union[ + Literal["in_progress"], + Literal["searching"], + Literal["completed"], + Literal["incomplete"], + Literal["failed"], + str, + ], + object: Optional[Literal["realtime.item"]] = None, + id: Optional[str] = None, # pylint: disable=redefined-builtin + queries: Optional[list[str]] = None, + results: Optional[list["_models.FileSearchResult"]] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ItemType.FILE_SEARCH_CALL # type: ignore + + class ResponseFunctionCallItem(ResponseItem, discriminator="function_call"): """A function call item within a conversation. @@ -3608,7 +4306,7 @@ class ResponseIncompleteDetails(ResponseStatusDetails, discriminator="incomplete :vartype type: str or ~azure.ai.voicelive.models.INCOMPLETE :ivar reason: Required. Is one of the following types: Literal["max_output_tokens"], Literal["content_filter"], str - :vartype reason: str + :vartype reason: str or str or str """ type: Literal[ResponseStatus.INCOMPLETE] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore @@ -3957,6 +4655,15 @@ class ResponseSession(_Model): calls. Is either a StaticInterimResponseConfig type or a LlmInterimResponseConfig type. :vartype interim_response: ~azure.ai.voicelive.models.StaticInterimResponseConfig or ~azure.ai.voicelive.models.LlmInterimResponseConfig + :ivar include: List of include options for the session (e.g., logprobs, phrases, file search + results). + :vartype include: list[str or ~azure.ai.voicelive.models.SessionIncludeOption] + :ivar metadata: Set of up to 16 key-value pairs that can be attached to the session. This is + useful for storing additional information about the session in a structured format, such as + tracking IDs, user context, or application-specific labels. These key-value pairs are also + included in Foundry resource logs for tracing and diagnostics. Keys can be a maximum of 64 + characters long and values can be a maximum of 512 characters long. + :vartype metadata: dict[str, str] :ivar agent: The agent configuration for the session, if applicable. :vartype agent: ~azure.ai.voicelive.models.AgentConfig :ivar id: The unique identifier for the session. @@ -4038,6 +4745,16 @@ class ResponseSession(_Model): ) """Configuration for interim response generation during latency or tool calls. Is either a StaticInterimResponseConfig type or a LlmInterimResponseConfig type.""" + include: Optional[list[Union[str, "_models.SessionIncludeOption"]]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """List of include options for the session (e.g., logprobs, phrases, file search results).""" + metadata: Optional[dict[str, str]] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Set of up to 16 key-value pairs that can be attached to the session. This is useful for storing + additional information about the session in a structured format, such as tracking IDs, user + context, or application-specific labels. These key-value pairs are also included in Foundry + resource logs for tracing and diagnostics. Keys can be a maximum of 64 characters long and + values can be a maximum of 512 characters long.""" agent: Optional["_models.AgentConfig"] = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The agent configuration for the session, if applicable.""" id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) @@ -4067,6 +4784,8 @@ def __init__( max_response_output_tokens: Optional[Union[int, Literal["inf"]]] = None, reasoning_effort: Optional[Union[str, "_models.ReasoningEffort"]] = None, interim_response: Optional["_types.InterimResponseConfig"] = None, + include: Optional[list[Union[str, "_models.SessionIncludeOption"]]] = None, + metadata: Optional[dict[str, str]] = None, agent: Optional["_models.AgentConfig"] = None, id: Optional[str] = None, # pylint: disable=redefined-builtin ) -> None: ... @@ -4114,6 +4833,50 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type = ContentPartType.TEXT # type: ignore +class ResponseWebSearchCallItem(ResponseItem, discriminator="web_search_call"): + """A response item that represents a web search call. + + :ivar object: Default value is "realtime.item". + :vartype object: str + :ivar type: The type of the item. Always 'web_search_call'. Required. Web search call item. + :vartype type: str or ~azure.ai.voicelive.models.WEB_SEARCH_CALL + :ivar id: The unique ID of the web search tool call. + :vartype id: str + :ivar status: The status of the web search tool call. Required. Is one of the following types: + Literal["in_progress"], Literal["searching"], Literal["completed"], Literal["failed"], str + :vartype status: str or str or str or str or str + """ + + type: Literal[ItemType.WEB_SEARCH_CALL] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The type of the item. Always 'web_search_call'. Required. Web search call item.""" + status: Union[Literal["in_progress"], Literal["searching"], Literal["completed"], Literal["failed"], str] = ( + rest_field(visibility=["read", "create", "update", "delete", "query"]) + ) + """The status of the web search tool call. Required. Is one of the following types: + Literal[\"in_progress\"], Literal[\"searching\"], Literal[\"completed\"], Literal[\"failed\"], + str""" + + @overload + def __init__( + self, + *, + status: Union[Literal["in_progress"], Literal["searching"], Literal["completed"], Literal["failed"], str], + object: Optional[Literal["realtime.item"]] = None, + id: Optional[str] = None, # pylint: disable=redefined-builtin + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ItemType.WEB_SEARCH_CALL # type: ignore + + class Scene(_Model): """Configuration for avatar's zoom level, position, rotation and movement amplitude in the video frame. @@ -4199,20 +4962,25 @@ class ServerEvent(_Model): ServerEventInputAudioBufferCommitted, ServerEventInputAudioBufferSpeechStarted, ServerEventInputAudioBufferSpeechStopped, ServerEventMcpListToolsCompleted, ServerEventMcpListToolsFailed, ServerEventMcpListToolsInProgress, - ServerEventResponseAnimationBlendshapeDelta, ServerEventResponseAnimationBlendshapeDone, - ServerEventResponseAnimationVisemeDelta, ServerEventResponseAnimationVisemeDone, - ServerEventResponseAudioDelta, ServerEventResponseAudioDone, - ServerEventResponseAudioTimestampDelta, ServerEventResponseAudioTimestampDone, + ServerEventOutputAudioBufferCleared, ServerEventResponseAnimationBlendshapeDelta, + ServerEventResponseAnimationBlendshapeDone, ServerEventResponseAnimationVisemeDelta, + ServerEventResponseAnimationVisemeDone, ServerEventResponseAudioDelta, + ServerEventResponseAudioDone, ServerEventResponseAudioTimestampDelta, + ServerEventResponseAudioTimestampDone, ServerEventResponseAudioTranscriptAnnotationAdded, ServerEventResponseAudioTranscriptDelta, ServerEventResponseAudioTranscriptDone, ServerEventResponseContentPartAdded, ServerEventResponseContentPartDone, ServerEventResponseCreated, ServerEventResponseDone, - ServerEventResponseFunctionCallArgumentsDelta, ServerEventResponseFunctionCallArgumentsDone, - ServerEventResponseMcpCallCompleted, ServerEventResponseMcpCallFailed, - ServerEventResponseMcpCallInProgress, ServerEventResponseMcpCallArgumentsDelta, - ServerEventResponseMcpCallArgumentsDone, ServerEventResponseOutputItemAdded, - ServerEventResponseOutputItemDone, ServerEventResponseTextDelta, ServerEventResponseTextDone, - ServerEventSessionAvatarConnecting, ServerEventSessionCreated, ServerEventSessionUpdated, - ServerEventWarning + ServerEventResponseFileSearchCallCompleted, ServerEventResponseFileSearchCallInProgress, + ServerEventResponseFileSearchCallSearching, ServerEventResponseFunctionCallArgumentsDelta, + ServerEventResponseFunctionCallArgumentsDone, ServerEventResponseMcpCallCompleted, + ServerEventResponseMcpCallFailed, ServerEventResponseMcpCallInProgress, + ServerEventResponseMcpCallArgumentsDelta, ServerEventResponseMcpCallArgumentsDone, + ServerEventResponseOutputItemAdded, ServerEventResponseOutputItemDone, + ServerEventResponseTextDelta, ServerEventResponseTextDone, ServerEventResponseVideoDelta, + ServerEventResponseWebSearchCallCompleted, ServerEventResponseWebSearchCallInProgress, + ServerEventResponseWebSearchCallSearching, ServerEventSessionAvatarConnecting, + ServerEventSessionAvatarSwitchToIdle, ServerEventSessionAvatarSwitchToSpeaking, + ServerEventSessionCreated, ServerEventSessionUpdated, ServerEventWarning :ivar type: The type of event. Required. Known values are: "error", "warning", "session.avatar.connecting", "session.created", "session.updated", @@ -4232,7 +5000,12 @@ class ServerEvent(_Model): "response.function_call_arguments.done", "mcp_list_tools.in_progress", "mcp_list_tools.completed", "mcp_list_tools.failed", "response.mcp_call_arguments.delta", "response.mcp_call_arguments.done", "response.mcp_call.in_progress", - "response.mcp_call.completed", and "response.mcp_call.failed". + "response.mcp_call.completed", "response.mcp_call.failed", "session.avatar.switch_to_speaking", + "session.avatar.switch_to_idle", "response.video.delta", "response.web_search_call.searching", + "response.web_search_call.in_progress", "response.web_search_call.completed", + "response.file_search_call.searching", "response.file_search_call.in_progress", + "response.file_search_call.completed", "output_audio_buffer.cleared", and + "response.audio_transcript.annotation.added". :vartype type: str or ~azure.ai.voicelive.models.ServerEventType :ivar event_id: :vartype event_id: str @@ -4259,7 +5032,13 @@ class ServerEvent(_Model): \"response.function_call_arguments.done\", \"mcp_list_tools.in_progress\", \"mcp_list_tools.completed\", \"mcp_list_tools.failed\", \"response.mcp_call_arguments.delta\", \"response.mcp_call_arguments.done\", \"response.mcp_call.in_progress\", - \"response.mcp_call.completed\", and \"response.mcp_call.failed\".""" + \"response.mcp_call.completed\", \"response.mcp_call.failed\", + \"session.avatar.switch_to_speaking\", \"session.avatar.switch_to_idle\", + \"response.video.delta\", \"response.web_search_call.searching\", + \"response.web_search_call.in_progress\", \"response.web_search_call.completed\", + \"response.file_search_call.searching\", \"response.file_search_call.in_progress\", + \"response.file_search_call.completed\", \"output_audio_buffer.cleared\", and + \"response.audio_transcript.annotation.added\".""" event_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) @overload @@ -4280,23 +5059,18 @@ def __init__(self, mapping: Mapping[str, Any]) -> None: def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) - @classmethod - def deserialize(cls, payload: dict[str, Any]) -> "ServerEvent": - # public, linter-friendly entrypoint - # pylint: disable-next=protected-access - return cls._deserialize(payload, []) class ServerEventConversationItemCreated(ServerEvent, discriminator="conversation.item.created"): """Returned when a conversation item is created. There are several scenarios that produce this event: - The server is generating a Response, which if successful will produce + * The server is generating a Response, which if successful will produce either one or two Items, which will be of type `message` (role `assistant`) or type `function_call`. - The input audio buffer has been committed, either by the client or the + * The input audio buffer has been committed, either by the client or the server (in `server_vad` mode). The server will take the content of the input audio buffer and add it to a new user message Item. - The client has sent a `conversation.item.create` event to add a new Item + * The client has sent a `conversation.item.create` event to add a new Item to the Conversation. :ivar event_id: @@ -4401,6 +5175,10 @@ class ServerEventConversationItemInputAudioTranscriptionCompleted( :vartype content_index: int :ivar transcript: The transcribed text. Required. :vartype transcript: str + :ivar logprobs: The log probabilities of the transcription tokens. + :vartype logprobs: list[~azure.ai.voicelive.models.LogProbProperties] + :ivar phrases: The transcription phrases with timing information. + :vartype phrases: list[~azure.ai.voicelive.models.TranscriptionPhrase] """ type: Literal[ServerEventType.CONVERSATION_ITEM_INPUT_AUDIO_TRANSCRIPTION_COMPLETED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore @@ -4412,6 +5190,14 @@ class ServerEventConversationItemInputAudioTranscriptionCompleted( """The index of the content part containing the audio. Required.""" transcript: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) """The transcribed text. Required.""" + logprobs: Optional[list["_models.LogProbProperties"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """The log probabilities of the transcription tokens.""" + phrases: Optional[list["_models.TranscriptionPhrase"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """The transcription phrases with timing information.""" @overload def __init__( @@ -4421,6 +5207,8 @@ def __init__( content_index: int, transcript: str, event_id: Optional[str] = None, + logprobs: Optional[list["_models.LogProbProperties"]] = None, + phrases: Optional[list["_models.TranscriptionPhrase"]] = None, ) -> None: ... @overload @@ -5010,6 +5798,39 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type = ServerEventType.MCP_LIST_TOOLS_IN_PROGRESS # type: ignore +class ServerEventOutputAudioBufferCleared(ServerEvent, discriminator="output_audio_buffer.cleared"): + """Returned when the output audio buffer has been cleared. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``output_audio_buffer.cleared``. Required. Output audio + buffer has been cleared. + :vartype type: str or ~azure.ai.voicelive.models.OUTPUT_AUDIO_BUFFER_CLEARED + """ + + type: Literal[ServerEventType.OUTPUT_AUDIO_BUFFER_CLEARED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``output_audio_buffer.cleared``. Required. Output audio buffer has been + cleared.""" + + @overload + def __init__( + self, + *, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.OUTPUT_AUDIO_BUFFER_CLEARED # type: ignore + + class ServerEventResponseAnimationBlendshapeDelta( ServerEvent, discriminator="response.animation_blendshapes.delta" ): # pylint: disable=name-too-long @@ -5464,6 +6285,71 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type = ServerEventType.RESPONSE_AUDIO_TIMESTAMP_DONE # type: ignore +class ServerEventResponseAudioTranscriptAnnotationAdded( + ServerEvent, discriminator="response.audio_transcript.annotation.added" +): # pylint: disable=name-too-long + """Returned when an audio transcript annotation is added to a response. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``response.audio_transcript.annotation.added``. Required. + Audio transcript annotation added. + :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_AUDIO_TRANSCRIPT_ANNOTATION_ADDED + :ivar response_id: The ID of the response. Required. + :vartype response_id: str + :ivar item_id: The ID of the item. Required. + :vartype item_id: str + :ivar output_index: The index of the output item in the response. Required. + :vartype output_index: int + :ivar content_index: The index of the content part in the item's content array. Required. + :vartype content_index: int + :ivar annotation_index: The index of the annotation. Required. + :vartype annotation_index: int + :ivar annotation: The annotation object. Required. + :vartype annotation: any + """ + + type: Literal[ServerEventType.RESPONSE_AUDIO_TRANSCRIPT_ANNOTATION_ADDED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``response.audio_transcript.annotation.added``. Required. Audio + transcript annotation added.""" + response_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the response. Required.""" + item_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the item. Required.""" + output_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the output item in the response. Required.""" + content_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the content part in the item's content array. Required.""" + annotation_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the annotation. Required.""" + annotation: Any = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The annotation object. Required.""" + + @overload + def __init__( + self, + *, + response_id: str, + item_id: str, + output_index: int, + content_index: int, + annotation_index: int, + annotation: Any, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.RESPONSE_AUDIO_TRANSCRIPT_ANNOTATION_ADDED # type: ignore + + class ServerEventResponseAudioTranscriptDelta(ServerEvent, discriminator="response.audio_transcript.delta"): """Returned when the model-generated transcription of audio output is updated. @@ -5772,6 +6658,171 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type = ServerEventType.RESPONSE_DONE # type: ignore +class ServerEventResponseFileSearchCallCompleted( + ServerEvent, discriminator="response.file_search_call.completed" +): # pylint: disable=name-too-long + """Returned when a file search call has completed. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``response.file_search_call.completed``. Required. File + search call completed. + :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_FILE_SEARCH_CALL_COMPLETED + :ivar response_id: The ID of the response. Required. + :vartype response_id: str + :ivar item_id: The ID of the item. Required. + :vartype item_id: str + :ivar output_index: The index of the output item in the response. Required. + :vartype output_index: int + :ivar sequence_number: The sequence number of the file search call. Required. + :vartype sequence_number: int + """ + + type: Literal[ServerEventType.RESPONSE_FILE_SEARCH_CALL_COMPLETED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``response.file_search_call.completed``. Required. File search call + completed.""" + response_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the response. Required.""" + item_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the item. Required.""" + output_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the output item in the response. Required.""" + sequence_number: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The sequence number of the file search call. Required.""" + + @overload + def __init__( + self, + *, + response_id: str, + item_id: str, + output_index: int, + sequence_number: int, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.RESPONSE_FILE_SEARCH_CALL_COMPLETED # type: ignore + + +class ServerEventResponseFileSearchCallInProgress( + ServerEvent, discriminator="response.file_search_call.in_progress" +): # pylint: disable=name-too-long + """Returned when a file search call is in progress. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``response.file_search_call.in_progress``. Required. File + search call is in progress. + :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_FILE_SEARCH_CALL_IN_PROGRESS + :ivar response_id: The ID of the response. Required. + :vartype response_id: str + :ivar item_id: The ID of the item. Required. + :vartype item_id: str + :ivar output_index: The index of the output item in the response. Required. + :vartype output_index: int + :ivar sequence_number: The sequence number of the file search call. Required. + :vartype sequence_number: int + """ + + type: Literal[ServerEventType.RESPONSE_FILE_SEARCH_CALL_IN_PROGRESS] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``response.file_search_call.in_progress``. Required. File search call + is in progress.""" + response_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the response. Required.""" + item_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the item. Required.""" + output_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the output item in the response. Required.""" + sequence_number: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The sequence number of the file search call. Required.""" + + @overload + def __init__( + self, + *, + response_id: str, + item_id: str, + output_index: int, + sequence_number: int, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.RESPONSE_FILE_SEARCH_CALL_IN_PROGRESS # type: ignore + + +class ServerEventResponseFileSearchCallSearching( + ServerEvent, discriminator="response.file_search_call.searching" +): # pylint: disable=name-too-long + """Returned when a file search call is searching. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``response.file_search_call.searching``. Required. File + search call is searching. + :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_FILE_SEARCH_CALL_SEARCHING + :ivar response_id: The ID of the response. Required. + :vartype response_id: str + :ivar item_id: The ID of the item. Required. + :vartype item_id: str + :ivar output_index: The index of the output item in the response. Required. + :vartype output_index: int + :ivar sequence_number: The sequence number of the file search call. Required. + :vartype sequence_number: int + """ + + type: Literal[ServerEventType.RESPONSE_FILE_SEARCH_CALL_SEARCHING] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``response.file_search_call.searching``. Required. File search call is + searching.""" + response_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the response. Required.""" + item_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the item. Required.""" + output_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the output item in the response. Required.""" + sequence_number: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The sequence number of the file search call. Required.""" + + @overload + def __init__( + self, + *, + response_id: str, + item_id: str, + output_index: int, + sequence_number: int, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.RESPONSE_FILE_SEARCH_CALL_SEARCHING # type: ignore + + class ServerEventResponseFunctionCallArgumentsDelta( ServerEvent, discriminator="response.function_call_arguments.delta" ): # pylint: disable=name-too-long @@ -6334,6 +7385,219 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type = ServerEventType.RESPONSE_TEXT_DONE # type: ignore +class ServerEventResponseVideoDelta(ServerEvent, discriminator="response.video.delta"): + """Returned when avatar video frame data is streamed. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``response.video.delta``. Required. Delta update for avatar + video frames. + :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_VIDEO_DELTA + :ivar output_index: The index of the output item in the response. Required. + :vartype output_index: int + :ivar codec: The codec used for the video data. Required. + :vartype codec: str + :ivar delta: The base64-encoded video frame data. Required. + :vartype delta: str + """ + + type: Literal[ServerEventType.RESPONSE_VIDEO_DELTA] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``response.video.delta``. Required. Delta update for avatar video + frames.""" + output_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the output item in the response. Required.""" + codec: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The codec used for the video data. Required.""" + delta: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The base64-encoded video frame data. Required.""" + + @overload + def __init__( + self, + *, + output_index: int, + codec: str, + delta: str, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.RESPONSE_VIDEO_DELTA # type: ignore + + +class ServerEventResponseWebSearchCallCompleted( + ServerEvent, discriminator="response.web_search_call.completed" +): # pylint: disable=name-too-long + """Returned when a web search call has completed. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``response.web_search_call.completed``. Required. Web + search call completed. + :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_WEB_SEARCH_CALL_COMPLETED + :ivar response_id: The ID of the response. Required. + :vartype response_id: str + :ivar item_id: The ID of the item. Required. + :vartype item_id: str + :ivar output_index: The index of the output item in the response. Required. + :vartype output_index: int + :ivar sequence_number: The sequence number of the web search call. Required. + :vartype sequence_number: int + """ + + type: Literal[ServerEventType.RESPONSE_WEB_SEARCH_CALL_COMPLETED] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``response.web_search_call.completed``. Required. Web search call + completed.""" + response_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the response. Required.""" + item_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the item. Required.""" + output_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the output item in the response. Required.""" + sequence_number: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The sequence number of the web search call. Required.""" + + @overload + def __init__( + self, + *, + response_id: str, + item_id: str, + output_index: int, + sequence_number: int, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.RESPONSE_WEB_SEARCH_CALL_COMPLETED # type: ignore + + +class ServerEventResponseWebSearchCallInProgress( + ServerEvent, discriminator="response.web_search_call.in_progress" +): # pylint: disable=name-too-long + """Returned when a web search call is in progress. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``response.web_search_call.in_progress``. Required. Web + search call is in progress. + :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_WEB_SEARCH_CALL_IN_PROGRESS + :ivar response_id: The ID of the response. Required. + :vartype response_id: str + :ivar item_id: The ID of the item. Required. + :vartype item_id: str + :ivar output_index: The index of the output item in the response. Required. + :vartype output_index: int + :ivar sequence_number: The sequence number of the web search call. Required. + :vartype sequence_number: int + """ + + type: Literal[ServerEventType.RESPONSE_WEB_SEARCH_CALL_IN_PROGRESS] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``response.web_search_call.in_progress``. Required. Web search call is + in progress.""" + response_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the response. Required.""" + item_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the item. Required.""" + output_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the output item in the response. Required.""" + sequence_number: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The sequence number of the web search call. Required.""" + + @overload + def __init__( + self, + *, + response_id: str, + item_id: str, + output_index: int, + sequence_number: int, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.RESPONSE_WEB_SEARCH_CALL_IN_PROGRESS # type: ignore + + +class ServerEventResponseWebSearchCallSearching( + ServerEvent, discriminator="response.web_search_call.searching" +): # pylint: disable=name-too-long + """Returned when a web search call is searching. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``response.web_search_call.searching``. Required. Web + search call is searching. + :vartype type: str or ~azure.ai.voicelive.models.RESPONSE_WEB_SEARCH_CALL_SEARCHING + :ivar response_id: The ID of the response. Required. + :vartype response_id: str + :ivar item_id: The ID of the item. Required. + :vartype item_id: str + :ivar output_index: The index of the output item in the response. Required. + :vartype output_index: int + :ivar sequence_number: The sequence number of the web search call. Required. + :vartype sequence_number: int + """ + + type: Literal[ServerEventType.RESPONSE_WEB_SEARCH_CALL_SEARCHING] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``response.web_search_call.searching``. Required. Web search call is + searching.""" + response_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the response. Required.""" + item_id: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the item. Required.""" + output_index: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The index of the output item in the response. Required.""" + sequence_number: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The sequence number of the web search call. Required.""" + + @overload + def __init__( + self, + *, + response_id: str, + item_id: str, + output_index: int, + sequence_number: int, + event_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.RESPONSE_WEB_SEARCH_CALL_SEARCHING # type: ignore + + class ServerEventSessionAvatarConnecting(ServerEvent, discriminator="session.avatar.connecting"): """Sent when the server is in the process of establishing an avatar media connection and provides its SDP answer. @@ -6372,6 +7636,82 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type = ServerEventType.SESSION_AVATAR_CONNECTING # type: ignore +class ServerEventSessionAvatarSwitchToIdle(ServerEvent, discriminator="session.avatar.switch_to_idle"): + """Returned when the avatar switches to idle state. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``session.avatar.switch_to_idle``. Required. Avatar + switches to idle state. + :vartype type: str or ~azure.ai.voicelive.models.SESSION_AVATAR_SWITCH_TO_IDLE + :ivar turn_id: The ID of the turn associated with the avatar state change. + :vartype turn_id: str + """ + + type: Literal[ServerEventType.SESSION_AVATAR_SWITCH_TO_IDLE] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``session.avatar.switch_to_idle``. Required. Avatar switches to idle + state.""" + turn_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the turn associated with the avatar state change.""" + + @overload + def __init__( + self, + *, + event_id: Optional[str] = None, + turn_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.SESSION_AVATAR_SWITCH_TO_IDLE # type: ignore + + +class ServerEventSessionAvatarSwitchToSpeaking(ServerEvent, discriminator="session.avatar.switch_to_speaking"): + """Returned when the avatar switches to speaking state. + + :ivar event_id: + :vartype event_id: str + :ivar type: The event type, must be ``session.avatar.switch_to_speaking``. Required. Avatar + switches to speaking state. + :vartype type: str or ~azure.ai.voicelive.models.SESSION_AVATAR_SWITCH_TO_SPEAKING + :ivar turn_id: The ID of the turn associated with the avatar state change. + :vartype turn_id: str + """ + + type: Literal[ServerEventType.SESSION_AVATAR_SWITCH_TO_SPEAKING] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore + """The event type, must be ``session.avatar.switch_to_speaking``. Required. Avatar switches to + speaking state.""" + turn_id: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The ID of the turn associated with the avatar state change.""" + + @overload + def __init__( + self, + *, + event_id: Optional[str] = None, + turn_id: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.type = ServerEventType.SESSION_AVATAR_SWITCH_TO_SPEAKING # type: ignore + + class ServerEventSessionCreated(ServerEvent, discriminator="session.created"): """Returned when a Session is created. Emitted automatically when a new connection is established as the first server event. This event will contain the default Session configuration. @@ -6527,33 +7867,42 @@ class ServerVad(TurnDetection, discriminator="server_vad"): :ivar type: Required. SERVER_VAD. :vartype type: str or ~azure.ai.voicelive.models.SERVER_VAD - :ivar threshold: + :ivar threshold: Activation threshold for VAD detection. Range: 0.0 to 1.0. :vartype threshold: float - :ivar prefix_padding_ms: + :ivar prefix_padding_ms: Amount of audio to include before speech is detected, in milliseconds. :vartype prefix_padding_ms: int - :ivar silence_duration_ms: + :ivar silence_duration_ms: Duration of silence required to end speech detection, in + milliseconds. :vartype silence_duration_ms: int - :ivar end_of_utterance_detection: + :ivar end_of_utterance_detection: Configuration for end-of-utterance detection. :vartype end_of_utterance_detection: ~azure.ai.voicelive.models.EouDetection - :ivar auto_truncate: + :ivar auto_truncate: Whether to automatically truncate the audio buffer when speech stops. :vartype auto_truncate: bool - :ivar create_response: + :ivar create_response: Whether to automatically create a response when speech stops. :vartype create_response: bool - :ivar interrupt_response: + :ivar interrupt_response: Whether to allow the user's speech to interrupt the assistant's + response. :vartype interrupt_response: bool """ type: Literal[TurnDetectionType.SERVER_VAD] = rest_discriminator(name="type", visibility=["read", "create", "update", "delete", "query"]) # type: ignore """Required. SERVER_VAD.""" threshold: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Activation threshold for VAD detection. Range: 0.0 to 1.0.""" prefix_padding_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Amount of audio to include before speech is detected, in milliseconds.""" silence_duration_ms: Optional[int] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Duration of silence required to end speech detection, in milliseconds.""" end_of_utterance_detection: Optional["_models.EouDetection"] = rest_field( visibility=["read", "create", "update", "delete", "query"] ) + """Configuration for end-of-utterance detection.""" auto_truncate: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to automatically truncate the audio buffer when speech stops.""" create_response: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to automatically create a response when speech stops.""" interrupt_response: Optional[bool] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Whether to allow the user's speech to interrupt the assistant's response.""" @overload def __init__( @@ -6784,6 +8133,99 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type = ToolType.FUNCTION # type: ignore +class TranscriptionPhrase(_Model): + """A transcribed phrase with timing information. + + :ivar offset_milliseconds: Offset from the start of the audio in milliseconds. Required. + :vartype offset_milliseconds: int + :ivar duration_milliseconds: Duration of the phrase in milliseconds. Required. + :vartype duration_milliseconds: int + :ivar text: The transcribed text of the phrase. Required. + :vartype text: str + :ivar words: The individual words in the phrase with timing information. + :vartype words: list[~azure.ai.voicelive.models.TranscriptionWord] + :ivar locale: The locale of the transcription (e.g., 'en-US'). + :vartype locale: str + :ivar confidence: The confidence score of the transcription. + :vartype confidence: float + """ + + offset_milliseconds: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Offset from the start of the audio in milliseconds. Required.""" + duration_milliseconds: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Duration of the phrase in milliseconds. Required.""" + text: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The transcribed text of the phrase. Required.""" + words: Optional[list["_models.TranscriptionWord"]] = rest_field( + visibility=["read", "create", "update", "delete", "query"] + ) + """The individual words in the phrase with timing information.""" + locale: Optional[str] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The locale of the transcription (e.g., 'en-US').""" + confidence: Optional[float] = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The confidence score of the transcription.""" + + @overload + def __init__( + self, + *, + offset_milliseconds: int, + duration_milliseconds: int, + text: str, + words: Optional[list["_models.TranscriptionWord"]] = None, + locale: Optional[str] = None, + confidence: Optional[float] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + +class TranscriptionWord(_Model): + """A time-stamped word in the transcription. + + :ivar text: The transcribed word text. Required. + :vartype text: str + :ivar offset_milliseconds: Offset from the start of the audio in milliseconds. Required. + :vartype offset_milliseconds: int + :ivar duration_milliseconds: Duration of the word in milliseconds. Required. + :vartype duration_milliseconds: int + """ + + text: str = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """The transcribed word text. Required.""" + offset_milliseconds: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Offset from the start of the audio in milliseconds. Required.""" + duration_milliseconds: int = rest_field(visibility=["read", "create", "update", "delete", "query"]) + """Duration of the word in milliseconds. Required.""" + + @overload + def __init__( + self, + *, + text: str, + offset_milliseconds: int, + duration_milliseconds: int, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + class UserMessageItem(MessageItem, discriminator="user"): """A user message item within a conversation. diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/telemetry/_voicelive_instrumentor.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/telemetry/_voicelive_instrumentor.py index 69895b91d2ed..1be407c32192 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/telemetry/_voicelive_instrumentor.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/telemetry/_voicelive_instrumentor.py @@ -173,9 +173,7 @@ def instrument(self, enable_content_recording: Optional[bool] = None) -> None: """ env_gate = os.environ.get("AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING", "").lower() if env_gate != "true": - logger.debug( - "VoiceLive tracing not enabled. Set AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true to enable." - ) + logger.debug("VoiceLive tracing not enabled. Set AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING=true to enable.") return self._impl.instrument(enable_content_recording) @@ -749,7 +747,9 @@ def _trace_send(self, original_send: Callable) -> Callable: # pylint: disable=t instrumentor = self @functools.wraps(original_send) - async def wrapper(conn_self, event, *args, **kwargs): # pylint: disable=too-many-branches,too-many-locals,too-many-statements,protected-access + async def wrapper( + conn_self, event, *args, **kwargs + ): # pylint: disable=too-many-branches,too-many-locals,too-many-statements,protected-access span_impl_type = settings.tracing_implementation() # pylint: disable=not-callable if span_impl_type is None: return await original_send(conn_self, event, *args, **kwargs) @@ -829,7 +829,9 @@ def _trace_recv(self, original_recv: Callable) -> Callable: # pylint: disable=t instrumentor = self @functools.wraps(original_recv) - async def wrapper(conn_self, *args, **kwargs): # pylint: disable=too-many-branches,too-many-locals,too-many-statements,protected-access + async def wrapper( + conn_self, *args, **kwargs + ): # pylint: disable=too-many-branches,too-many-locals,too-many-statements,protected-access span_impl_type = settings.tracing_implementation() # pylint: disable=not-callable if span_impl_type is None: return await original_recv(conn_self, *args, **kwargs) @@ -945,14 +947,18 @@ async def wrapper(conn_self, *args, **kwargs): # pylint: disable=too-many-branc if input_tokens is not None: span.add_attribute(GEN_AI_USAGE_INPUT_TOKENS, input_tokens) instrumentor._record_token_usage( - input_tokens, "input", "recv", + input_tokens, + "input", + "recv", server_address=getattr(conn_self, "_telemetry_server_address", None), model=getattr(conn_self, "_telemetry_model", None), ) if output_tokens is not None: span.add_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens) instrumentor._record_token_usage( - output_tokens, "output", "recv", + output_tokens, + "output", + "recv", server_address=getattr(conn_self, "_telemetry_server_address", None), model=getattr(conn_self, "_telemetry_model", None), ) @@ -1121,9 +1127,7 @@ def _extract_session_config_from_send(conn_self: Any, event: Any) -> None: # py event_attrs[GEN_AI_EVENT_CONTENT] = json.dumps( [{"role": "system", "content": instructions}], ensure_ascii=False ) - connect_span.span_instance.add_event( - name=GEN_AI_SYSTEM_INSTRUCTION_EVENT, attributes=event_attrs - ) + connect_span.span_instance.add_event(name=GEN_AI_SYSTEM_INSTRUCTION_EVENT, attributes=event_attrs) # Temperature temperature = get(session, "temperature") @@ -1141,9 +1145,12 @@ def _extract_session_config_from_send(conn_self: Any, event: Any) -> None: # py try: if isinstance(tools, list): tools_json = json.dumps( - [t if isinstance(t, dict) else (t.as_dict() if hasattr(t, "as_dict") else str(t)) - for t in tools], - default=str, ensure_ascii=False, + [ + t if isinstance(t, dict) else (t.as_dict() if hasattr(t, "as_dict") else str(t)) + for t in tools + ], + default=str, + ensure_ascii=False, ) else: tools_json = str(tools) @@ -1167,7 +1174,9 @@ def _get_field(obj: Any, field: str) -> Any: return getattr(obj, field, None) @staticmethod - def _extract_event_ids(conn_self: Any, result: Any, span: "AbstractSpan") -> None: # pylint: disable=too-many-branches,too-many-locals + def _extract_event_ids( + conn_self: Any, result: Any, span: "AbstractSpan" + ) -> None: # pylint: disable=too-many-branches,too-many-locals """Extract IDs, MCP fields, and agent fields from any recv event. Extracts ``response_id``, ``call_id``, ``item_id``, ``previous_item_id``, @@ -1261,7 +1270,9 @@ def _extract_event_ids(conn_self: Any, result: Any, span: "AbstractSpan") -> Non span.add_attribute(GEN_AI_VOICE_MCP_APPROVE, approve) @staticmethod - def _extract_send_event_ids(conn_self: Any, event: Any, span: "AbstractSpan") -> None: # pylint: disable=unused-argument + def _extract_send_event_ids( + conn_self: Any, event: Any, span: "AbstractSpan" + ) -> None: # pylint: disable=unused-argument """Extract call_id and response_id from send events. For ``conversation.item.create`` events, the nested ``item`` may carry @@ -1303,7 +1314,9 @@ def _extract_send_event_ids(conn_self: Any, event: Any, span: "AbstractSpan") -> span.add_attribute(GEN_AI_VOICE_PREVIOUS_ITEM_ID, previous_item_id) @staticmethod - def _extract_response_done(conn_self: Any, result: Any, span: "AbstractSpan") -> None: # pylint: disable=too-many-branches + def _extract_response_done( + conn_self: Any, result: Any, span: "AbstractSpan" + ) -> None: # pylint: disable=too-many-branches """Extract response metadata from a response.done event. Sets ``gen_ai.response.id``, ``gen_ai.conversation.id``, and @@ -1469,7 +1482,9 @@ def _record_token_usage( logger.debug("Failed to record token usage", exc_info=True) @staticmethod - def _add_rate_limit_event(span: "AbstractSpan", event_type: str, result: Any) -> None: # pylint: disable=too-many-branches + def _add_rate_limit_event( + span: "AbstractSpan", event_type: str, result: Any + ) -> None: # pylint: disable=too-many-branches """Add a span event for rate limit or error events from the server. :param span: The active span. @@ -1515,8 +1530,10 @@ def _add_rate_limit_event(span: "AbstractSpan", event_type: str, result: Any) -> try: if isinstance(rate_limits, (list, tuple)): attrs["gen_ai.voice.rate_limits"] = json.dumps( - [rl if isinstance(rl, dict) else (rl.as_dict() if hasattr(rl, "as_dict") else str(rl)) - for rl in rate_limits], + [ + rl if isinstance(rl, dict) else (rl.as_dict() if hasattr(rl, "as_dict") else str(rl)) + for rl in rate_limits + ], default=str, ) else: diff --git a/sdk/voicelive/azure-ai-voicelive/azure/py.typed b/sdk/voicelive/azure-ai-voicelive/azure/py.typed new file mode 100644 index 000000000000..e5aff4f83af8 --- /dev/null +++ b/sdk/voicelive/azure-ai-voicelive/azure/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. \ No newline at end of file diff --git a/sdk/voicelive/azure-ai-voicelive/pyproject.toml b/sdk/voicelive/azure-ai-voicelive/pyproject.toml index 02fb44dc7667..23136835e480 100644 --- a/sdk/voicelive/azure-ai-voicelive/pyproject.toml +++ b/sdk/voicelive/azure-ai-voicelive/pyproject.toml @@ -58,7 +58,7 @@ test = [ repository = "https://github.com/Azure/azure-sdk-for-python" [tool.setuptools.dynamic] -version = {attr = "azure.ai.voicelive._version.VERSION"} +version = {attr = "azure._version.VERSION"} readme = {file = ["README.md", "CHANGELOG.md"], content-type = "text/markdown"} [tool.setuptools.packages.find] @@ -68,15 +68,10 @@ exclude = [ "samples*", "generated_samples*", "doc*", - "azure", - "azure.ai", ] [tool.setuptools.package-data] pytyped = ["py.typed"] -[tool.pytest.ini_options] -asyncio_default_fixture_loop_scope = "function" -asyncio_mode = "auto" [tool.azure-sdk-conda] -in_bundle = false \ No newline at end of file +in_bundle = false diff --git a/sdk/voicelive/azure-ai-voicelive/samples/basic_voice_assistant_async.py b/sdk/voicelive/azure-ai-voicelive/samples/basic_voice_assistant_async.py index 06ea24f6cd6b..91ee2e350751 100644 --- a/sdk/voicelive/azure-ai-voicelive/samples/basic_voice_assistant_async.py +++ b/sdk/voicelive/azure-ai-voicelive/samples/basic_voice_assistant_async.py @@ -77,6 +77,7 @@ def setup_telemetry(enable_content_recording: bool = False): """Set up OpenTelemetry tracing with console exporter and VoiceLive instrumentation.""" from azure.core.settings import settings + settings.tracing_implementation = "opentelemetry" from opentelemetry import trace @@ -92,10 +93,12 @@ def setup_telemetry(enable_content_recording: bool = False): os.environ.setdefault("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "true") from azure.ai.voicelive.telemetry import VoiceLiveInstrumentor + VoiceLiveInstrumentor().instrument() return trace.get_tracer(__name__) + ## Change to the directory where this script is located os.chdir(os.path.dirname(os.path.abspath(__file__))) diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py b/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py index b363e2882613..a9a255c69f11 100644 --- a/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py +++ b/sdk/voicelive/azure-ai-voicelive/tests/test_live_realtime_service.py @@ -152,10 +152,7 @@ async def _collect_audio_trans_outputs(conn, duration_s: float) -> int: except asyncio.TimeoutError: break - if ( - event.type == ServerEventType.RESPONSE_AUDIO_DELTA - or event.type == ServerEventType.RESPONSE_AUDIO_DONE - ): + if event.type == ServerEventType.RESPONSE_AUDIO_DELTA or event.type == ServerEventType.RESPONSE_AUDIO_DONE: audio_events += 1 if ( @@ -733,7 +730,10 @@ async def test_realtime_service_live_session_update( function_call_output = await _wait_for_event(conn, {ServerEventType.RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE}) assert isinstance(function_call_output, ServerEventResponseFunctionCallArgumentsDone) assert function_call_output.name == "get_weather" - assert function_call_output.arguments.replace(" ", "").replace("\n", "") in ['{"location":"北京"}', '{"location":"Beijing"}'] + assert function_call_output.arguments.replace(" ", "").replace("\n", "") in [ + '{"location":"北京"}', + '{"location":"Beijing"}', + ] await conn.response.create() transcripts, audio_bytes = await _collect_event( @@ -825,7 +825,14 @@ async def test_realtime_service_input_audio_transcription( self, test_data_dir: Path, model: str, - transcription_model: Literal["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe", "gpt-4o-transcribe-diarize", "azure-speech", "mai-transcribe-1"], + transcription_model: Literal[ + "whisper-1", + "gpt-4o-transcribe", + "gpt-4o-mini-transcribe", + "gpt-4o-transcribe-diarize", + "azure-speech", + "mai-transcribe-1", + ], api_version: str, **kwargs, ): @@ -891,7 +898,8 @@ async def test_realtime_service_with_eou( model: str, turn_detection_cls: Type[Union["ServerVad", "AzureSemanticVad", "AzureSemanticVadMultilingual"]], end_of_detection: Type[Union["AzureSemanticDetection", "AzureSemanticDetectionEn"]], - api_version: str, **kwargs, + api_version: str, + **kwargs, ): file = test_data_dir / "4-1.wav" voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") @@ -1189,7 +1197,13 @@ async def test_realtime_service_truncate_item(self, test_data_dir: Path, model: ) @pytest.mark.parametrize("api_version", ["2025-10-01", "2026-01-01-preview"]) async def test_realtime_service_with_input_audio_format( - self, test_data_dir: Path, model: str, audio_format: InputAudioFormat, turn_detection: TurnDetection, api_version: str, **kwargs + self, + test_data_dir: Path, + model: str, + audio_format: InputAudioFormat, + turn_detection: TurnDetection, + api_version: str, + **kwargs, ): """Test that all supported input_audio_format values work correctly with all models. diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_enums.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_enums.py index d3341a18a3e7..3d9f4267d783 100644 --- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_enums.py +++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_enums.py @@ -10,6 +10,7 @@ AzureVoiceType, ClientEventType, ContentPartType, + SessionIncludeOption, InputAudioFormat, ItemParamStatus, ItemType, @@ -20,6 +21,7 @@ PersonalVoiceModels, ResponseItemStatus, ResponseStatus, + ServerEventType, ToolChoiceLiteral, ToolType, TurnDetectionType, @@ -215,7 +217,8 @@ def test_all_values(self): """Test all enum values are accessible.""" assert PersonalVoiceModels.DRAGON_LATEST_NEURAL == "DragonLatestNeural" assert PersonalVoiceModels.PHOENIX_LATEST_NEURAL == "PhoenixLatestNeural" - assert PersonalVoiceModels.PHOENIX_V2_NEURAL == "PhoenixV2Neural" + assert PersonalVoiceModels.DRAGON_HD_OMNI_LATEST_NEURAL == "DragonHDOmniLatestNeural" + assert PersonalVoiceModels.MAI_VOICE1 == "MAI-Voice-1" class TestResponseItemStatus: @@ -228,6 +231,66 @@ def test_all_values(self): assert ResponseItemStatus.INCOMPLETE == "incomplete" +class TestAzureVoiceTypeNew: + """Test new AzureVoiceType enum values.""" + + def test_avatar_voice_sync(self): + assert AzureVoiceType.AVATAR_VOICE_SYNC == "avatar-voice-sync" + + +class TestClientEventTypeNew: + """Test new ClientEventType enum values.""" + + def test_output_audio_buffer_clear(self): + assert ClientEventType.OUTPUT_AUDIO_BUFFER_CLEAR == "output_audio_buffer.clear" + + +class TestItemTypeNew: + """Test new ItemType enum values.""" + + def test_web_search_call(self): + assert ItemType.WEB_SEARCH_CALL == "web_search_call" + + def test_file_search_call(self): + assert ItemType.FILE_SEARCH_CALL == "file_search_call" + + +class TestServerEventTypeNew: + """Test new ServerEventType enum values.""" + + def test_avatar_events(self): + assert ServerEventType.SESSION_AVATAR_SWITCH_TO_SPEAKING == "session.avatar.switch_to_speaking" + assert ServerEventType.SESSION_AVATAR_SWITCH_TO_IDLE == "session.avatar.switch_to_idle" + + def test_video_delta(self): + assert ServerEventType.RESPONSE_VIDEO_DELTA == "response.video.delta" + + def test_web_search_events(self): + assert ServerEventType.RESPONSE_WEB_SEARCH_CALL_SEARCHING == "response.web_search_call.searching" + assert ServerEventType.RESPONSE_WEB_SEARCH_CALL_IN_PROGRESS == "response.web_search_call.in_progress" + assert ServerEventType.RESPONSE_WEB_SEARCH_CALL_COMPLETED == "response.web_search_call.completed" + + def test_file_search_events(self): + assert ServerEventType.RESPONSE_FILE_SEARCH_CALL_SEARCHING == "response.file_search_call.searching" + assert ServerEventType.RESPONSE_FILE_SEARCH_CALL_IN_PROGRESS == "response.file_search_call.in_progress" + assert ServerEventType.RESPONSE_FILE_SEARCH_CALL_COMPLETED == "response.file_search_call.completed" + + def test_output_audio_buffer_cleared(self): + assert ServerEventType.OUTPUT_AUDIO_BUFFER_CLEARED == "output_audio_buffer.cleared" + + def test_audio_transcript_annotation(self): + assert ServerEventType.RESPONSE_AUDIO_TRANSCRIPT_ANNOTATION_ADDED == "response.audio_transcript.annotation.added" + + +class TestSessionIncludeOption: + """Test SessionIncludeOption enum.""" + + def test_all_values(self): + assert SessionIncludeOption.ITEM_INPUT_AUDIO_TRANSCRIPTION_LOGPROBS == "item.input_audio_transcription.logprobs" + assert SessionIncludeOption.ITEM_INPUT_AUDIO_TRANSCRIPTION_PHRASES == "item.input_audio_transcription.phrases" + assert SessionIncludeOption.FILE_SEARCH_CALL_RESULTS == "file_search_call.results" + + class TestResponseStatus: """Test ResponseStatus enum.""" diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models.py index 2c4f9eb79829..ee838dd9a4b9 100644 --- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models.py +++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_models.py @@ -5,11 +5,17 @@ # -------------------------------------------------------------------------- from azure.ai.voicelive.models import ( + ActionFind, + ActionOpenPage, + ActionSearch, + ActionSearchSource, AssistantMessageItem, + AzureAvatarVoiceSyncVoice, AzureCustomVoice, AzurePersonalVoice, AzureStandardVoice, AzureVoiceType, + FileSearchResult, InputAudioContentPart, InputTextContentPart, ItemParamStatus, @@ -26,11 +32,13 @@ OutputTextContentPart, PersonalVoiceModels, RequestSession, + ResponseFileSearchCallItem, ResponseMCPApprovalRequestItem, ResponseMCPApprovalResponseItem, ResponseMCPCallItem, ResponseMCPListToolItem, ResponseSession, + ResponseWebSearchCallItem, ServerEventMcpListToolsCompleted, ServerEventMcpListToolsFailed, ServerEventMcpListToolsInProgress, @@ -39,6 +47,8 @@ ServerEventType, SystemMessageItem, ToolType, + TranscriptionPhrase, + TranscriptionWord, UserMessageItem, ) @@ -573,6 +583,102 @@ def test_response_mcp_list_tool_item_empty(self): assert item.server_label == "empty-server" +class TestActionModels: + """Test web search action models.""" + + def test_action_find(self): + action = ActionFind(pattern="test query", url="https://example.com") + assert action.type == "find" + assert action.pattern == "test query" + assert action.url == "https://example.com" + + def test_action_open_page(self): + action = ActionOpenPage(url="https://example.com/page") + assert action.type == "open_page" + assert action.url == "https://example.com/page" + + def test_action_search(self): + source = ActionSearchSource(url="https://example.com") + action = ActionSearch(query="weather", sources=[source]) + assert action.type == "search" + assert action.query == "weather" + assert len(action.sources) == 1 + assert action.sources[0].url == "https://example.com" + + def test_action_search_source(self): + source = ActionSearchSource(url="https://example.com/source") + assert source.type == "url" + assert source.url == "https://example.com/source" + + def test_action_search_optional_fields(self): + action = ActionSearch() + assert action.type == "search" + assert action.query is None + assert action.sources is None + + +class TestAzureAvatarVoiceSyncVoice: + """Test AzureAvatarVoiceSyncVoice model.""" + + def test_basic_creation(self): + voice = AzureAvatarVoiceSyncVoice(model=PersonalVoiceModels.DRAGON_LATEST_NEURAL) + assert voice.type == AzureVoiceType.AVATAR_VOICE_SYNC + assert voice.model == PersonalVoiceModels.DRAGON_LATEST_NEURAL + + def test_with_optional_params(self): + voice = AzureAvatarVoiceSyncVoice( + model=PersonalVoiceModels.MAI_VOICE1, + temperature=0.8, + locale="en-US", + style="cheerful", + ) + assert voice.model == PersonalVoiceModels.MAI_VOICE1 + assert voice.temperature == 0.8 + assert voice.locale == "en-US" + assert voice.style == "cheerful" + + +class TestFileSearchResult: + """Test FileSearchResult model.""" + + def test_basic(self): + result = FileSearchResult(file_id="file-123", filename="doc.pdf", score=0.95) + assert result.file_id == "file-123" + assert result.filename == "doc.pdf" + assert result.score == 0.95 + + +class TestResponseWebSearchCallItem: + """Test ResponseWebSearchCallItem model.""" + + def test_basic(self): + item = ResponseWebSearchCallItem(status="completed") + assert item.type == ItemType.WEB_SEARCH_CALL + + +class TestResponseFileSearchCallItem: + """Test ResponseFileSearchCallItem model.""" + + def test_basic(self): + item = ResponseFileSearchCallItem(status="completed") + assert item.type == ItemType.FILE_SEARCH_CALL + + +class TestTranscriptionModels: + """Test transcription-related models.""" + + def test_transcription_word(self): + word = TranscriptionWord(text="hello", offset_milliseconds=0, duration_milliseconds=500) + assert word.text == "hello" + assert word.offset_milliseconds == 0 + assert word.duration_milliseconds == 500 + + def test_transcription_phrase(self): + phrase = TranscriptionPhrase(text="hello world", offset_milliseconds=0, duration_milliseconds=1000) + assert phrase.text == "hello world" + assert phrase.offset_milliseconds == 0 + assert phrase.duration_milliseconds == 1000 + class TestMCPServerEvents: """Test MCP-related server event models.""" diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_smoke_validation.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_smoke_validation.py index 151ddb3b5c7e..c1c8614a8ab4 100644 --- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_smoke_validation.py +++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_smoke_validation.py @@ -28,10 +28,16 @@ def test_basic_imports(): InputAudioFormat, OutputAudioFormat, Modality, + SessionIncludeOption, + ServerEventType, ) # Test model imports from azure.ai.voicelive.models import ( + ActionFind, + ActionOpenPage, + ActionSearch, + AzureAvatarVoiceSyncVoice, InputTextContentPart, OutputTextContentPart, UserMessageItem, @@ -41,6 +47,10 @@ def test_basic_imports(): AzureStandardVoice, AzurePersonalVoice, RequestSession, + ResponseWebSearchCallItem, + ResponseFileSearchCallItem, + TranscriptionPhrase, + TranscriptionWord, ) # Test async imports diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_telemetry.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_telemetry.py index a4866c5dbdda..d2f6f1cc6dd7 100644 --- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_telemetry.py +++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_telemetry.py @@ -1,3 +1,4 @@ +# pylint: disable=too-many-lines # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. @@ -520,6 +521,7 @@ def test_extract_function_call_arguments_done_content(self): result, "response.function_call_arguments.done" ) import json as _json + parsed = _json.loads(content) assert parsed["name"] == "get_current_weather" assert parsed["arguments"] == {"location": "Seattle"} @@ -538,6 +540,7 @@ def test_extract_output_item_done_function_call(self): } content = _VoiceLiveInstrumentorPreview._extract_done_event_content(result, "response.output_item.done") import json as _json + parsed = _json.loads(content) assert parsed["messages"][0]["name"] == "get_current_weather" assert parsed["messages"][0]["arguments"] == {"location": "Seattle"} @@ -560,6 +563,7 @@ def test_extract_response_done_function_call_item(self): } content = _VoiceLiveInstrumentorPreview._extract_done_event_content(result, "response.done") import json as _json + parsed = _json.loads(content) assert parsed["messages"][0]["name"] == "get_current_weather" assert parsed["messages"][0]["arguments"] == {"location": "Seattle"} @@ -583,9 +587,7 @@ def test_extract_session_id_from_dict(self): _VoiceLiveInstrumentorPreview._extract_session_id(conn, result) assert conn._telemetry_session_id == "test-session-abc123" - conn._telemetry_span.add_attribute.assert_called_with( - "gen_ai.voice.session_id", "test-session-abc123" - ) + conn._telemetry_span.add_attribute.assert_called_with("gen_ai.voice.session_id", "test-session-abc123") def test_extract_session_id_from_object(self): from azure.ai.voicelive.telemetry._voicelive_instrumentor import _VoiceLiveInstrumentorPreview @@ -639,15 +641,9 @@ def test_extract_audio_format_from_dict(self): assert conn._telemetry_input_audio_format == "pcm16" assert conn._telemetry_output_audio_format == "pcm16" - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.voice.input_audio_format", "pcm16" - ) - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.voice.output_audio_format", "pcm16" - ) - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.voice.input_sample_rate", 24000 - ) + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.voice.input_audio_format", "pcm16") + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.voice.output_audio_format", "pcm16") + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.voice.input_sample_rate", 24000) def test_extract_audio_format_with_g711_ulaw(self): from azure.ai.voicelive.telemetry._voicelive_instrumentor import _VoiceLiveInstrumentorPreview @@ -667,9 +663,7 @@ def test_extract_audio_format_with_g711_ulaw(self): assert conn._telemetry_input_audio_format == "g711_ulaw" assert conn._telemetry_output_audio_format == "g711_alaw" - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.voice.input_sample_rate", 8000 - ) + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.voice.input_sample_rate", 8000) def test_extract_audio_format_no_sampling_rate(self): """When input_audio_sampling_rate is absent, only formats are set.""" @@ -689,8 +683,9 @@ def test_extract_audio_format_no_sampling_rate(self): assert conn._telemetry_input_audio_format == "pcm16" # input_sample_rate should NOT be set - calls = [c for c in conn._telemetry_span.add_attribute.call_args_list - if c[0][0] == "gen_ai.voice.input_sample_rate"] + calls = [ + c for c in conn._telemetry_span.add_attribute.call_args_list if c[0][0] == "gen_ai.voice.input_sample_rate" + ] assert len(calls) == 0 def test_extract_audio_format_no_session(self): @@ -724,15 +719,9 @@ def test_extract_audio_format_from_recv(self): assert conn._telemetry_input_audio_format == "pcm16" assert conn._telemetry_output_audio_format == "pcm16" - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.voice.input_audio_format", "pcm16" - ) - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.voice.output_audio_format", "pcm16" - ) - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.voice.input_sample_rate", 24000 - ) + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.voice.input_audio_format", "pcm16") + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.voice.output_audio_format", "pcm16") + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.voice.input_sample_rate", 24000) def test_extract_audio_format_from_recv_no_session(self): """Recv extraction should not crash when session field is missing.""" @@ -1103,15 +1092,9 @@ def test_extract_instructions_and_temperature(self): _VoiceLiveInstrumentorPreview._extract_session_config_from_send(conn, event) - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.system_instructions", "You are a helpful assistant." - ) - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.request.temperature", "0.7" - ) - conn._telemetry_span.add_attribute.assert_any_call( - "gen_ai.request.max_output_tokens", 4096 - ) + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.system_instructions", "You are a helpful assistant.") + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.request.temperature", "0.7") + conn._telemetry_span.add_attribute.assert_any_call("gen_ai.request.max_output_tokens", 4096) def test_extract_tools(self): from azure.ai.voicelive.telemetry._voicelive_instrumentor import _VoiceLiveInstrumentorPreview @@ -1246,8 +1229,9 @@ async def fake_aenter(mgr_self, *a, **kw): import asyncio - with patch("azure.ai.voicelive.telemetry._voicelive_instrumentor.settings") as mock_settings, \ - patch("azure.ai.voicelive.telemetry._voicelive_instrumentor.start_span") as mock_start: + with patch("azure.ai.voicelive.telemetry._voicelive_instrumentor.settings") as mock_settings, patch( + "azure.ai.voicelive.telemetry._voicelive_instrumentor.start_span" + ) as mock_start: mock_span = MagicMock() mock_span.__enter__ = MagicMock(return_value=mock_span) mock_span.__exit__ = MagicMock(return_value=False) @@ -1674,8 +1658,7 @@ def test_no_tool_name_for_message_items(self): _VoiceLiveInstrumentorPreview._extract_event_ids(conn, result, span) # Should NOT have mcp.tool_name - tool_name_calls = [c for c in span.add_attribute.call_args_list - if c[0][0] == "gen_ai.voice.mcp.tool_name"] + tool_name_calls = [c for c in span.add_attribute.call_args_list if c[0][0] == "gen_ai.voice.mcp.tool_name"] assert len(tool_name_calls) == 0 def test_extract_item_id_from_nested_item(self): @@ -1854,4 +1837,3 @@ def test_agent_version_and_project_name_on_connect(self): span.add_attribute.assert_any_call("gen_ai.conversation.id", "conv_123") span.add_attribute.assert_any_call("gen_ai.agent.version", "v2.1") span.add_attribute.assert_any_call("gen_ai.agent.project_name", "TestProject") - diff --git a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_voice_config.py b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_voice_config.py index 53cd6da8318e..f3b4d06f60e9 100644 --- a/sdk/voicelive/azure-ai-voicelive/tests/test_unit_voice_config.py +++ b/sdk/voicelive/azure-ai-voicelive/tests/test_unit_voice_config.py @@ -184,7 +184,8 @@ def test_all_personal_voice_models(self): models = [ PersonalVoiceModels.DRAGON_LATEST_NEURAL, PersonalVoiceModels.PHOENIX_LATEST_NEURAL, - PersonalVoiceModels.PHOENIX_V2_NEURAL, + PersonalVoiceModels.DRAGON_HD_OMNI_LATEST_NEURAL, + PersonalVoiceModels.MAI_VOICE1, ] for model in models: @@ -195,7 +196,7 @@ def test_all_personal_voice_models(self): def test_azure_personal_voice_inheritance(self): """Test that Azure personal voice inherits from AzureVoice.""" - voice = AzurePersonalVoice(name="personal-voice", model=PersonalVoiceModels.PHOENIX_V2_NEURAL) + voice = AzurePersonalVoice(name="personal-voice", model=PersonalVoiceModels.DRAGON_HD_OMNI_LATEST_NEURAL) assert isinstance(voice, AzureVoice) assert isinstance(voice, AzurePersonalVoice) @@ -277,13 +278,13 @@ def test_request_session_with_azure_standard_voice(self): def test_request_session_with_azure_personal_voice(self): """Test RequestSession with Azure personal voice.""" voice = AzurePersonalVoice( - name="my-personal-voice", model=PersonalVoiceModels.PHOENIX_V2_NEURAL, temperature=0.9 + name="my-personal-voice", model=PersonalVoiceModels.DRAGON_HD_OMNI_LATEST_NEURAL, temperature=0.9 ) session = RequestSession(model="gpt-4o-realtime-preview", voice=voice) assert session.voice == voice assert session.voice.type == AzureVoiceType.AZURE_PERSONAL - assert session.voice.model == PersonalVoiceModels.PHOENIX_V2_NEURAL + assert session.voice.model == PersonalVoiceModels.DRAGON_HD_OMNI_LATEST_NEURAL assert session.voice.temperature == 0.9 def test_response_session_with_voice(self): From 723c5caf17c7af25aad06a3ac5fcd7d87bee317a Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 11:15:06 -0700 Subject: [PATCH 02/12] Update version to 1.2.0 GA --- sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py index 772c3a2b9715..a73f358d285a 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/_version.py @@ -6,4 +6,4 @@ # Changes may cause incorrect behavior and will be lost if the code is regenerated. # -------------------------------------------------------------------------- -VERSION = "1.2.0b6" +VERSION = "1.2.0" From a2dfd36e79a5148d6ef6d4421710abefe3a1f87f Mon Sep 17 00:00:00 2001 From: xitzhang Date: Tue, 5 May 2026 11:36:13 -0700 Subject: [PATCH 03/12] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- sdk/voicelive/azure-ai-voicelive/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/voicelive/azure-ai-voicelive/pyproject.toml b/sdk/voicelive/azure-ai-voicelive/pyproject.toml index 23136835e480..3727b56db8e8 100644 --- a/sdk/voicelive/azure-ai-voicelive/pyproject.toml +++ b/sdk/voicelive/azure-ai-voicelive/pyproject.toml @@ -58,7 +58,7 @@ test = [ repository = "https://github.com/Azure/azure-sdk-for-python" [tool.setuptools.dynamic] -version = {attr = "azure._version.VERSION"} +version = {attr = "azure.ai.voicelive._version.VERSION"} readme = {file = ["README.md", "CHANGELOG.md"], content-type = "text/markdown"} [tool.setuptools.packages.find] From 4c20f0d0b84ff3d91a355af4e74e109d1955e1df Mon Sep 17 00:00:00 2001 From: xitzhang Date: Tue, 5 May 2026 11:37:46 -0700 Subject: [PATCH 04/12] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- sdk/voicelive/azure-ai-voicelive/pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/voicelive/azure-ai-voicelive/pyproject.toml b/sdk/voicelive/azure-ai-voicelive/pyproject.toml index 3727b56db8e8..9804dabf504f 100644 --- a/sdk/voicelive/azure-ai-voicelive/pyproject.toml +++ b/sdk/voicelive/azure-ai-voicelive/pyproject.toml @@ -73,5 +73,9 @@ exclude = [ [tool.setuptools.package-data] pytyped = ["py.typed"] +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" + [tool.azure-sdk-conda] in_bundle = false From 1c9dde82680c96af0dd27fa5f83e8170e9ddfa7a Mon Sep 17 00:00:00 2001 From: xitzhang Date: Tue, 5 May 2026 11:38:28 -0700 Subject: [PATCH 05/12] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- sdk/voicelive/azure-ai-voicelive/CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md index 26924c94f63d..81fa472cf9e6 100644 --- a/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md +++ b/sdk/voicelive/azure-ai-voicelive/CHANGELOG.md @@ -55,8 +55,7 @@ ### Breaking Changes -- Removed `PersonalVoiceModels.PHOENIX_V2_NEURAL` enum value (replaced by `DRAGON_HD_OMNI_LATEST_NEURAL` - and `MAI_VOICE1`) +- Removed `PersonalVoiceModels.PHOENIX_V2_NEURAL` enum value (replaced by `DRAGON_HD_OMNI_LATEST_NEURAL` and `MAI_VOICE1`) - Removed Foundry Agent Tool classes (`FoundryAgentTool`, `ResponseFoundryAgentCallItem`, etc.) — use `AgentSessionConfig` with `connect()` instead From 376059a7cca7ccd61af223937c509e83b3e75aa7 Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 13:56:35 -0700 Subject: [PATCH 06/12] Fix azure/_version.py to 1.2.0 GA (was 1.0.0b1) --- sdk/voicelive/azure-ai-voicelive/azure/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/voicelive/azure-ai-voicelive/azure/_version.py b/sdk/voicelive/azure-ai-voicelive/azure/_version.py index be71c81bd282..a73f358d285a 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/_version.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/_version.py @@ -6,4 +6,4 @@ # Changes may cause incorrect behavior and will be lost if the code is regenerated. # -------------------------------------------------------------------------- -VERSION = "1.0.0b1" +VERSION = "1.2.0" From 2675906f45381e750acf37f3d4997c31e80758c4 Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 14:00:40 -0700 Subject: [PATCH 07/12] remove _version --- sdk/voicelive/azure-ai-voicelive/azure/_version.py | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 sdk/voicelive/azure-ai-voicelive/azure/_version.py diff --git a/sdk/voicelive/azure-ai-voicelive/azure/_version.py b/sdk/voicelive/azure-ai-voicelive/azure/_version.py deleted file mode 100644 index a73f358d285a..000000000000 --- a/sdk/voicelive/azure-ai-voicelive/azure/_version.py +++ /dev/null @@ -1,9 +0,0 @@ -# coding=utf-8 -# -------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# Code generated by Microsoft (R) Python Code Generator. -# Changes may cause incorrect behavior and will be lost if the code is regenerated. -# -------------------------------------------------------------------------- - -VERSION = "1.2.0" From 19ae78918d9eded551d11eb8df9df706740595dc Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 14:28:13 -0700 Subject: [PATCH 08/12] Fix sphinx docstring formatting in ServerEventConversationItemCreated --- .../azure/ai/voicelive/models/_models.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py index 48377eb3897a..30e154e81df9 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/models/_models.py @@ -5065,13 +5065,13 @@ class ServerEventConversationItemCreated(ServerEvent, discriminator="conversatio event: * The server is generating a Response, which if successful will produce - either one or two Items, which will be of type `message` - (role `assistant`) or type `function_call`. + either one or two Items, which will be of type `message` + (role `assistant`) or type `function_call`. * The input audio buffer has been committed, either by the client or the - server (in `server_vad` mode). The server will take the content of the - input audio buffer and add it to a new user message Item. + server (in `server_vad` mode). The server will take the content of the + input audio buffer and add it to a new user message Item. * The client has sent a `conversation.item.create` event to add a new Item - to the Conversation. + to the Conversation. :ivar event_id: :vartype event_id: str From 5d7052610000dd1fbe668c368775941324e2e474 Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 14:34:22 -0700 Subject: [PATCH 09/12] update cspell --- sdk/voicelive/azure-ai-voicelive/cspell.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/voicelive/azure-ai-voicelive/cspell.json b/sdk/voicelive/azure-ai-voicelive/cspell.json index cd7cb8c673c2..4cdc7d4be4d0 100644 --- a/sdk/voicelive/azure-ai-voicelive/cspell.json +++ b/sdk/voicelive/azure-ai-voicelive/cspell.json @@ -13,7 +13,8 @@ "xhigh", "XHIGH", "genai", - "GENAI" + "GENAI", + "SSML" ], "ignorePaths": [ "*.csv", From 07e07d08031bdf4bdb3d9bc910020d81d8f9f598 Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 14:41:47 -0700 Subject: [PATCH 10/12] update cespell file --- sdk/voicelive/azure-ai-voicelive/cspell.json | 21 ++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sdk/voicelive/azure-ai-voicelive/cspell.json b/sdk/voicelive/azure-ai-voicelive/cspell.json index 4cdc7d4be4d0..1dc68d63932d 100644 --- a/sdk/voicelive/azure-ai-voicelive/cspell.json +++ b/sdk/voicelive/azure-ai-voicelive/cspell.json @@ -1,20 +1,21 @@ { "ignoreWords": [ - "viseme", - "VISEME", - "ulaw", - "ULAW", + "GENAI", + "genai", + "HDOMNI", + "libasound", "logprobs", "pyaudio", "PyAudio", - "libasound", - "webrtc", + "SSML", + "ULAW", + "ulaw", + "VISEME", + "viseme", "WEBRTC", - "xhigh", + "webrtc", "XHIGH", - "genai", - "GENAI", - "SSML" + "xhigh" ], "ignorePaths": [ "*.csv", From 68aa4b6be2e4d1387ac49259d751bd1c2421225f Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 15:18:00 -0700 Subject: [PATCH 11/12] update status --- .../azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py | 5 +++-- sdk/voicelive/azure-ai-voicelive/pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py index 52a39f70b9e2..ad9de8d19bd5 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/aio/_patch.py @@ -22,7 +22,8 @@ import aiohttp except ImportError as exc: raise ImportError( - "aiohttp is required for azure-ai-voicelive. " "Install it with: pip install azure-ai-voicelive[aiohttp]" + "aiohttp is required for azure-ai-voicelive. " + "Install it with: pip install azure-ai-voicelive[aiohttp]" ) from exc from azure.ai.voicelive.models._models import ( ClientEventConversationItemCreate, @@ -511,7 +512,7 @@ async def recv(self) -> ServerEvent: raise ConnectionClosed(1006, "Empty WebSocket frame") payload = json.loads(raw.decode("utf-8")) - event = cast("ServerEvent", ServerEvent._deserialize(payload, [])) + event = cast("ServerEvent", ServerEvent._deserialize(payload, [])) # pylint: disable=protected-access return event except (ValueError, TypeError) as e: log.error("Error parsing message: %s", e) diff --git a/sdk/voicelive/azure-ai-voicelive/pyproject.toml b/sdk/voicelive/azure-ai-voicelive/pyproject.toml index 9804dabf504f..fc4032c38360 100644 --- a/sdk/voicelive/azure-ai-voicelive/pyproject.toml +++ b/sdk/voicelive/azure-ai-voicelive/pyproject.toml @@ -17,7 +17,7 @@ authors = [ description = "Microsoft Corporation Azure Ai Voicelive Client Library for Python" license = "MIT" classifiers = [ - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3", From 25c6fbd67f667a08952cce9e9ed9aa3d7f786da8 Mon Sep 17 00:00:00 2001 From: Xiting Zhang Date: Tue, 5 May 2026 16:23:46 -0700 Subject: [PATCH 12/12] Remove unused conn_self param from _extract_send_event_ids --- .../ai/voicelive/telemetry/_voicelive_instrumentor.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/telemetry/_voicelive_instrumentor.py b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/telemetry/_voicelive_instrumentor.py index 1be407c32192..25561fd72ae6 100644 --- a/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/telemetry/_voicelive_instrumentor.py +++ b/sdk/voicelive/azure-ai-voicelive/azure/ai/voicelive/telemetry/_voicelive_instrumentor.py @@ -808,7 +808,7 @@ async def wrapper( if message_size is not None: span.add_attribute(GEN_AI_VOICE_MESSAGE_SIZE, message_size) # Extract call_id from send events (e.g. conversation.item.create with function_call_output) - instrumentor._extract_send_event_ids(conn_self, event, span) + instrumentor._extract_send_event_ids(event, span) instrumentor._add_send_event(span, event_type, content_str) return await original_send(conn_self, event, *args, **kwargs) except Exception as exc: @@ -1271,16 +1271,14 @@ def _extract_event_ids( @staticmethod def _extract_send_event_ids( - conn_self: Any, event: Any, span: "AbstractSpan" - ) -> None: # pylint: disable=unused-argument + event: Any, span: "AbstractSpan" + ) -> None: """Extract call_id and response_id from send events. For ``conversation.item.create`` events, the nested ``item`` may carry a ``call_id`` (for function_call_output items). For ``response.cancel`` events, ``response_id`` may be present. - :param conn_self: The ``VoiceLiveConnection`` instance. - :type conn_self: ~azure.ai.voicelive.aio.VoiceLiveConnection :param event: The client event being sent. :type event: any :param span: The current send span.