From f6af98c07c1a49625c339030801c2d20ae70088b Mon Sep 17 00:00:00 2001 From: jbolor21 <86250273+jbolor21@users.noreply.github.com> Date: Wed, 17 Jun 2026 22:15:01 -0700 Subject: [PATCH] adds image_url as flag to send string instead of dict to oai --- .../components/Config/CreateTargetDialog.tsx | 21 +++++++++++++++++++ .../openai/openai_response_target.py | 8 ++++++- .../setup/initializers/components/targets.py | 5 ++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/Config/CreateTargetDialog.tsx b/frontend/src/components/Config/CreateTargetDialog.tsx index ba2b13b4c7..1260233a2d 100644 --- a/frontend/src/components/Config/CreateTargetDialog.tsx +++ b/frontend/src/components/Config/CreateTargetDialog.tsx @@ -151,6 +151,7 @@ export default function CreateTargetDialog({ open, onClose, onCreated, existingT const [temperature, setTemperature] = useState('1.0') const [topP, setTopP] = useState('1.0') const [repetitionPenalty, setRepetitionPenalty] = useState('1.0') + const [imageUrlAsString, setImageUrlAsString] = useState(false) const [submitting, setSubmitting] = useState(false) const [error, setError] = useState(null) const [fieldErrors, setFieldErrors] = useState<{ targetType?: string; endpoint?: string }>({}) @@ -165,6 +166,7 @@ export default function CreateTargetDialog({ open, onClose, onCreated, existingT const isRoundRobin = targetConfig?.kind === 'roundrobin' const isAzureML = targetConfig?.kind === 'azureml' const isOpenAi = targetConfig?.kind === 'openai' + const isOpenAiResponse = targetType === 'OpenAIResponseTarget' const supportsEntra = targetConfig?.supportsEntra ?? false const showAuthField = targetType !== '' && supportsEntra const isEntra = showAuthField && authMode === 'entra' @@ -252,6 +254,7 @@ export default function CreateTargetDialog({ open, onClose, onCreated, existingT setTemperature('1.0') setTopP('1.0') setRepetitionPenalty('1.0') + setImageUrlAsString(false) setError(null) setFieldErrors({}) setSelectedInnerTargets([]) @@ -337,6 +340,10 @@ export default function CreateTargetDialog({ open, onClose, onCreated, existingT if (!isNaN(parsedRepetitionPenalty)) params.repetition_penalty = parsedRepetitionPenalty } + if (isOpenAiResponse && imageUrlAsString) { + params.image_url_as_string = true + } + await targetsApi.createTarget({ type: targetType, params, @@ -577,6 +584,20 @@ export default function CreateTargetDialog({ open, onClose, onCreated, existingT )} + {isOpenAiResponse && ( +
+ setImageUrlAsString(data.checked)} + label="Send image_url as bare string" + /> + + Some Responses-API endpoints (e.g. Corvid) require image_url to be a bare data-URL + string rather than an object. Leave off for standard OpenAI / Azure OpenAI. + +
+ )} + {showAuthField && ( None: @@ -169,6 +170,10 @@ def __init__( self._custom_functions: dict[str, ToolExecutor] = custom_functions or {} self._fail_on_missing_function: bool = fail_on_missing_function + # Some Responses API endpoints (e.g. Corvid) require image_url as a bare string + # rather than the {"url": ...} object used by Chat Completions. + self._image_url_as_string: bool = image_url_as_string + # Extract the grammar 'tool' if one is present # See # https://platform.openai.com/docs/guides/function-calling#context-free-grammars @@ -238,7 +243,8 @@ async def _construct_input_item_from_piece_async(self, piece: MessagePiece) -> d } if piece.converted_value_data_type == "image_path": data_url = await convert_local_image_to_data_url_async(piece.converted_value) - return {"type": "input_image", "image_url": {"url": data_url}} + image_url: Any = data_url if self._image_url_as_string else {"url": data_url} + return {"type": "input_image", "image_url": image_url} raise ValueError(f"Unsupported piece type for inline content: {piece.converted_value_data_type}") async def _build_input_for_multi_modal_async(self, conversation: MutableSequence[Message]) -> list[dict[str, Any]]: diff --git a/pyrit/setup/initializers/components/targets.py b/pyrit/setup/initializers/components/targets.py index 098c2997cd..3ce9c12ec2 100644 --- a/pyrit/setup/initializers/components/targets.py +++ b/pyrit/setup/initializers/components/targets.py @@ -289,6 +289,7 @@ class TargetConfig: key_var="AZURE_OPENAI_GPT5_KEY", model_var="AZURE_OPENAI_GPT5_MODEL", underlying_model_var="AZURE_OPENAI_GPT5_UNDERLYING_MODEL", + extra_kwargs={"image_url_as_string": True}, ), TargetConfig( registry_name="azure_openai_gpt5_responses_high_reasoning", @@ -297,7 +298,7 @@ class TargetConfig: key_var="AZURE_OPENAI_GPT5_KEY", model_var="AZURE_OPENAI_GPT5_MODEL", underlying_model_var="AZURE_OPENAI_GPT5_UNDERLYING_MODEL", - extra_kwargs={"extra_body_parameters": {"reasoning": {"effort": "high"}}}, + extra_kwargs={"extra_body_parameters": {"reasoning": {"effort": "high"}}, "image_url_as_string": True}, ), TargetConfig( registry_name="platform_openai_responses", @@ -305,6 +306,7 @@ class TargetConfig: endpoint_var="PLATFORM_OPENAI_RESPONSES_ENDPOINT", key_var="PLATFORM_OPENAI_RESPONSES_KEY", model_var="PLATFORM_OPENAI_RESPONSES_MODEL", + extra_kwargs={"image_url_as_string": True}, ), TargetConfig( registry_name="azure_openai_responses", @@ -313,6 +315,7 @@ class TargetConfig: key_var="AZURE_OPENAI_RESPONSES_KEY", model_var="AZURE_OPENAI_RESPONSES_MODEL", underlying_model_var="AZURE_OPENAI_RESPONSES_UNDERLYING_MODEL", + extra_kwargs={"image_url_as_string": True}, ), # ============================================ # Realtime Targets (RealtimeTarget)