Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 105 additions & 29 deletions doc/code/executor/3_attack_configuration.ipynb

Large diffs are not rendered by default.

28 changes: 24 additions & 4 deletions doc/code/executor/3_attack_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
# |---|---|
# | `objective` | What you are trying to get the **objective target** (the system under test) to do. Drives scoring and multi-turn adversarial prompts. |
# | `memory_labels` | A `dict[str, str]` tagged onto every prompt/response, so you can filter this run later in memory. |
# | `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns (system prompt, prior history). |
# | `system_prompt` | The objective target's system prompt, as a string. The standard one-line way to set it; PyRIT lowers it to a single `system` message at the front of the conversation. Mutually exclusive with a `system` message in `prepended_conversation`. |
# | `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns — a full `system`/`user`/`assistant` history. Use this when you need more than a system prompt. |
# | `next_message` | The exact next message to send, instead of letting the attack derive it from the objective. Useful for multimodal or pre-built seeds. |
#
# Construction-time configuration objects — **adversarial**, **scoring**, and **converter** — are
Expand Down Expand Up @@ -59,12 +60,31 @@
)
await output_attack_async(result)

# %% [markdown]
# ## Setting a system prompt
#
# `system_prompt=` is the standard, one-line way to set the **objective target's** system prompt.
# PyRIT lowers it into a single `system` message at the front of the conversation, so it behaves the
# same across single-turn and multi-turn attacks without any per-attack wiring.
#
# `system_prompt=` and a `system` message inside `prepended_conversation` (next section) are mutually
# exclusive — supplying both raises a `ValueError`. Use `prepended_conversation` only when you need to
# seed more than a system prompt.

# %%
result = await attack.execute_async( # type: ignore
objective="Explain how a saponification reaction works",
system_prompt="You are a helpful chemistry tutor who explains concepts step by step.",
)
await output_attack_async(result)

# %% [markdown]
# ## Prepended conversations
#
# A prepended conversation seeds the exchange before the attack adds its own turn. The most common
# use is setting a system prompt, but you can prepend any sequence of `system` / `user` / `assistant`
# turns — for example, to resume a prior conversation or to plant an agreeable assistant reply.
# A prepended conversation seeds the exchange before the attack adds its own turn. For just a system
# prompt, prefer `system_prompt=` above. Reach for a prepended conversation when you need to seed a
# sequence of `system` / `user` / `assistant` turns — for example, to resume a prior conversation or
# to plant an agreeable assistant reply.

# %%
from pyrit.models import Message, MessagePiece
Expand Down
2 changes: 1 addition & 1 deletion pyrit/executor/attack/compound/sequential_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def __init__(
# Inner child attacks expand their own next_message / prepended_conversation
# via their own params_type; the compound takes no per-call message
# overrides.
params_type=AttackParameters.excluding("next_message", "prepended_conversation"),
params_type=AttackParameters.excluding("next_message", "prepended_conversation", "system_prompt"),
logger=logger,
)
self._child_attacks: list[SequentialChildAttack] = list(child_attacks)
Expand Down
3 changes: 3 additions & 0 deletions pyrit/executor/attack/core/attack_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ class AttackParameters:
# Conversation that is automatically prepended to the target model
prepended_conversation: list[Message] | None = None

# System prompt for the objective target; lowered to a prepended system message
system_prompt: str | None = None

# Additional labels that can be applied to the prompts throughout the attack
memory_labels: dict[str, str] | None = field(default_factory=dict)

Expand Down
46 changes: 46 additions & 0 deletions pyrit/executor/attack/core/attack_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,3 +688,49 @@ async def execute_async(
context = self._context_type(params=params, **context_kwargs)

return await self.execute_with_context_async(context=context)

async def execute_with_context_async(self, *, context: AttackStrategyContextT) -> AttackStrategyResultT:
"""
Execute the attack with full lifecycle management.

Overrides the base implementation to lower the ``system_prompt=`` sugar into a
prepended system-role message. This is the single chokepoint every public entry
point crosses (both ``execute_async`` and ``AttackExecutor``), so lowering here
guarantees the system prompt is always delivered to the objective target.

Args:
context (AttackStrategyContextT): The attack context to execute.

Returns:
AttackStrategyResultT: The result of the attack execution.
"""
self._apply_system_prompt_to_context(context=context)
return await super().execute_with_context_async(context=context)

@staticmethod
def _apply_system_prompt_to_context(*, context: AttackStrategyContextT) -> None:
"""
Lower system_prompt= into a single system-role prepended message.

Reuses the prepended_conversation override so frozen params are never
mutated.

Args:
context (AttackStrategyContextT): The attack context to mutate.

Raises:
ValueError: If a system-role message was also supplied directly in
prepended_conversation.
"""
system_prompt = getattr(context.params, "system_prompt", None)
if system_prompt is None:
return

existing = context.prepended_conversation
if any(message.api_role == "system" for message in existing):
raise ValueError(
"Cannot supply both system_prompt= and a system-role message in "
"prepended_conversation; use one or the other."
)

context.prepended_conversation = [Message.from_system_prompt(system_prompt), *existing]
4 changes: 3 additions & 1 deletion pyrit/executor/attack/single_turn/context_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@

# ContextComplianceAttack generates prepended_conversation internally
# by building a benign context conversation.
ContextComplianceAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message")
ContextComplianceAttackParameters = AttackParameters.excluding(
"prepended_conversation", "next_message", "system_prompt"
)


class ContextComplianceAttack(PromptSendingAttack):
Expand Down
2 changes: 1 addition & 1 deletion pyrit/executor/attack/single_turn/flip_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
logger = logging.getLogger(__name__)

# FlipAttack generates prepended_conversation internally from its system prompt.
FlipAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message")
FlipAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt")


class FlipAttack(PromptSendingAttack):
Expand Down
2 changes: 1 addition & 1 deletion pyrit/executor/attack/single_turn/many_shot_jailbreak.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

# ManyShotJailbreakAttack does not support prepended conversations
# as it constructs its own prompt format with examples.
ManyShotJailbreakParameters = AttackParameters.excluding("prepended_conversation", "next_message")
ManyShotJailbreakParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt")

_MANY_SHOT_EXAMPLES_PATH = DATASETS_PATH / "jailbreak" / "many_shot_examples.json"

Expand Down
2 changes: 1 addition & 1 deletion pyrit/executor/attack/single_turn/role_play.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

# RolePlayAttack generates next_message and prepended_conversation internally,
# so it does not accept these parameters from callers.
RolePlayAttackParameters = AttackParameters.excluding("next_message", "prepended_conversation")
RolePlayAttackParameters = AttackParameters.excluding("next_message", "prepended_conversation", "system_prompt")


class RolePlayPaths(enum.Enum):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ class SingleTurnAttackContext(AttackContext[AttackParamsT]):
# Unique identifier of the main conversation between the attacker and model
conversation_id: str = field(default_factory=lambda: str(uuid.uuid4()))

# System prompt for chat-based targets
system_prompt: str | None = None

# Arbitrary metadata that downstream attacks or scorers may attach
metadata: dict[str, str | int] | None = None

Expand Down
2 changes: 1 addition & 1 deletion pyrit/executor/attack/single_turn/skeleton_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
logger = logging.getLogger(__name__)

# SkeletonKeyAttack generates prepended_conversation internally from the skeleton key prompt and acceptance response.
SkeletonKeyAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message")
SkeletonKeyAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt")


class SkeletonKeyAttack(PromptSendingAttack):
Expand Down
51 changes: 50 additions & 1 deletion tests/unit/executor/attack/core/test_attack_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import asyncio
import dataclasses
import uuid
from unittest.mock import AsyncMock, MagicMock
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

Expand All @@ -28,6 +28,7 @@
SeedObjective,
SeedPrompt,
)
from pyrit.prompt_target import PromptTarget


# Helper to create a properly configured mock attack
Expand Down Expand Up @@ -60,6 +61,25 @@ def create_seed_group(objective: str) -> SeedAttackGroup:
)


class _ConcreteSingleTurnAttack(AttackStrategy):
"""Minimal concrete attack used to exercise the real execute_with_context_async override."""

def __init__(self, *, objective_target):
super().__init__(objective_target=objective_target, context_type=SingleTurnAttackContext)

def _validate_context(self, *, context):
pass

async def _setup_async(self, *, context):
pass

async def _perform_async(self, *, context):
return create_attack_result(context.objective)

async def _teardown_async(self, *, context):
pass


@pytest.mark.usefixtures("patch_central_database")
class TestAttackExecutorInitialization:
"""Tests for AttackExecutor initialization."""
Expand Down Expand Up @@ -710,3 +730,32 @@ async def test_excluded_params_type_rejects_excluded_fields(self):
assert "prepended_conversation" not in fields
assert "objective" in fields
assert "memory_labels" in fields


@pytest.mark.usefixtures("patch_central_database")
class TestExecutorSystemPromptLowering:
"""Regression: the executor path lowers system_prompt= via the shared chokepoint.

AttackExecutor builds a context and calls execute_with_context_async directly, bypassing
execute_async. Lowering lives in execute_with_context_async, so the system prompt must
still be lowered on this path (otherwise it would silently never reach the target).
"""

async def test_executor_lowers_broadcast_system_prompt(self):
attack = _ConcreteSingleTurnAttack(objective_target=MagicMock(spec=PromptTarget))

executor = AttackExecutor()
with patch(
"pyrit.executor.core.strategy.Strategy.execute_with_context_async", new_callable=AsyncMock
) as mock_super:
mock_super.return_value = create_attack_result("Test objective")
await executor.execute_attack_async(
attack=attack,
objectives=["Test objective"],
system_prompt="You are a helpful assistant.",
)

context = mock_super.call_args.kwargs["context"]
prepended = context.prepended_conversation
assert [message.api_role for message in prepended] == ["system"]
assert prepended[0].get_value() == "You are a helpful assistant."
72 changes: 71 additions & 1 deletion tests/unit/executor/attack/core/test_attack_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Licensed under the MIT license.

import logging
from unittest.mock import MagicMock, patch
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

Expand Down Expand Up @@ -279,6 +279,76 @@ async def test_execute_async_allows_optional_parameters_as_none(self, mock_attac
assert result is not None


@pytest.mark.usefixtures("patch_central_database")
class TestExecuteAsyncSystemPromptLowering:
"""Tests for lowering the system_prompt= argument at the execute_with_context_async chokepoint.

Lowering lives in ``AttackStrategy.execute_with_context_async``, so these tests patch the
base ``Strategy.execute_with_context_async`` (the ``super()`` call) to skip the lifecycle
while still running the override that performs the lowering.
"""

_SUPER = "pyrit.executor.core.strategy.Strategy.execute_with_context_async"

async def test_system_prompt_lowered_to_single_system_message(self, mock_attack_strategy):
with patch(self._SUPER, new_callable=AsyncMock) as mock_super:
await mock_attack_strategy.execute_async(
objective="Test objective",
system_prompt="You are a helpful assistant.",
)

prepended = mock_super.call_args.kwargs["context"].prepended_conversation
assert len(prepended) == 1
assert prepended[0].api_role == "system"
assert prepended[0].get_value() == "You are a helpful assistant."

async def test_system_prompt_prepended_before_existing_conversation(self, mock_attack_strategy):
assistant_message = Message.from_prompt(prompt="Earlier reply", role="assistant")
with patch(self._SUPER, new_callable=AsyncMock) as mock_super:
await mock_attack_strategy.execute_async(
objective="Test objective",
system_prompt="You are a helpful assistant.",
prepended_conversation=[assistant_message],
)

prepended = mock_super.call_args.kwargs["context"].prepended_conversation
assert [message.api_role for message in prepended] == ["system", "assistant"]
assert prepended[0].get_value() == "You are a helpful assistant."

async def test_system_prompt_conflict_with_existing_system_message_raises(self, mock_attack_strategy):
existing_system = Message.from_system_prompt("Existing system message")
with pytest.raises(ValueError, match="Cannot supply both system_prompt="):
await mock_attack_strategy.execute_async(
objective="Test objective",
system_prompt="You are a helpful assistant.",
prepended_conversation=[existing_system],
)

async def test_no_system_prompt_leaves_prepended_conversation_unchanged(self, mock_attack_strategy):
user_message = Message.from_prompt(prompt="Hello", role="user")
with patch(self._SUPER, new_callable=AsyncMock) as mock_super:
await mock_attack_strategy.execute_async(
objective="Test objective",
prepended_conversation=[user_message],
)

prepended = mock_super.call_args.kwargs["context"].prepended_conversation
assert len(prepended) == 1
assert prepended[0].api_role == "user"

async def test_lowering_happens_when_context_passed_directly(self, mock_attack_strategy):
# Simulates the AttackExecutor path: a context is built externally and handed
# straight to execute_with_context_async, bypassing execute_async.
params = AttackParameters(objective="Test objective", system_prompt="You are a helpful assistant.")
context = mock_attack_strategy._context_type(params=params)

with patch(self._SUPER, new_callable=AsyncMock):
await mock_attack_strategy.execute_with_context_async(context=context)

assert [message.api_role for message in context.prepended_conversation] == ["system"]
assert context.prepended_conversation[0].get_value() == "You are a helpful assistant."


@pytest.mark.usefixtures("patch_central_database")
class TestDefaultAttackStrategyEventHandler:
"""Tests for the default attack strategy event handler"""
Expand Down
34 changes: 32 additions & 2 deletions tests/unit/executor/attack/single_turn/test_prompt_sending.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ def test_validate_context_with_additional_optional_fields(self, mock_target):
next_message=Message.from_prompt(prompt="test", role="user"),
),
conversation_id=str(uuid.uuid4()),
system_prompt="System prompt",
metadata={"key": "value"},
)

Expand Down Expand Up @@ -1051,7 +1050,38 @@ async def test_execute_async_with_parameters(self, mock_target, sample_response)
assert context.objective == "Test objective"
assert context.memory_labels == {"test": "label"}
assert context.next_message is not None
assert context.system_prompt == "System prompt"
# system_prompt= is lowered into a leading system-role prepended message
assert context.prepended_conversation[0].api_role == "system"
assert context.prepended_conversation[0].get_value() == "System prompt"
assert context.prepended_conversation[1].api_role == "assistant"

async def test_execute_async_delivers_system_prompt_to_conversation(self, mock_target):
"""system_prompt= is lowered and reaches the conversation manager that seeds the target's conversation."""
attack = PromptSendingAttack(objective_target=mock_target)

delivered = {}

async def capture_setup(*, context, **kwargs):
delivered["prepended"] = list(context.prepended_conversation)

attack._conversation_manager = MagicMock()
attack._conversation_manager.initialize_context_async = AsyncMock(side_effect=capture_setup)
attack._perform_async = AsyncMock(
return_value=AttackResult(
conversation_id="test-id",
objective="Test objective",
outcome=AttackOutcome.SUCCESS,
executed_turns=1,
)
)

await attack.execute_async(
objective="Test objective",
system_prompt="You are a helpful assistant.",
)

assert [message.api_role for message in delivered["prepended"]] == ["system"]
assert delivered["prepended"][0].get_value() == "You are a helpful assistant."

async def test_execute_async_with_invalid_params_raises_error(self, mock_target):
"""Test execute_async raises error when invalid parameters are passed"""
Expand Down
12 changes: 12 additions & 0 deletions tests/unit/executor/attack/single_turn/test_role_play.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,18 @@ def test_params_type_excludes_prepended_conversation(self, role_play_attack):
fields = {f.name for f in dataclasses.fields(role_play_attack.params_type)}
assert "prepended_conversation" not in fields

def test_params_type_excludes_system_prompt(self, role_play_attack):
"""Test that params_type excludes system_prompt field"""
import dataclasses

fields = {f.name for f in dataclasses.fields(role_play_attack.params_type)}
assert "system_prompt" not in fields

async def test_execute_async_rejects_system_prompt(self, role_play_attack):
"""Test that execute_async rejects system_prompt for an attack that excludes it"""
with pytest.raises(ValueError, match="does not accept parameters"):
await role_play_attack.execute_async(objective="Test objective", system_prompt="You are a pirate.")

def test_params_type_includes_objective(self, role_play_attack):
"""Test that params_type includes objective field"""
import dataclasses
Expand Down
Loading