From 27f6851cf6f1bc5fd4b60c349c30fdfa616a804a Mon Sep 17 00:00:00 2001
From: Varun Joginpalli <vjoginpalli@microsoft.com>
Date: Fri, 19 Jun 2026 22:39:50 +0000
Subject: [PATCH] MAINT: Standardize garak.encoding defaults and fix
 atomic-attack name collisions

---
 doc/scanner/garak.ipynb                       |  20 ++-
 doc/scanner/garak.py                          |  11 +-
 pyrit/scenario/scenarios/garak/encoding.py    | 134 +++++++++++------
 .../unit/backend/test_scenario_run_service.py |  27 ++++
 tests/unit/scenario/garak/test_encoding.py    | 136 +++++++++++++++++-
 5 files changed, 269 insertions(+), 59 deletions(-)

diff --git a/doc/scanner/garak.ipynb b/doc/scanner/garak.ipynb
index 6b9d68b91b..2adc9c0fab 100644
--- a/doc/scanner/garak.ipynb
+++ b/doc/scanner/garak.ipynb
@@ -19,7 +19,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "1",
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 0
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -63,15 +65,20 @@
     "strategy encodes the prompt, asks the target to decode it, and scores whether the decoded output\n",
     "matches the harmful content. Default datasets include slur terms and web/HTML/JS content.\n",
     "\n",
-    "**CLI example:**\n",
+    "**Default run** uses the curated `DEFAULT` strategy aggregate (Base16, ROT13, MorseCode — one\n",
+    "base-N, one substitution cipher, and one symbolic alphabet) for a fast, representative scan. Use\n",
+    "the `ALL` aggregate for an exhaustive run across every encoding scheme.\n",
+    "\n",
+    "**Fast path** (sanity-check target wiring in well under a minute) — pick a single-variant encoding\n",
+    "and one prompt:\n",
     "\n",
     "```bash\n",
-    "pyrit_scan garak.encoding --target openai_chat --strategies base64 --max-dataset-size 1\n",
+    "pyrit_scan garak.encoding --target openai_chat --strategies rot13 --max-dataset-size 1\n",
     "```\n",
     "\n",
     "**Available strategies** (17 encodings): Base64, Base2048, Base16, Base32, ASCII85, Hex,\n",
     "QuotedPrintable, UUencode, ROT13, Braille, Atbash, MorseCode, NATO, Ecoji, Zalgo, LeetSpeak,\n",
-    "AsciiSmuggler\n",
+    "AsciiSmuggler. Aggregates: `DEFAULT` (curated subset, the default) and `ALL` (every encoding).\n",
     "\n",
     "> **Note:** Strategy composition is NOT supported for Encoding — each encoding is tested\n",
     "> independently."
@@ -141,7 +148,7 @@
       "\u001b[36m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\u001b[1m  📋 Scenario Details\u001b[0m\n",
       "\u001b[36m    • Name: Encoding\u001b[0m\n",
-      "\u001b[36m    • Scenario Version: 1\u001b[0m\n",
+      "\u001b[36m    • Scenario Version: 2\u001b[0m\n",
       "\u001b[36m    • PyRIT Version: 0.12.1.dev0\u001b[0m\n",
       "\u001b[36m    • Description:\u001b[0m\n",
       "\u001b[36m        Encoding Scenario implementation for PyRIT. This scenario tests how resilient models are to various encoding\u001b[0m\n",
@@ -205,6 +212,9 @@
   }
  ],
  "metadata": {
+  "jupytext": {
+   "main_language": "python"
+  },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
diff --git a/doc/scanner/garak.py b/doc/scanner/garak.py
index e86c03146f..a207f608d4 100644
--- a/doc/scanner/garak.py
+++ b/doc/scanner/garak.py
@@ -37,15 +37,20 @@
 # strategy encodes the prompt, asks the target to decode it, and scores whether the decoded output
 # matches the harmful content. Default datasets include slur terms and web/HTML/JS content.
 #
-# **CLI example:**
+# **Default run** uses the curated `DEFAULT` strategy aggregate (Base16, ROT13, MorseCode — one
+# base-N, one substitution cipher, and one symbolic alphabet) for a fast, representative scan. Use
+# the `ALL` aggregate for an exhaustive run across every encoding scheme.
+#
+# **Fast path** (sanity-check target wiring in well under a minute) — pick a single-variant encoding
+# and one prompt:
 #
 # ```bash
-# pyrit_scan garak.encoding --target openai_chat --strategies base64 --max-dataset-size 1
+# pyrit_scan garak.encoding --target openai_chat --strategies rot13 --max-dataset-size 1
 # ```
 #
 # **Available strategies** (17 encodings): Base64, Base2048, Base16, Base32, ASCII85, Hex,
 # QuotedPrintable, UUencode, ROT13, Braille, Atbash, MorseCode, NATO, Ecoji, Zalgo, LeetSpeak,
-# AsciiSmuggler
+# AsciiSmuggler. Aggregates: `DEFAULT` (curated subset, the default) and `ALL` (every encoding).
 #
 # > **Note:** Strategy composition is NOT supported for Encoding — each encoding is tested
 # > independently.
diff --git a/pyrit/scenario/scenarios/garak/encoding.py b/pyrit/scenario/scenarios/garak/encoding.py
index abe36b7ca6..6582fd67b2 100644
--- a/pyrit/scenario/scenarios/garak/encoding.py
+++ b/pyrit/scenario/scenarios/garak/encoding.py
@@ -83,33 +83,51 @@ class EncodingStrategy(ScenarioStrategy):
     Strategies for encoding attacks.
 
     Each enum member represents an encoding scheme that will be tested against the target model.
-    The ALL aggregate expands to include all encoding strategies.
+    The ``ALL`` aggregate expands to every encoding scheme (exhaustive run). The ``DEFAULT``
+    aggregate expands to a small curated subset that spans distinct encoding families, giving a
+    fast, representative default run.
 
     Note: EncodingStrategy does not support composition. Each encoding must be applied individually.
+    The strategy axis here is the encoding scheme (not an attack technique), and every encoding runs
+    as a single-turn ``PromptSendingAttack``, so SINGLE_TURN/MULTI_TURN aggregates are not applicable.
     """
 
-    # Aggregate member
+    # Aggregate members
     ALL = ("all", {"all"})
+    DEFAULT = ("default", {"default"})
 
-    # Individual encoding strategies (matching the atomic attack names)
+    # Individual encoding strategies (each value matches the encoding name used for display grouping).
+    # Members tagged
+    # ``default`` form the curated DEFAULT aggregate: one base-N encoding (Base16), one
+    # substitution cipher (ROT13), and one symbolic alphabet (MorseCode).
     Base64 = ("base64", set[str]())
     Base2048 = ("base2048", set[str]())
-    Base16 = ("base16", set[str]())
+    Base16 = ("base16", {"default"})
     Base32 = ("base32", set[str]())
     ASCII85 = ("ascii85", set[str]())
     Hex = ("hex", set[str]())
     QuotedPrintable = ("quoted_printable", set[str]())
     UUencode = ("uuencode", set[str]())
-    ROT13 = ("rot13", set[str]())
+    ROT13 = ("rot13", {"default"})
     Braille = ("braille", set[str]())
     Atbash = ("atbash", set[str]())
-    MorseCode = ("morse_code", set[str]())
+    MorseCode = ("morse_code", {"default"})
     NATO = ("nato", set[str]())
     Ecoji = ("ecoji", set[str]())
     Zalgo = ("zalgo", set[str]())
     LeetSpeak = ("leet_speak", set[str]())
     AsciiSmuggler = ("ascii_smuggler", set[str]())
 
+    @classmethod
+    def get_aggregate_tags(cls) -> set[str]:
+        """
+        Get the set of tags that represent aggregate categories.
+
+        Returns:
+            set[str]: The base ``"all"`` aggregate plus the scenario-specific ``"default"`` aggregate.
+        """
+        return super().get_aggregate_tags() | {"default"}
+
 
 logger = logging.getLogger(__name__)
 
@@ -131,7 +149,7 @@ class Encoding(Scenario):
     By default, this uses the same dataset as Garak: slur terms and web XSS payloads.
     """
 
-    VERSION: int = 1
+    VERSION: int = 2
 
     @apply_defaults
     def __init__(
@@ -163,7 +181,7 @@ def __init__(
         super().__init__(
             version=self.VERSION,
             strategy_class=EncodingStrategy,
-            default_strategy=EncodingStrategy.ALL,
+            default_strategy=EncodingStrategy.DEFAULT,
             default_dataset_config=EncodingDatasetConfiguration(
                 dataset_names=["garak_slur_terms_en", "garak_web_html_js"],
                 max_dataset_size=3,
@@ -228,45 +246,57 @@ def _get_converter_attacks(self) -> list[AtomicAttack]:
         Returns:
             list[AtomicAttack]: List of all atomic attacks to execute.
         """
-        # Map of all available converters with their encoding names
-        all_converters_with_encodings: list[tuple[list[PromptConverter], str]] = [
-            ([Base64Converter()], "base64"),
-            ([Base64Converter(encoding_func="urlsafe_b64encode")], "base64"),
-            ([Base64Converter(encoding_func="standard_b64encode")], "base64"),
-            ([Base64Converter(encoding_func="b2a_base64")], "base64"),
-            ([Base2048Converter()], "base2048"),
-            ([Base64Converter(encoding_func="b16encode")], "base16"),
-            ([Base64Converter(encoding_func="b32encode")], "base32"),
-            ([Base64Converter(encoding_func="a85encode")], "ascii85"),
-            ([Base64Converter(encoding_func="b85encode")], "ascii85"),
-            ([BinAsciiConverter(encoding_func="hex")], "hex"),
-            ([BinAsciiConverter(encoding_func="quoted-printable")], "quoted_printable"),
-            ([BinAsciiConverter(encoding_func="UUencode")], "uuencode"),
-            ([ROT13Converter()], "rot13"),
-            ([BrailleConverter()], "braille"),
-            ([AtbashConverter()], "atbash"),
-            ([MorseConverter()], "morse_code"),
-            ([NatoConverter()], "nato"),
-            ([EcojiConverter()], "ecoji"),
-            ([ZalgoConverter()], "zalgo"),
-            ([LeetspeakConverter()], "leet_speak"),
-            ([AsciiSmugglerConverter()], "ascii_smuggler"),
+        # Map of all available converters with their encoding name and a unique variant slug.
+        # ``encoding_name`` drives strategy selection and user-facing grouping (display_group);
+        # ``variant_slug`` is unique per row so that atomic-attack names stay unique even when one
+        # encoding name maps to multiple converter variants (e.g. base64, ascii85).
+        # NOTE: some base64 variants are near-duplicates (default == standard_b64encode; b2a only
+        # appends a trailing newline). They are retained here to keep the exhaustive ALL run stable
+        # behind the VERSION gate; trimming them is a separate cleanup.
+        all_converters_with_encodings: list[tuple[list[PromptConverter], str, str]] = [
+            ([Base64Converter()], "base64", "base64"),
+            ([Base64Converter(encoding_func="urlsafe_b64encode")], "base64", "base64_urlsafe"),
+            ([Base64Converter(encoding_func="standard_b64encode")], "base64", "base64_standard"),
+            ([Base64Converter(encoding_func="b2a_base64")], "base64", "base64_b2a"),
+            ([Base2048Converter()], "base2048", "base2048"),
+            ([Base64Converter(encoding_func="b16encode")], "base16", "base16"),
+            ([Base64Converter(encoding_func="b32encode")], "base32", "base32"),
+            ([Base64Converter(encoding_func="a85encode")], "ascii85", "ascii85_a85"),
+            ([Base64Converter(encoding_func="b85encode")], "ascii85", "ascii85_b85"),
+            ([BinAsciiConverter(encoding_func="hex")], "hex", "hex"),
+            ([BinAsciiConverter(encoding_func="quoted-printable")], "quoted_printable", "quoted_printable"),
+            ([BinAsciiConverter(encoding_func="UUencode")], "uuencode", "uuencode"),
+            ([ROT13Converter()], "rot13", "rot13"),
+            ([BrailleConverter()], "braille", "braille"),
+            ([AtbashConverter()], "atbash", "atbash"),
+            ([MorseConverter()], "morse_code", "morse_code"),
+            ([NatoConverter()], "nato", "nato"),
+            ([EcojiConverter()], "ecoji", "ecoji"),
+            ([ZalgoConverter()], "zalgo", "zalgo"),
+            ([LeetspeakConverter()], "leet_speak", "leet_speak"),
+            ([AsciiSmugglerConverter()], "ascii_smuggler", "ascii_smuggler"),
         ]
 
         # Filter to only include selected strategies
         selected_encoding_names = {s.value for s in self._scenario_strategies}
         converters_with_encodings = [
-            (conv, name) for conv, name in all_converters_with_encodings if name in selected_encoding_names
+            (conv, name, variant_slug)
+            for conv, name, variant_slug in all_converters_with_encodings
+            if name in selected_encoding_names
         ]
 
         atomic_attacks = []
-        for conv, name in converters_with_encodings:
-            atomic_attacks.extend(self._get_prompt_attacks(converters=conv, encoding_name=name))
+        for conv, name, variant_slug in converters_with_encodings:
+            atomic_attacks.extend(
+                self._get_prompt_attacks(converters=conv, encoding_name=name, variant_slug=variant_slug)
+            )
         return atomic_attacks
 
-    def _get_prompt_attacks(self, *, converters: list[PromptConverter], encoding_name: str) -> list[AtomicAttack]:
+    def _get_prompt_attacks(
+        self, *, converters: list[PromptConverter], encoding_name: str, variant_slug: str
+    ) -> list[AtomicAttack]:
         """
-        Create atomic attacks for a specific encoding scheme.
+        Create atomic attacks for a specific encoding converter variant.
 
         For each seed prompt (the text to be decoded), creates atomic attacks that:
         1. Encode the seed prompt using the specified converter(s)
@@ -276,31 +306,42 @@ def _get_prompt_attacks(self, *, converters: list[PromptConverter], encoding_nam
 
         Args:
             converters (list[PromptConverter]): The list of converters to apply to the seed prompts.
-            encoding_name (str): Human-readable name of the encoding scheme (e.g., "Base64", "ROT13").
+            encoding_name (str): Human-readable name of the encoding scheme (e.g., "base64", "rot13").
+                Used as the ``display_group`` so all variants of an encoding aggregate together in output.
+            variant_slug (str): Unique slug for this converter variant, used to build a unique
+                ``atomic_attack_name`` per converter variant and prompt config.
 
         Returns:
-            list[AtomicAttack]: List of atomic attacks for this encoding scheme.
+            list[AtomicAttack]: List of atomic attacks for this encoding converter variant.
 
         Raises:
             ValueError: If scenario is not properly initialized.
         """
-        converter_configs = [
-            AttackConverterConfig(
-                request_converters=PromptConverterConfiguration.from_converters(converters=converters)
+        # (config_name_suffix, converter_config). The bare "raw" config encodes only; each
+        # decode-template config additionally asks the model to decode.
+        converter_configs: list[tuple[str, AttackConverterConfig]] = [
+            (
+                "raw",
+                AttackConverterConfig(
+                    request_converters=PromptConverterConfiguration.from_converters(converters=converters)
+                ),
             )
         ]
 
-        for decode_type in self._encoding_templates:
+        for decode_index, decode_type in enumerate(self._encoding_templates):
             converters_ = converters[:] + [AskToDecodeConverter(template=decode_type, encoding_name=encoding_name)]
 
             converter_configs.append(
-                AttackConverterConfig(
-                    request_converters=PromptConverterConfiguration.from_converters(converters=converters_)
+                (
+                    f"decode{decode_index}",
+                    AttackConverterConfig(
+                        request_converters=PromptConverterConfiguration.from_converters(converters=converters_)
+                    ),
                 )
             )
 
         atomic_attacks = []
-        for attack_converter_config in converter_configs:
+        for config_suffix, attack_converter_config in converter_configs:
             # objective_target is guaranteed to be non-None by parent class validation
             if self._objective_target is None:
                 raise ValueError(
@@ -313,7 +354,8 @@ def _get_prompt_attacks(self, *, converters: list[PromptConverter], encoding_nam
             )
             atomic_attacks.append(
                 AtomicAttack(
-                    atomic_attack_name=encoding_name,
+                    atomic_attack_name=f"{variant_slug}_{config_suffix}",
+                    display_group=encoding_name,
                     attack_technique=AttackTechnique(attack=attack),
                     seed_groups=self._resolved_seed_groups or [],
                 )
diff --git a/tests/unit/backend/test_scenario_run_service.py b/tests/unit/backend/test_scenario_run_service.py
index 15116a0ac9..0445965c60 100644
--- a/tests/unit/backend/test_scenario_run_service.py
+++ b/tests/unit/backend/test_scenario_run_service.py
@@ -22,6 +22,7 @@
 )
 from pyrit.models import AttackOutcome
 from pyrit.scenario.core import DatasetConfiguration
+from pyrit.scenario.scenarios.garak.encoding import EncodingDatasetConfiguration
 
 _REGISTRY_PATCH_BASE = "pyrit.registry"
 _MEMORY_PATCH = "pyrit.memory.CentralMemory.get_memory_instance"
@@ -314,6 +315,32 @@ class _MarkerDatasetConfiguration(DatasetConfiguration):
         assert default_config.get_default_dataset_names() == ["original"]
         assert default_config.max_dataset_size == 100
 
+    async def test_start_run_dataset_names_preserves_real_encoding_config_type(self, mock_all_registries) -> None:
+        """The real ``EncodingDatasetConfiguration`` round-trips through the backend ``dataset_names`` path.
+
+        Foot-gun guard: the backend's ``_build_init_kwargs`` silently degrades a lost
+        ``DatasetConfiguration`` subclass to a plain base config on ``TypeError``. ``EncodingDatasetConfiguration``
+        must therefore stay backend-constructible (no new *required* ``__init__`` args); otherwise the
+        ``--dataset-names`` path would build a base config and produce wrong seed shaping. This pins the
+        real subclass (not a synthetic marker) so adding a required ctor arg fails loudly here.
+        """
+        default_config = EncodingDatasetConfiguration(
+            dataset_names=["garak_slur_terms_en", "garak_web_html_js"], max_dataset_size=3
+        )
+        scenario_instance = mock_all_registries["scenario_instance"]
+        scenario_instance._default_dataset_config = default_config
+
+        service = ScenarioRunService()
+        await service.start_run_async(request=_make_request(dataset_names=["custom_a", "custom_b"], max_dataset_size=2))
+
+        init_call = scenario_instance.initialize_async.await_args
+        built_config = init_call.kwargs["dataset_config"]
+
+        # Real subclass type is preserved (not degraded to base DatasetConfiguration)
+        assert type(built_config) is EncodingDatasetConfiguration
+        assert built_config.get_default_dataset_names() == ["custom_a", "custom_b"]
+        assert built_config.max_dataset_size == 2
+
     async def test_start_run_dataset_names_without_max_dataset_size_preserves_subclass(
         self, mock_all_registries
     ) -> None:
diff --git a/tests/unit/scenario/garak/test_encoding.py b/tests/unit/scenario/garak/test_encoding.py
index 64b6622ae8..826f1637a0 100644
--- a/tests/unit/scenario/garak/test_encoding.py
+++ b/tests/unit/scenario/garak/test_encoding.py
@@ -104,7 +104,7 @@ def test_init_with_default_seed_prompts(self, mock_objective_target, mock_object
             )
 
             assert scenario.name == "Encoding"
-            assert scenario.VERSION == 1
+            assert scenario.VERSION == 2
 
     def test_init_with_custom_scorer(self, mock_objective_target, mock_objective_scorer, mock_memory_seeds):
         """Test initialization with custom objective scorer."""
@@ -189,12 +189,13 @@ async def test_init_attack_strategies(
 
             await scenario.initialize_async(objective_target=mock_objective_target, dataset_config=mock_dataset_config)
 
-            # By default, EncodingStrategy.ALL is used, which expands to all encoding strategies
+            # By default, EncodingStrategy.DEFAULT is used, which expands to the curated subset
             assert len(scenario._scenario_strategies) > 0
             # Verify all strategies contain EncodingStrategy instances
             assert all(isinstance(s, EncodingStrategy) for s in scenario._scenario_strategies)
-            # Verify none of the strategies are the aggregate "ALL"
+            # Verify none of the strategies are the aggregate members
             assert all(s != EncodingStrategy.ALL for s in scenario._scenario_strategies)
+            assert all(s != EncodingStrategy.DEFAULT for s in scenario._scenario_strategies)
 
 
 @pytest.mark.usefixtures("patch_central_database")
@@ -250,7 +251,9 @@ async def test_get_prompt_attacks_creates_attack_runs(
             )
 
             await scenario.initialize_async(objective_target=mock_objective_target, dataset_config=mock_dataset_config)
-            attack_runs = scenario._get_prompt_attacks(converters=[Base64Converter()], encoding_name="Base64")
+            attack_runs = scenario._get_prompt_attacks(
+                converters=[Base64Converter()], encoding_name="base64", variant_slug="base64"
+            )
 
             # Should create attack runs
             assert len(attack_runs) > 0
@@ -277,7 +280,9 @@ async def test_attack_runs_include_objectives(
             )
 
             await scenario.initialize_async(objective_target=mock_objective_target, dataset_config=mock_dataset_config)
-            attack_runs = scenario._get_prompt_attacks(converters=[Base64Converter()], encoding_name="Base64")
+            attack_runs = scenario._get_prompt_attacks(
+                converters=[Base64Converter()], encoding_name="base64", variant_slug="base64"
+            )
 
             # Check that seed groups contain objectives with the expected format
             for run in attack_runs:
@@ -431,3 +436,124 @@ async def test_one_resolution_call_baseline_matches_strategies(self, mock_object
         baseline_objs = set(scenario._atomic_attacks[0].objectives)
         for attack in scenario._atomic_attacks[1:]:
             assert set(attack.objectives) == baseline_objs
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestEncodingStrategyDefaults:
+    """Tests for the curated DEFAULT aggregate and aggregate-tag wiring."""
+
+    def test_default_is_the_default_strategy(self, mock_objective_scorer):
+        """The scenario's default strategy is the curated DEFAULT aggregate, not ALL."""
+        from unittest.mock import patch
+
+        with patch.object(Encoding, "_resolve_seed_groups", return_value=[]):
+            scenario = Encoding(objective_scorer=mock_objective_scorer)
+        assert scenario._default_strategy == EncodingStrategy.DEFAULT
+
+    def test_default_aggregate_membership(self):
+        """DEFAULT expands to one base-N, one substitution, and one symbolic encoding."""
+        members = EncodingStrategy.get_strategies_by_tag("default")
+        assert members == {EncodingStrategy.Base16, EncodingStrategy.ROT13, EncodingStrategy.MorseCode}
+
+    def test_default_is_subset_of_all(self):
+        """Every DEFAULT member is also part of the exhaustive ALL aggregate."""
+        all_members = set(EncodingStrategy.get_all_strategies())
+        default_members = EncodingStrategy.get_strategies_by_tag("default")
+        assert default_members <= all_members
+
+    def test_get_aggregate_tags_includes_default(self):
+        """``default`` is registered as an aggregate tag alongside ``all``."""
+        tags = EncodingStrategy.get_aggregate_tags()
+        assert "all" in tags
+        assert "default" in tags
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestEncodingAtomicCountsAndNaming:
+    """Atomic-attack counts, name uniqueness, and display-group grouping."""
+
+    @staticmethod
+    def _seed_groups(count: int = 1):
+        return [
+            SeedAttackGroup(seeds=[SeedObjective(value=f"obj{i}"), SeedPrompt(value=f"payload{i}")])
+            for i in range(count)
+        ]
+
+    async def _build(self, target, scorer, strategies=None, *, include_baseline=True, encoding_templates=None):
+        from unittest.mock import patch
+
+        seed_groups = self._seed_groups(1)
+        with patch.object(Encoding, "_resolve_seed_groups", return_value=seed_groups):
+            ctor_kwargs = {"objective_scorer": scorer}
+            if encoding_templates is not None:
+                ctor_kwargs["encoding_templates"] = encoding_templates
+            scenario = Encoding(**ctor_kwargs)
+            kwargs = {"objective_target": target, "include_baseline": include_baseline}
+            if strategies is not None:
+                kwargs["scenario_strategies"] = strategies
+            await scenario.initialize_async(**kwargs)
+        return scenario
+
+    async def test_default_run_atomic_count(self, mock_objective_target, mock_objective_scorer):
+        """DEFAULT = 3 encodings x (1 raw + 4 decode templates) + 1 baseline = 16 atomics."""
+        scenario = await self._build(mock_objective_target, mock_objective_scorer)
+        assert len(scenario._atomic_attacks) == 16
+
+    async def test_all_run_atomic_count(self, mock_objective_target, mock_objective_scorer):
+        """ALL = 21 converter variants x 5 prompt configs + 1 baseline = 106 atomics."""
+        scenario = await self._build(mock_objective_target, mock_objective_scorer, strategies=[EncodingStrategy.ALL])
+        assert len(scenario._atomic_attacks) == 106
+
+    async def test_all_atomic_names_unique(self, mock_objective_target, mock_objective_scorer):
+        """Every atomic-attack name is unique under ALL (no name collisions across variants)."""
+        scenario = await self._build(mock_objective_target, mock_objective_scorer, strategies=[EncodingStrategy.ALL])
+        names = [a.atomic_attack_name for a in scenario._atomic_attacks]
+        assert len(names) == len(set(names))
+
+    async def test_multi_variant_encoding_names_unique(self, mock_objective_target, mock_objective_scorer):
+        """base64 expands to 4 converter variants, each producing distinct atomic-attack names."""
+        scenario = await self._build(mock_objective_target, mock_objective_scorer, strategies=[EncodingStrategy.Base64])
+        names = [a.atomic_attack_name for a in scenario._atomic_attacks if a.atomic_attack_name != "baseline"]
+        # 4 variants x 5 configs = 20 attacks, all uniquely named
+        assert len(names) == 20
+        assert len(set(names)) == 20
+
+    async def test_display_group_aggregates_by_encoding(self, mock_objective_target, mock_objective_scorer):
+        """All base64 variants share a single ``base64`` display_group for reporting."""
+        scenario = await self._build(mock_objective_target, mock_objective_scorer, strategies=[EncodingStrategy.Base64])
+        non_baseline = [a for a in scenario._atomic_attacks if a.atomic_attack_name != "baseline"]
+        assert {a.display_group for a in non_baseline} == {"base64"}
+
+    async def test_fast_path_single_encoding_count(self, mock_objective_target, mock_objective_scorer):
+        """Fast path (single ROT13 strategy) = 1 raw + 4 decode + 1 baseline = 6 atomics."""
+        scenario = await self._build(mock_objective_target, mock_objective_scorer, strategies=[EncodingStrategy.ROT13])
+        assert len(scenario._atomic_attacks) == 6
+        rot13_names = sorted(a.atomic_attack_name for a in scenario._atomic_attacks)
+        assert rot13_names == [
+            "baseline",
+            "rot13_decode0",
+            "rot13_decode1",
+            "rot13_decode2",
+            "rot13_decode3",
+            "rot13_raw",
+        ]
+
+    async def test_default_run_count_without_baseline(self, mock_objective_target, mock_objective_scorer):
+        """With baseline disabled, DEFAULT = 3 encodings x 5 prompt configs = 15 atomics (no baseline)."""
+        scenario = await self._build(mock_objective_target, mock_objective_scorer, include_baseline=False)
+        assert len(scenario._atomic_attacks) == 15
+        assert all(a.atomic_attack_name != "baseline" for a in scenario._atomic_attacks)
+
+    async def test_custom_encoding_templates_scale_decode_configs(self, mock_objective_target, mock_objective_scorer):
+        """Each decode template adds one prompt config: N templates -> 1 raw + N decode configs per variant."""
+        scenario = await self._build(
+            mock_objective_target,
+            mock_objective_scorer,
+            strategies=[EncodingStrategy.ROT13],
+            include_baseline=False,
+            encoding_templates=["decode this: {encoded_text}", "now decode: {encoded_text}"],
+        )
+        # 1 variant x (1 raw + 2 decode) = 3 atomics
+        assert len(scenario._atomic_attacks) == 3
+        names = sorted(a.atomic_attack_name for a in scenario._atomic_attacks)
+        assert names == ["rot13_decode0", "rot13_decode1", "rot13_raw"]