microsoft · varunj-msft · Jun 19, 2026 · rlundeen2 · Jun 20, 2026 · rlundeen2
diff --git a/doc/scanner/garak.ipynb b/doc/scanner/garak.ipynb
@@ -19,7 +19,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "1",
-   "metadata": {},
+   "metadata": {
+    "lines_to_next_cell": 0
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -63,15 +65,20 @@
     "strategy encodes the prompt, asks the target to decode it, and scores whether the decoded output\n",
     "matches the harmful content. Default datasets include slur terms and web/HTML/JS content.\n",
     "\n",
-    "**CLI example:**\n",
+    "**Default run** uses the curated `DEFAULT` strategy aggregate (Base16, ROT13, MorseCode — one\n",
+    "base-N, one substitution cipher, and one symbolic alphabet) for a fast, representative scan. Use\n",
+    "the `ALL` aggregate for an exhaustive run across every encoding scheme.\n",
+    "\n",
+    "**Fast path** (sanity-check target wiring in well under a minute) — pick a single-variant encoding\n",
+    "and one prompt:\n",
     "\n",
     "```bash\n",
-    "pyrit_scan garak.encoding --target openai_chat --strategies base64 --max-dataset-size 1\n",
+    "pyrit_scan garak.encoding --target openai_chat --strategies rot13 --max-dataset-size 1\n",
     "```\n",
     "\n",
     "**Available strategies** (17 encodings): Base64, Base2048, Base16, Base32, ASCII85, Hex,\n",
     "QuotedPrintable, UUencode, ROT13, Braille, Atbash, MorseCode, NATO, Ecoji, Zalgo, LeetSpeak,\n",
-    "AsciiSmuggler\n",
+    "AsciiSmuggler. Aggregates: `DEFAULT` (curated subset, the default) and `ALL` (every encoding).\n",
     "\n",
     "> **Note:** Strategy composition is NOT supported for Encoding — each encoding is tested\n",
     "> independently."
@@ -141,7 +148,7 @@
       "\u001b[36m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n",
       "\u001b[1m  📋 Scenario Details\u001b[0m\n",
       "\u001b[36m    • Name: Encoding\u001b[0m\n",
-      "\u001b[36m    • Scenario Version: 1\u001b[0m\n",
+      "\u001b[36m    • Scenario Version: 2\u001b[0m\n",
       "\u001b[36m    • PyRIT Version: 0.12.1.dev0\u001b[0m\n",
       "\u001b[36m    • Description:\u001b[0m\n",
       "\u001b[36m        Encoding Scenario implementation for PyRIT. This scenario tests how resilient models are to various encoding\u001b[0m\n",
@@ -205,6 +212,9 @@
   }
  ],
  "metadata": {
+  "jupytext": {
+   "main_language": "python"
+  },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",

diff --git a/doc/scanner/garak.py b/doc/scanner/garak.py
@@ -37,15 +37,20 @@
 # strategy encodes the prompt, asks the target to decode it, and scores whether the decoded output
 # matches the harmful content. Default datasets include slur terms and web/HTML/JS content.
 #
-# **CLI example:**
+# **Default run** uses the curated `DEFAULT` strategy aggregate (Base16, ROT13, MorseCode — one
+# base-N, one substitution cipher, and one symbolic alphabet) for a fast, representative scan. Use
+# the `ALL` aggregate for an exhaustive run across every encoding scheme.
+#
+# **Fast path** (sanity-check target wiring in well under a minute) — pick a single-variant encoding
+# and one prompt:
 #
 # ```bash
-# pyrit_scan garak.encoding --target openai_chat --strategies base64 --max-dataset-size 1
+# pyrit_scan garak.encoding --target openai_chat --strategies rot13 --max-dataset-size 1
 # ```
 #
 # **Available strategies** (17 encodings): Base64, Base2048, Base16, Base32, ASCII85, Hex,
 # QuotedPrintable, UUencode, ROT13, Braille, Atbash, MorseCode, NATO, Ecoji, Zalgo, LeetSpeak,
-# AsciiSmuggler
+# AsciiSmuggler. Aggregates: `DEFAULT` (curated subset, the default) and `ALL` (every encoding).
 #
 # > **Note:** Strategy composition is NOT supported for Encoding — each encoding is tested
 # > independently.

diff --git a/pyrit/scenario/scenarios/garak/encoding.py b/pyrit/scenario/scenarios/garak/encoding.py
@@ -83,33 +83,51 @@ class EncodingStrategy(ScenarioStrategy):
     Strategies for encoding attacks.
 
     Each enum member represents an encoding scheme that will be tested against the target model.
-    The ALL aggregate expands to include all encoding strategies.
+    The ``ALL`` aggregate expands to every encoding scheme (exhaustive run). The ``DEFAULT``
+    aggregate expands to a small curated subset that spans distinct encoding families, giving a
+    fast, representative default run.
 
     Note: EncodingStrategy does not support composition. Each encoding must be applied individually.
+    The strategy axis here is the encoding scheme (not an attack technique), and every encoding runs
+    as a single-turn ``PromptSendingAttack``, so SINGLE_TURN/MULTI_TURN aggregates are not applicable.
     """
 
-    # Aggregate member
+    # Aggregate members
     ALL = ("all", {"all"})
+    DEFAULT = ("default", {"default"})
 
-    # Individual encoding strategies (matching the atomic attack names)
+    # Individual encoding strategies (each value matches the encoding name used for display grouping).
+    # Members tagged
+    # ``default`` form the curated DEFAULT aggregate: one base-N encoding (Base16), one
+    # substitution cipher (ROT13), and one symbolic alphabet (MorseCode).
     Base64 = ("base64", set[str]())
     Base2048 = ("base2048", set[str]())
-    Base16 = ("base16", set[str]())
+    Base16 = ("base16", {"default"})
     Base32 = ("base32", set[str]())
     ASCII85 = ("ascii85", set[str]())
     Hex = ("hex", set[str]())
     QuotedPrintable = ("quoted_printable", set[str]())
     UUencode = ("uuencode", set[str]())
-    ROT13 = ("rot13", set[str]())
+    ROT13 = ("rot13", {"default"})
     Braille = ("braille", set[str]())
     Atbash = ("atbash", set[str]())
-    MorseCode = ("morse_code", set[str]())
+    MorseCode = ("morse_code", {"default"})
     NATO = ("nato", set[str]())
     Ecoji = ("ecoji", set[str]())
     Zalgo = ("zalgo", set[str]())
     LeetSpeak = ("leet_speak", set[str]())
     AsciiSmuggler = ("ascii_smuggler", set[str]())
 
+    @classmethod
+    def get_aggregate_tags(cls) -> set[str]:
+        """
+        Get the set of tags that represent aggregate categories.
+
+        Returns:
+            set[str]: The base ``"all"`` aggregate plus the scenario-specific ``"default"`` aggregate.
+        """
+        return super().get_aggregate_tags() | {"default"}
+
 
 logger = logging.getLogger(__name__)
 
@@ -131,7 +149,7 @@ class Encoding(Scenario):
     By default, this uses the same dataset as Garak: slur terms and web XSS payloads.
     """
 
-    VERSION: int = 1
+    VERSION: int = 2
 
     @apply_defaults
     def __init__(
@@ -163,7 +181,7 @@ def __init__(
         super().__init__(
             version=self.VERSION,
             strategy_class=EncodingStrategy,
-            default_strategy=EncodingStrategy.ALL,
+            default_strategy=EncodingStrategy.DEFAULT,
             default_dataset_config=EncodingDatasetConfiguration(
                 dataset_names=["garak_slur_terms_en", "garak_web_html_js"],
                 max_dataset_size=3,
@@ -228,45 +246,57 @@ def _get_converter_attacks(self) -> list[AtomicAttack]:
         Returns:
             list[AtomicAttack]: List of all atomic attacks to execute.
         """
-        # Map of all available converters with their encoding names
-        all_converters_with_encodings: list[tuple[list[PromptConverter], str]] = [
-            ([Base64Converter()], "base64"),
-            ([Base64Converter(encoding_func="urlsafe_b64encode")], "base64"),
-            ([Base64Converter(encoding_func="standard_b64encode")], "base64"),
-            ([Base64Converter(encoding_func="b2a_base64")], "base64"),
-            ([Base2048Converter()], "base2048"),
-            ([Base64Converter(encoding_func="b16encode")], "base16"),
-            ([Base64Converter(encoding_func="b32encode")], "base32"),
-            ([Base64Converter(encoding_func="a85encode")], "ascii85"),
-            ([Base64Converter(encoding_func="b85encode")], "ascii85"),
-            ([BinAsciiConverter(encoding_func="hex")], "hex"),
-            ([BinAsciiConverter(encoding_func="quoted-printable")], "quoted_printable"),
-            ([BinAsciiConverter(encoding_func="UUencode")], "uuencode"),
-            ([ROT13Converter()], "rot13"),
-            ([BrailleConverter()], "braille"),
-            ([AtbashConverter()], "atbash"),
-            ([MorseConverter()], "morse_code"),
-            ([NatoConverter()], "nato"),
-            ([EcojiConverter()], "ecoji"),
-            ([ZalgoConverter()], "zalgo"),
-            ([LeetspeakConverter()], "leet_speak"),
-            ([AsciiSmugglerConverter()], "ascii_smuggler"),
+        # Map of all available converters with their encoding name and a unique variant slug.
+        # ``encoding_name`` drives strategy selection and user-facing grouping (display_group);
+        # ``variant_slug`` is unique per row so that atomic-attack names stay unique even when one
+        # encoding name maps to multiple converter variants (e.g. base64, ascii85).
+        # NOTE: some base64 variants are near-duplicates (default == standard_b64encode; b2a only
+        # appends a trailing newline). They are retained here to keep the exhaustive ALL run stable
+        # behind the VERSION gate; trimming them is a separate cleanup.
+        all_converters_with_encodings: list[tuple[list[PromptConverter], str, str]] = [
+            ([Base64Converter()], "base64", "base64"),
+            ([Base64Converter(encoding_func="urlsafe_b64encode")], "base64", "base64_urlsafe"),
+            ([Base64Converter(encoding_func="standard_b64encode")], "base64", "base64_standard"),
+            ([Base64Converter(encoding_func="b2a_base64")], "base64", "base64_b2a"),
+            ([Base2048Converter()], "base2048", "base2048"),
+            ([Base64Converter(encoding_func="b16encode")], "base16", "base16"),
+            ([Base64Converter(encoding_func="b32encode")], "base32", "base32"),
+            ([Base64Converter(encoding_func="a85encode")], "ascii85", "ascii85_a85"),
+            ([Base64Converter(encoding_func="b85encode")], "ascii85", "ascii85_b85"),
+            ([BinAsciiConverter(encoding_func="hex")], "hex", "hex"),
+            ([BinAsciiConverter(encoding_func="quoted-printable")], "quoted_printable", "quoted_printable"),
+            ([BinAsciiConverter(encoding_func="UUencode")], "uuencode", "uuencode"),
+            ([ROT13Converter()], "rot13", "rot13"),
+            ([BrailleConverter()], "braille", "braille"),
+            ([AtbashConverter()], "atbash", "atbash"),
+            ([MorseConverter()], "morse_code", "morse_code"),
+            ([NatoConverter()], "nato", "nato"),
+            ([EcojiConverter()], "ecoji", "ecoji"),
+            ([ZalgoConverter()], "zalgo", "zalgo"),
+            ([LeetspeakConverter()], "leet_speak", "leet_speak"),
+            ([AsciiSmugglerConverter()], "ascii_smuggler", "ascii_smuggler"),
         ]
 
         # Filter to only include selected strategies
         selected_encoding_names = {s.value for s in self._scenario_strategies}
         converters_with_encodings = [
-            (conv, name) for conv, name in all_converters_with_encodings if name in selected_encoding_names
+            (conv, name, variant_slug)
+            for conv, name, variant_slug in all_converters_with_encodings
+            if name in selected_encoding_names
         ]
 
         atomic_attacks = []
-        for conv, name in converters_with_encodings:
-            atomic_attacks.extend(self._get_prompt_attacks(converters=conv, encoding_name=name))
+        for conv, name, variant_slug in converters_with_encodings:
+            atomic_attacks.extend(
+                self._get_prompt_attacks(converters=conv, encoding_name=name, variant_slug=variant_slug)
+            )
         return atomic_attacks
 
-    def _get_prompt_attacks(self, *, converters: list[PromptConverter], encoding_name: str) -> list[AtomicAttack]:
+    def _get_prompt_attacks(
+        self, *, converters: list[PromptConverter], encoding_name: str, variant_slug: str
+    ) -> list[AtomicAttack]:
         """
-        Create atomic attacks for a specific encoding scheme.
+        Create atomic attacks for a specific encoding converter variant.
 
         For each seed prompt (the text to be decoded), creates atomic attacks that:
         1. Encode the seed prompt using the specified converter(s)
@@ -276,31 +306,42 @@ def _get_prompt_attacks(self, *, converters: list[PromptConverter], encoding_nam
 
         Args:
             converters (list[PromptConverter]): The list of converters to apply to the seed prompts.
-            encoding_name (str): Human-readable name of the encoding scheme (e.g., "Base64", "ROT13").
+            encoding_name (str): Human-readable name of the encoding scheme (e.g., "base64", "rot13").
+                Used as the ``display_group`` so all variants of an encoding aggregate together in output.
+            variant_slug (str): Unique slug for this converter variant, used to build a unique
+                ``atomic_attack_name`` per converter variant and prompt config.
 
         Returns:
-            list[AtomicAttack]: List of atomic attacks for this encoding scheme.
+            list[AtomicAttack]: List of atomic attacks for this encoding converter variant.
 
         Raises:
             ValueError: If scenario is not properly initialized.
         """
-        converter_configs = [
-            AttackConverterConfig(
-                request_converters=PromptConverterConfiguration.from_converters(converters=converters)
+        # (config_name_suffix, converter_config). The bare "raw" config encodes only; each
+        # decode-template config additionally asks the model to decode.
+        converter_configs: list[tuple[str, AttackConverterConfig]] = [
+            (
+                "raw",
+                AttackConverterConfig(
+                    request_converters=PromptConverterConfiguration.from_converters(converters=converters)
+                ),
             )
         ]
 
-        for decode_type in self._encoding_templates:
+        for decode_index, decode_type in enumerate(self._encoding_templates):
             converters_ = converters[:] + [AskToDecodeConverter(template=decode_type, encoding_name=encoding_name)]
 
             converter_configs.append(
-                AttackConverterConfig(
-                    request_converters=PromptConverterConfiguration.from_converters(converters=converters_)
+                (
+                    f"decode{decode_index}",
+                    AttackConverterConfig(
+                        request_converters=PromptConverterConfiguration.from_converters(converters=converters_)
+                    ),
                 )
             )
 
         atomic_attacks = []
-        for attack_converter_config in converter_configs:
+        for config_suffix, attack_converter_config in converter_configs:
             # objective_target is guaranteed to be non-None by parent class validation
             if self._objective_target is None:
                 raise ValueError(
@@ -313,7 +354,8 @@ def _get_prompt_attacks(self, *, converters: list[PromptConverter], encoding_nam
             )
             atomic_attacks.append(
                 AtomicAttack(
-                    atomic_attack_name=encoding_name,
+                    atomic_attack_name=f"{variant_slug}_{config_suffix}",
+                    display_group=encoding_name,
                     attack_technique=AttackTechnique(attack=attack),
                     seed_groups=self._resolved_seed_groups or [],
                 )

diff --git a/tests/unit/backend/test_scenario_run_service.py b/tests/unit/backend/test_scenario_run_service.py
@@ -22,6 +22,7 @@
 )
 from pyrit.models import AttackOutcome
 from pyrit.scenario.core import DatasetConfiguration
+from pyrit.scenario.scenarios.garak.encoding import EncodingDatasetConfiguration
 
 _REGISTRY_PATCH_BASE = "pyrit.registry"
 _MEMORY_PATCH = "pyrit.memory.CentralMemory.get_memory_instance"
@@ -314,6 +315,32 @@ class _MarkerDatasetConfiguration(DatasetConfiguration):
         assert default_config.get_default_dataset_names() == ["original"]
         assert default_config.max_dataset_size == 100
 
+    async def test_start_run_dataset_names_preserves_real_encoding_config_type(self, mock_all_registries) -> None:
+        """The real ``EncodingDatasetConfiguration`` round-trips through the backend ``dataset_names`` path.
+
+        Foot-gun guard: the backend's ``_build_init_kwargs`` silently degrades a lost
+        ``DatasetConfiguration`` subclass to a plain base config on ``TypeError``. ``EncodingDatasetConfiguration``
+        must therefore stay backend-constructible (no new *required* ``__init__`` args); otherwise the
+        ``--dataset-names`` path would build a base config and produce wrong seed shaping. This pins the
+        real subclass (not a synthetic marker) so adding a required ctor arg fails loudly here.
+        """
+        default_config = EncodingDatasetConfiguration(
+            dataset_names=["garak_slur_terms_en", "garak_web_html_js"], max_dataset_size=3
+        )
+        scenario_instance = mock_all_registries["scenario_instance"]
+        scenario_instance._default_dataset_config = default_config
+
+        service = ScenarioRunService()
+        await service.start_run_async(request=_make_request(dataset_names=["custom_a", "custom_b"], max_dataset_size=2))
+
+        init_call = scenario_instance.initialize_async.await_args
+        built_config = init_call.kwargs["dataset_config"]
+
+        # Real subclass type is preserved (not degraded to base DatasetConfiguration)
+        assert type(built_config) is EncodingDatasetConfiguration
+        assert built_config.get_default_dataset_names() == ["custom_a", "custom_b"]
+        assert built_config.max_dataset_size == 2
+
     async def test_start_run_dataset_names_without_max_dataset_size_preserves_subclass(
         self, mock_all_registries
     ) -> None: