From 4cf7a5fe58bd5f1abbccf723feeb9fcf521ac905 Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Tue, 31 Mar 2026 22:30:27 +0200
Subject: [PATCH 01/12] feat: Add machine-readable digest of comparison

---
 lexical-sprouting-scroll.md | 107 ++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 lexical-sprouting-scroll.md

diff --git a/lexical-sprouting-scroll.md b/lexical-sprouting-scroll.md
new file mode 100644
index 0000000..8007ec1
--- /dev/null
+++ b/lexical-sprouting-scroll.md
@@ -0,0 +1,107 @@
+# Add `Digest` dataclass for machine-readable comparison output
+
+## Context
+
+Currently, `DataFrameComparison.summary()` returns a `Summary` object that renders rich-formatted console output. There's no way to get structured, machine-readable data from a comparison (e.g., for LLM consumption, CI pipelines, or programmatic analysis). This adds a `digest()` method returning a plain dataclass hierarchy that can be serialized via `dataclasses.asdict()` and `json.dumps()`.
+
+## Dataclass Structure
+
+All dataclasses in a new file `diffly/digest.py`:
+
+```python
+@dataclass
+class Digest:
+    equal: bool
+    left_name: str
+    right_name: str
+    primary_key: list[str] | None
+    schemas: DigestSchemas | None       # None when equal, or slim + schemas match
+    rows: DigestRows | None             # None when equal, or slim + rows match
+    columns: list[DigestColumn] | None  # None when equal, no PK, no joined rows, or slim + all match
+    sample_rows_left_only: list[tuple[Any, ...]] | None   # None when no PK or sample_k==0
+    sample_rows_right_only: list[tuple[Any, ...]] | None
+
+@dataclass
+class DigestSchemas:
+    left_only: list[tuple[str, str]]                # (col_name, dtype_str)
+    in_common: list[tuple[str, str, str]]            # (col_name, left_dtype_str, right_dtype_str)
+    right_only: list[tuple[str, str]]
+
+@dataclass
+class DigestRows:
+    n_left: int
+    n_right: int
+    n_left_only: int | None       # None when no primary key
+    n_joined_equal: int | None
+    n_joined_unequal: int | None
+    n_right_only: int | None
+
+@dataclass
+class DigestColumn:
+    name: str
+    match_rate: float
+    changes: list[DigestColumnChange] | None  # None when top_k==0 or column is hidden
+
+@dataclass
+class DigestColumnChange:
+    old: Any
+    new: Any
+    count: int
+    sample_pk: Any | None   # None when show_sample_primary_key_per_change=False
+```
+
+**Design notes:**
+
+- `primary_key` is a top-level field so consumers know what the sample row tuples represent.
+- `sample_rows_left_only` / `sample_rows_right_only` use `list[tuple]` matching the primary key column order.
+- `in_common` uses 3-tuples `(name, left_dtype, right_dtype)` to capture dtype changes (when they match, `left_dtype == right_dtype`).
+- `schemas` is always populated (not `None`) when frames aren't equal and not slim-hidden, even if schemas match -- the caller might want to confirm schemas are identical. **Actually**: mirror `Summary` logic -- `None` when `slim=True` and schemas are equal.
+
+## Files to modify
+
+### 1. New: [diffly/digest.py](diffly/digest.py)
+
+- All dataclass definitions above
+- `_to_python(value)` helper to convert Polars values (date, datetime, timedelta, Decimal) to JSON-safe types
+- Builder function `_build_digest(comparison, **params) -> Digest` containing the logic to extract data from `DataFrameComparison`, mirroring the control flow of `Summary._print_to_console` / `_print_diff`
+- `to_dict()` method on `Digest` via `dataclasses.asdict()`
+- `to_json()` convenience method
+
+### 2. [diffly/comparison.py](diffly/comparison.py) (~line 976)
+
+- Add `digest()` method on `DataFrameComparison` with same signature as `summary()`
+- Lazy import `from .digest import Digest` (same pattern as summary)
+
+### 3. [diffly/cli.py](diffly/cli.py)
+
+- Add `--json` flag (bool, default False)
+- When True, call `comparison.digest(...).to_json()` instead of `comparison.summary(...).format()`
+
+### 4. [diffly/**init**.py](diffly/__init__.py)
+
+- No changes needed -- `Digest` is accessed via `comparison.digest()`, not imported directly. Can revisit later.
+
+### 5. **No changes to** [diffly/testing.py](diffly/testing.py)
+
+- `testing.py` uses `summary()` for human-readable assertion error messages. `digest()` is a data output format, not relevant to assertions.
+
+### 6. New: [tests/test_digest.py](tests/test_digest.py)
+
+- Equal frames -> `equal=True`, all sections `None`
+- Schema differences (left-only, right-only, dtype mismatches in in_common)
+- Row counts with and without primary key
+- Column match rates with `show_perfect_column_matches=True/False`
+- `top_k_column_changes` + `show_sample_primary_key_per_change`
+- `sample_k_rows_only` for `sample_rows_left_only` / `sample_rows_right_only`
+- `slim=True` suppresses matching sections
+- `hidden_columns` hides column changes
+- Validation errors (same as Summary: hidden PK columns, sample PK without top-k)
+- JSON serialization roundtrip: `json.loads(digest.to_json())` is valid
+
+## Verification
+
+```bash
+pixi run pytest tests/test_digest.py -v
+pixi run test
+pixi run pre-commit-run
+```

From 30f27b0f25dbf5f9d8e2b79953dffcd18a283df1 Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 13:59:42 +0200
Subject: [PATCH 02/12] update plan

---
 lexical-sprouting-scroll.md | 123 ++++++++++++++++++++++--------------
 1 file changed, 76 insertions(+), 47 deletions(-)

diff --git a/lexical-sprouting-scroll.md b/lexical-sprouting-scroll.md
index 8007ec1..db97798 100644
--- a/lexical-sprouting-scroll.md
+++ b/lexical-sprouting-scroll.md
@@ -1,34 +1,58 @@
-# Add `Digest` dataclass for machine-readable comparison output
+# Add `SummaryData` dataclass as the data layer for comparison output
 
 ## Context
 
-Currently, `DataFrameComparison.summary()` returns a `Summary` object that renders rich-formatted console output. There's no way to get structured, machine-readable data from a comparison (e.g., for LLM consumption, CI pipelines, or programmatic analysis). This adds a `digest()` method returning a plain dataclass hierarchy that can be serialized via `dataclasses.asdict()` and `json.dumps()`.
+`Summary` currently both extracts data from `DataFrameComparison` and renders it with Rich — every `_print_*` method queries the comparison object directly. There is no structured, machine-readable output format. We introduce `SummaryData` as an intermediate data layer: a plain dataclass hierarchy computed once in `Summary.__init__`, then consumed for both Rich rendering (`print(summary)` / `summary.format()`) and JSON serialization (`summary.to_json()`).
 
-## Dataclass Structure
+## Architecture
 
-All dataclasses in a new file `diffly/digest.py`:
+```
+DataFrameComparison.summary()
+        │
+        ▼
+      Summary.__init__
+        │
+        ├── calls _compute_summary_data() once
+        │           │
+        │           ▼
+        │      SummaryData          ← plain dataclass, no dependencies beyond stdlib
+        │
+        ├── print(summary) / summary.format()   → Rich rendering from SummaryData
+        └── summary.to_json()                   → JSON serialization from SummaryData
+```
+
+- **`SummaryData`** is the single source of truth for what data to present given the parameters (`slim`, `show_perfect_column_matches`, `top_k_column_changes`, etc.).
+- **`Summary`** computes a `SummaryData` in its `__init__` via `_compute_summary_data()`, stores it as `self._data`. All `_print_*` methods render from `self._data` instead of querying `self._comparison`. `to_json()` serializes `self._data`.
+- **`comparison.summary()`** remains the only entry point. No new method on `DataFrameComparison`.
+
+## Dataclass Design
+
+All dataclasses live in `diffly/summary.py` alongside the existing `Summary` class:
 
 ```python
 @dataclass
-class Digest:
+class SummaryData:
     equal: bool
     left_name: str
     right_name: str
     primary_key: list[str] | None
-    schemas: DigestSchemas | None       # None when equal, or slim + schemas match
-    rows: DigestRows | None             # None when equal, or slim + rows match
-    columns: list[DigestColumn] | None  # None when equal, no PK, no joined rows, or slim + all match
+    schemas: SummaryDataSchemas | None       # None when equal, or slim + schemas match
+    rows: SummaryDataRows | None             # None when equal, or slim + rows match
+    columns: list[SummaryDataColumn] | None  # None when equal, no PK, no joined rows, or slim + all match
     sample_rows_left_only: list[tuple[Any, ...]] | None   # None when no PK or sample_k==0
     sample_rows_right_only: list[tuple[Any, ...]] | None
 
+    def to_dict(self) -> dict[str, Any]: ...
+    def to_json(self, **kwargs) -> str: ...
+
 @dataclass
-class DigestSchemas:
+class SummaryDataSchemas:
     left_only: list[tuple[str, str]]                # (col_name, dtype_str)
     in_common: list[tuple[str, str, str]]            # (col_name, left_dtype_str, right_dtype_str)
     right_only: list[tuple[str, str]]
 
 @dataclass
-class DigestRows:
+class SummaryDataRows:
     n_left: int
     n_right: int
     n_left_only: int | None       # None when no primary key
@@ -37,71 +61,76 @@ class DigestRows:
     n_right_only: int | None
 
 @dataclass
-class DigestColumn:
+class SummaryDataColumn:
     name: str
     match_rate: float
-    changes: list[DigestColumnChange] | None  # None when top_k==0 or column is hidden
+    n_total_changes: int          # total distinct changes (needed for "...and N others")
+    changes: list[SummaryDataColumnChange] | None  # None when top_k==0 or column is hidden
 
 @dataclass
-class DigestColumnChange:
+class SummaryDataColumnChange:
     old: Any
     new: Any
     count: int
-    sample_pk: Any | None   # None when show_sample_primary_key_per_change=False
+    sample_pk: tuple[Any, ...] | None   # None when show_sample_primary_key_per_change=False
 ```
 
-**Design notes:**
+### Design decisions
 
-- `primary_key` is a top-level field so consumers know what the sample row tuples represent.
-- `sample_rows_left_only` / `sample_rows_right_only` use `list[tuple]` matching the primary key column order.
-- `in_common` uses 3-tuples `(name, left_dtype, right_dtype)` to capture dtype changes (when they match, `left_dtype == right_dtype`).
-- `schemas` is always populated (not `None`) when frames aren't equal and not slim-hidden, even if schemas match -- the caller might want to confirm schemas are identical. **Actually**: mirror `Summary` logic -- `None` when `slim=True` and schemas are equal.
+- **Primary key consistency:** Both `sample_rows_{left,right}_only` entries and `sample_pk` in `SummaryDataColumnChange` use `tuple[Any, ...]` matching the `primary_key` column order.
+- **None logic:** `schemas` is `None` when equal, or when `slim=True` and schemas match. Same pattern for `rows` and `columns`.
+- **`n_total_changes`** on `SummaryDataColumn`: needed to render `"(...and 5 others)"`. The `changes` list only holds the top-k.
+- **Equal + empty frames:** Summary distinguishes "empty but matching" from "match exactly" via row count. When `equal=True`, `rows` is `None`. _Alternative:_ add a top-level `n_rows_left` field if this proves awkward during implementation.
 
 ## Files to modify
 
-### 1. New: [diffly/digest.py](diffly/digest.py)
+### 1. `diffly/summary.py`
 
-- All dataclass definitions above
-- `_to_python(value)` helper to convert Polars values (date, datetime, timedelta, Decimal) to JSON-safe types
-- Builder function `_build_digest(comparison, **params) -> Digest` containing the logic to extract data from `DataFrameComparison`, mirroring the control flow of `Summary._print_to_console` / `_print_diff`
-- `to_dict()` method on `Digest` via `dataclasses.asdict()`
-- `to_json()` convenience method
+**Add** (above the `Summary` class):
 
-### 2. [diffly/comparison.py](diffly/comparison.py) (~line 976)
+- `SummaryData` and child dataclass definitions
+- `_to_python(value)` helper for JSON-safe conversion (date → isoformat, timedelta → total_seconds, Decimal → float)
+- `_compute_summary_data(comparison, **params) -> SummaryData`: single place for data extraction, parameter validation, and "what to show" decisions. This moves the current validation logic out of `Summary.__init__` and the data-querying logic out of the `_print_*` methods.
 
-- Add `digest()` method on `DataFrameComparison` with same signature as `summary()`
-- Lazy import `from .digest import Digest` (same pattern as summary)
+**Modify** `Summary`:
 
-### 3. [diffly/cli.py](diffly/cli.py)
+- `__init__` calls `_compute_summary_data()`, stores result as `self._data`. Remove `self._comparison` and parameter fields that are now captured in `SummaryData`.
+- Keep `self.slim` (controls header panel rendering, not data content).
+- Add `to_json(**kwargs) -> str` method delegating to `self._data.to_json()`.
+- Refactor each `_print_*` method to render from `self._data`:
+  - `_print_to_console`: check `self._data.equal`
+  - `_print_equal`: derive "empty but matching" from `self._data`
+  - `_print_primary_key`: read `self._data.primary_key`
+  - `_print_schemas`: render from `self._data.schemas` (skip if `None`)
+  - `_print_rows`: render from `self._data.rows` (skip if `None`)
+  - `_print_columns`: render from `self._data.columns` (skip if `None`)
+  - `_print_sample_rows_only_one_side`: render from `self._data.sample_rows_{left,right}_only`
+- Remove runtime imports of `DataFrameComparison` and `Schemas` (no longer needed for rendering)
 
-- Add `--json` flag (bool, default False)
-- When True, call `comparison.digest(...).to_json()` instead of `comparison.summary(...).format()`
+### 2. `diffly/comparison.py`
 
-### 4. [diffly/**init**.py](diffly/__init__.py)
+- No changes. `summary()` continues to return `Summary` with the same signature.
 
-- No changes needed -- `Digest` is accessed via `comparison.digest()`, not imported directly. Can revisit later.
+### 3. `diffly/cli.py`
 
-### 5. **No changes to** [diffly/testing.py](diffly/testing.py)
+- Add `--json` flag (bool, default False).
+- When True, call `comparison.summary(...).to_json()` instead of `comparison.summary(...).format()`.
 
-- `testing.py` uses `summary()` for human-readable assertion error messages. `digest()` is a data output format, not relevant to assertions.
+### 4. New: `tests/test_summary_data.py`
 
-### 6. New: [tests/test_digest.py](tests/test_digest.py)
+- Parametrized test over `show_perfect_column_matches`, `top_k_column_changes`, `slim`, `sample_k_rows_only` (with derived `sample_pk`) using `itertools.product`.
+- Single rich test case where all `SummaryData` fields are populated; assert correct fields are `None` vs populated per parameter combination.
+- Additional tests: equal frames, no primary key, hidden columns, multiple PK, slim suppression, validation errors.
+- JSON roundtrip via `json.loads(summary.to_json())`.
 
-- Equal frames -> `equal=True`, all sections `None`
-- Schema differences (left-only, right-only, dtype mismatches in in_common)
-- Row counts with and without primary key
-- Column match rates with `show_perfect_column_matches=True/False`
-- `top_k_column_changes` + `show_sample_primary_key_per_change`
-- `sample_k_rows_only` for `sample_rows_left_only` / `sample_rows_right_only`
-- `slim=True` suppresses matching sections
-- `hidden_columns` hides column changes
-- Validation errors (same as Summary: hidden PK columns, sample PK without top-k)
-- JSON serialization roundtrip: `json.loads(digest.to_json())` is valid
+### 5. No changes to `diffly/__init__.py` or `diffly/testing.py`
 
 ## Verification
 
 ```bash
-pixi run pytest tests/test_digest.py -v
+pixi run pytest tests/test_summary_data.py -v
 pixi run test
 pixi run pre-commit-run
 ```
+
+Existing summary fixture tests must continue to pass unchanged — they validate that the Rich rendering is identical before and after the refactor.

From c4d62a94667a9439b07722e94879ec31a95dd77b Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 15:18:17 +0200
Subject: [PATCH 03/12] initial implementation

---
 diffly/cli.py                                 |  34 +-
 diffly/summary.py                             | 789 +++++++++++-------
 lexical-sprouting-scroll.md                   |  17 +-
 ...alse_sample_rows_False_sample_pk_False.txt |   8 +-
 ..._False_sample_rows_True_sample_pk_True.txt |   8 +-
 ...alse_sample_rows_False_sample_pk_False.txt |   8 +-
 ..._False_sample_rows_True_sample_pk_True.txt |   8 +-
 ...alse_sample_rows_False_sample_pk_False.txt |  10 +-
 ..._False_sample_rows_True_sample_pk_True.txt |  10 +-
 ...alse_sample_rows_False_sample_pk_False.txt |  10 +-
 ..._False_sample_rows_True_sample_pk_True.txt |  10 +-
 ...alse_sample_rows_False_sample_pk_False.txt |  10 +-
 ..._False_sample_rows_True_sample_pk_True.txt |  10 +-
 ...alse_sample_rows_False_sample_pk_False.txt |  10 +-
 ..._False_sample_rows_True_sample_pk_True.txt |  10 +-
 ...alse_sample_rows_False_sample_pk_False.txt | 124 +--
 ..._False_sample_rows_True_sample_pk_True.txt | 124 +--
 ...alse_sample_rows_False_sample_pk_False.txt | 124 +--
 ..._False_sample_rows_True_sample_pk_True.txt | 124 +--
 tests/test_summary_data.py                    | 249 ++++++
 20 files changed, 1082 insertions(+), 615 deletions(-)
 create mode 100644 tests/test_summary_data.py

diff --git a/diffly/cli.py b/diffly/cli.py
index 51c4658..002af8d 100644
--- a/diffly/cli.py
+++ b/diffly/cli.py
@@ -110,6 +110,16 @@ def main(
             )
         ),
     ] = False,
+    output_json: Annotated[
+        bool,
+        typer.Option(
+            "--json",
+            help=(
+                "Output a machine-readable JSON digest instead of a rich-formatted "
+                "summary."
+            ),
+        ),
+    ] = False,
     hidden_columns: Annotated[
         list[str],
         typer.Option(
@@ -130,18 +140,20 @@ def main(
         rel_tol=rel_tol,
         abs_tol_temporal=dt.timedelta(seconds=abs_tol_temporal),
     )
-    typer.echo(
-        comparison.summary(
-            show_perfect_column_matches=show_perfect_column_matches,
-            top_k_column_changes=top_k_column_changes,
-            sample_k_rows_only=sample_k_rows_only,
-            show_sample_primary_key_per_change=show_sample_primary_key_per_change,
-            left_name=left_name,
-            right_name=right_name,
-            slim=slim,
-            hidden_columns=hidden_columns,
-        ).format(pretty=True)
+    summary = comparison.summary(
+        show_perfect_column_matches=show_perfect_column_matches,
+        top_k_column_changes=top_k_column_changes,
+        sample_k_rows_only=sample_k_rows_only,
+        show_sample_primary_key_per_change=show_sample_primary_key_per_change,
+        left_name=left_name,
+        right_name=right_name,
+        slim=slim,
+        hidden_columns=hidden_columns,
     )
+    if output_json:
+        typer.echo(summary.to_json())
+    else:
+        typer.echo(summary.format(pretty=True))
 
 
 if __name__ == "__main__":  # pragma: no cover
diff --git a/diffly/summary.py b/diffly/summary.py
index 3c908ce..c7daf1c 100644
--- a/diffly/summary.py
+++ b/diffly/summary.py
@@ -1,12 +1,16 @@
 # Copyright (c) QuantCo 2025-2026
 # SPDX-License-Identifier: BSD-3-Clause
 
+from __future__ import annotations
+
+import dataclasses
 import io
+import json
 from dataclasses import dataclass
-from datetime import date, datetime
-from typing import Any, Literal, cast
+from datetime import date, datetime, timedelta
+from decimal import Decimal
+from typing import TYPE_CHECKING, Any, Literal, cast
 
-import polars as pl
 from rich import box
 from rich.columns import Columns as RichColumns
 from rich.console import Console, Group, RenderableType
@@ -16,10 +20,9 @@
 from rich.text import Text
 
 from ._utils import Side, capitalize_first
-from .comparison import (
-    DataFrameComparison,
-    Schemas,
-)
+
+if TYPE_CHECKING:  # pragma: no cover
+    from .comparison import DataFrameComparison
 
 WIDTH = 90
 SCHEMAS_COLUMN_WIDTH = 25
@@ -30,6 +33,282 @@
 MAX_STRING_LENGTH: int | None = 128
 
 
+# ---------------------------------------------------------------------------- #
+#                                 SUMMARY DATA                                 #
+# ---------------------------------------------------------------------------- #
+
+
+@dataclass
+class SummaryDataSchemas:
+    left_only: list[tuple[str, str]]
+    in_common: list[tuple[str, str, str]]
+    right_only: list[tuple[str, str]]
+
+
+@dataclass
+class SummaryDataRows:
+    n_left: int
+    n_right: int
+    n_left_only: int | None  # None when no primary key
+    n_joined_equal: int | None  # None when no primary key
+    n_joined_unequal: int | None  # None when no primary key
+    n_right_only: int | None  # None when no primary key
+
+
+@dataclass
+class SummaryDataColumnChange:
+    old: Any
+    new: Any
+    count: int
+    sample_pk: tuple[Any, ...] | None
+
+
+@dataclass
+class SummaryDataColumn:
+    name: str
+    match_rate: float
+    n_total_changes: int
+    changes: list[SummaryDataColumnChange] | None
+
+
+@dataclass
+class SummaryData:
+    equal: bool
+    n_rows_left: int
+    left_name: str
+    right_name: str
+    primary_key: list[str] | None
+    schemas: SummaryDataSchemas | None
+    rows: SummaryDataRows | None
+    columns: list[SummaryDataColumn] | None
+    sample_rows_left_only: list[tuple[Any, ...]] | None
+    sample_rows_right_only: list[tuple[Any, ...]] | None
+
+    def to_dict(self) -> dict[str, Any]:
+        def _convert(obj: Any) -> Any:
+            if isinstance(obj, dict):
+                return {k: _convert(v) for k, v in obj.items()}
+            if isinstance(obj, (list, tuple)):
+                return type(obj)(_convert(v) for v in obj)
+            return _to_python(obj)
+
+        return _convert(dataclasses.asdict(self))
+
+    def to_json(self, **kwargs: Any) -> str:
+        return json.dumps(self.to_dict(), **kwargs)
+
+
+def _to_python(value: Any) -> Any:
+    """Convert values to JSON-safe Python types."""
+    if isinstance(value, datetime):
+        return value.isoformat()
+    if isinstance(value, date):
+        return value.isoformat()
+    if isinstance(value, timedelta):
+        return value.total_seconds()
+    if isinstance(value, Decimal):
+        return float(value)
+    return value
+
+
+def _compute_summary_data(
+    comparison: DataFrameComparison,
+    show_perfect_column_matches: bool,
+    top_k_column_changes: int,
+    sample_k_rows_only: int,
+    show_sample_primary_key_per_change: bool,
+    left_name: str,
+    right_name: str,
+    slim: bool,
+    hidden_columns: list[str] | None,
+) -> SummaryData:
+    from .comparison import DataFrameComparison
+
+    hidden_columns = hidden_columns or []
+
+    # Validation (same as old Summary.__init__)
+    if comparison.primary_key is not None:
+        overlap = set(hidden_columns).intersection(set(comparison.primary_key))
+        if overlap and sample_k_rows_only > 0:
+            raise ValueError(
+                f"Cannot show sample rows only on the left or right side when primary"
+                f" key column(s) {', '.join(overlap)} should be hidden."
+            )
+        if overlap and show_sample_primary_key_per_change:
+            raise ValueError(
+                f"Cannot show sample primary key for changed columns when primary"
+                f" key column(s) {', '.join(overlap)} should be hidden."
+            )
+    if top_k_column_changes == 0 and show_sample_primary_key_per_change:
+        raise ValueError(
+            "Cannot show sample primary key per change when top_k_column_changes is 0."
+        )
+
+    top_k_changes_by_column = {
+        col: 0 if col in hidden_columns else top_k_column_changes
+        for col in comparison._other_common_columns
+    }
+
+    # Materialize frames (same pattern as old Summary.__init__)
+    comp = DataFrameComparison(
+        left=comparison.left.collect().lazy(),
+        right=comparison.right.collect().lazy(),
+        left_schema=comparison.left_schema,
+        right_schema=comparison.right_schema,
+        primary_key=comparison.primary_key,
+        _other_common_columns=comparison._other_common_columns,
+        abs_tol_by_column=comparison.abs_tol_by_column,
+        rel_tol_by_column=comparison.rel_tol_by_column,
+        abs_tol_temporal_by_column=comparison.abs_tol_temporal_by_column,
+    )
+
+    is_equal = comp.equal()
+    n_rows_left = comp.num_rows_left()
+
+    if is_equal:
+        return SummaryData(
+            equal=True,
+            n_rows_left=n_rows_left,
+            left_name=left_name,
+            right_name=right_name,
+            primary_key=comp.primary_key,
+            schemas=None,
+            rows=None,
+            columns=None,
+            sample_rows_left_only=None,
+            sample_rows_right_only=None,
+        )
+
+    # --- Schemas ---
+    schemas: SummaryDataSchemas | None = None
+    schemas_obj = comp.schemas
+    schemas_equal = schemas_obj.equal()
+    if not slim or not schemas_equal:
+        left_only_cols = sorted(schemas_obj.left_only().items())
+        right_only_cols = sorted(schemas_obj.right_only().items())
+        in_common = sorted(schemas_obj.in_common().items())
+        schemas = SummaryDataSchemas(
+            left_only=[(name, str(dtype)) for name, dtype in left_only_cols],
+            in_common=[
+                (name, str(left_dtype), str(right_dtype))
+                for name, (left_dtype, right_dtype) in in_common
+            ],
+            right_only=[(name, str(dtype)) for name, dtype in right_only_cols],
+        )
+
+    # --- Rows ---
+    rows: SummaryDataRows | None = None
+    has_pk = comp.primary_key is not None
+    if has_pk:
+        rows_equal = comp._equal_rows()
+    else:
+        rows_equal = comp.equal_num_rows()
+    if not slim or not rows_equal:
+        if has_pk:
+            rows = SummaryDataRows(
+                n_left=comp.num_rows_left(),
+                n_right=comp.num_rows_right(),
+                n_left_only=comp.num_rows_left_only(),
+                n_joined_equal=comp.num_rows_joined_equal(),
+                n_joined_unequal=comp.num_rows_joined_unequal(),
+                n_right_only=comp.num_rows_right_only(),
+            )
+        else:
+            rows = SummaryDataRows(
+                n_left=comp.num_rows_left(),
+                n_right=comp.num_rows_right(),
+                n_left_only=None,
+                n_joined_equal=None,
+                n_joined_unequal=None,
+                n_right_only=None,
+            )
+
+    # --- Columns ---
+    columns: list[SummaryDataColumn] | None = None
+    match_rates_can_be_computed = (
+        comp.primary_key is not None and comp.num_rows_joined() > 0
+    )
+    if match_rates_can_be_computed:
+        match_rates = comp.fraction_same()
+        all_match = not comp._other_common_columns or min(match_rates.values()) >= 1
+        if not slim or not all_match:
+            columns = []
+            for col_name in sorted(match_rates):
+                rate = match_rates[col_name]
+                top_k = top_k_changes_by_column[col_name]
+                changes: list[SummaryDataColumnChange] | None = None
+                n_total_changes = 0
+                if top_k > 0 and rate < 1:
+                    all_change_counts = comp.change_counts(
+                        col_name,
+                        include_sample_primary_key=show_sample_primary_key_per_change,
+                    )
+                    n_total_changes = len(all_change_counts)
+                    top_change_counts = all_change_counts.head(top_k)
+                    changes = []
+                    for row in top_change_counts.iter_rows(named=True):
+                        sample_pk: tuple[Any, ...] | None = None
+                        if show_sample_primary_key_per_change:
+                            pk_cols = comp.primary_key
+                            assert isinstance(pk_cols, list)
+                            sample_pk = tuple(row[f"sample_{c}"] for c in pk_cols)
+                        changes.append(
+                            SummaryDataColumnChange(
+                                old=row[Side.LEFT],
+                                new=row[Side.RIGHT],
+                                count=row["count"],
+                                sample_pk=sample_pk,
+                            )
+                        )
+                columns.append(
+                    SummaryDataColumn(
+                        name=col_name,
+                        match_rate=rate,
+                        n_total_changes=n_total_changes,
+                        changes=changes,
+                    )
+                )
+
+    # --- Sample rows left/right only ---
+    sample_rows_left_only: list[tuple[Any, ...]] | None = None
+    sample_rows_right_only: list[tuple[Any, ...]] | None = None
+    if has_pk and sample_k_rows_only > 0:
+        pk = comp.primary_key
+        assert isinstance(pk, list)
+
+        if comp.num_rows_left_only() > 0:
+            df = comp.left_only(lazy=True).select(pk).head(sample_k_rows_only).collect()
+            sample_rows_left_only = [tuple(row) for row in df.iter_rows()]
+        else:
+            sample_rows_left_only = []
+
+        if comp.num_rows_right_only() > 0:
+            df = (
+                comp.right_only(lazy=True).select(pk).head(sample_k_rows_only).collect()
+            )
+            sample_rows_right_only = [tuple(row) for row in df.iter_rows()]
+        else:
+            sample_rows_right_only = []
+
+    return SummaryData(
+        equal=False,
+        n_rows_left=n_rows_left,
+        left_name=left_name,
+        right_name=right_name,
+        primary_key=comp.primary_key,
+        schemas=schemas,
+        rows=rows,
+        columns=columns,
+        sample_rows_left_only=sample_rows_left_only,
+        sample_rows_right_only=sample_rows_right_only,
+    )
+
+
+# ---------------------------------------------------------------------------- #
+#                                    SUMMARY                                   #
+# ---------------------------------------------------------------------------- #
+
+
 @dataclass
 class Summary:
     """Container object for generating a summary of the comparison of two data frames.
@@ -56,48 +335,21 @@ def _truncate_name(name: str) -> str:
                 return f"{name[:CUSTOM_COLUMN_NAME_MAX_LENGTH]}..."
             return name
 
-        def _validate_primary_key_hidden_columns() -> None:
-            overlap = set(self.hidden_columns).intersection(
-                set(self._comparison.primary_key or [])
-            )
-            if overlap and self.sample_k_rows_only > 0:
-                raise ValueError(
-                    f"Cannot show sample rows only on the left or right side when primary"
-                    f" key column(s) {', '.join(overlap)} should be hidden."
-                )
-            if overlap and self.show_sample_primary_key_per_change:
-                raise ValueError(
-                    f"Cannot show sample primary key for changed columns when primary"
-                    f" key column(s) {', '.join(overlap)} should be hidden."
-                )
-
-        self._comparison = DataFrameComparison(
-            left=comparison.left.collect().lazy(),
-            right=comparison.right.collect().lazy(),
-            left_schema=comparison.left_schema,
-            right_schema=comparison.right_schema,
-            primary_key=comparison.primary_key,
-            _other_common_columns=comparison._other_common_columns,
-            abs_tol_by_column=comparison.abs_tol_by_column,
-            rel_tol_by_column=comparison.rel_tol_by_column,
-            abs_tol_temporal_by_column=comparison.abs_tol_temporal_by_column,
+        self._data = _compute_summary_data(
+            comparison,
+            show_perfect_column_matches=show_perfect_column_matches,
+            top_k_column_changes=top_k_column_changes,
+            sample_k_rows_only=sample_k_rows_only,
+            show_sample_primary_key_per_change=show_sample_primary_key_per_change,
+            left_name=left_name,
+            right_name=right_name,
+            slim=slim,
+            hidden_columns=hidden_columns,
         )
+        self.slim = slim
         self.show_perfect_column_matches = show_perfect_column_matches
         self.left_name = _truncate_name(left_name)
         self.right_name = _truncate_name(right_name)
-        self.slim = slim
-        self.sample_k_rows_only = sample_k_rows_only
-        self.show_sample_primary_key_per_change = show_sample_primary_key_per_change
-        self.hidden_columns = hidden_columns or []
-        self.top_k_changes_by_column = {
-            col: 0 if col in self.hidden_columns else top_k_column_changes
-            for col in comparison._other_common_columns
-        }
-        _validate_primary_key_hidden_columns()
-        if (top_k_column_changes == 0) and show_sample_primary_key_per_change:
-            raise ValueError(
-                "Cannot show sample primary key per change when top_k_column_changes is 0."
-            )
 
     def format(self, pretty: bool | None = None) -> str:
         """Format this summary for printing.
@@ -120,6 +372,14 @@ def format(self, pretty: bool | None = None) -> str:
 
         return _trim_whitespaces(summary)
 
+    def to_json(self, **kwargs: Any) -> str:
+        """Serialize this summary as a JSON string.
+
+        Returns:
+            A JSON string representation of the summary data.
+        """
+        return self._data.to_json(**kwargs)
+
     # -------------------------------- DUNDER METHODS -------------------------------- #
 
     def __str__(self) -> str:
@@ -140,13 +400,13 @@ def _print_to_console(self, console: Console) -> None:
                     box=box.HEAVY,
                 )
             )
-        if self._comparison.equal():
+        if self._data.equal:
             self._print_equal(console)
         else:
             self._print_diff(console)
 
     def _print_equal(self, console: Console) -> None:
-        if self._comparison.num_rows_left() == 0:
+        if self._data.n_rows_left == 0:
             message = "--- Data frames are empty, but their schema matches exactly! ---"
         else:
             message = "--- Data frames match exactly! ---"
@@ -165,7 +425,8 @@ def _print_diff(self, console: Console) -> None:
     # --------------------------------- PRIMARY KEY ---------------------------------- #
 
     def _print_primary_key(self, console: Console) -> None:
-        if (primary_key := self._comparison.primary_key) is not None:
+        primary_key = self._data.primary_key
+        if primary_key is not None:
             content = self._section_primary_key(primary_key)
         else:
             content = Text(
@@ -188,30 +449,40 @@ def _section_primary_key(self, primary_key: list[str]) -> RenderableType:
     # ------------------------------------ SCHEMA ------------------------------------ #
 
     def _print_schemas(self, console: Console) -> None:
+        if self._data.schemas is None:
+            return
+
+        schemas = self._data.schemas
+        schemas_equal = (
+            not schemas.left_only
+            and not schemas.right_only
+            and all(left == right for _, left, right in schemas.in_common)
+        )
+
         content: RenderableType
-        if self._comparison.schemas.equal():
-            num_cols = len(self._comparison.schemas.left())
+        if schemas_equal:
+            num_cols = len(schemas.in_common)
             content = Text(
                 f"Schemas match exactly (column count: {num_cols:,}).", style="italic"
             )
         else:
-            content = self._section_schemas(self._comparison.schemas)
+            content = self._section_schemas(schemas)
 
-        # NOTE: In slim mode, we only print the section if there are differences.
-        if not self.slim or not self._comparison.schemas.equal():
-            _print_section(console, "Schemas", content)
+        _print_section(console, "Schemas", content)
 
-    def _section_schemas(self, columns: Schemas) -> RenderableType:
+    def _section_schemas(self, schemas: SummaryDataSchemas) -> RenderableType:
         def _print_num_columns(n: int) -> str:
             return f"{n:,} column{'s' if n != 1 else ''}"
 
         table = Table()
 
-        left_only = columns.left_only().column_names()
-        right_only = columns.right_only().column_names()
-        max_column_width = max(len(column) for column in left_only | right_only | {""})
+        left_only_names = {name for name, _ in schemas.left_only}
+        right_only_names = {name for name, _ in schemas.right_only}
+        max_column_width = max(
+            len(column) for column in left_only_names | right_only_names | {""}
+        )
 
-        if len(missing := left_only | right_only) > 0:
+        if len(missing := left_only_names | right_only_names) > 0:
             # NOTE: At least 10 as "in common" already has 9 chars
             min_width = max(10, *[len(col) for col in missing])
         else:
@@ -220,8 +491,8 @@ def _print_num_columns(n: int) -> str:
         table_data: dict[str, list[str]] = {}
 
         # Left only
-        if len(left_only) > 0:
-            left_only_header = f"{capitalize_first(self.left_name)} only \n{_print_num_columns(len(left_only))}"
+        if len(left_only_names) > 0:
+            left_only_header = f"{capitalize_first(self.left_name)} only \n{_print_num_columns(len(left_only_names))}"
             table.add_column(
                 left_only_header,
                 header_style="red",
@@ -231,11 +502,11 @@ def _print_num_columns(n: int) -> str:
                 overflow=OVERFLOW,
             )
             table_data[left_only_header] = [
-                _format_colname(col) for col in sorted(left_only)
+                _format_colname(col) for col in sorted(left_only_names)
             ]
 
         # In common
-        in_common_header = f"In common \n{_print_num_columns(len(columns.in_common()))}"
+        in_common_header = f"In common \n{_print_num_columns(len(schemas.in_common))}"
         table.add_column(
             in_common_header,
             justify="center",
@@ -243,25 +514,27 @@ def _print_num_columns(n: int) -> str:
             max_width=SCHEMAS_COLUMN_WIDTH,
             overflow=OVERFLOW,
         )
-        num_in_common = len(columns.in_common())
+        num_in_common = len(schemas.in_common)
         table_data[in_common_header] = []
-        common_but_mismatching = columns.in_common().mismatching_dtypes()
-        if len(common_but_mismatching) == 0:
+        mismatching = [
+            (name, left, right)
+            for name, left, right in schemas.in_common
+            if left != right
+        ]
+        if len(mismatching) == 0:
             table_data[in_common_header] = ["..."]
             max_column_width = max(
                 max_column_width, len(table_data[in_common_header][0])
             )
         else:
-            for col, (left_dtype, right_dtype) in sorted(
-                common_but_mismatching.items(), key=lambda x: x[0]
-            ):
+            for col, left_dtype, right_dtype in sorted(mismatching, key=lambda x: x[0]):
                 table_data[in_common_header].append(
                     f"{_format_colname(col)} [{left_dtype} -> {right_dtype}]"
                 )
                 max_column_width = max(
                     max_column_width, len(f"{col} [{left_dtype} -> {right_dtype}]")
                 )
-            num_remaining = num_in_common - len(common_but_mismatching)
+            num_remaining = num_in_common - len(mismatching)
             if num_remaining > 0:
                 table_data[in_common_header].append(
                     f"(+{_print_num_columns(num_remaining)} with matching "
@@ -272,8 +545,8 @@ def _print_num_columns(n: int) -> str:
                 )
 
         # Right only
-        if len(right_only) > 0:
-            right_only_header = f"{capitalize_first(self.right_name)} only\n{_print_num_columns(len(right_only))}"
+        if len(right_only_names) > 0:
+            right_only_header = f"{capitalize_first(self.right_name)} only\n{_print_num_columns(len(right_only_names))}"
             table.add_column(
                 right_only_header,
                 header_style="green",
@@ -283,7 +556,7 @@ def _print_num_columns(n: int) -> str:
                 overflow=OVERFLOW,
             )
             table_data[right_only_header] = [
-                _format_colname(col) for col in sorted(right_only)
+                _format_colname(col) for col in sorted(right_only_names)
             ]
 
         max_len = max(len(column_list) for column_list in table_data.values())
@@ -300,55 +573,58 @@ def _print_num_columns(n: int) -> str:
     # ------------------------------------- ROWS ------------------------------------- #
 
     def _print_rows(self, console: Console) -> None:
+        if self._data.rows is None:
+            return
+
+        rows = self._data.rows
         content: RenderableType
-        if self._comparison.primary_key is None:
-            content = self._print_rows_without_primary_key()
-            equal = self._comparison.equal_num_rows()
+        if self._data.primary_key is None:
+            content = self._render_rows_without_primary_key(rows)
         else:
-            content = self._print_rows_with_primary_key()
-            equal = self._comparison._equal_rows()
-        # NOTE: In slim mode, we only print the section if there are differences.
-        if not self.slim or not equal:
-            _print_section(console, "Rows", content)
+            content = self._render_rows_with_primary_key(rows)
+        _print_section(console, "Rows", content)
 
-    def _print_rows_without_primary_key(self) -> RenderableType:
+    def _render_rows_without_primary_key(self, rows: SummaryDataRows) -> RenderableType:
         content: RenderableType
-        if self._comparison.equal_num_rows():
+        if rows.n_left == rows.n_right:
             content = Text(
-                "The number of rows matches exactly (row count: "
-                f"{self._comparison.num_rows_left():,}).",
+                f"The number of rows matches exactly (row count: {rows.n_left:,}).",
                 style="italic",
             )
         else:
-            content = self._section_row_counts()
+            content = self._section_row_counts(rows)
         return content
 
-    def _print_rows_with_primary_key(self) -> RenderableType:
+    def _render_rows_with_primary_key(self, rows: SummaryDataRows) -> RenderableType:
+        assert rows.n_joined_equal is not None
+        assert rows.n_joined_unequal is not None
+        assert rows.n_left_only is not None
+        assert rows.n_right_only is not None
+
         content: RenderableType
-        if self._comparison._equal_rows():
+        equal_rows = rows.n_joined_equal == rows.n_left == rows.n_right
+        if equal_rows:
             content = Text(
-                f"All rows match exactly (row count: {self._comparison.num_rows_left():,}).",
+                f"All rows match exactly (row count: {rows.n_left:,}).",
                 style="italic",
             )
         else:
             # NOTE: In slim mode, we omit the row counts section and only show the
             # row matches section.
-            if self._comparison.equal_num_rows() and self.slim:
-                content = Group(self._section_row_matches())
+            if (rows.n_left == rows.n_right) and self.slim:
+                content = Group(self._section_row_matches(rows))
             else:
                 content = Group(
-                    self._section_row_counts(),
+                    self._section_row_counts(rows),
                     "",
-                    self._section_row_matches(),
+                    self._section_row_matches(rows),
                 )
         return content
 
-    def _section_row_counts(self) -> RenderableType:
+    def _section_row_counts(self, rows: SummaryDataRows) -> RenderableType:
         gain_loss = ""
-        if self._comparison.num_rows_left() > 0:
-            fraction_rows_right = (
-                self._comparison.num_rows_right() / self._comparison.num_rows_left()
-            )
+        if rows.n_left > 0:
+            fraction_rows_right = rows.n_right / rows.n_left
             if fraction_rows_right > 1:
                 gain_loss = f"(+{(fraction_rows_right - 1):.2%})"
             elif fraction_rows_right < 1:
@@ -366,86 +642,86 @@ def _section_row_counts(self) -> RenderableType:
         count_grid.add_column("", justify="center")
         count_grid.add_column(right_header, justify="center")
         count_grid.add_row(
-            f"{self._comparison.num_rows_left():,}",
+            f"{rows.n_left:,}",
             f" {gain_loss} ",
-            f"{self._comparison.num_rows_right():,}",
+            f"{rows.n_right:,}",
         )
         count_rows.append(count_grid)
 
         return Group(*count_rows)
 
-    def _section_row_matches(self) -> RenderableType:
+    def _section_row_matches(self, rows: SummaryDataRows) -> RenderableType:
+        assert rows.n_left_only is not None
+        assert rows.n_joined_equal is not None
+        assert rows.n_joined_unequal is not None
+        assert rows.n_right_only is not None
+        n_joined = rows.n_joined_equal + rows.n_joined_unequal
+
         columns: list[RenderableType] = []
         num_dummy_cols = 5
 
         # Left Table
-        if self._comparison.num_rows_left() > 0:
+        if rows.n_left > 0:
             left_table = Table(show_header=False, padding=0, box=box.HEAVY_EDGE)
             for _ in range(num_dummy_cols):
                 left_table.add_column()
-            if self._comparison.num_rows_left_only() > 0:
+            if rows.n_left_only > 0:
                 left_table.add_row(*([Text("-", style="red")] * num_dummy_cols))
                 left_table.add_section()
-            if self._comparison.num_rows_joined_equal() > 0:
+            if rows.n_joined_equal > 0:
                 left_table.add_row(*([" "] * num_dummy_cols))
                 left_table.add_section()
-            if self._comparison.num_rows_joined_unequal() > 0:
+            if rows.n_joined_unequal > 0:
                 left_table.add_row(*([" "] * num_dummy_cols))
                 left_table.add_section()
 
             columns.append(left_table)
 
         # Separator between tables
-        if self._comparison.num_rows_joined() > 0:
-            rows: list[RenderableType] = []
-            if self._comparison.num_rows_left_only() > 0:
-                rows.append("\n")
-            if self._comparison.num_rows_joined_equal() > 0:
-                rows.append("╌" * 3)
-                rows.append(Text(" = ", style="bold"))
-            if self._comparison.num_rows_joined_unequal() > 0:
-                rows.append("╌" * 3)
-                rows.append(Text(" ≠ ", style="bold"))
-            rows.append("╌" * 3)
-
-            columns.append(Group(*rows))
+        if n_joined > 0:
+            separator_rows: list[RenderableType] = []
+            if rows.n_left_only > 0:
+                separator_rows.append("\n")
+            if rows.n_joined_equal > 0:
+                separator_rows.append("╌" * 3)
+                separator_rows.append(Text(" = ", style="bold"))
+            if rows.n_joined_unequal > 0:
+                separator_rows.append("╌" * 3)
+                separator_rows.append(Text(" ≠ ", style="bold"))
+            separator_rows.append("╌" * 3)
+
+            columns.append(Group(*separator_rows))
         else:
             columns.append(" " * 3)
 
         # Right table
-        if self._comparison.num_rows_right() > 0:
+        if rows.n_right > 0:
             right_table = Table(show_header=False, padding=0, box=box.HEAVY_EDGE)
             for _ in range(num_dummy_cols):
                 right_table.add_column()
-            if self._comparison.num_rows_joined_equal() > 0:
+            if rows.n_joined_equal > 0:
                 right_table.add_row(*([" "] * num_dummy_cols))
                 right_table.add_section()
-            if self._comparison.num_rows_joined_unequal() > 0:
+            if rows.n_joined_unequal > 0:
                 right_table.add_row(*([" "] * num_dummy_cols))
                 right_table.add_section()
-            if self._comparison.num_rows_right_only() > 0:
+            if rows.n_right_only > 0:
                 right_table.add_row(*([Text("+", style="green")] * num_dummy_cols))
 
-            if self._comparison.num_rows_left_only() > 0:
+            if rows.n_left_only > 0:
                 columns.append(Group("\n", right_table))
             else:
                 columns.append(right_table)
 
         # Numbers for groups
-        if (
-            self._comparison.num_rows_left() > 0
-            or self._comparison.num_rows_right() > 0
-        ):
+        if rows.n_left > 0 or rows.n_right > 0:
             grid = Table(
                 show_header=False,
                 box=box.Box(
                     "\n".join(
                         (  # header row
                             ["╌" * 4]
-                            if (
-                                self._comparison.num_rows_left_only() == 0
-                                and self._comparison.num_rows_left() > 0
-                            )
+                            if (rows.n_left_only == 0 and rows.n_left > 0)
                             else [" " * 4]
                         )
                         + [" " * 4] * 3
@@ -453,10 +729,7 @@ def _section_row_matches(self) -> RenderableType:
                         + [" " * 4] * 2
                         + (  # bottom row
                             ["╌" * 4]
-                            if (
-                                self._comparison.num_rows_right_only() == 0
-                                and self._comparison.num_rows_right() > 0
-                            )
+                            if (rows.n_right_only == 0 and rows.n_right > 0)
                             else [" " * 4]
                         )
                     )
@@ -466,65 +739,49 @@ def _section_row_matches(self) -> RenderableType:
             grid.add_column("Count", justify="right")
             grid.add_column("Type", justify="left")
             grid.add_column("Percentage", justify="right")
-            if self._comparison.num_rows_left_only() > 0:
-                fraction_left_only = (
-                    self._comparison.num_rows_left_only()
-                    / self._comparison.num_rows_left()
-                )
+            if rows.n_left_only > 0:
+                fraction_left_only = rows.n_left_only / rows.n_left
                 grid.add_row(
-                    f"{self._comparison.num_rows_left_only():,}",
+                    f"{rows.n_left_only:,}",
                     f"{self.left_name} only",
                     f"({_format_fraction_as_percentage(fraction_left_only)})",
                 )
                 grid.add_section()
-            if self._comparison.num_rows_joined_equal() > 0:
-                fraction_equal = (
-                    self._comparison.num_rows_joined_equal()
-                    / self._comparison.num_rows_joined()
-                )
+            if rows.n_joined_equal > 0:
+                fraction_equal = rows.n_joined_equal / n_joined
                 grid.add_row(
-                    f"{self._comparison.num_rows_joined_equal():,}",
+                    f"{rows.n_joined_equal:,}",
                     "equal",
                     f"({_format_fraction_as_percentage(fraction_equal)})",
                 )
                 grid.add_section()
-            if self._comparison.num_rows_joined_unequal() > 0:
-                fraction_unequal = (
-                    self._comparison.num_rows_joined_unequal()
-                    / self._comparison.num_rows_joined()
-                )
+            if rows.n_joined_unequal > 0:
+                fraction_unequal = rows.n_joined_unequal / n_joined
                 grid.add_row(
-                    f"{self._comparison.num_rows_joined_unequal():,}",
+                    f"{rows.n_joined_unequal:,}",
                     "unequal",
                     f"({_format_fraction_as_percentage(fraction_unequal)})",
                 )
                 grid.add_section()
-            if self._comparison.num_rows_right_only() > 0:
-                fraction_right_only = (
-                    self._comparison.num_rows_right_only()
-                    / self._comparison.num_rows_right()
-                )
+            if rows.n_right_only > 0:
+                fraction_right_only = rows.n_right_only / rows.n_right
                 grid.add_row(
-                    f"{self._comparison.num_rows_right_only():,}",
+                    f"{rows.n_right_only:,}",
                     f"{self.right_name} only",
                     f"({_format_fraction_as_percentage(fraction_right_only)})",
                 )
             columns.append(grid)
 
         # Num joined
-        num_sections = (self._comparison.num_rows_joined_equal() > 0) + (
-            self._comparison.num_rows_joined_unequal() > 0
-        )
+        num_sections = (rows.n_joined_equal > 0) + (rows.n_joined_unequal > 0)
         if num_sections > 0:
             joined_rows: list[RenderableType] = []
-            if self._comparison.num_rows_left_only() > 0:
+            if rows.n_left_only > 0:
                 joined_rows.append("\n")
             joined_rows.append("╌╮")
             joined_rows.append(" │")
             if num_sections > 1:
-                joined_rows.append(
-                    f"╌├╴  {self._comparison.num_rows_joined():,}  joined"
-                )
+                joined_rows.append(f"╌├╴  {n_joined:,}  joined")
                 joined_rows.append(" │")
             joined_rows.append("╌╯")
             columns.append(Group(*joined_rows))
@@ -534,179 +791,129 @@ def _section_row_matches(self) -> RenderableType:
     # -------------------------------- COLUMN MATCHES -------------------------------- #
 
     def _print_columns(self, console: Console) -> None:
-        # NOTE: We can only compute column matches if there are primary key columns and
-        # at least one joined row.
-        match_rates_can_be_computed = (
-            self._comparison.primary_key is not None
-            and self._comparison.num_rows_joined() > 0
+        if self._data.columns is None:
+            return
+        _print_section(
+            console,
+            "Columns",
+            self._section_columns(),
         )
-        if match_rates_can_be_computed:
-            match_rates = self._comparison.fraction_same()
-            # NOTE: In slim mode, we only print the columns section if there are
-            # non-primary key columns and at least one column has a match rate < 1.
-            if not self.slim or (
-                self._comparison._other_common_columns and min(match_rates.values()) < 1
-            ):
-                _print_section(
-                    console,
-                    "Columns",
-                    self._section_columns(),
-                )
 
     def _section_columns(self) -> RenderableType:
         display_items: list[RenderableType] = []
+        columns = self._data.columns
+        assert columns is not None
 
-        if self._comparison._other_common_columns and (
-            self.show_perfect_column_matches
-            or (min(self._comparison.fraction_same().values()) < 1)
-        ):
-            matches = Table(show_header=False)
-            matches.add_column(
-                "Column", max_width=COLUMN_SECTION_COLUMN_WIDTH, overflow=OVERFLOW
-            )
-            matches.add_column("Match Rate", justify="right")
-            has_top_changes_column = any(
-                self.top_k_changes_by_column[col_name] > 0
-                for col_name in self._comparison._other_common_columns
-                if self._comparison.fraction_same()[col_name] < 1
+        if not columns:
+            display_items.append(
+                Text("No common non-primary key columns to compare.", style="italic")
             )
-            if has_top_changes_column:
-                matches.add_column("Top Changes", justify="right")
-            if self.show_perfect_column_matches:
-                max_col_len = max(
-                    len(col) for col in self._comparison.fraction_same().keys()
+        else:
+            visible = [
+                c
+                for c in columns
+                if self.show_perfect_column_matches or c.match_rate < 1
+            ]
+            if not visible:
+                display_items.append(
+                    Text("All columns match perfectly.", style="italic")
                 )
             else:
-                max_col_len = max(
-                    len(col)
-                    for col, frac in self._comparison.fraction_same().items()
-                    if frac < 1
+                matches = Table(show_header=False)
+                matches.add_column(
+                    "Column",
+                    max_width=COLUMN_SECTION_COLUMN_WIDTH,
+                    overflow=OVERFLOW,
+                )
+                matches.add_column("Match Rate", justify="right")
+                has_top_changes_column = any(
+                    c.changes is not None for c in columns if c.match_rate < 1
                 )
-            for column, match_rate in sorted(
-                self._comparison.fraction_same().items(), key=lambda x: x[0]
-            ):
-                if self.show_perfect_column_matches or match_rate < 1:
-                    columns: list[RenderableType] = [
-                        Text(column, style="cyan"),
-                        f"{_format_fraction_as_percentage(match_rate)}",
+                if has_top_changes_column:
+                    matches.add_column("Top Changes", justify="right")
+                max_col_len = max(len(c.name) for c in visible)
+                for col in visible:
+                    row_items: list[RenderableType] = [
+                        Text(col.name, style="cyan"),
+                        f"{_format_fraction_as_percentage(col.match_rate)}",
                     ]
-                    top_k_column_changes = self.top_k_changes_by_column[column]
-                    if top_k_column_changes > 0:
-                        all_change_counts = self._comparison.change_counts(
-                            column,
-                            include_sample_primary_key=self.show_sample_primary_key_per_change,
-                        )
-
-                        top_change_counts = all_change_counts.head(top_k_column_changes)
-
+                    if col.changes is not None:
                         change_lines = []
-                        for row in top_change_counts.iter_rows(named=True):
+                        for change in col.changes:
                             line = (
-                                f"{_format_value(row['left'])} -> "
-                                f"{_format_value(row['right'])} ({row['count']:,}x"
+                                f"{_format_value(change.old)} -> "
+                                f"{_format_value(change.new)} ({change.count:,}x"
                             )
-                            if self.show_sample_primary_key_per_change:
-                                primary_key = self._comparison.primary_key
-                                assert isinstance(primary_key, list)
+                            if change.sample_pk is not None:
                                 line += ", e.g. "
-                                if len(primary_key) == 1:
-                                    line += _format_value(
-                                        row[f"sample_{primary_key[0]}"]
-                                    )
+                                if len(change.sample_pk) == 1:
+                                    line += _format_value(change.sample_pk[0])
                                 else:
                                     line += "("
                                     line += ", ".join(
-                                        [
-                                            _format_value(row[f"sample_{col}"])
-                                            for col in primary_key
-                                        ]
+                                        [_format_value(v) for v in change.sample_pk]
                                     )
                                     line += ")"
                             line += ")"
                             change_lines.append(line)
 
-                        if (
-                            remaining_count := len(all_change_counts)
-                            - top_k_column_changes
-                        ) > 0:
+                        remaining_count = col.n_total_changes - len(col.changes)
+                        if remaining_count > 0:
                             change_lines.append(
                                 f"(...and {remaining_count:,} {('other' if remaining_count == 1 else 'others')})"
                             )
 
                         text = "\n".join(change_lines)
-                        columns.append(text)
+                        row_items.append(text)
 
-                    matches.add_row(*columns)
+                    matches.add_row(*row_items)
                     if (
                         has_top_changes_column
                         or max_col_len > COLUMN_SECTION_COLUMN_WIDTH
                     ):
                         matches.add_section()
 
-            display_items.append(matches)
-        elif not self._comparison._other_common_columns:
-            display_items.append(
-                Text("No common non-primary key columns to compare.", style="italic")
-            )
-        else:
-            display_items.append(Text("All columns match perfectly.", style="italic"))
+                display_items.append(matches)
 
         return Group(*display_items)
 
     # ------------------------------ ROWS ONLY ONE SIDE ------------------------------ #
 
     def _print_sample_rows_only_one_side(self, console: Console, side: Side) -> None:
-        if self._comparison.primary_key is None:
-            return
-        num_rows_only = (
-            self._comparison.num_rows_left_only()
-            if side == Side.LEFT
-            else self._comparison.num_rows_right_only()
-        )
-        name = self.left_name if side == Side.LEFT else self.right_name
-        if num_rows_only > 0 and self.sample_k_rows_only > 0:
+        if side == Side.LEFT:
+            sample_rows = self._data.sample_rows_left_only
+            name = self.left_name
+        else:
+            sample_rows = self._data.sample_rows_right_only
+            name = self.right_name
+
+        primary_key = self._data.primary_key
+        if primary_key is not None and sample_rows is not None and len(sample_rows) > 0:
             _print_section(
                 console,
                 f"Rows {name} only",
-                self._section_rows_only_one_side(
-                    side=side, sample_k_rows_only=self.sample_k_rows_only
-                ),
+                self._section_rows_only_one_side(sample_rows, primary_key),
             )
 
     def _section_rows_only_one_side(
-        self, side: Side, sample_k_rows_only: int
+        self,
+        sample_rows: list[tuple[Any, ...]],
+        primary_key: list[str],
     ) -> RenderableType:
-        def _polars_to_rich_table(df: pl.DataFrame) -> Table:
-            table = Table()
-            columns = df.columns
-
-            for col in columns[:MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES]:
-                table.add_column(col, overflow="ellipsis")
-
-            if len(columns) > MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES:
-                table.add_column("...", style="dim")
-
-            for row in df.iter_rows():
-                added_row = [
-                    str(v) for v in row[:MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES]
-                ]
-                if len(columns) > MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES:
-                    added_row.append("...")
-                table.add_row(*added_row)
-
-            return table
-
-        only_one_side = (
-            self._comparison.left_only(lazy=True)
-            if side == Side.LEFT
-            else self._comparison.right_only(lazy=True)
-        )
-        primary_key = self._comparison.primary_key
-        assert isinstance(primary_key, list)
+        table = Table()
+        for col in primary_key[:MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES]:
+            table.add_column(col, overflow="ellipsis")
 
-        return _polars_to_rich_table(
-            only_one_side.select(primary_key).head(sample_k_rows_only).collect()
-        )
+        if len(primary_key) > MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES:
+            table.add_column("...", style="dim")
+
+        for row in sample_rows:
+            added_row = [str(v) for v in row[:MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES]]
+            if len(primary_key) > MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES:
+                added_row.append("...")
+            table.add_row(*added_row)
+
+        return table
 
 
 # ------------------------------------------------------------------------------------ #
diff --git a/lexical-sprouting-scroll.md b/lexical-sprouting-scroll.md
index db97798..44dc9d3 100644
--- a/lexical-sprouting-scroll.md
+++ b/lexical-sprouting-scroll.md
@@ -36,11 +36,11 @@ class SummaryData:
     left_name: str
     right_name: str
     primary_key: list[str] | None
-    schemas: SummaryDataSchemas | None       # None when equal, or slim + schemas match
-    rows: SummaryDataRows | None             # None when equal, or slim + rows match
-    columns: list[SummaryDataColumn] | None  # None when equal, no PK, no joined rows, or slim + all match
+    schemas: SummaryDataSchemas | None
+    rows: SummaryDataRows | None
+    columns: list[SummaryDataColumn] | None
     sample_rows_left_only: list[tuple[Any, ...]] | None   # None when no PK or sample_k==0
-    sample_rows_right_only: list[tuple[Any, ...]] | None
+    sample_rows_right_only: list[tuple[Any, ...]] | None # None when no PK or sample_k==0
 
     def to_dict(self) -> dict[str, Any]: ...
     def to_json(self, **kwargs) -> str: ...
@@ -56,9 +56,9 @@ class SummaryDataRows:
     n_left: int
     n_right: int
     n_left_only: int | None       # None when no primary key
-    n_joined_equal: int | None
-    n_joined_unequal: int | None
-    n_right_only: int | None
+    n_joined_equal: int | None  # None when no primary key
+    n_joined_unequal: int | None  # None when no primary key
+    n_right_only: int | None  # None when no primary key
 
 @dataclass
 class SummaryDataColumn:
@@ -78,9 +78,8 @@ class SummaryDataColumnChange:
 ### Design decisions
 
 - **Primary key consistency:** Both `sample_rows_{left,right}_only` entries and `sample_pk` in `SummaryDataColumnChange` use `tuple[Any, ...]` matching the `primary_key` column order.
-- **None logic:** `schemas` is `None` when equal, or when `slim=True` and schemas match. Same pattern for `rows` and `columns`.
 - **`n_total_changes`** on `SummaryDataColumn`: needed to render `"(...and 5 others)"`. The `changes` list only holds the top-k.
-- **Equal + empty frames:** Summary distinguishes "empty but matching" from "match exactly" via row count. When `equal=True`, `rows` is `None`. _Alternative:_ add a top-level `n_rows_left` field if this proves awkward during implementation.
+- **Equal + empty frames:** Summary distinguishes "empty but matching" from "match exactly" via row count. _Alternative:_ add a top-level `n_rows_left` field if this proves awkward during implementation.
 
 ## Files to modify
 
diff --git a/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt b/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
index d7be9d3..85f4d09 100644
--- a/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
+++ b/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
@@ -18,7 +18,7 @@
 
  Columns
  ▔▔▔▔▔▔▔
-   ┌───────────┬─────────┬──┐
-   │ speed_kph │ 100.00% │  │
-   │ weight_kg │ 100.00% │  │
-   └───────────┴─────────┴──┘
+   ┌───────────┬─────────┐
+   │ speed_kph │ 100.00% │
+   │ weight_kg │ 100.00% │
+   └───────────┴─────────┘
diff --git a/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt b/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
index d7be9d3..85f4d09 100644
--- a/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
+++ b/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
@@ -18,7 +18,7 @@
 
  Columns
  ▔▔▔▔▔▔▔
-   ┌───────────┬─────────┬──┐
-   │ speed_kph │ 100.00% │  │
-   │ weight_kg │ 100.00% │  │
-   └───────────┴─────────┴──┘
+   ┌───────────┬─────────┐
+   │ speed_kph │ 100.00% │
+   │ weight_kg │ 100.00% │
+   └───────────┴─────────┘
diff --git a/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt b/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
index 084420d..f0f8834 100644
--- a/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
+++ b/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
@@ -18,7 +18,7 @@
 
  [1mColumns[0m
  ▔▔▔▔▔▔▔
-   ┌───────────┬─────────┬──┐
-   │ [36mspeed_kph[0m │ 100.00% │  │
-   │ [36mweight_kg[0m │ 100.00% │  │
-   └───────────┴─────────┴──┘
+   ┌───────────┬─────────┐
+   │ [36mspeed_kph[0m │ 100.00% │
+   │ [36mweight_kg[0m │ 100.00% │
+   └───────────┴─────────┘
diff --git a/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt b/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
index 084420d..f0f8834 100644
--- a/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
+++ b/tests/summary/fixtures/equal_non_empty_different_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
@@ -18,7 +18,7 @@
 
  [1mColumns[0m
  ▔▔▔▔▔▔▔
-   ┌───────────┬─────────┬──┐
-   │ [36mspeed_kph[0m │ 100.00% │  │
-   │ [36mweight_kg[0m │ 100.00% │  │
-   └───────────┴─────────┴──┘
+   ┌───────────┬─────────┐
+   │ [36mspeed_kph[0m │ 100.00% │
+   │ [36mweight_kg[0m │ 100.00% │
+   └───────────┴─────────┘
diff --git a/tests/summary/fixtures/gained_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt b/tests/summary/fixtures/gained_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
index f94820c..e1880cc 100644
--- a/tests/summary/fixtures/gained_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
+++ b/tests/summary/fixtures/gained_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
@@ -20,8 +20,8 @@
 
  Columns
  ▔▔▔▔▔▔▔
-   ┌─────────────────┬─────────┬──┐
-   │ life_expectancy │ 100.00% │  │
-   │ speed_kph       │ 100.00% │  │
-   │ weight_kg       │ 100.00% │  │
-   └─────────────────┴─────────┴──┘
+   ┌─────────────────┬─────────┐
+   │ life_expectancy │ 100.00% │
+   │ speed_kph       │ 100.00% │
+   │ weight_kg       │ 100.00% │
+   └─────────────────┴─────────┘
diff --git a/tests/summary/fixtures/gained_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt b/tests/summary/fixtures/gained_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
index bbba4af..fe23871 100644
--- a/tests/summary/fixtures/gained_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
+++ b/tests/summary/fixtures/gained_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
@@ -20,11 +20,11 @@
 
  Columns
  ▔▔▔▔▔▔▔
-   ┌─────────────────┬─────────┬──┐
-   │ life_expectancy │ 100.00% │  │
-   │ speed_kph       │ 100.00% │  │
-   │ weight_kg       │ 100.00% │  │
-   └─────────────────┴─────────┴──┘
+   ┌─────────────────┬─────────┐
+   │ life_expectancy │ 100.00% │
+   │ speed_kph       │ 100.00% │
+   │ weight_kg       │ 100.00% │
+   └─────────────────┴─────────┘
 
  Rows right only
  ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔
diff --git a/tests/summary/fixtures/gained_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt b/tests/summary/fixtures/gained_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
index 6c967b4..4876dde 100644
--- a/tests/summary/fixtures/gained_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
+++ b/tests/summary/fixtures/gained_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
@@ -20,8 +20,8 @@
 
  [1mColumns[0m
  ▔▔▔▔▔▔▔
-   ┌─────────────────┬─────────┬──┐
-   │ [36mlife_expectancy[0m │ 100.00% │  │
-   │ [36mspeed_kph      [0m │ 100.00% │  │
-   │ [36mweight_kg      [0m │ 100.00% │  │
-   └─────────────────┴─────────┴──┘
+   ┌─────────────────┬─────────┐
+   │ [36mlife_expectancy[0m │ 100.00% │
+   │ [36mspeed_kph      [0m │ 100.00% │
+   │ [36mweight_kg      [0m │ 100.00% │
+   └─────────────────┴─────────┘
diff --git a/tests/summary/fixtures/gained_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt b/tests/summary/fixtures/gained_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
index 7e99164..c566908 100644
--- a/tests/summary/fixtures/gained_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
+++ b/tests/summary/fixtures/gained_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
@@ -20,11 +20,11 @@
 
  [1mColumns[0m
  ▔▔▔▔▔▔▔
-   ┌─────────────────┬─────────┬──┐
-   │ [36mlife_expectancy[0m │ 100.00% │  │
-   │ [36mspeed_kph      [0m │ 100.00% │  │
-   │ [36mweight_kg      [0m │ 100.00% │  │
-   └─────────────────┴─────────┴──┘
+   ┌─────────────────┬─────────┐
+   │ [36mlife_expectancy[0m │ 100.00% │
+   │ [36mspeed_kph      [0m │ 100.00% │
+   │ [36mweight_kg      [0m │ 100.00% │
+   └─────────────────┴─────────┘
 
  [1mRows right only[0m
  ▔▔▔▔▔▔▔▔▔▔▔▔▔▔▔
diff --git a/tests/summary/fixtures/lost_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt b/tests/summary/fixtures/lost_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
index 2d673a2..e119d64 100644
--- a/tests/summary/fixtures/lost_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
+++ b/tests/summary/fixtures/lost_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
@@ -20,8 +20,8 @@
 
  Columns
  ▔▔▔▔▔▔▔
-   ┌─────────────────┬─────────┬──┐
-   │ life_expectancy │ 100.00% │  │
-   │ speed_kph       │ 100.00% │  │
-   │ weight_kg       │ 100.00% │  │
-   └─────────────────┴─────────┴──┘
+   ┌─────────────────┬─────────┐
+   │ life_expectancy │ 100.00% │
+   │ speed_kph       │ 100.00% │
+   │ weight_kg       │ 100.00% │
+   └─────────────────┴─────────┘
diff --git a/tests/summary/fixtures/lost_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt b/tests/summary/fixtures/lost_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
index 0e12a95..e2dce9e 100644
--- a/tests/summary/fixtures/lost_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
+++ b/tests/summary/fixtures/lost_rows_only/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
@@ -20,11 +20,11 @@
 
  Columns
  ▔▔▔▔▔▔▔
-   ┌─────────────────┬─────────┬──┐
-   │ life_expectancy │ 100.00% │  │
-   │ speed_kph       │ 100.00% │  │
-   │ weight_kg       │ 100.00% │  │
-   └─────────────────┴─────────┴──┘
+   ┌─────────────────┬─────────┐
+   │ life_expectancy │ 100.00% │
+   │ speed_kph       │ 100.00% │
+   │ weight_kg       │ 100.00% │
+   └─────────────────┴─────────┘
 
  Rows left only
  ▔▔▔▔▔▔▔▔▔▔▔▔▔▔
diff --git a/tests/summary/fixtures/lost_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt b/tests/summary/fixtures/lost_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
index bbdba0e..8d6b229 100644
--- a/tests/summary/fixtures/lost_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
+++ b/tests/summary/fixtures/lost_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
@@ -20,8 +20,8 @@
 
  [1mColumns[0m
  ▔▔▔▔▔▔▔
-   ┌─────────────────┬─────────┬──┐
-   │ [36mlife_expectancy[0m │ 100.00% │  │
-   │ [36mspeed_kph      [0m │ 100.00% │  │
-   │ [36mweight_kg      [0m │ 100.00% │  │
-   └─────────────────┴─────────┴──┘
+   ┌─────────────────┬─────────┐
+   │ [36mlife_expectancy[0m │ 100.00% │
+   │ [36mspeed_kph      [0m │ 100.00% │
+   │ [36mweight_kg      [0m │ 100.00% │
+   └─────────────────┴─────────┘
diff --git a/tests/summary/fixtures/lost_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt b/tests/summary/fixtures/lost_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
index c569494..c4c7b55 100644
--- a/tests/summary/fixtures/lost_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
+++ b/tests/summary/fixtures/lost_rows_only/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
@@ -20,11 +20,11 @@
 
  [1mColumns[0m
  ▔▔▔▔▔▔▔
-   ┌─────────────────┬─────────┬──┐
-   │ [36mlife_expectancy[0m │ 100.00% │  │
-   │ [36mspeed_kph      [0m │ 100.00% │  │
-   │ [36mweight_kg      [0m │ 100.00% │  │
-   └─────────────────┴─────────┴──┘
+   ┌─────────────────┬─────────┐
+   │ [36mlife_expectancy[0m │ 100.00% │
+   │ [36mspeed_kph      [0m │ 100.00% │
+   │ [36mweight_kg      [0m │ 100.00% │
+   └─────────────────┴─────────┘
 
  [1mRows left only[0m
  ▔▔▔▔▔▔▔▔▔▔▔▔▔▔
diff --git a/tests/summary/fixtures/many_pk_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt b/tests/summary/fixtures/many_pk_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
index 4a3530b..482fdf5 100644
--- a/tests/summary/fixtures/many_pk_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
+++ b/tests/summary/fixtures/many_pk_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
@@ -22,65 +22,65 @@
 
  Columns
  ▔▔▔▔▔▔▔
-   ┌────────────────────┬─────────┬──┐
-   │ life_expectancy_0  │ 100.00% │  │
-   │ life_expectancy_1  │ 100.00% │  │
-   │ life_expectancy_10 │ 100.00% │  │
-   │ life_expectancy_11 │ 100.00% │  │
-   │ life_expectancy_12 │ 100.00% │  │
-   │ life_expectancy_13 │ 100.00% │  │
-   │ life_expectancy_14 │ 100.00% │  │
-   │ life_expectancy_15 │ 100.00% │  │
-   │ life_expectancy_16 │ 100.00% │  │
-   │ life_expectancy_17 │ 100.00% │  │
-   │ life_expectancy_18 │ 100.00% │  │
-   │ life_expectancy_19 │ 100.00% │  │
-   │ life_expectancy_2  │ 100.00% │  │
-   │ life_expectancy_3  │ 100.00% │  │
-   │ life_expectancy_4  │ 100.00% │  │
-   │ life_expectancy_5  │ 100.00% │  │
-   │ life_expectancy_6  │ 100.00% │  │
-   │ life_expectancy_7  │ 100.00% │  │
-   │ life_expectancy_8  │ 100.00% │  │
-   │ life_expectancy_9  │ 100.00% │  │
-   │ speed_kph_0        │ 100.00% │  │
-   │ speed_kph_1        │ 100.00% │  │
-   │ speed_kph_10       │ 100.00% │  │
-   │ speed_kph_11       │ 100.00% │  │
-   │ speed_kph_12       │ 100.00% │  │
-   │ speed_kph_13       │ 100.00% │  │
-   │ speed_kph_14       │ 100.00% │  │
-   │ speed_kph_15       │ 100.00% │  │
-   │ speed_kph_16       │ 100.00% │  │
-   │ speed_kph_17       │ 100.00% │  │
-   │ speed_kph_18       │ 100.00% │  │
-   │ speed_kph_19       │ 100.00% │  │
-   │ speed_kph_2        │ 100.00% │  │
-   │ speed_kph_3        │ 100.00% │  │
-   │ speed_kph_4        │ 100.00% │  │
-   │ speed_kph_5        │ 100.00% │  │
-   │ speed_kph_6        │ 100.00% │  │
-   │ speed_kph_7        │ 100.00% │  │
-   │ speed_kph_8        │ 100.00% │  │
-   │ speed_kph_9        │ 100.00% │  │
-   │ weight_kg_0        │ 100.00% │  │
-   │ weight_kg_1        │ 100.00% │  │
-   │ weight_kg_10       │ 100.00% │  │
-   │ weight_kg_11       │ 100.00% │  │
-   │ weight_kg_12       │ 100.00% │  │
-   │ weight_kg_13       │ 100.00% │  │
-   │ weight_kg_14       │ 100.00% │  │
-   │ weight_kg_15       │ 100.00% │  │
-   │ weight_kg_16       │ 100.00% │  │
-   │ weight_kg_17       │ 100.00% │  │
-   │ weight_kg_18       │ 100.00% │  │
-   │ weight_kg_19       │ 100.00% │  │
-   │ weight_kg_2        │ 100.00% │  │
-   │ weight_kg_3        │ 100.00% │  │
-   │ weight_kg_4        │ 100.00% │  │
-   │ weight_kg_5        │ 100.00% │  │
-   │ weight_kg_6        │ 100.00% │  │
-   │ weight_kg_7        │ 100.00% │  │
-   │ weight_kg_8        │ 100.00% │  │
-   │ weight_kg_9        │ 100.00% │  │
-   └────────────────────┴─────────┴──┘
+   ┌────────────────────┬─────────┐
+   │ life_expectancy_0  │ 100.00% │
+   │ life_expectancy_1  │ 100.00% │
+   │ life_expectancy_10 │ 100.00% │
+   │ life_expectancy_11 │ 100.00% │
+   │ life_expectancy_12 │ 100.00% │
+   │ life_expectancy_13 │ 100.00% │
+   │ life_expectancy_14 │ 100.00% │
+   │ life_expectancy_15 │ 100.00% │
+   │ life_expectancy_16 │ 100.00% │
+   │ life_expectancy_17 │ 100.00% │
+   │ life_expectancy_18 │ 100.00% │
+   │ life_expectancy_19 │ 100.00% │
+   │ life_expectancy_2  │ 100.00% │
+   │ life_expectancy_3  │ 100.00% │
+   │ life_expectancy_4  │ 100.00% │
+   │ life_expectancy_5  │ 100.00% │
+   │ life_expectancy_6  │ 100.00% │
+   │ life_expectancy_7  │ 100.00% │
+   │ life_expectancy_8  │ 100.00% │
+   │ life_expectancy_9  │ 100.00% │
+   │ speed_kph_0        │ 100.00% │
+   │ speed_kph_1        │ 100.00% │
+   │ speed_kph_10       │ 100.00% │
+   │ speed_kph_11       │ 100.00% │
+   │ speed_kph_12       │ 100.00% │
+   │ speed_kph_13       │ 100.00% │
+   │ speed_kph_14       │ 100.00% │
+   │ speed_kph_15       │ 100.00% │
+   │ speed_kph_16       │ 100.00% │
+   │ speed_kph_17       │ 100.00% │
+   │ speed_kph_18       │ 100.00% │
+   │ speed_kph_19       │ 100.00% │
+   │ speed_kph_2        │ 100.00% │
+   │ speed_kph_3        │ 100.00% │
+   │ speed_kph_4        │ 100.00% │
+   │ speed_kph_5        │ 100.00% │
+   │ speed_kph_6        │ 100.00% │
+   │ speed_kph_7        │ 100.00% │
+   │ speed_kph_8        │ 100.00% │
+   │ speed_kph_9        │ 100.00% │
+   │ weight_kg_0        │ 100.00% │
+   │ weight_kg_1        │ 100.00% │
+   │ weight_kg_10       │ 100.00% │
+   │ weight_kg_11       │ 100.00% │
+   │ weight_kg_12       │ 100.00% │
+   │ weight_kg_13       │ 100.00% │
+   │ weight_kg_14       │ 100.00% │
+   │ weight_kg_15       │ 100.00% │
+   │ weight_kg_16       │ 100.00% │
+   │ weight_kg_17       │ 100.00% │
+   │ weight_kg_18       │ 100.00% │
+   │ weight_kg_19       │ 100.00% │
+   │ weight_kg_2        │ 100.00% │
+   │ weight_kg_3        │ 100.00% │
+   │ weight_kg_4        │ 100.00% │
+   │ weight_kg_5        │ 100.00% │
+   │ weight_kg_6        │ 100.00% │
+   │ weight_kg_7        │ 100.00% │
+   │ weight_kg_8        │ 100.00% │
+   │ weight_kg_9        │ 100.00% │
+   └────────────────────┴─────────┘
diff --git a/tests/summary/fixtures/many_pk_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt b/tests/summary/fixtures/many_pk_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
index 087cbe3..a30c9c9 100644
--- a/tests/summary/fixtures/many_pk_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
+++ b/tests/summary/fixtures/many_pk_columns/gen/pretty_False_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
@@ -22,68 +22,68 @@
 
  Columns
  ▔▔▔▔▔▔▔
-   ┌────────────────────┬─────────┬──┐
-   │ life_expectancy_0  │ 100.00% │  │
-   │ life_expectancy_1  │ 100.00% │  │
-   │ life_expectancy_10 │ 100.00% │  │
-   │ life_expectancy_11 │ 100.00% │  │
-   │ life_expectancy_12 │ 100.00% │  │
-   │ life_expectancy_13 │ 100.00% │  │
-   │ life_expectancy_14 │ 100.00% │  │
-   │ life_expectancy_15 │ 100.00% │  │
-   │ life_expectancy_16 │ 100.00% │  │
-   │ life_expectancy_17 │ 100.00% │  │
-   │ life_expectancy_18 │ 100.00% │  │
-   │ life_expectancy_19 │ 100.00% │  │
-   │ life_expectancy_2  │ 100.00% │  │
-   │ life_expectancy_3  │ 100.00% │  │
-   │ life_expectancy_4  │ 100.00% │  │
-   │ life_expectancy_5  │ 100.00% │  │
-   │ life_expectancy_6  │ 100.00% │  │
-   │ life_expectancy_7  │ 100.00% │  │
-   │ life_expectancy_8  │ 100.00% │  │
-   │ life_expectancy_9  │ 100.00% │  │
-   │ speed_kph_0        │ 100.00% │  │
-   │ speed_kph_1        │ 100.00% │  │
-   │ speed_kph_10       │ 100.00% │  │
-   │ speed_kph_11       │ 100.00% │  │
-   │ speed_kph_12       │ 100.00% │  │
-   │ speed_kph_13       │ 100.00% │  │
-   │ speed_kph_14       │ 100.00% │  │
-   │ speed_kph_15       │ 100.00% │  │
-   │ speed_kph_16       │ 100.00% │  │
-   │ speed_kph_17       │ 100.00% │  │
-   │ speed_kph_18       │ 100.00% │  │
-   │ speed_kph_19       │ 100.00% │  │
-   │ speed_kph_2        │ 100.00% │  │
-   │ speed_kph_3        │ 100.00% │  │
-   │ speed_kph_4        │ 100.00% │  │
-   │ speed_kph_5        │ 100.00% │  │
-   │ speed_kph_6        │ 100.00% │  │
-   │ speed_kph_7        │ 100.00% │  │
-   │ speed_kph_8        │ 100.00% │  │
-   │ speed_kph_9        │ 100.00% │  │
-   │ weight_kg_0        │ 100.00% │  │
-   │ weight_kg_1        │ 100.00% │  │
-   │ weight_kg_10       │ 100.00% │  │
-   │ weight_kg_11       │ 100.00% │  │
-   │ weight_kg_12       │ 100.00% │  │
-   │ weight_kg_13       │ 100.00% │  │
-   │ weight_kg_14       │ 100.00% │  │
-   │ weight_kg_15       │ 100.00% │  │
-   │ weight_kg_16       │ 100.00% │  │
-   │ weight_kg_17       │ 100.00% │  │
-   │ weight_kg_18       │ 100.00% │  │
-   │ weight_kg_19       │ 100.00% │  │
-   │ weight_kg_2        │ 100.00% │  │
-   │ weight_kg_3        │ 100.00% │  │
-   │ weight_kg_4        │ 100.00% │  │
-   │ weight_kg_5        │ 100.00% │  │
-   │ weight_kg_6        │ 100.00% │  │
-   │ weight_kg_7        │ 100.00% │  │
-   │ weight_kg_8        │ 100.00% │  │
-   │ weight_kg_9        │ 100.00% │  │
-   └────────────────────┴─────────┴──┘
+   ┌────────────────────┬─────────┐
+   │ life_expectancy_0  │ 100.00% │
+   │ life_expectancy_1  │ 100.00% │
+   │ life_expectancy_10 │ 100.00% │
+   │ life_expectancy_11 │ 100.00% │
+   │ life_expectancy_12 │ 100.00% │
+   │ life_expectancy_13 │ 100.00% │
+   │ life_expectancy_14 │ 100.00% │
+   │ life_expectancy_15 │ 100.00% │
+   │ life_expectancy_16 │ 100.00% │
+   │ life_expectancy_17 │ 100.00% │
+   │ life_expectancy_18 │ 100.00% │
+   │ life_expectancy_19 │ 100.00% │
+   │ life_expectancy_2  │ 100.00% │
+   │ life_expectancy_3  │ 100.00% │
+   │ life_expectancy_4  │ 100.00% │
+   │ life_expectancy_5  │ 100.00% │
+   │ life_expectancy_6  │ 100.00% │
+   │ life_expectancy_7  │ 100.00% │
+   │ life_expectancy_8  │ 100.00% │
+   │ life_expectancy_9  │ 100.00% │
+   │ speed_kph_0        │ 100.00% │
+   │ speed_kph_1        │ 100.00% │
+   │ speed_kph_10       │ 100.00% │
+   │ speed_kph_11       │ 100.00% │
+   │ speed_kph_12       │ 100.00% │
+   │ speed_kph_13       │ 100.00% │
+   │ speed_kph_14       │ 100.00% │
+   │ speed_kph_15       │ 100.00% │
+   │ speed_kph_16       │ 100.00% │
+   │ speed_kph_17       │ 100.00% │
+   │ speed_kph_18       │ 100.00% │
+   │ speed_kph_19       │ 100.00% │
+   │ speed_kph_2        │ 100.00% │
+   │ speed_kph_3        │ 100.00% │
+   │ speed_kph_4        │ 100.00% │
+   │ speed_kph_5        │ 100.00% │
+   │ speed_kph_6        │ 100.00% │
+   │ speed_kph_7        │ 100.00% │
+   │ speed_kph_8        │ 100.00% │
+   │ speed_kph_9        │ 100.00% │
+   │ weight_kg_0        │ 100.00% │
+   │ weight_kg_1        │ 100.00% │
+   │ weight_kg_10       │ 100.00% │
+   │ weight_kg_11       │ 100.00% │
+   │ weight_kg_12       │ 100.00% │
+   │ weight_kg_13       │ 100.00% │
+   │ weight_kg_14       │ 100.00% │
+   │ weight_kg_15       │ 100.00% │
+   │ weight_kg_16       │ 100.00% │
+   │ weight_kg_17       │ 100.00% │
+   │ weight_kg_18       │ 100.00% │
+   │ weight_kg_19       │ 100.00% │
+   │ weight_kg_2        │ 100.00% │
+   │ weight_kg_3        │ 100.00% │
+   │ weight_kg_4        │ 100.00% │
+   │ weight_kg_5        │ 100.00% │
+   │ weight_kg_6        │ 100.00% │
+   │ weight_kg_7        │ 100.00% │
+   │ weight_kg_8        │ 100.00% │
+   │ weight_kg_9        │ 100.00% │
+   └────────────────────┴─────────┘
 
  Rows left only
  ▔▔▔▔▔▔▔▔▔▔▔▔▔▔
diff --git a/tests/summary/fixtures/many_pk_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt b/tests/summary/fixtures/many_pk_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
index faafd15..016ac42 100644
--- a/tests/summary/fixtures/many_pk_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
+++ b/tests/summary/fixtures/many_pk_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_False_sample_pk_False.txt
@@ -22,65 +22,65 @@
 
  [1mColumns[0m
  ▔▔▔▔▔▔▔
-   ┌────────────────────┬─────────┬──┐
-   │ [36mlife_expectancy_0 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_1 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_10[0m │ 100.00% │  │
-   │ [36mlife_expectancy_11[0m │ 100.00% │  │
-   │ [36mlife_expectancy_12[0m │ 100.00% │  │
-   │ [36mlife_expectancy_13[0m │ 100.00% │  │
-   │ [36mlife_expectancy_14[0m │ 100.00% │  │
-   │ [36mlife_expectancy_15[0m │ 100.00% │  │
-   │ [36mlife_expectancy_16[0m │ 100.00% │  │
-   │ [36mlife_expectancy_17[0m │ 100.00% │  │
-   │ [36mlife_expectancy_18[0m │ 100.00% │  │
-   │ [36mlife_expectancy_19[0m │ 100.00% │  │
-   │ [36mlife_expectancy_2 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_3 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_4 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_5 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_6 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_7 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_8 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_9 [0m │ 100.00% │  │
-   │ [36mspeed_kph_0       [0m │ 100.00% │  │
-   │ [36mspeed_kph_1       [0m │ 100.00% │  │
-   │ [36mspeed_kph_10      [0m │ 100.00% │  │
-   │ [36mspeed_kph_11      [0m │ 100.00% │  │
-   │ [36mspeed_kph_12      [0m │ 100.00% │  │
-   │ [36mspeed_kph_13      [0m │ 100.00% │  │
-   │ [36mspeed_kph_14      [0m │ 100.00% │  │
-   │ [36mspeed_kph_15      [0m │ 100.00% │  │
-   │ [36mspeed_kph_16      [0m │ 100.00% │  │
-   │ [36mspeed_kph_17      [0m │ 100.00% │  │
-   │ [36mspeed_kph_18      [0m │ 100.00% │  │
-   │ [36mspeed_kph_19      [0m │ 100.00% │  │
-   │ [36mspeed_kph_2       [0m │ 100.00% │  │
-   │ [36mspeed_kph_3       [0m │ 100.00% │  │
-   │ [36mspeed_kph_4       [0m │ 100.00% │  │
-   │ [36mspeed_kph_5       [0m │ 100.00% │  │
-   │ [36mspeed_kph_6       [0m │ 100.00% │  │
-   │ [36mspeed_kph_7       [0m │ 100.00% │  │
-   │ [36mspeed_kph_8       [0m │ 100.00% │  │
-   │ [36mspeed_kph_9       [0m │ 100.00% │  │
-   │ [36mweight_kg_0       [0m │ 100.00% │  │
-   │ [36mweight_kg_1       [0m │ 100.00% │  │
-   │ [36mweight_kg_10      [0m │ 100.00% │  │
-   │ [36mweight_kg_11      [0m │ 100.00% │  │
-   │ [36mweight_kg_12      [0m │ 100.00% │  │
-   │ [36mweight_kg_13      [0m │ 100.00% │  │
-   │ [36mweight_kg_14      [0m │ 100.00% │  │
-   │ [36mweight_kg_15      [0m │ 100.00% │  │
-   │ [36mweight_kg_16      [0m │ 100.00% │  │
-   │ [36mweight_kg_17      [0m │ 100.00% │  │
-   │ [36mweight_kg_18      [0m │ 100.00% │  │
-   │ [36mweight_kg_19      [0m │ 100.00% │  │
-   │ [36mweight_kg_2       [0m │ 100.00% │  │
-   │ [36mweight_kg_3       [0m │ 100.00% │  │
-   │ [36mweight_kg_4       [0m │ 100.00% │  │
-   │ [36mweight_kg_5       [0m │ 100.00% │  │
-   │ [36mweight_kg_6       [0m │ 100.00% │  │
-   │ [36mweight_kg_7       [0m │ 100.00% │  │
-   │ [36mweight_kg_8       [0m │ 100.00% │  │
-   │ [36mweight_kg_9       [0m │ 100.00% │  │
-   └────────────────────┴─────────┴──┘
+   ┌────────────────────┬─────────┐
+   │ [36mlife_expectancy_0 [0m │ 100.00% │
+   │ [36mlife_expectancy_1 [0m │ 100.00% │
+   │ [36mlife_expectancy_10[0m │ 100.00% │
+   │ [36mlife_expectancy_11[0m │ 100.00% │
+   │ [36mlife_expectancy_12[0m │ 100.00% │
+   │ [36mlife_expectancy_13[0m │ 100.00% │
+   │ [36mlife_expectancy_14[0m │ 100.00% │
+   │ [36mlife_expectancy_15[0m │ 100.00% │
+   │ [36mlife_expectancy_16[0m │ 100.00% │
+   │ [36mlife_expectancy_17[0m │ 100.00% │
+   │ [36mlife_expectancy_18[0m │ 100.00% │
+   │ [36mlife_expectancy_19[0m │ 100.00% │
+   │ [36mlife_expectancy_2 [0m │ 100.00% │
+   │ [36mlife_expectancy_3 [0m │ 100.00% │
+   │ [36mlife_expectancy_4 [0m │ 100.00% │
+   │ [36mlife_expectancy_5 [0m │ 100.00% │
+   │ [36mlife_expectancy_6 [0m │ 100.00% │
+   │ [36mlife_expectancy_7 [0m │ 100.00% │
+   │ [36mlife_expectancy_8 [0m │ 100.00% │
+   │ [36mlife_expectancy_9 [0m │ 100.00% │
+   │ [36mspeed_kph_0       [0m │ 100.00% │
+   │ [36mspeed_kph_1       [0m │ 100.00% │
+   │ [36mspeed_kph_10      [0m │ 100.00% │
+   │ [36mspeed_kph_11      [0m │ 100.00% │
+   │ [36mspeed_kph_12      [0m │ 100.00% │
+   │ [36mspeed_kph_13      [0m │ 100.00% │
+   │ [36mspeed_kph_14      [0m │ 100.00% │
+   │ [36mspeed_kph_15      [0m │ 100.00% │
+   │ [36mspeed_kph_16      [0m │ 100.00% │
+   │ [36mspeed_kph_17      [0m │ 100.00% │
+   │ [36mspeed_kph_18      [0m │ 100.00% │
+   │ [36mspeed_kph_19      [0m │ 100.00% │
+   │ [36mspeed_kph_2       [0m │ 100.00% │
+   │ [36mspeed_kph_3       [0m │ 100.00% │
+   │ [36mspeed_kph_4       [0m │ 100.00% │
+   │ [36mspeed_kph_5       [0m │ 100.00% │
+   │ [36mspeed_kph_6       [0m │ 100.00% │
+   │ [36mspeed_kph_7       [0m │ 100.00% │
+   │ [36mspeed_kph_8       [0m │ 100.00% │
+   │ [36mspeed_kph_9       [0m │ 100.00% │
+   │ [36mweight_kg_0       [0m │ 100.00% │
+   │ [36mweight_kg_1       [0m │ 100.00% │
+   │ [36mweight_kg_10      [0m │ 100.00% │
+   │ [36mweight_kg_11      [0m │ 100.00% │
+   │ [36mweight_kg_12      [0m │ 100.00% │
+   │ [36mweight_kg_13      [0m │ 100.00% │
+   │ [36mweight_kg_14      [0m │ 100.00% │
+   │ [36mweight_kg_15      [0m │ 100.00% │
+   │ [36mweight_kg_16      [0m │ 100.00% │
+   │ [36mweight_kg_17      [0m │ 100.00% │
+   │ [36mweight_kg_18      [0m │ 100.00% │
+   │ [36mweight_kg_19      [0m │ 100.00% │
+   │ [36mweight_kg_2       [0m │ 100.00% │
+   │ [36mweight_kg_3       [0m │ 100.00% │
+   │ [36mweight_kg_4       [0m │ 100.00% │
+   │ [36mweight_kg_5       [0m │ 100.00% │
+   │ [36mweight_kg_6       [0m │ 100.00% │
+   │ [36mweight_kg_7       [0m │ 100.00% │
+   │ [36mweight_kg_8       [0m │ 100.00% │
+   │ [36mweight_kg_9       [0m │ 100.00% │
+   └────────────────────┴─────────┘
diff --git a/tests/summary/fixtures/many_pk_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt b/tests/summary/fixtures/many_pk_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
index ad33e1a..6b1046c 100644
--- a/tests/summary/fixtures/many_pk_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
+++ b/tests/summary/fixtures/many_pk_columns/gen/pretty_True_perfect_True_top_True_slim_False_sample_rows_True_sample_pk_True.txt
@@ -22,68 +22,68 @@
 
  [1mColumns[0m
  ▔▔▔▔▔▔▔
-   ┌────────────────────┬─────────┬──┐
-   │ [36mlife_expectancy_0 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_1 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_10[0m │ 100.00% │  │
-   │ [36mlife_expectancy_11[0m │ 100.00% │  │
-   │ [36mlife_expectancy_12[0m │ 100.00% │  │
-   │ [36mlife_expectancy_13[0m │ 100.00% │  │
-   │ [36mlife_expectancy_14[0m │ 100.00% │  │
-   │ [36mlife_expectancy_15[0m │ 100.00% │  │
-   │ [36mlife_expectancy_16[0m │ 100.00% │  │
-   │ [36mlife_expectancy_17[0m │ 100.00% │  │
-   │ [36mlife_expectancy_18[0m │ 100.00% │  │
-   │ [36mlife_expectancy_19[0m │ 100.00% │  │
-   │ [36mlife_expectancy_2 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_3 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_4 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_5 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_6 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_7 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_8 [0m │ 100.00% │  │
-   │ [36mlife_expectancy_9 [0m │ 100.00% │  │
-   │ [36mspeed_kph_0       [0m │ 100.00% │  │
-   │ [36mspeed_kph_1       [0m │ 100.00% │  │
-   │ [36mspeed_kph_10      [0m │ 100.00% │  │
-   │ [36mspeed_kph_11      [0m │ 100.00% │  │
-   │ [36mspeed_kph_12      [0m │ 100.00% │  │
-   │ [36mspeed_kph_13      [0m │ 100.00% │  │
-   │ [36mspeed_kph_14      [0m │ 100.00% │  │
-   │ [36mspeed_kph_15      [0m │ 100.00% │  │
-   │ [36mspeed_kph_16      [0m │ 100.00% │  │
-   │ [36mspeed_kph_17      [0m │ 100.00% │  │
-   │ [36mspeed_kph_18      [0m │ 100.00% │  │
-   │ [36mspeed_kph_19      [0m │ 100.00% │  │
-   │ [36mspeed_kph_2       [0m │ 100.00% │  │
-   │ [36mspeed_kph_3       [0m │ 100.00% │  │
-   │ [36mspeed_kph_4       [0m │ 100.00% │  │
-   │ [36mspeed_kph_5       [0m │ 100.00% │  │
-   │ [36mspeed_kph_6       [0m │ 100.00% │  │
-   │ [36mspeed_kph_7       [0m │ 100.00% │  │
-   │ [36mspeed_kph_8       [0m │ 100.00% │  │
-   │ [36mspeed_kph_9       [0m │ 100.00% │  │
-   │ [36mweight_kg_0       [0m │ 100.00% │  │
-   │ [36mweight_kg_1       [0m │ 100.00% │  │
-   │ [36mweight_kg_10      [0m │ 100.00% │  │
-   │ [36mweight_kg_11      [0m │ 100.00% │  │
-   │ [36mweight_kg_12      [0m │ 100.00% │  │
-   │ [36mweight_kg_13      [0m │ 100.00% │  │
-   │ [36mweight_kg_14      [0m │ 100.00% │  │
-   │ [36mweight_kg_15      [0m │ 100.00% │  │
-   │ [36mweight_kg_16      [0m │ 100.00% │  │
-   │ [36mweight_kg_17      [0m │ 100.00% │  │
-   │ [36mweight_kg_18      [0m │ 100.00% │  │
-   │ [36mweight_kg_19      [0m │ 100.00% │  │
-   │ [36mweight_kg_2       [0m │ 100.00% │  │
-   │ [36mweight_kg_3       [0m │ 100.00% │  │
-   │ [36mweight_kg_4       [0m │ 100.00% │  │
-   │ [36mweight_kg_5       [0m │ 100.00% │  │
-   │ [36mweight_kg_6       [0m │ 100.00% │  │
-   │ [36mweight_kg_7       [0m │ 100.00% │  │
-   │ [36mweight_kg_8       [0m │ 100.00% │  │
-   │ [36mweight_kg_9       [0m │ 100.00% │  │
-   └────────────────────┴─────────┴──┘
+   ┌────────────────────┬─────────┐
+   │ [36mlife_expectancy_0 [0m │ 100.00% │
+   │ [36mlife_expectancy_1 [0m │ 100.00% │
+   │ [36mlife_expectancy_10[0m │ 100.00% │
+   │ [36mlife_expectancy_11[0m │ 100.00% │
+   │ [36mlife_expectancy_12[0m │ 100.00% │
+   │ [36mlife_expectancy_13[0m │ 100.00% │
+   │ [36mlife_expectancy_14[0m │ 100.00% │
+   │ [36mlife_expectancy_15[0m │ 100.00% │
+   │ [36mlife_expectancy_16[0m │ 100.00% │
+   │ [36mlife_expectancy_17[0m │ 100.00% │
+   │ [36mlife_expectancy_18[0m │ 100.00% │
+   │ [36mlife_expectancy_19[0m │ 100.00% │
+   │ [36mlife_expectancy_2 [0m │ 100.00% │
+   │ [36mlife_expectancy_3 [0m │ 100.00% │
+   │ [36mlife_expectancy_4 [0m │ 100.00% │
+   │ [36mlife_expectancy_5 [0m │ 100.00% │
+   │ [36mlife_expectancy_6 [0m │ 100.00% │
+   │ [36mlife_expectancy_7 [0m │ 100.00% │
+   │ [36mlife_expectancy_8 [0m │ 100.00% │
+   │ [36mlife_expectancy_9 [0m │ 100.00% │
+   │ [36mspeed_kph_0       [0m │ 100.00% │
+   │ [36mspeed_kph_1       [0m │ 100.00% │
+   │ [36mspeed_kph_10      [0m │ 100.00% │
+   │ [36mspeed_kph_11      [0m │ 100.00% │
+   │ [36mspeed_kph_12      [0m │ 100.00% │
+   │ [36mspeed_kph_13      [0m │ 100.00% │
+   │ [36mspeed_kph_14      [0m │ 100.00% │
+   │ [36mspeed_kph_15      [0m │ 100.00% │
+   │ [36mspeed_kph_16      [0m │ 100.00% │
+   │ [36mspeed_kph_17      [0m │ 100.00% │
+   │ [36mspeed_kph_18      [0m │ 100.00% │
+   │ [36mspeed_kph_19      [0m │ 100.00% │
+   │ [36mspeed_kph_2       [0m │ 100.00% │
+   │ [36mspeed_kph_3       [0m │ 100.00% │
+   │ [36mspeed_kph_4       [0m │ 100.00% │
+   │ [36mspeed_kph_5       [0m │ 100.00% │
+   │ [36mspeed_kph_6       [0m │ 100.00% │
+   │ [36mspeed_kph_7       [0m │ 100.00% │
+   │ [36mspeed_kph_8       [0m │ 100.00% │
+   │ [36mspeed_kph_9       [0m │ 100.00% │
+   │ [36mweight_kg_0       [0m │ 100.00% │
+   │ [36mweight_kg_1       [0m │ 100.00% │
+   │ [36mweight_kg_10      [0m │ 100.00% │
+   │ [36mweight_kg_11      [0m │ 100.00% │
+   │ [36mweight_kg_12      [0m │ 100.00% │
+   │ [36mweight_kg_13      [0m │ 100.00% │
+   │ [36mweight_kg_14      [0m │ 100.00% │
+   │ [36mweight_kg_15      [0m │ 100.00% │
+   │ [36mweight_kg_16      [0m │ 100.00% │
+   │ [36mweight_kg_17      [0m │ 100.00% │
+   │ [36mweight_kg_18      [0m │ 100.00% │
+   │ [36mweight_kg_19      [0m │ 100.00% │
+   │ [36mweight_kg_2       [0m │ 100.00% │
+   │ [36mweight_kg_3       [0m │ 100.00% │
+   │ [36mweight_kg_4       [0m │ 100.00% │
+   │ [36mweight_kg_5       [0m │ 100.00% │
+   │ [36mweight_kg_6       [0m │ 100.00% │
+   │ [36mweight_kg_7       [0m │ 100.00% │
+   │ [36mweight_kg_8       [0m │ 100.00% │
+   │ [36mweight_kg_9       [0m │ 100.00% │
+   └────────────────────┴─────────┘
 
  [1mRows left only[0m
  ▔▔▔▔▔▔▔▔▔▔▔▔▔▔
diff --git a/tests/test_summary_data.py b/tests/test_summary_data.py
new file mode 100644
index 0000000..aab6b44
--- /dev/null
+++ b/tests/test_summary_data.py
@@ -0,0 +1,249 @@
+# Copyright (c) QuantCo 2025-2026
+# SPDX-License-Identifier: BSD-3-Clause
+
+import itertools
+import json
+
+import polars as pl
+import pytest
+
+from diffly import compare_frames
+from diffly.comparison import DataFrameComparison
+from diffly.summary import (
+    SummaryData,
+    SummaryDataColumn,
+    SummaryDataColumnChange,
+    SummaryDataRows,
+    SummaryDataSchemas,
+)
+
+
+def _make_comparison() -> DataFrameComparison:
+    """A rich comparison with schema diffs, row diffs, and column diffs."""
+    left = pl.DataFrame(
+        {
+            "id": [1, 2, 3, 4],
+            "status": ["a", "b", "c", "d"],
+            "value": [10.0, 20.0, 30.0, 40.0],
+            "left_col": ["x", "y", "z", "w"],
+        }
+    )
+    right = pl.DataFrame(
+        {
+            "id": [1, 2, 3, 5],
+            "status": ["a", "x", "x", "e"],
+            "value": [10.0, 25.0, 30.0, 50.0],
+            "right_col": ["p", "q", "r", "s"],
+        }
+    )
+    return compare_frames(left, right, primary_key="id")
+
+
+@pytest.mark.parametrize(
+    "show_perfect_column_matches, show_top_column_changes, slim, sample_rows, sample_pk",
+    [
+        (*combo[:2], combo[2], combo[3], combo[3] and combo[1])
+        for combo in itertools.product([True, False], repeat=4)
+    ],
+)
+def test_summary_data_parametrized(
+    show_perfect_column_matches: bool,
+    show_top_column_changes: bool,
+    slim: bool,
+    sample_rows: bool,
+    sample_pk: bool,
+) -> None:
+    comp = _make_comparison()
+    summary = comp.summary(
+        show_perfect_column_matches=show_perfect_column_matches,
+        top_k_column_changes=3 if show_top_column_changes else 0,
+        slim=slim,
+        sample_k_rows_only=3 if sample_rows else 0,
+        show_sample_primary_key_per_change=sample_pk,
+    )
+    data = summary._data
+
+    assert isinstance(data, SummaryData)
+    assert data.equal is False
+    assert data.primary_key == ["id"]
+
+    # --- Schemas ---
+    schemas_equal = comp.schemas.equal()
+    if slim and schemas_equal:
+        assert data.schemas is None
+    else:
+        assert isinstance(data.schemas, SummaryDataSchemas)
+        assert len(data.schemas.left_only) > 0  # left_col
+        assert len(data.schemas.right_only) > 0  # right_col
+
+    # --- Rows ---
+    rows_equal = comp._equal_rows()
+    if slim and rows_equal:
+        assert data.rows is None
+    else:
+        assert isinstance(data.rows, SummaryDataRows)
+        assert data.rows.n_left == 4
+        assert data.rows.n_right == 4
+        assert data.rows.n_left_only is not None
+        assert data.rows.n_right_only is not None
+
+    # --- Columns ---
+    assert data.columns is not None
+    match_rates = comp.fraction_same()
+    for col in data.columns:
+        assert isinstance(col, SummaryDataColumn)
+        rate = match_rates[col.name]
+        assert col.match_rate == rate
+        if show_top_column_changes and rate < 1:
+            assert col.changes is not None
+            for change in col.changes:
+                assert isinstance(change, SummaryDataColumnChange)
+                if sample_pk:
+                    assert isinstance(change.sample_pk, tuple)
+                    assert len(change.sample_pk) == 1
+                else:
+                    assert change.sample_pk is None
+        else:
+            assert col.changes is None
+
+    # --- Sample rows ---
+    if sample_rows:
+        assert data.sample_rows_left_only is not None
+        assert data.sample_rows_right_only is not None
+        assert len(data.sample_rows_left_only) > 0
+        assert len(data.sample_rows_right_only) > 0
+        for row in data.sample_rows_left_only:
+            assert isinstance(row, tuple)
+        for row in data.sample_rows_right_only:
+            assert isinstance(row, tuple)
+    else:
+        assert data.sample_rows_left_only is None
+        assert data.sample_rows_right_only is None
+
+    # JSON roundtrip
+    parsed = json.loads(summary.to_json())
+    assert isinstance(parsed, dict)
+    assert parsed["equal"] is False
+
+
+def test_summary_data_equal_frames() -> None:
+    df = pl.DataFrame({"id": [1, 2], "value": [10.0, 20.0]})
+    comp = compare_frames(df, df, primary_key="id")
+    data = comp.summary()._data
+    assert data.equal is True
+    assert data.schemas is None
+    assert data.rows is None
+    assert data.columns is None
+    assert data.sample_rows_left_only is None
+    assert data.sample_rows_right_only is None
+
+
+def test_summary_data_no_primary_key() -> None:
+    left = pl.DataFrame({"a": [1, 2], "b": [3.0, 4.0]})
+    right = pl.DataFrame({"a": [1, 2], "b": [3.0, 5.0]})
+    comp = compare_frames(left, right)
+    data = comp.summary()._data
+    assert data.equal is False
+    assert data.primary_key is None
+    assert data.rows is not None
+    assert data.rows.n_left_only is None
+    assert data.rows.n_joined_equal is None
+    assert data.columns is None
+    assert data.sample_rows_left_only is None
+    assert data.sample_rows_right_only is None
+
+
+def test_summary_data_hidden_columns() -> None:
+    left = pl.DataFrame({"id": [1, 2], "secret": ["a", "b"], "value": [10.0, 20.0]})
+    right = pl.DataFrame({"id": [1, 2], "secret": ["a", "x"], "value": [10.0, 25.0]})
+    comp = compare_frames(left, right, primary_key="id")
+    data = comp.summary(
+        top_k_column_changes=3,
+        hidden_columns=["secret"],
+    )._data
+    assert data.columns is not None
+    for col in data.columns:
+        if col.name == "secret":
+            assert col.changes is None
+        elif col.match_rate < 1:
+            assert col.changes is not None
+
+
+def test_summary_data_validate_hidden_pk_sample_rows() -> None:
+    df = pl.DataFrame({"id": ["a", "b", "c"]})
+    comp = compare_frames(df, df.filter(pl.col("id") == "a"), primary_key=["id"])
+    with pytest.raises(ValueError, match="Cannot show sample rows only"):
+        comp.summary(sample_k_rows_only=3, hidden_columns=["id"])
+
+
+def test_summary_data_validate_hidden_pk_sample_pk() -> None:
+    df = pl.DataFrame({"id": ["a", "b", "c"], "value": [1.0, 2.0, 3.0]})
+    comp = compare_frames(df, df.with_columns(pl.col("value") + 1), primary_key=["id"])
+    with pytest.raises(ValueError, match="Cannot show sample primary key"):
+        comp.summary(
+            top_k_column_changes=3,
+            show_sample_primary_key_per_change=True,
+            hidden_columns=["id"],
+        )
+
+
+def test_summary_data_validate_zero_top_k_with_sample_pk() -> None:
+    df = pl.DataFrame({"id": ["a", "b"], "value": [1.0, 2.0]})
+    comp = compare_frames(df, df.with_columns(pl.col("value") + 1), primary_key=["id"])
+    with pytest.raises(
+        ValueError,
+        match="Cannot show sample primary key per change when top_k_column_changes is 0",
+    ):
+        comp.summary(top_k_column_changes=0, show_sample_primary_key_per_change=True)
+
+
+def test_summary_data_multiple_pk_columns() -> None:
+    left = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"], "val": [10, 20, 30]})
+    right = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"], "val": [10, 99, 30]})
+    comp = compare_frames(left, right, primary_key=["a", "b"])
+    data = comp.summary(
+        top_k_column_changes=3,
+        show_sample_primary_key_per_change=True,
+        sample_k_rows_only=3,
+    )._data
+    assert data.primary_key == ["a", "b"]
+    assert data.columns is not None
+    for col in data.columns:
+        if col.changes:
+            for change in col.changes:
+                assert isinstance(change.sample_pk, tuple)
+                assert len(change.sample_pk) == 2
+
+
+def test_summary_data_to_dict() -> None:
+    df = pl.DataFrame({"id": [1, 2], "value": [10.0, 20.0]})
+    comp = compare_frames(df, df, primary_key="id")
+    d = comp.summary()._data.to_dict()
+    assert isinstance(d, dict)
+    assert d["equal"] is True
+
+
+def test_summary_data_slim_suppresses_matching_sections() -> None:
+    left = pl.DataFrame({"id": [1, 2, 3], "value": [10.0, 20.0, 30.0]})
+    right = pl.DataFrame({"id": [1, 2, 3], "value": [10.0, 25.0, 30.0]})
+    comp = compare_frames(left, right, primary_key="id")
+    data = comp.summary(slim=True)._data
+
+    # Schemas match -> None in slim mode
+    assert data.schemas is None
+    # Rows have differences (joined unequal) -> shown
+    assert data.rows is not None
+    # Columns have differences -> shown
+    assert data.columns is not None
+
+
+def test_summary_data_n_total_changes() -> None:
+    left = pl.DataFrame({"id": list(range(10)), "val": list(range(10))})
+    right = pl.DataFrame({"id": list(range(10)), "val": list(range(10, 20))})
+    comp = compare_frames(left, right, primary_key="id")
+    data = comp.summary(top_k_column_changes=3)._data
+    assert data.columns is not None
+    col = next(c for c in data.columns if c.name == "val")
+    assert col.changes is not None
+    assert len(col.changes) == 3
+    assert col.n_total_changes == 10

From 9633f1f76653863b0307eb438bcc48ee69517a44 Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 15:55:01 +0200
Subject: [PATCH 04/12] improve test coverage

---
 diffly/summary.py          | 64 ++++++++++++++++++++------------------
 tests/test_summary_data.py | 48 ++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 30 deletions(-)

diff --git a/diffly/summary.py b/diffly/summary.py
index c7daf1c..c0113f0 100644
--- a/diffly/summary.py
+++ b/diffly/summary.py
@@ -49,10 +49,10 @@ class SummaryDataSchemas:
 class SummaryDataRows:
     n_left: int
     n_right: int
-    n_left_only: int | None  # None when no primary key
-    n_joined_equal: int | None  # None when no primary key
-    n_joined_unequal: int | None  # None when no primary key
-    n_right_only: int | None  # None when no primary key
+    n_left_only: int | None
+    n_joined_equal: int | None
+    n_joined_unequal: int | None
+    n_right_only: int | None
 
 
 @dataclass
@@ -75,6 +75,8 @@ class SummaryDataColumn:
 class SummaryData:
     equal: bool
     n_rows_left: int
+    slim: bool
+    show_perfect_column_matches: bool
     left_name: str
     right_name: str
     primary_key: list[str] | None
@@ -126,9 +128,8 @@ def _compute_summary_data(
 
     hidden_columns = hidden_columns or []
 
-    # Validation (same as old Summary.__init__)
-    if comparison.primary_key is not None:
-        overlap = set(hidden_columns).intersection(set(comparison.primary_key))
+    def _validate_primary_key_hidden_columns() -> None:
+        overlap = set(hidden_columns).intersection(set(comparison.primary_key or []))
         if overlap and sample_k_rows_only > 0:
             raise ValueError(
                 f"Cannot show sample rows only on the left or right side when primary"
@@ -139,6 +140,8 @@ def _compute_summary_data(
                 f"Cannot show sample primary key for changed columns when primary"
                 f" key column(s) {', '.join(overlap)} should be hidden."
             )
+
+    _validate_primary_key_hidden_columns()
     if top_k_column_changes == 0 and show_sample_primary_key_per_change:
         raise ValueError(
             "Cannot show sample primary key per change when top_k_column_changes is 0."
@@ -148,8 +151,6 @@ def _compute_summary_data(
         col: 0 if col in hidden_columns else top_k_column_changes
         for col in comparison._other_common_columns
     }
-
-    # Materialize frames (same pattern as old Summary.__init__)
     comp = DataFrameComparison(
         left=comparison.left.collect().lazy(),
         right=comparison.right.collect().lazy(),
@@ -169,6 +170,8 @@ def _compute_summary_data(
         return SummaryData(
             equal=True,
             n_rows_left=n_rows_left,
+            slim=slim,
+            show_perfect_column_matches=show_perfect_column_matches,
             left_name=left_name,
             right_name=right_name,
             primary_key=comp.primary_key,
@@ -293,6 +296,8 @@ def _compute_summary_data(
     return SummaryData(
         equal=False,
         n_rows_left=n_rows_left,
+        slim=slim,
+        show_perfect_column_matches=show_perfect_column_matches,
         left_name=left_name,
         right_name=right_name,
         primary_key=comp.primary_key,
@@ -330,11 +335,6 @@ def __init__(
         slim: bool,
         hidden_columns: list[str] | None,
     ):
-        def _truncate_name(name: str) -> str:
-            if len(name) > CUSTOM_COLUMN_NAME_MAX_LENGTH:
-                return f"{name[:CUSTOM_COLUMN_NAME_MAX_LENGTH]}..."
-            return name
-
         self._data = _compute_summary_data(
             comparison,
             show_perfect_column_matches=show_perfect_column_matches,
@@ -346,10 +346,6 @@ def _truncate_name(name: str) -> str:
             slim=slim,
             hidden_columns=hidden_columns,
         )
-        self.slim = slim
-        self.show_perfect_column_matches = show_perfect_column_matches
-        self.left_name = _truncate_name(left_name)
-        self.right_name = _truncate_name(right_name)
 
     def format(self, pretty: bool | None = None) -> str:
         """Format this summary for printing.
@@ -393,7 +389,7 @@ def __repr__(self) -> str:
     # -------------------------------------------------------------------------------- #
 
     def _print_to_console(self, console: Console) -> None:
-        if not self.slim:
+        if not self._data.slim:
             console.print(
                 Panel(
                     Text("Diffly Summary", style="bold", justify="center"),
@@ -437,7 +433,7 @@ def _print_primary_key(self, console: Console) -> None:
             )
         # NOTE: The primary key is only displayed in the default mode. If a primary
         # key was not supplied, the warning is displayed in both modes.
-        if not self.slim or primary_key is None:
+        if not self._data.slim or primary_key is None:
             console.print(Padding(content, pad=(0, 3)))
             console.print("")
 
@@ -492,7 +488,7 @@ def _print_num_columns(n: int) -> str:
 
         # Left only
         if len(left_only_names) > 0:
-            left_only_header = f"{capitalize_first(self.left_name)} only \n{_print_num_columns(len(left_only_names))}"
+            left_only_header = f"{capitalize_first(_truncate_name(self._data.left_name))} only \n{_print_num_columns(len(left_only_names))}"
             table.add_column(
                 left_only_header,
                 header_style="red",
@@ -546,7 +542,7 @@ def _print_num_columns(n: int) -> str:
 
         # Right only
         if len(right_only_names) > 0:
-            right_only_header = f"{capitalize_first(self.right_name)} only\n{_print_num_columns(len(right_only_names))}"
+            right_only_header = f"{capitalize_first(_truncate_name(self._data.right_name))} only\n{_print_num_columns(len(right_only_names))}"
             table.add_column(
                 right_only_header,
                 header_style="green",
@@ -611,7 +607,7 @@ def _render_rows_with_primary_key(self, rows: SummaryDataRows) -> RenderableType
         else:
             # NOTE: In slim mode, we omit the row counts section and only show the
             # row matches section.
-            if (rows.n_left == rows.n_right) and self.slim:
+            if (rows.n_left == rows.n_right) and self._data.slim:
                 content = Group(self._section_row_matches(rows))
             else:
                 content = Group(
@@ -636,8 +632,10 @@ def _section_row_counts(self, rows: SummaryDataRows) -> RenderableType:
         count_rows: list[RenderableType] = []
 
         count_grid = Table(padding=0, box=None)
-        left_header = f"{capitalize_first(self.left_name)} count"
-        right_header = f"{capitalize_first(self.right_name)} count"
+        left_header = f"{capitalize_first(_truncate_name(self._data.left_name))} count"
+        right_header = (
+            f"{capitalize_first(_truncate_name(self._data.right_name))} count"
+        )
         count_grid.add_column(left_header, justify="center")
         count_grid.add_column("", justify="center")
         count_grid.add_column(right_header, justify="center")
@@ -743,7 +741,7 @@ def _section_row_matches(self, rows: SummaryDataRows) -> RenderableType:
                 fraction_left_only = rows.n_left_only / rows.n_left
                 grid.add_row(
                     f"{rows.n_left_only:,}",
-                    f"{self.left_name} only",
+                    f"{_truncate_name(self._data.left_name)} only",
                     f"({_format_fraction_as_percentage(fraction_left_only)})",
                 )
                 grid.add_section()
@@ -767,7 +765,7 @@ def _section_row_matches(self, rows: SummaryDataRows) -> RenderableType:
                 fraction_right_only = rows.n_right_only / rows.n_right
                 grid.add_row(
                     f"{rows.n_right_only:,}",
-                    f"{self.right_name} only",
+                    f"{_truncate_name(self._data.right_name)} only",
                     f"({_format_fraction_as_percentage(fraction_right_only)})",
                 )
             columns.append(grid)
@@ -812,7 +810,7 @@ def _section_columns(self) -> RenderableType:
             visible = [
                 c
                 for c in columns
-                if self.show_perfect_column_matches or c.match_rate < 1
+                if self._data.show_perfect_column_matches or c.match_rate < 1
             ]
             if not visible:
                 display_items.append(
@@ -882,10 +880,10 @@ def _section_columns(self) -> RenderableType:
     def _print_sample_rows_only_one_side(self, console: Console, side: Side) -> None:
         if side == Side.LEFT:
             sample_rows = self._data.sample_rows_left_only
-            name = self.left_name
+            name = _truncate_name(self._data.left_name)
         else:
             sample_rows = self._data.sample_rows_right_only
-            name = self.right_name
+            name = _truncate_name(self._data.right_name)
 
         primary_key = self._data.primary_key
         if primary_key is not None and sample_rows is not None and len(sample_rows) > 0:
@@ -935,6 +933,12 @@ def _print_section(console: Console, heading: str, content: RenderableType) -> N
     )
 
 
+def _truncate_name(name: str) -> str:
+    if len(name) > CUSTOM_COLUMN_NAME_MAX_LENGTH:
+        return f"{name[:CUSTOM_COLUMN_NAME_MAX_LENGTH]}..."
+    return name
+
+
 def _format_colname(name: str) -> str:
     return f"[cyan]{name}[/cyan]"
 
diff --git a/tests/test_summary_data.py b/tests/test_summary_data.py
index aab6b44..76479f0 100644
--- a/tests/test_summary_data.py
+++ b/tests/test_summary_data.py
@@ -3,6 +3,8 @@
 
 import itertools
 import json
+from datetime import date, datetime, timedelta
+from decimal import Decimal
 
 import polars as pl
 import pytest
@@ -15,6 +17,7 @@
     SummaryDataColumnChange,
     SummaryDataRows,
     SummaryDataSchemas,
+    _to_python,
 )
 
 
@@ -237,6 +240,51 @@ def test_summary_data_slim_suppresses_matching_sections() -> None:
     assert data.columns is not None
 
 
+@pytest.mark.parametrize(
+    "value, expected",
+    [
+        (datetime(2024, 1, 15, 12, 30), "2024-01-15T12:30:00"),
+        (date(2024, 1, 15), "2024-01-15"),
+        (timedelta(seconds=5), 5.0),
+        (Decimal("1.5"), 1.5),
+        (42, 42),
+        ("hello", "hello"),
+        (None, None),
+    ],
+)
+def test_to_python(value: object, expected: object) -> None:
+    assert _to_python(value) == expected
+
+
+def test_to_dict_with_typed_values() -> None:
+    comp = _make_comparison()
+    summary = comp.summary(top_k_column_changes=3, sample_k_rows_only=3)
+    d = summary._data.to_dict()
+
+    assert isinstance(d, dict)
+    assert d["equal"] is False
+    assert isinstance(d["columns"], list)
+    assert isinstance(d["sample_rows_left_only"], list)
+    # Verify roundtrip through JSON works
+    json_str = json.dumps(d)
+    parsed = json.loads(json_str)
+    assert parsed["equal"] is False
+    assert len(parsed["columns"]) > 0
+
+
+def test_to_json_with_date_values() -> None:
+    left = pl.DataFrame({"id": [1, 2], "d": [date(2024, 1, 1), date(2024, 6, 1)]})
+    right = pl.DataFrame({"id": [1, 2], "d": [date(2024, 1, 1), date(2024, 12, 1)]})
+    comp = compare_frames(left, right, primary_key="id")
+    summary = comp.summary(top_k_column_changes=3)
+    parsed = json.loads(summary.to_json())
+    assert parsed["equal"] is False
+    col = next(c for c in parsed["columns"] if c["name"] == "d")
+    assert col["changes"] is not None
+    assert col["changes"][0]["old"] == "2024-06-01"
+    assert col["changes"][0]["new"] == "2024-12-01"
+
+
 def test_summary_data_n_total_changes() -> None:
     left = pl.DataFrame({"id": list(range(10)), "val": list(range(10))})
     right = pl.DataFrame({"id": list(range(10)), "val": list(range(10, 20))})

From 74332e87c41b3f92fe3a08efcdd16a3bc19906f7 Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 16:25:49 +0200
Subject: [PATCH 05/12] cli test coverage

---
 tests/cli/test_cli.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
index b22b5b7..63926d4 100644
--- a/tests/cli/test_cli.py
+++ b/tests/cli/test_cli.py
@@ -16,7 +16,8 @@
 runner = CliRunner()
 
 
-def test_cli_smoke(tmp_path: Path) -> None:
+@pytest.mark.parametrize("output_json", [False, True])
+def test_cli_smoke(tmp_path: Path, output_json: bool) -> None:
     left = pl.DataFrame(
         {
             "name": ["cat", "dog", "mouse"],
@@ -35,20 +36,23 @@ def test_cli_smoke(tmp_path: Path) -> None:
 
     left.write_parquet(tmp_path / "left.parquet")
     right.write_parquet(tmp_path / "right.parquet")
-    result = runner.invoke(
-        app,
-        [
-            str(tmp_path / "left.parquet"),
-            str(tmp_path / "right.parquet"),
-            "--primary-key",
-            "name",
-        ],
-        color=True,
-    )
+    args = [
+        str(tmp_path / "left.parquet"),
+        str(tmp_path / "right.parquet"),
+        "--primary-key",
+        "name",
+    ]
+    if output_json:
+        args.append("--json")
+    result = runner.invoke(app, args, color=True)
     comparison = compare_frames(
         pl.scan_parquet(tmp_path / "left.parquet"),
         pl.scan_parquet(tmp_path / "right.parquet"),
         primary_key="name",
     )
     assert result.exit_code == 0
-    assert result.output == comparison.summary().format(pretty=True) + "\n"
+
+    if output_json:
+        assert result.output == comparison.summary().to_json() + "\n"
+    else:
+        assert result.output == comparison.summary().format(pretty=True) + "\n"

From 23b293edaa23330644ae7abc0b368c7aea8ff138 Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 17:03:12 +0200
Subject: [PATCH 06/12] refactor

---
 diffly/summary.py | 71 +++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/diffly/summary.py b/diffly/summary.py
index c0113f0..6be2f33 100644
--- a/diffly/summary.py
+++ b/diffly/summary.py
@@ -6,7 +6,7 @@
 import dataclasses
 import io
 import json
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import date, datetime, timedelta
 from decimal import Decimal
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -43,6 +43,10 @@ class SummaryDataSchemas:
     left_only: list[tuple[str, str]]
     in_common: list[tuple[str, str, str]]
     right_only: list[tuple[str, str]]
+    _equal: bool = field(default=False, repr=False)
+    _mismatching_dtypes: list[tuple[str, str, str]] = field(
+        default_factory=list, repr=False
+    )
 
 
 @dataclass
@@ -53,6 +57,8 @@ class SummaryDataRows:
     n_joined_equal: int | None
     n_joined_unequal: int | None
     n_right_only: int | None
+    _equal_rows: bool = field(default=False, repr=False)
+    _equal_num_rows: bool = field(default=False, repr=False)
 
 
 @dataclass
@@ -85,11 +91,13 @@ class SummaryData:
     columns: list[SummaryDataColumn] | None
     sample_rows_left_only: list[tuple[Any, ...]] | None
     sample_rows_right_only: list[tuple[Any, ...]] | None
+    _truncated_left_name: str = field(default="", repr=False)
+    _truncated_right_name: str = field(default="", repr=False)
 
     def to_dict(self) -> dict[str, Any]:
         def _convert(obj: Any) -> Any:
             if isinstance(obj, dict):
-                return {k: _convert(v) for k, v in obj.items()}
+                return {k: _convert(v) for k, v in obj.items() if not k.startswith("_")}
             if isinstance(obj, (list, tuple)):
                 return type(obj)(_convert(v) for v in obj)
             return _to_python(obj)
@@ -166,6 +174,9 @@ def _validate_primary_key_hidden_columns() -> None:
     is_equal = comp.equal()
     n_rows_left = comp.num_rows_left()
 
+    truncated_left = _truncate_name(left_name)
+    truncated_right = _truncate_name(right_name)
+
     if is_equal:
         return SummaryData(
             equal=True,
@@ -180,6 +191,8 @@ def _validate_primary_key_hidden_columns() -> None:
             columns=None,
             sample_rows_left_only=None,
             sample_rows_right_only=None,
+            _truncated_left_name=truncated_left,
+            _truncated_right_name=truncated_right,
         )
 
     # --- Schemas ---
@@ -190,6 +203,7 @@ def _validate_primary_key_hidden_columns() -> None:
         left_only_cols = sorted(schemas_obj.left_only().items())
         right_only_cols = sorted(schemas_obj.right_only().items())
         in_common = sorted(schemas_obj.in_common().items())
+        mismatching = sorted(schemas_obj.in_common().mismatching_dtypes().items())
         schemas = SummaryDataSchemas(
             left_only=[(name, str(dtype)) for name, dtype in left_only_cols],
             in_common=[
@@ -197,6 +211,11 @@ def _validate_primary_key_hidden_columns() -> None:
                 for name, (left_dtype, right_dtype) in in_common
             ],
             right_only=[(name, str(dtype)) for name, dtype in right_only_cols],
+            _equal=schemas_equal,
+            _mismatching_dtypes=[
+                (name, str(left_dtype), str(right_dtype))
+                for name, (left_dtype, right_dtype) in mismatching
+            ],
         )
 
     # --- Rows ---
@@ -215,6 +234,8 @@ def _validate_primary_key_hidden_columns() -> None:
                 n_joined_equal=comp.num_rows_joined_equal(),
                 n_joined_unequal=comp.num_rows_joined_unequal(),
                 n_right_only=comp.num_rows_right_only(),
+                _equal_rows=comp._equal_rows(),
+                _equal_num_rows=comp.equal_num_rows(),
             )
         else:
             rows = SummaryDataRows(
@@ -224,6 +245,8 @@ def _validate_primary_key_hidden_columns() -> None:
                 n_joined_equal=None,
                 n_joined_unequal=None,
                 n_right_only=None,
+                _equal_rows=False,
+                _equal_num_rows=comp.equal_num_rows(),
             )
 
     # --- Columns ---
@@ -306,6 +329,8 @@ def _validate_primary_key_hidden_columns() -> None:
         columns=columns,
         sample_rows_left_only=sample_rows_left_only,
         sample_rows_right_only=sample_rows_right_only,
+        _truncated_left_name=truncated_left,
+        _truncated_right_name=truncated_right,
     )
 
 
@@ -421,8 +446,7 @@ def _print_diff(self, console: Console) -> None:
     # --------------------------------- PRIMARY KEY ---------------------------------- #
 
     def _print_primary_key(self, console: Console) -> None:
-        primary_key = self._data.primary_key
-        if primary_key is not None:
+        if (primary_key := self._data.primary_key) is not None:
             content = self._section_primary_key(primary_key)
         else:
             content = Text(
@@ -449,14 +473,8 @@ def _print_schemas(self, console: Console) -> None:
             return
 
         schemas = self._data.schemas
-        schemas_equal = (
-            not schemas.left_only
-            and not schemas.right_only
-            and all(left == right for _, left, right in schemas.in_common)
-        )
-
         content: RenderableType
-        if schemas_equal:
+        if schemas._equal:
             num_cols = len(schemas.in_common)
             content = Text(
                 f"Schemas match exactly (column count: {num_cols:,}).", style="italic"
@@ -488,7 +506,7 @@ def _print_num_columns(n: int) -> str:
 
         # Left only
         if len(left_only_names) > 0:
-            left_only_header = f"{capitalize_first(_truncate_name(self._data.left_name))} only \n{_print_num_columns(len(left_only_names))}"
+            left_only_header = f"{capitalize_first(self._data._truncated_left_name)} only \n{_print_num_columns(len(left_only_names))}"
             table.add_column(
                 left_only_header,
                 header_style="red",
@@ -512,11 +530,7 @@ def _print_num_columns(n: int) -> str:
         )
         num_in_common = len(schemas.in_common)
         table_data[in_common_header] = []
-        mismatching = [
-            (name, left, right)
-            for name, left, right in schemas.in_common
-            if left != right
-        ]
+        mismatching = schemas._mismatching_dtypes
         if len(mismatching) == 0:
             table_data[in_common_header] = ["..."]
             max_column_width = max(
@@ -542,7 +556,7 @@ def _print_num_columns(n: int) -> str:
 
         # Right only
         if len(right_only_names) > 0:
-            right_only_header = f"{capitalize_first(_truncate_name(self._data.right_name))} only\n{_print_num_columns(len(right_only_names))}"
+            right_only_header = f"{capitalize_first(self._data._truncated_right_name)} only\n{_print_num_columns(len(right_only_names))}"
             table.add_column(
                 right_only_header,
                 header_style="green",
@@ -582,7 +596,7 @@ def _print_rows(self, console: Console) -> None:
 
     def _render_rows_without_primary_key(self, rows: SummaryDataRows) -> RenderableType:
         content: RenderableType
-        if rows.n_left == rows.n_right:
+        if rows._equal_num_rows:
             content = Text(
                 f"The number of rows matches exactly (row count: {rows.n_left:,}).",
                 style="italic",
@@ -598,8 +612,7 @@ def _render_rows_with_primary_key(self, rows: SummaryDataRows) -> RenderableType
         assert rows.n_right_only is not None
 
         content: RenderableType
-        equal_rows = rows.n_joined_equal == rows.n_left == rows.n_right
-        if equal_rows:
+        if rows._equal_rows:
             content = Text(
                 f"All rows match exactly (row count: {rows.n_left:,}).",
                 style="italic",
@@ -607,7 +620,7 @@ def _render_rows_with_primary_key(self, rows: SummaryDataRows) -> RenderableType
         else:
             # NOTE: In slim mode, we omit the row counts section and only show the
             # row matches section.
-            if (rows.n_left == rows.n_right) and self._data.slim:
+            if rows._equal_num_rows and self._data.slim:
                 content = Group(self._section_row_matches(rows))
             else:
                 content = Group(
@@ -632,10 +645,8 @@ def _section_row_counts(self, rows: SummaryDataRows) -> RenderableType:
         count_rows: list[RenderableType] = []
 
         count_grid = Table(padding=0, box=None)
-        left_header = f"{capitalize_first(_truncate_name(self._data.left_name))} count"
-        right_header = (
-            f"{capitalize_first(_truncate_name(self._data.right_name))} count"
-        )
+        left_header = f"{capitalize_first(self._data._truncated_left_name)} count"
+        right_header = f"{capitalize_first(self._data._truncated_right_name)} count"
         count_grid.add_column(left_header, justify="center")
         count_grid.add_column("", justify="center")
         count_grid.add_column(right_header, justify="center")
@@ -741,7 +752,7 @@ def _section_row_matches(self, rows: SummaryDataRows) -> RenderableType:
                 fraction_left_only = rows.n_left_only / rows.n_left
                 grid.add_row(
                     f"{rows.n_left_only:,}",
-                    f"{_truncate_name(self._data.left_name)} only",
+                    f"{self._data._truncated_left_name} only",
                     f"({_format_fraction_as_percentage(fraction_left_only)})",
                 )
                 grid.add_section()
@@ -765,7 +776,7 @@ def _section_row_matches(self, rows: SummaryDataRows) -> RenderableType:
                 fraction_right_only = rows.n_right_only / rows.n_right
                 grid.add_row(
                     f"{rows.n_right_only:,}",
-                    f"{_truncate_name(self._data.right_name)} only",
+                    f"{self._data._truncated_right_name} only",
                     f"({_format_fraction_as_percentage(fraction_right_only)})",
                 )
             columns.append(grid)
@@ -880,10 +891,10 @@ def _section_columns(self) -> RenderableType:
     def _print_sample_rows_only_one_side(self, console: Console, side: Side) -> None:
         if side == Side.LEFT:
             sample_rows = self._data.sample_rows_left_only
-            name = _truncate_name(self._data.left_name)
+            name = self._data._truncated_left_name
         else:
             sample_rows = self._data.sample_rows_right_only
-            name = _truncate_name(self._data.right_name)
+            name = self._data._truncated_right_name
 
         primary_key = self._data.primary_key
         if primary_key is not None and sample_rows is not None and len(sample_rows) > 0:

From fbaaba1eecfda9e93c5bae9c41e5ddfe02f0259d Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 17:18:40 +0200
Subject: [PATCH 07/12] improve

---
 diffly/summary.py | 241 ++++++++++++++++++++++++----------------------
 1 file changed, 125 insertions(+), 116 deletions(-)

diff --git a/diffly/summary.py b/diffly/summary.py
index 6be2f33..9d596fd 100644
--- a/diffly/summary.py
+++ b/diffly/summary.py
@@ -40,9 +40,9 @@
 
 @dataclass
 class SummaryDataSchemas:
-    left_only: list[tuple[str, str]]
+    left_only: list[str]
     in_common: list[tuple[str, str, str]]
-    right_only: list[tuple[str, str]]
+    right_only: list[str]
     _equal: bool = field(default=False, repr=False)
     _mismatching_dtypes: list[tuple[str, str, str]] = field(
         default_factory=list, repr=False
@@ -59,6 +59,7 @@ class SummaryDataRows:
     n_right_only: int | None
     _equal_rows: bool = field(default=False, repr=False)
     _equal_num_rows: bool = field(default=False, repr=False)
+    _show_row_counts: bool = field(default=True, repr=False)
 
 
 @dataclass
@@ -80,9 +81,6 @@ class SummaryDataColumn:
 @dataclass
 class SummaryData:
     equal: bool
-    n_rows_left: int
-    slim: bool
-    show_perfect_column_matches: bool
     left_name: str
     right_name: str
     primary_key: list[str] | None
@@ -91,6 +89,10 @@ class SummaryData:
     columns: list[SummaryDataColumn] | None
     sample_rows_left_only: list[tuple[Any, ...]] | None
     sample_rows_right_only: list[tuple[Any, ...]] | None
+    _n_rows_left: int = field(default=0, repr=False)
+    _show_header: bool = field(default=True, repr=False)
+    _show_primary_key_section: bool = field(default=True, repr=False)
+    _has_common_columns: bool = field(default=False, repr=False)
     _truncated_left_name: str = field(default="", repr=False)
     _truncated_right_name: str = field(default="", repr=False)
 
@@ -180,9 +182,6 @@ def _validate_primary_key_hidden_columns() -> None:
     if is_equal:
         return SummaryData(
             equal=True,
-            n_rows_left=n_rows_left,
-            slim=slim,
-            show_perfect_column_matches=show_perfect_column_matches,
             left_name=left_name,
             right_name=right_name,
             primary_key=comp.primary_key,
@@ -191,6 +190,10 @@ def _validate_primary_key_hidden_columns() -> None:
             columns=None,
             sample_rows_left_only=None,
             sample_rows_right_only=None,
+            _n_rows_left=n_rows_left,
+            _show_header=not slim,
+            _show_primary_key_section=True,
+            _has_common_columns=bool(comp._other_common_columns),
             _truncated_left_name=truncated_left,
             _truncated_right_name=truncated_right,
         )
@@ -200,17 +203,15 @@ def _validate_primary_key_hidden_columns() -> None:
     schemas_obj = comp.schemas
     schemas_equal = schemas_obj.equal()
     if not slim or not schemas_equal:
-        left_only_cols = sorted(schemas_obj.left_only().items())
-        right_only_cols = sorted(schemas_obj.right_only().items())
         in_common = sorted(schemas_obj.in_common().items())
         mismatching = sorted(schemas_obj.in_common().mismatching_dtypes().items())
         schemas = SummaryDataSchemas(
-            left_only=[(name, str(dtype)) for name, dtype in left_only_cols],
+            left_only=sorted(schemas_obj.left_only().column_names()),
             in_common=[
                 (name, str(left_dtype), str(right_dtype))
                 for name, (left_dtype, right_dtype) in in_common
             ],
-            right_only=[(name, str(dtype)) for name, dtype in right_only_cols],
+            right_only=sorted(schemas_obj.right_only().column_names()),
             _equal=schemas_equal,
             _mismatching_dtypes=[
                 (name, str(left_dtype), str(right_dtype))
@@ -236,6 +237,7 @@ def _validate_primary_key_hidden_columns() -> None:
                 n_right_only=comp.num_rows_right_only(),
                 _equal_rows=comp._equal_rows(),
                 _equal_num_rows=comp.equal_num_rows(),
+                _show_row_counts=not (comp.equal_num_rows() and slim),
             )
         else:
             rows = SummaryDataRows(
@@ -247,6 +249,7 @@ def _validate_primary_key_hidden_columns() -> None:
                 n_right_only=None,
                 _equal_rows=False,
                 _equal_num_rows=comp.equal_num_rows(),
+                _show_row_counts=True,
             )
 
     # --- Columns ---
@@ -254,6 +257,7 @@ def _validate_primary_key_hidden_columns() -> None:
     match_rates_can_be_computed = (
         comp.primary_key is not None and comp.num_rows_joined() > 0
     )
+    has_common_columns = bool(comp._other_common_columns)
     if match_rates_can_be_computed:
         match_rates = comp.fraction_same()
         all_match = not comp._other_common_columns or min(match_rates.values()) >= 1
@@ -261,6 +265,8 @@ def _validate_primary_key_hidden_columns() -> None:
             columns = []
             for col_name in sorted(match_rates):
                 rate = match_rates[col_name]
+                if not show_perfect_column_matches and rate >= 1:
+                    continue
                 top_k = top_k_changes_by_column[col_name]
                 changes: list[SummaryDataColumnChange] | None = None
                 n_total_changes = 0
@@ -318,9 +324,6 @@ def _validate_primary_key_hidden_columns() -> None:
 
     return SummaryData(
         equal=False,
-        n_rows_left=n_rows_left,
-        slim=slim,
-        show_perfect_column_matches=show_perfect_column_matches,
         left_name=left_name,
         right_name=right_name,
         primary_key=comp.primary_key,
@@ -329,6 +332,10 @@ def _validate_primary_key_hidden_columns() -> None:
         columns=columns,
         sample_rows_left_only=sample_rows_left_only,
         sample_rows_right_only=sample_rows_right_only,
+        _n_rows_left=n_rows_left,
+        _show_header=not slim,
+        _show_primary_key_section=not slim or comp.primary_key is None,
+        _has_common_columns=has_common_columns,
         _truncated_left_name=truncated_left,
         _truncated_right_name=truncated_right,
     )
@@ -414,7 +421,7 @@ def __repr__(self) -> str:
     # -------------------------------------------------------------------------------- #
 
     def _print_to_console(self, console: Console) -> None:
-        if not self._data.slim:
+        if self._data._show_header:
             console.print(
                 Panel(
                     Text("Diffly Summary", style="bold", justify="center"),
@@ -427,7 +434,7 @@ def _print_to_console(self, console: Console) -> None:
             self._print_diff(console)
 
     def _print_equal(self, console: Console) -> None:
-        if self._data.n_rows_left == 0:
+        if self._data._n_rows_left == 0:
             message = "--- Data frames are empty, but their schema matches exactly! ---"
         else:
             message = "--- Data frames match exactly! ---"
@@ -446,8 +453,8 @@ def _print_diff(self, console: Console) -> None:
     # --------------------------------- PRIMARY KEY ---------------------------------- #
 
     def _print_primary_key(self, console: Console) -> None:
-        if (primary_key := self._data.primary_key) is not None:
-            content = self._section_primary_key(primary_key)
+        if self._data.primary_key is not None:
+            content = self._section_primary_key()
         else:
             content = Text(
                 "Attention: the data frames do not match exactly, but as no primary"
@@ -455,13 +462,13 @@ def _print_primary_key(self, console: Console) -> None:
                 " computed.",
                 style="italic",
             )
-        # NOTE: The primary key is only displayed in the default mode. If a primary
-        # key was not supplied, the warning is displayed in both modes.
-        if not self._data.slim or primary_key is None:
+        if self._data._show_primary_key_section:
             console.print(Padding(content, pad=(0, 3)))
             console.print("")
 
-    def _section_primary_key(self, primary_key: list[str]) -> RenderableType:
+    def _section_primary_key(self) -> RenderableType:
+        primary_key = self._data.primary_key
+        assert primary_key is not None
         return Group(
             f"Primary key: {', '.join(_format_colname(col) for col in primary_key)}"
         )
@@ -469,10 +476,10 @@ def _section_primary_key(self, primary_key: list[str]) -> RenderableType:
     # ------------------------------------ SCHEMA ------------------------------------ #
 
     def _print_schemas(self, console: Console) -> None:
-        if self._data.schemas is None:
+        schemas = self._data.schemas
+        if schemas is None:
             return
 
-        schemas = self._data.schemas
         content: RenderableType
         if schemas._equal:
             num_cols = len(schemas.in_common)
@@ -480,18 +487,21 @@ def _print_schemas(self, console: Console) -> None:
                 f"Schemas match exactly (column count: {num_cols:,}).", style="italic"
             )
         else:
-            content = self._section_schemas(schemas)
+            content = self._section_schemas()
 
         _print_section(console, "Schemas", content)
 
-    def _section_schemas(self, schemas: SummaryDataSchemas) -> RenderableType:
+    def _section_schemas(self) -> RenderableType:
+        schemas = self._data.schemas
+        assert schemas is not None
+
         def _print_num_columns(n: int) -> str:
             return f"{n:,} column{'s' if n != 1 else ''}"
 
         table = Table()
 
-        left_only_names = {name for name, _ in schemas.left_only}
-        right_only_names = {name for name, _ in schemas.right_only}
+        left_only_names = set(schemas.left_only)
+        right_only_names = set(schemas.right_only)
         max_column_width = max(
             len(column) for column in left_only_names | right_only_names | {""}
         )
@@ -530,21 +540,23 @@ def _print_num_columns(n: int) -> str:
         )
         num_in_common = len(schemas.in_common)
         table_data[in_common_header] = []
-        mismatching = schemas._mismatching_dtypes
-        if len(mismatching) == 0:
+        common_but_mismatching = schemas._mismatching_dtypes
+        if len(common_but_mismatching) == 0:
             table_data[in_common_header] = ["..."]
             max_column_width = max(
                 max_column_width, len(table_data[in_common_header][0])
             )
         else:
-            for col, left_dtype, right_dtype in sorted(mismatching, key=lambda x: x[0]):
+            for col, left_dtype, right_dtype in sorted(
+                common_but_mismatching, key=lambda x: x[0]
+            ):
                 table_data[in_common_header].append(
                     f"{_format_colname(col)} [{left_dtype} -> {right_dtype}]"
                 )
                 max_column_width = max(
                     max_column_width, len(f"{col} [{left_dtype} -> {right_dtype}]")
                 )
-            num_remaining = num_in_common - len(mismatching)
+            num_remaining = num_in_common - len(common_but_mismatching)
             if num_remaining > 0:
                 table_data[in_common_header].append(
                     f"(+{_print_num_columns(num_remaining)} with matching "
@@ -586,15 +598,16 @@ def _print_rows(self, console: Console) -> None:
         if self._data.rows is None:
             return
 
-        rows = self._data.rows
         content: RenderableType
         if self._data.primary_key is None:
-            content = self._render_rows_without_primary_key(rows)
+            content = self._render_rows_without_primary_key()
         else:
-            content = self._render_rows_with_primary_key(rows)
+            content = self._render_rows_with_primary_key()
         _print_section(console, "Rows", content)
 
-    def _render_rows_without_primary_key(self, rows: SummaryDataRows) -> RenderableType:
+    def _render_rows_without_primary_key(self) -> RenderableType:
+        rows = self._data.rows
+        assert rows is not None
         content: RenderableType
         if rows._equal_num_rows:
             content = Text(
@@ -602,10 +615,12 @@ def _render_rows_without_primary_key(self, rows: SummaryDataRows) -> RenderableT
                 style="italic",
             )
         else:
-            content = self._section_row_counts(rows)
+            content = self._section_row_counts()
         return content
 
-    def _render_rows_with_primary_key(self, rows: SummaryDataRows) -> RenderableType:
+    def _render_rows_with_primary_key(self) -> RenderableType:
+        rows = self._data.rows
+        assert rows is not None
         assert rows.n_joined_equal is not None
         assert rows.n_joined_unequal is not None
         assert rows.n_left_only is not None
@@ -618,19 +633,19 @@ def _render_rows_with_primary_key(self, rows: SummaryDataRows) -> RenderableType
                 style="italic",
             )
         else:
-            # NOTE: In slim mode, we omit the row counts section and only show the
-            # row matches section.
-            if rows._equal_num_rows and self._data.slim:
-                content = Group(self._section_row_matches(rows))
+            if not rows._show_row_counts:
+                content = Group(self._section_row_matches())
             else:
                 content = Group(
-                    self._section_row_counts(rows),
+                    self._section_row_counts(),
                     "",
-                    self._section_row_matches(rows),
+                    self._section_row_matches(),
                 )
         return content
 
-    def _section_row_counts(self, rows: SummaryDataRows) -> RenderableType:
+    def _section_row_counts(self) -> RenderableType:
+        rows = self._data.rows
+        assert rows is not None
         gain_loss = ""
         if rows.n_left > 0:
             fraction_rows_right = rows.n_right / rows.n_left
@@ -659,7 +674,9 @@ def _section_row_counts(self, rows: SummaryDataRows) -> RenderableType:
 
         return Group(*count_rows)
 
-    def _section_row_matches(self, rows: SummaryDataRows) -> RenderableType:
+    def _section_row_matches(self) -> RenderableType:
+        rows = self._data.rows
+        assert rows is not None
         assert rows.n_left_only is not None
         assert rows.n_joined_equal is not None
         assert rows.n_joined_unequal is not None
@@ -813,76 +830,65 @@ def _section_columns(self) -> RenderableType:
         columns = self._data.columns
         assert columns is not None
 
-        if not columns:
+        if not self._data._has_common_columns:
             display_items.append(
                 Text("No common non-primary key columns to compare.", style="italic")
             )
+        elif not columns:
+            display_items.append(Text("All columns match perfectly.", style="italic"))
         else:
-            visible = [
-                c
-                for c in columns
-                if self._data.show_perfect_column_matches or c.match_rate < 1
-            ]
-            if not visible:
-                display_items.append(
-                    Text("All columns match perfectly.", style="italic")
-                )
-            else:
-                matches = Table(show_header=False)
-                matches.add_column(
-                    "Column",
-                    max_width=COLUMN_SECTION_COLUMN_WIDTH,
-                    overflow=OVERFLOW,
-                )
-                matches.add_column("Match Rate", justify="right")
-                has_top_changes_column = any(
-                    c.changes is not None for c in columns if c.match_rate < 1
-                )
-                if has_top_changes_column:
-                    matches.add_column("Top Changes", justify="right")
-                max_col_len = max(len(c.name) for c in visible)
-                for col in visible:
-                    row_items: list[RenderableType] = [
-                        Text(col.name, style="cyan"),
-                        f"{_format_fraction_as_percentage(col.match_rate)}",
-                    ]
-                    if col.changes is not None:
-                        change_lines = []
-                        for change in col.changes:
-                            line = (
-                                f"{_format_value(change.old)} -> "
-                                f"{_format_value(change.new)} ({change.count:,}x"
-                            )
-                            if change.sample_pk is not None:
-                                line += ", e.g. "
-                                if len(change.sample_pk) == 1:
-                                    line += _format_value(change.sample_pk[0])
-                                else:
-                                    line += "("
-                                    line += ", ".join(
-                                        [_format_value(v) for v in change.sample_pk]
-                                    )
-                                    line += ")"
-                            line += ")"
-                            change_lines.append(line)
-
-                        remaining_count = col.n_total_changes - len(col.changes)
-                        if remaining_count > 0:
-                            change_lines.append(
-                                f"(...and {remaining_count:,} {('other' if remaining_count == 1 else 'others')})"
-                            )
+            matches = Table(show_header=False)
+            matches.add_column(
+                "Column",
+                max_width=COLUMN_SECTION_COLUMN_WIDTH,
+                overflow=OVERFLOW,
+            )
+            matches.add_column("Match Rate", justify="right")
+            has_top_changes_column = any(
+                c.changes is not None for c in columns if c.match_rate < 1
+            )
+            if has_top_changes_column:
+                matches.add_column("Top Changes", justify="right")
+            max_col_len = max(len(c.name) for c in columns)
+            for col in columns:
+                row_items: list[RenderableType] = [
+                    Text(col.name, style="cyan"),
+                    f"{_format_fraction_as_percentage(col.match_rate)}",
+                ]
+                if col.changes is not None:
+                    change_lines = []
+                    for change in col.changes:
+                        line = (
+                            f"{_format_value(change.old)} -> "
+                            f"{_format_value(change.new)} ({change.count:,}x"
+                        )
+                        if change.sample_pk is not None:
+                            line += ", e.g. "
+                            if len(change.sample_pk) == 1:
+                                line += _format_value(change.sample_pk[0])
+                            else:
+                                line += "("
+                                line += ", ".join(
+                                    [_format_value(v) for v in change.sample_pk]
+                                )
+                                line += ")"
+                        line += ")"
+                        change_lines.append(line)
+
+                    remaining_count = col.n_total_changes - len(col.changes)
+                    if remaining_count > 0:
+                        change_lines.append(
+                            f"(...and {remaining_count:,} {('other' if remaining_count == 1 else 'others')})"
+                        )
 
-                        text = "\n".join(change_lines)
-                        row_items.append(text)
+                    text = "\n".join(change_lines)
+                    row_items.append(text)
 
-                    matches.add_row(*row_items)
-                    if (
-                        has_top_changes_column
-                        or max_col_len > COLUMN_SECTION_COLUMN_WIDTH
-                    ):
-                        matches.add_section()
+                matches.add_row(*row_items)
+                if has_top_changes_column or max_col_len > COLUMN_SECTION_COLUMN_WIDTH:
+                    matches.add_section()
 
-                display_items.append(matches)
+            display_items.append(matches)
 
         return Group(*display_items)
 
@@ -901,14 +907,17 @@ def _print_sample_rows_only_one_side(self, console: Console, side: Side) -> None
             _print_section(
                 console,
                 f"Rows {name} only",
-                self._section_rows_only_one_side(sample_rows, primary_key),
+                self._section_rows_only_one_side(side),
             )
 
-    def _section_rows_only_one_side(
-        self,
-        sample_rows: list[tuple[Any, ...]],
-        primary_key: list[str],
-    ) -> RenderableType:
+    def _section_rows_only_one_side(self, side: Side) -> RenderableType:
+        if side == Side.LEFT:
+            sample_rows = self._data.sample_rows_left_only
+        else:
+            sample_rows = self._data.sample_rows_right_only
+        assert sample_rows is not None
+        primary_key = self._data.primary_key
+        assert primary_key is not None
         table = Table()
         for col in primary_key[:MAX_DISPLAYED_COLUMNS_IN_SAMPLE_TABLES]:
             table.add_column(col, overflow="ellipsis")

From a63e22940f174b12782e2a5ca33e279a4c40fae3 Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 17:50:48 +0200
Subject: [PATCH 08/12] clean up

---
 diffly/summary.py          | 75 +++++++++++++++++---------------------
 tests/test_summary_data.py |  4 +-
 2 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/diffly/summary.py b/diffly/summary.py
index 9d596fd..eef03ac 100644
--- a/diffly/summary.py
+++ b/diffly/summary.py
@@ -40,9 +40,9 @@
 
 @dataclass
 class SummaryDataSchemas:
-    left_only: list[str]
+    left_only_names: list[str]
     in_common: list[tuple[str, str, str]]
-    right_only: list[str]
+    right_only_names: list[str]
     _equal: bool = field(default=False, repr=False)
     _mismatching_dtypes: list[tuple[str, str, str]] = field(
         default_factory=list, repr=False
@@ -81,18 +81,16 @@ class SummaryDataColumn:
 @dataclass
 class SummaryData:
     equal: bool
-    left_name: str
-    right_name: str
+    left_name: str | None
+    right_name: str | None
     primary_key: list[str] | None
     schemas: SummaryDataSchemas | None
     rows: SummaryDataRows | None
     columns: list[SummaryDataColumn] | None
     sample_rows_left_only: list[tuple[Any, ...]] | None
     sample_rows_right_only: list[tuple[Any, ...]] | None
-    _n_rows_left: int = field(default=0, repr=False)
-    _show_header: bool = field(default=True, repr=False)
-    _show_primary_key_section: bool = field(default=True, repr=False)
-    _has_common_columns: bool = field(default=False, repr=False)
+    _is_empty: bool = field(default=False, repr=False)
+    _other_common_columns: list[str] = field(default_factory=list, repr=False)
     _truncated_left_name: str = field(default="", repr=False)
     _truncated_right_name: str = field(default="", repr=False)
 
@@ -174,7 +172,7 @@ def _validate_primary_key_hidden_columns() -> None:
     )
 
     is_equal = comp.equal()
-    n_rows_left = comp.num_rows_left()
+    is_empty = comp.num_rows_left() == 0
 
     truncated_left = _truncate_name(left_name)
     truncated_right = _truncate_name(right_name)
@@ -182,37 +180,34 @@ def _validate_primary_key_hidden_columns() -> None:
     if is_equal:
         return SummaryData(
             equal=True,
-            left_name=left_name,
-            right_name=right_name,
-            primary_key=comp.primary_key,
+            left_name=None,
+            right_name=None,
+            primary_key=None,
             schemas=None,
             rows=None,
             columns=None,
             sample_rows_left_only=None,
             sample_rows_right_only=None,
-            _n_rows_left=n_rows_left,
-            _show_header=not slim,
-            _show_primary_key_section=True,
-            _has_common_columns=bool(comp._other_common_columns),
+            _is_empty=is_empty,
+            _other_common_columns=comp._other_common_columns,
             _truncated_left_name=truncated_left,
             _truncated_right_name=truncated_right,
         )
 
     # --- Schemas ---
     schemas: SummaryDataSchemas | None = None
-    schemas_obj = comp.schemas
-    schemas_equal = schemas_obj.equal()
-    if not slim or not schemas_equal:
-        in_common = sorted(schemas_obj.in_common().items())
-        mismatching = sorted(schemas_obj.in_common().mismatching_dtypes().items())
+    # NOTE: In slim mode, we only print the section if there are differences.
+    if not slim or not comp.schemas.equal():
+        in_common = sorted(comp.schemas.in_common().items())
+        mismatching = sorted(comp.schemas.in_common().mismatching_dtypes().items())
         schemas = SummaryDataSchemas(
-            left_only=sorted(schemas_obj.left_only().column_names()),
+            left_only_names=sorted(comp.schemas.left_only().column_names()),
             in_common=[
                 (name, str(left_dtype), str(right_dtype))
                 for name, (left_dtype, right_dtype) in in_common
             ],
-            right_only=sorted(schemas_obj.right_only().column_names()),
-            _equal=schemas_equal,
+            right_only_names=sorted(comp.schemas.right_only().column_names()),
+            _equal=comp.schemas.equal(),
             _mismatching_dtypes=[
                 (name, str(left_dtype), str(right_dtype))
                 for name, (left_dtype, right_dtype) in mismatching
@@ -221,13 +216,12 @@ def _validate_primary_key_hidden_columns() -> None:
 
     # --- Rows ---
     rows: SummaryDataRows | None = None
-    has_pk = comp.primary_key is not None
-    if has_pk:
+    if comp.primary_key is not None:
         rows_equal = comp._equal_rows()
     else:
         rows_equal = comp.equal_num_rows()
     if not slim or not rows_equal:
-        if has_pk:
+        if comp.primary_key is not None:
             rows = SummaryDataRows(
                 n_left=comp.num_rows_left(),
                 n_right=comp.num_rows_right(),
@@ -257,11 +251,9 @@ def _validate_primary_key_hidden_columns() -> None:
     match_rates_can_be_computed = (
         comp.primary_key is not None and comp.num_rows_joined() > 0
     )
-    has_common_columns = bool(comp._other_common_columns)
     if match_rates_can_be_computed:
         match_rates = comp.fraction_same()
-        all_match = not comp._other_common_columns or min(match_rates.values()) >= 1
-        if not slim or not all_match:
+        if not slim or (comp._other_common_columns and min(match_rates.values()) < 1):
             columns = []
             for col_name in sorted(match_rates):
                 rate = match_rates[col_name]
@@ -304,7 +296,7 @@ def _validate_primary_key_hidden_columns() -> None:
     # --- Sample rows left/right only ---
     sample_rows_left_only: list[tuple[Any, ...]] | None = None
     sample_rows_right_only: list[tuple[Any, ...]] | None = None
-    if has_pk and sample_k_rows_only > 0:
+    if comp.primary_key is not None and sample_k_rows_only > 0:
         pk = comp.primary_key
         assert isinstance(pk, list)
 
@@ -332,10 +324,8 @@ def _validate_primary_key_hidden_columns() -> None:
         columns=columns,
         sample_rows_left_only=sample_rows_left_only,
         sample_rows_right_only=sample_rows_right_only,
-        _n_rows_left=n_rows_left,
-        _show_header=not slim,
-        _show_primary_key_section=not slim or comp.primary_key is None,
-        _has_common_columns=has_common_columns,
+        _is_empty=is_empty,
+        _other_common_columns=comp._other_common_columns,
         _truncated_left_name=truncated_left,
         _truncated_right_name=truncated_right,
     )
@@ -367,6 +357,7 @@ def __init__(
         slim: bool,
         hidden_columns: list[str] | None,
     ):
+        self.slim = slim
         self._data = _compute_summary_data(
             comparison,
             show_perfect_column_matches=show_perfect_column_matches,
@@ -421,7 +412,7 @@ def __repr__(self) -> str:
     # -------------------------------------------------------------------------------- #
 
     def _print_to_console(self, console: Console) -> None:
-        if self._data._show_header:
+        if not self.slim:
             console.print(
                 Panel(
                     Text("Diffly Summary", style="bold", justify="center"),
@@ -434,7 +425,7 @@ def _print_to_console(self, console: Console) -> None:
             self._print_diff(console)
 
     def _print_equal(self, console: Console) -> None:
-        if self._data._n_rows_left == 0:
+        if self._data._is_empty:
             message = "--- Data frames are empty, but their schema matches exactly! ---"
         else:
             message = "--- Data frames match exactly! ---"
@@ -453,7 +444,7 @@ def _print_diff(self, console: Console) -> None:
     # --------------------------------- PRIMARY KEY ---------------------------------- #
 
     def _print_primary_key(self, console: Console) -> None:
-        if self._data.primary_key is not None:
+        if (primary_key := self._data.primary_key) is not None:
             content = self._section_primary_key()
         else:
             content = Text(
@@ -462,7 +453,7 @@ def _print_primary_key(self, console: Console) -> None:
                 " computed.",
                 style="italic",
             )
-        if self._data._show_primary_key_section:
+        if not self.slim or primary_key is None:
             console.print(Padding(content, pad=(0, 3)))
             console.print("")
 
@@ -500,8 +491,8 @@ def _print_num_columns(n: int) -> str:
 
         table = Table()
 
-        left_only_names = set(schemas.left_only)
-        right_only_names = set(schemas.right_only)
+        left_only_names = set(schemas.left_only_names)
+        right_only_names = set(schemas.right_only_names)
         max_column_width = max(
             len(column) for column in left_only_names | right_only_names | {""}
         )
@@ -830,7 +821,7 @@ def _section_columns(self) -> RenderableType:
         columns = self._data.columns
         assert columns is not None
 
-        if not self._data._has_common_columns:
+        if not self._data._other_common_columns:
             display_items.append(
                 Text("No common non-primary key columns to compare.", style="italic")
             )
diff --git a/tests/test_summary_data.py b/tests/test_summary_data.py
index 76479f0..0368ad9 100644
--- a/tests/test_summary_data.py
+++ b/tests/test_summary_data.py
@@ -76,8 +76,8 @@ def test_summary_data_parametrized(
         assert data.schemas is None
     else:
         assert isinstance(data.schemas, SummaryDataSchemas)
-        assert len(data.schemas.left_only) > 0  # left_col
-        assert len(data.schemas.right_only) > 0  # right_col
+        assert len(data.schemas.left_only_names) > 0  # left_col
+        assert len(data.schemas.right_only_names) > 0  # right_col
 
     # --- Rows ---
     rows_equal = comp._equal_rows()

From f7292ada94fddcbfcb34b156e731968df498eaf7 Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 18:16:37 +0200
Subject: [PATCH 09/12] simplify test

---
 diffly/summary.py          |   9 ++
 tests/test_summary_data.py | 314 ++++++++-----------------------------
 2 files changed, 78 insertions(+), 245 deletions(-)

diff --git a/diffly/summary.py b/diffly/summary.py
index eef03ac..3ed2724 100644
--- a/diffly/summary.py
+++ b/diffly/summary.py
@@ -220,6 +220,7 @@ def _validate_primary_key_hidden_columns() -> None:
         rows_equal = comp._equal_rows()
     else:
         rows_equal = comp.equal_num_rows()
+    # NOTE: In slim mode, we only print the section if there are differences.
     if not slim or not rows_equal:
         if comp.primary_key is not None:
             rows = SummaryDataRows(
@@ -231,6 +232,8 @@ def _validate_primary_key_hidden_columns() -> None:
                 n_right_only=comp.num_rows_right_only(),
                 _equal_rows=comp._equal_rows(),
                 _equal_num_rows=comp.equal_num_rows(),
+                # NOTE: In slim mode, we omit the row counts section and only show the
+                # row matches section.
                 _show_row_counts=not (comp.equal_num_rows() and slim),
             )
         else:
@@ -248,11 +251,15 @@ def _validate_primary_key_hidden_columns() -> None:
 
     # --- Columns ---
     columns: list[SummaryDataColumn] | None = None
+    # NOTE: We can only compute column matches if there are primary key columns and at
+    # least one joined row.
     match_rates_can_be_computed = (
         comp.primary_key is not None and comp.num_rows_joined() > 0
     )
     if match_rates_can_be_computed:
         match_rates = comp.fraction_same()
+        # NOTE: In slim mode, we only print the columns section if there are
+        # non-primary key columns and at least one column has a match rate < 1.
         if not slim or (comp._other_common_columns and min(match_rates.values()) < 1):
             columns = []
             for col_name in sorted(match_rates):
@@ -453,6 +460,8 @@ def _print_primary_key(self, console: Console) -> None:
                 " computed.",
                 style="italic",
             )
+        # NOTE: The primary key is only displayed in the default mode. If a primary key
+        # was not supplied, the warning is displayed in both modes.
         if not self.slim or primary_key is None:
             console.print(Padding(content, pad=(0, 3)))
             console.print("")
diff --git a/tests/test_summary_data.py b/tests/test_summary_data.py
index 0368ad9..c64dd60 100644
--- a/tests/test_summary_data.py
+++ b/tests/test_summary_data.py
@@ -3,40 +3,32 @@
 
 import itertools
 import json
-from datetime import date, datetime, timedelta
-from decimal import Decimal
 
 import polars as pl
 import pytest
 
 from diffly import compare_frames
 from diffly.comparison import DataFrameComparison
-from diffly.summary import (
-    SummaryData,
-    SummaryDataColumn,
-    SummaryDataColumnChange,
-    SummaryDataRows,
-    SummaryDataSchemas,
-    _to_python,
-)
 
 
 def _make_comparison() -> DataFrameComparison:
-    """A rich comparison with schema diffs, row diffs, and column diffs."""
+    # Designed so every parametrized flag affects the expected JSON output:
+    # - Same columns in both frames → schemas equal → slim suppresses schemas section
+    # - status matches perfectly for joined rows → show_perfect_column_matches matters
+    # - value differs for id=2 → always has a non-perfect column
+    # - id=4 left-only, id=5 right-only → sample rows matter
     left = pl.DataFrame(
         {
             "id": [1, 2, 3, 4],
             "status": ["a", "b", "c", "d"],
             "value": [10.0, 20.0, 30.0, 40.0],
-            "left_col": ["x", "y", "z", "w"],
         }
     )
     right = pl.DataFrame(
         {
             "id": [1, 2, 3, 5],
-            "status": ["a", "x", "x", "e"],
+            "status": ["a", "b", "c", "e"],
             "value": [10.0, 25.0, 30.0, 50.0],
-            "right_col": ["p", "q", "r", "s"],
         }
     )
     return compare_frames(left, right, primary_key="id")
@@ -57,241 +49,73 @@ def test_summary_data_parametrized(
     sample_pk: bool,
 ) -> None:
     comp = _make_comparison()
+    top_k = 3 if show_top_column_changes else 0
     summary = comp.summary(
         show_perfect_column_matches=show_perfect_column_matches,
-        top_k_column_changes=3 if show_top_column_changes else 0,
-        slim=slim,
+        top_k_column_changes=top_k,
         sample_k_rows_only=3 if sample_rows else 0,
         show_sample_primary_key_per_change=sample_pk,
+        slim=slim,
     )
-    data = summary._data
-
-    assert isinstance(data, SummaryData)
-    assert data.equal is False
-    assert data.primary_key == ["id"]
-
-    # --- Schemas ---
-    schemas_equal = comp.schemas.equal()
-    if slim and schemas_equal:
-        assert data.schemas is None
-    else:
-        assert isinstance(data.schemas, SummaryDataSchemas)
-        assert len(data.schemas.left_only_names) > 0  # left_col
-        assert len(data.schemas.right_only_names) > 0  # right_col
-
-    # --- Rows ---
-    rows_equal = comp._equal_rows()
-    if slim and rows_equal:
-        assert data.rows is None
-    else:
-        assert isinstance(data.rows, SummaryDataRows)
-        assert data.rows.n_left == 4
-        assert data.rows.n_right == 4
-        assert data.rows.n_left_only is not None
-        assert data.rows.n_right_only is not None
-
-    # --- Columns ---
-    assert data.columns is not None
-    match_rates = comp.fraction_same()
-    for col in data.columns:
-        assert isinstance(col, SummaryDataColumn)
-        rate = match_rates[col.name]
-        assert col.match_rate == rate
-        if show_top_column_changes and rate < 1:
-            assert col.changes is not None
-            for change in col.changes:
-                assert isinstance(change, SummaryDataColumnChange)
-                if sample_pk:
-                    assert isinstance(change.sample_pk, tuple)
-                    assert len(change.sample_pk) == 1
-                else:
-                    assert change.sample_pk is None
-        else:
-            assert col.changes is None
-
-    # --- Sample rows ---
-    if sample_rows:
-        assert data.sample_rows_left_only is not None
-        assert data.sample_rows_right_only is not None
-        assert len(data.sample_rows_left_only) > 0
-        assert len(data.sample_rows_right_only) > 0
-        for row in data.sample_rows_left_only:
-            assert isinstance(row, tuple)
-        for row in data.sample_rows_right_only:
-            assert isinstance(row, tuple)
-    else:
-        assert data.sample_rows_left_only is None
-        assert data.sample_rows_right_only is None
-
-    # JSON roundtrip
-    parsed = json.loads(summary.to_json())
-    assert isinstance(parsed, dict)
-    assert parsed["equal"] is False
-
-
-def test_summary_data_equal_frames() -> None:
-    df = pl.DataFrame({"id": [1, 2], "value": [10.0, 20.0]})
-    comp = compare_frames(df, df, primary_key="id")
-    data = comp.summary()._data
-    assert data.equal is True
-    assert data.schemas is None
-    assert data.rows is None
-    assert data.columns is None
-    assert data.sample_rows_left_only is None
-    assert data.sample_rows_right_only is None
-
-
-def test_summary_data_no_primary_key() -> None:
-    left = pl.DataFrame({"a": [1, 2], "b": [3.0, 4.0]})
-    right = pl.DataFrame({"a": [1, 2], "b": [3.0, 5.0]})
-    comp = compare_frames(left, right)
-    data = comp.summary()._data
-    assert data.equal is False
-    assert data.primary_key is None
-    assert data.rows is not None
-    assert data.rows.n_left_only is None
-    assert data.rows.n_joined_equal is None
-    assert data.columns is None
-    assert data.sample_rows_left_only is None
-    assert data.sample_rows_right_only is None
-
-
-def test_summary_data_hidden_columns() -> None:
-    left = pl.DataFrame({"id": [1, 2], "secret": ["a", "b"], "value": [10.0, 20.0]})
-    right = pl.DataFrame({"id": [1, 2], "secret": ["a", "x"], "value": [10.0, 25.0]})
-    comp = compare_frames(left, right, primary_key="id")
-    data = comp.summary(
-        top_k_column_changes=3,
-        hidden_columns=["secret"],
-    )._data
-    assert data.columns is not None
-    for col in data.columns:
-        if col.name == "secret":
-            assert col.changes is None
-        elif col.match_rate < 1:
-            assert col.changes is not None
-
-
-def test_summary_data_validate_hidden_pk_sample_rows() -> None:
-    df = pl.DataFrame({"id": ["a", "b", "c"]})
-    comp = compare_frames(df, df.filter(pl.col("id") == "a"), primary_key=["id"])
-    with pytest.raises(ValueError, match="Cannot show sample rows only"):
-        comp.summary(sample_k_rows_only=3, hidden_columns=["id"])
-
+    result = json.loads(summary.to_json())
+
+    # --- Build expected dictionary ---
+    # Schemas: equal (same columns, same dtypes) → suppressed in slim mode
+    expected_schemas: dict | None = None
+    if not slim:
+        expected_schemas = {
+            "left_only_names": [],
+            "in_common": [
+                ["id", "Int64", "Int64"],
+                ["status", "String", "String"],
+                ["value", "Float64", "Float64"],
+            ],
+            "right_only_names": [],
+        }
 
-def test_summary_data_validate_hidden_pk_sample_pk() -> None:
-    df = pl.DataFrame({"id": ["a", "b", "c"], "value": [1.0, 2.0, 3.0]})
-    comp = compare_frames(df, df.with_columns(pl.col("value") + 1), primary_key=["id"])
-    with pytest.raises(ValueError, match="Cannot show sample primary key"):
-        comp.summary(
-            top_k_column_changes=3,
-            show_sample_primary_key_per_change=True,
-            hidden_columns=["id"],
+    # Columns: status has 100% match rate, value has 2/3
+    # show_perfect_column_matches controls whether the perfect status column appears
+    value_col = {
+        "name": "value",
+        "match_rate": pytest.approx(2 / 3),
+        "n_total_changes": 1 if show_top_column_changes else 0,
+        "changes": (
+            [
+                {
+                    "old": 20.0,
+                    "new": 25.0,
+                    "count": 1,
+                    "sample_pk": [2] if sample_pk else None,
+                }
+            ]
+            if show_top_column_changes
+            else None
+        ),
+    }
+    expected_columns = []
+    if show_perfect_column_matches:
+        expected_columns.append(
+            {"name": "status", "match_rate": 1.0, "n_total_changes": 0, "changes": None}
         )
-
-
-def test_summary_data_validate_zero_top_k_with_sample_pk() -> None:
-    df = pl.DataFrame({"id": ["a", "b"], "value": [1.0, 2.0]})
-    comp = compare_frames(df, df.with_columns(pl.col("value") + 1), primary_key=["id"])
-    with pytest.raises(
-        ValueError,
-        match="Cannot show sample primary key per change when top_k_column_changes is 0",
-    ):
-        comp.summary(top_k_column_changes=0, show_sample_primary_key_per_change=True)
-
-
-def test_summary_data_multiple_pk_columns() -> None:
-    left = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"], "val": [10, 20, 30]})
-    right = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"], "val": [10, 99, 30]})
-    comp = compare_frames(left, right, primary_key=["a", "b"])
-    data = comp.summary(
-        top_k_column_changes=3,
-        show_sample_primary_key_per_change=True,
-        sample_k_rows_only=3,
-    )._data
-    assert data.primary_key == ["a", "b"]
-    assert data.columns is not None
-    for col in data.columns:
-        if col.changes:
-            for change in col.changes:
-                assert isinstance(change.sample_pk, tuple)
-                assert len(change.sample_pk) == 2
-
-
-def test_summary_data_to_dict() -> None:
-    df = pl.DataFrame({"id": [1, 2], "value": [10.0, 20.0]})
-    comp = compare_frames(df, df, primary_key="id")
-    d = comp.summary()._data.to_dict()
-    assert isinstance(d, dict)
-    assert d["equal"] is True
-
-
-def test_summary_data_slim_suppresses_matching_sections() -> None:
-    left = pl.DataFrame({"id": [1, 2, 3], "value": [10.0, 20.0, 30.0]})
-    right = pl.DataFrame({"id": [1, 2, 3], "value": [10.0, 25.0, 30.0]})
-    comp = compare_frames(left, right, primary_key="id")
-    data = comp.summary(slim=True)._data
-
-    # Schemas match -> None in slim mode
-    assert data.schemas is None
-    # Rows have differences (joined unequal) -> shown
-    assert data.rows is not None
-    # Columns have differences -> shown
-    assert data.columns is not None
-
-
-@pytest.mark.parametrize(
-    "value, expected",
-    [
-        (datetime(2024, 1, 15, 12, 30), "2024-01-15T12:30:00"),
-        (date(2024, 1, 15), "2024-01-15"),
-        (timedelta(seconds=5), 5.0),
-        (Decimal("1.5"), 1.5),
-        (42, 42),
-        ("hello", "hello"),
-        (None, None),
-    ],
-)
-def test_to_python(value: object, expected: object) -> None:
-    assert _to_python(value) == expected
-
-
-def test_to_dict_with_typed_values() -> None:
-    comp = _make_comparison()
-    summary = comp.summary(top_k_column_changes=3, sample_k_rows_only=3)
-    d = summary._data.to_dict()
-
-    assert isinstance(d, dict)
-    assert d["equal"] is False
-    assert isinstance(d["columns"], list)
-    assert isinstance(d["sample_rows_left_only"], list)
-    # Verify roundtrip through JSON works
-    json_str = json.dumps(d)
-    parsed = json.loads(json_str)
-    assert parsed["equal"] is False
-    assert len(parsed["columns"]) > 0
-
-
-def test_to_json_with_date_values() -> None:
-    left = pl.DataFrame({"id": [1, 2], "d": [date(2024, 1, 1), date(2024, 6, 1)]})
-    right = pl.DataFrame({"id": [1, 2], "d": [date(2024, 1, 1), date(2024, 12, 1)]})
-    comp = compare_frames(left, right, primary_key="id")
-    summary = comp.summary(top_k_column_changes=3)
-    parsed = json.loads(summary.to_json())
-    assert parsed["equal"] is False
-    col = next(c for c in parsed["columns"] if c["name"] == "d")
-    assert col["changes"] is not None
-    assert col["changes"][0]["old"] == "2024-06-01"
-    assert col["changes"][0]["new"] == "2024-12-01"
-
-
-def test_summary_data_n_total_changes() -> None:
-    left = pl.DataFrame({"id": list(range(10)), "val": list(range(10))})
-    right = pl.DataFrame({"id": list(range(10)), "val": list(range(10, 20))})
-    comp = compare_frames(left, right, primary_key="id")
-    data = comp.summary(top_k_column_changes=3)._data
-    assert data.columns is not None
-    col = next(c for c in data.columns if c.name == "val")
-    assert col.changes is not None
-    assert len(col.changes) == 3
-    assert col.n_total_changes == 10
+    expected_columns.append(value_col)
+
+    expected = {
+        "equal": False,
+        "left_name": "left",
+        "right_name": "right",
+        "primary_key": ["id"],
+        "schemas": expected_schemas,
+        "rows": {
+            "n_left": 4,
+            "n_right": 4,
+            "n_left_only": 1,
+            "n_joined_equal": 2,
+            "n_joined_unequal": 1,
+            "n_right_only": 1,
+        },
+        "columns": expected_columns,
+        "sample_rows_left_only": [[4]] if sample_rows else None,
+        "sample_rows_right_only": [[5]] if sample_rows else None,
+    }
+
+    assert result == expected

From 1bf4e1a63482b7118b6b840b34af158d35158881 Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 18:28:25 +0200
Subject: [PATCH 10/12] improve test coverage

---
 tests/summary/test_summary.py | 136 +++++++++++++++++++++++++++++++++-
 tests/test_summary_data.py    | 121 ------------------------------
 2 files changed, 135 insertions(+), 122 deletions(-)
 delete mode 100644 tests/test_summary_data.py

diff --git a/tests/summary/test_summary.py b/tests/summary/test_summary.py
index 9fbfb5c..7581336 100644
--- a/tests/summary/test_summary.py
+++ b/tests/summary/test_summary.py
@@ -1,14 +1,19 @@
 # Copyright (c) QuantCo 2025-2026
 # SPDX-License-Identifier: BSD-3-Clause
 
+import itertools
+import json
 from collections.abc import Callable
+from datetime import date, datetime
+from decimal import Decimal
 from typing import Any
 
 import polars as pl
 import pytest
 
 from diffly import compare_frames
-from diffly.summary import _format_fraction_as_percentage
+from diffly.comparison import DataFrameComparison
+from diffly.summary import _format_fraction_as_percentage, _to_python
 
 
 @pytest.mark.parametrize("show_perfect_column_matches", [True, False])
@@ -124,3 +129,132 @@ def test_zero_top_k_column_changes_with_show_sample_primary_key() -> None:
             top_k_column_changes=0,
             show_sample_primary_key_per_change=True,
         )
+
+
+def _make_comparison() -> DataFrameComparison:
+    # Designed so every parametrized flag affects the expected JSON output:
+    # - Same columns in both frames → schemas equal → slim suppresses schemas section
+    # - status matches perfectly for joined rows → show_perfect_column_matches matters
+    # - value differs for id=2 → always has a non-perfect column
+    # - id=4 left-only, id=5 right-only → sample rows matter
+    left = pl.DataFrame(
+        {
+            "id": [1, 2, 3, 4],
+            "status": ["a", "b", "c", "d"],
+            "value": [10.0, 20.0, 30.0, 40.0],
+        }
+    )
+    right = pl.DataFrame(
+        {
+            "id": [1, 2, 3, 5],
+            "status": ["a", "b", "c", "e"],
+            "value": [10.0, 25.0, 30.0, 50.0],
+        }
+    )
+    return compare_frames(left, right, primary_key="id")
+
+
+@pytest.mark.parametrize(
+    "show_perfect_column_matches, show_top_column_changes, slim, sample_rows, sample_pk",
+    [
+        (*combo[:2], combo[2], combo[3], combo[3] and combo[1])
+        for combo in itertools.product([True, False], repeat=4)
+    ],
+)
+def test_summary_data_parametrized(
+    show_perfect_column_matches: bool,
+    show_top_column_changes: bool,
+    slim: bool,
+    sample_rows: bool,
+    sample_pk: bool,
+) -> None:
+    comp = _make_comparison()
+    top_k = 3 if show_top_column_changes else 0
+    summary = comp.summary(
+        show_perfect_column_matches=show_perfect_column_matches,
+        top_k_column_changes=top_k,
+        sample_k_rows_only=3 if sample_rows else 0,
+        show_sample_primary_key_per_change=sample_pk,
+        slim=slim,
+    )
+    result = json.loads(summary.to_json())
+
+    # --- Build expected dictionary ---
+    # Schemas: equal (same columns, same dtypes) → suppressed in slim mode
+    expected_schemas: dict | None = None
+    if not slim:
+        expected_schemas = {
+            "left_only_names": [],
+            "in_common": [
+                ["id", "Int64", "Int64"],
+                ["status", "String", "String"],
+                ["value", "Float64", "Float64"],
+            ],
+            "right_only_names": [],
+        }
+
+    # Columns: status has 100% match rate, value has 2/3
+    # show_perfect_column_matches controls whether the perfect status column appears
+    value_col = {
+        "name": "value",
+        "match_rate": pytest.approx(2 / 3),
+        "n_total_changes": 1 if show_top_column_changes else 0,
+        "changes": (
+            [
+                {
+                    "old": 20.0,
+                    "new": 25.0,
+                    "count": 1,
+                    "sample_pk": [2] if sample_pk else None,
+                }
+            ]
+            if show_top_column_changes
+            else None
+        ),
+    }
+    expected_columns = []
+    if show_perfect_column_matches:
+        expected_columns.append(
+            {"name": "status", "match_rate": 1.0, "n_total_changes": 0, "changes": None}
+        )
+    expected_columns.append(value_col)
+
+    expected = {
+        "equal": False,
+        "left_name": "left",
+        "right_name": "right",
+        "primary_key": ["id"],
+        "schemas": expected_schemas,
+        "rows": {
+            "n_left": 4,
+            "n_right": 4,
+            "n_left_only": 1,
+            "n_joined_equal": 2,
+            "n_joined_unequal": 1,
+            "n_right_only": 1,
+        },
+        "columns": expected_columns,
+        "sample_rows_left_only": [[4]] if sample_rows else None,
+        "sample_rows_right_only": [[5]] if sample_rows else None,
+    }
+
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "input, expected",
+    [
+        ([1, 2, 3], [1, 2, 3]),
+        ({"a": 1, "b": 2}, {"a": 1, "b": 2}),
+        ("string", "string"),
+        (123, 123),
+        (12.34, 12.34),
+        (True, True),
+        (None, None),
+        (date(2024, 1, 1), "2024-01-01"),
+        (datetime(2024, 1, 1, 12, 0, 0), "2024-01-01T12:00:00"),
+        (Decimal("12.34"), 12.34),
+    ],
+)
+def test__to_python(input: Any, expected: Any) -> None:
+    assert _to_python(input) == expected
diff --git a/tests/test_summary_data.py b/tests/test_summary_data.py
deleted file mode 100644
index c64dd60..0000000
--- a/tests/test_summary_data.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) QuantCo 2025-2026
-# SPDX-License-Identifier: BSD-3-Clause
-
-import itertools
-import json
-
-import polars as pl
-import pytest
-
-from diffly import compare_frames
-from diffly.comparison import DataFrameComparison
-
-
-def _make_comparison() -> DataFrameComparison:
-    # Designed so every parametrized flag affects the expected JSON output:
-    # - Same columns in both frames → schemas equal → slim suppresses schemas section
-    # - status matches perfectly for joined rows → show_perfect_column_matches matters
-    # - value differs for id=2 → always has a non-perfect column
-    # - id=4 left-only, id=5 right-only → sample rows matter
-    left = pl.DataFrame(
-        {
-            "id": [1, 2, 3, 4],
-            "status": ["a", "b", "c", "d"],
-            "value": [10.0, 20.0, 30.0, 40.0],
-        }
-    )
-    right = pl.DataFrame(
-        {
-            "id": [1, 2, 3, 5],
-            "status": ["a", "b", "c", "e"],
-            "value": [10.0, 25.0, 30.0, 50.0],
-        }
-    )
-    return compare_frames(left, right, primary_key="id")
-
-
-@pytest.mark.parametrize(
-    "show_perfect_column_matches, show_top_column_changes, slim, sample_rows, sample_pk",
-    [
-        (*combo[:2], combo[2], combo[3], combo[3] and combo[1])
-        for combo in itertools.product([True, False], repeat=4)
-    ],
-)
-def test_summary_data_parametrized(
-    show_perfect_column_matches: bool,
-    show_top_column_changes: bool,
-    slim: bool,
-    sample_rows: bool,
-    sample_pk: bool,
-) -> None:
-    comp = _make_comparison()
-    top_k = 3 if show_top_column_changes else 0
-    summary = comp.summary(
-        show_perfect_column_matches=show_perfect_column_matches,
-        top_k_column_changes=top_k,
-        sample_k_rows_only=3 if sample_rows else 0,
-        show_sample_primary_key_per_change=sample_pk,
-        slim=slim,
-    )
-    result = json.loads(summary.to_json())
-
-    # --- Build expected dictionary ---
-    # Schemas: equal (same columns, same dtypes) → suppressed in slim mode
-    expected_schemas: dict | None = None
-    if not slim:
-        expected_schemas = {
-            "left_only_names": [],
-            "in_common": [
-                ["id", "Int64", "Int64"],
-                ["status", "String", "String"],
-                ["value", "Float64", "Float64"],
-            ],
-            "right_only_names": [],
-        }
-
-    # Columns: status has 100% match rate, value has 2/3
-    # show_perfect_column_matches controls whether the perfect status column appears
-    value_col = {
-        "name": "value",
-        "match_rate": pytest.approx(2 / 3),
-        "n_total_changes": 1 if show_top_column_changes else 0,
-        "changes": (
-            [
-                {
-                    "old": 20.0,
-                    "new": 25.0,
-                    "count": 1,
-                    "sample_pk": [2] if sample_pk else None,
-                }
-            ]
-            if show_top_column_changes
-            else None
-        ),
-    }
-    expected_columns = []
-    if show_perfect_column_matches:
-        expected_columns.append(
-            {"name": "status", "match_rate": 1.0, "n_total_changes": 0, "changes": None}
-        )
-    expected_columns.append(value_col)
-
-    expected = {
-        "equal": False,
-        "left_name": "left",
-        "right_name": "right",
-        "primary_key": ["id"],
-        "schemas": expected_schemas,
-        "rows": {
-            "n_left": 4,
-            "n_right": 4,
-            "n_left_only": 1,
-            "n_joined_equal": 2,
-            "n_joined_unequal": 1,
-            "n_right_only": 1,
-        },
-        "columns": expected_columns,
-        "sample_rows_left_only": [[4]] if sample_rows else None,
-        "sample_rows_right_only": [[5]] if sample_rows else None,
-    }
-
-    assert result == expected

From 42a9781b2d3bf6cfbe6ad9418885c7750e00db2b Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 18:34:10 +0200
Subject: [PATCH 11/12] fix timedelta

---
 tests/summary/test_summary.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tests/summary/test_summary.py b/tests/summary/test_summary.py
index 7581336..febd6ea 100644
--- a/tests/summary/test_summary.py
+++ b/tests/summary/test_summary.py
@@ -4,7 +4,7 @@
 import itertools
 import json
 from collections.abc import Callable
-from datetime import date, datetime
+from datetime import date, datetime, timedelta
 from decimal import Decimal
 from typing import Any
 
@@ -155,10 +155,10 @@ def _make_comparison() -> DataFrameComparison:
 
 
 @pytest.mark.parametrize(
-    "show_perfect_column_matches, show_top_column_changes, slim, sample_rows, sample_pk",
+    "show_perfect_column_matches, show_top_column_changes, slim, sample_rows, sample_pk, hide_value",
     [
-        (*combo[:2], combo[2], combo[3], combo[3] and combo[1])
-        for combo in itertools.product([True, False], repeat=4)
+        (*combo[:2], combo[2], combo[3], combo[3] and combo[1], combo[4])
+        for combo in itertools.product([True, False], repeat=5)
     ],
 )
 def test_summary_data_parametrized(
@@ -167,15 +167,18 @@ def test_summary_data_parametrized(
     slim: bool,
     sample_rows: bool,
     sample_pk: bool,
+    hide_value: bool,
 ) -> None:
     comp = _make_comparison()
     top_k = 3 if show_top_column_changes else 0
+    hidden_columns = ["value"] if hide_value else None
     summary = comp.summary(
         show_perfect_column_matches=show_perfect_column_matches,
         top_k_column_changes=top_k,
         sample_k_rows_only=3 if sample_rows else 0,
         show_sample_primary_key_per_change=sample_pk,
         slim=slim,
+        hidden_columns=hidden_columns,
     )
     result = json.loads(summary.to_json())
 
@@ -194,11 +197,13 @@ def test_summary_data_parametrized(
         }
 
     # Columns: status has 100% match rate, value has 2/3
-    # show_perfect_column_matches controls whether the perfect status column appears
+    # - show_perfect_column_matches controls whether the perfect status column appears
+    # - hide_value suppresses changes for value (top_k forced to 0 for hidden columns)
+    show_value_changes = show_top_column_changes and not hide_value
     value_col = {
         "name": "value",
         "match_rate": pytest.approx(2 / 3),
-        "n_total_changes": 1 if show_top_column_changes else 0,
+        "n_total_changes": 1 if show_value_changes else 0,
         "changes": (
             [
                 {
@@ -208,7 +213,7 @@ def test_summary_data_parametrized(
                     "sample_pk": [2] if sample_pk else None,
                 }
             ]
-            if show_top_column_changes
+            if show_value_changes
             else None
         ),
     }
@@ -254,6 +259,7 @@ def test_summary_data_parametrized(
         (date(2024, 1, 1), "2024-01-01"),
         (datetime(2024, 1, 1, 12, 0, 0), "2024-01-01T12:00:00"),
         (Decimal("12.34"), 12.34),
+        (timedelta(hours=1, minutes=30), 5400),
     ],
 )
 def test__to_python(input: Any, expected: Any) -> None:

From bd9aa41d52ebb5d4db36bc463b042b8ccf9bffdf Mon Sep 17 00:00:00 2001
From: Marius Merkle <marius.merkle@quantco.com>
Date: Wed, 1 Apr 2026 18:54:15 +0200
Subject: [PATCH 12/12] feedback copilot

---
 diffly/summary.py             | 5 +++--
 tests/summary/test_summary.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/diffly/summary.py b/diffly/summary.py
index 3ed2724..b5ddb4f 100644
--- a/diffly/summary.py
+++ b/diffly/summary.py
@@ -137,7 +137,9 @@ def _compute_summary_data(
     hidden_columns = hidden_columns or []
 
     def _validate_primary_key_hidden_columns() -> None:
-        overlap = set(hidden_columns).intersection(set(comparison.primary_key or []))
+        overlap = sorted(
+            set(hidden_columns).intersection(set(comparison.primary_key or []))
+        )
         if overlap and sample_k_rows_only > 0:
             raise ValueError(
                 f"Cannot show sample rows only on the left or right side when primary"
@@ -343,7 +345,6 @@ def _validate_primary_key_hidden_columns() -> None:
 # ---------------------------------------------------------------------------- #
 
 
-@dataclass
 class Summary:
     """Container object for generating a summary of the comparison of two data frames.
 
diff --git a/tests/summary/test_summary.py b/tests/summary/test_summary.py
index febd6ea..79ebb3e 100644
--- a/tests/summary/test_summary.py
+++ b/tests/summary/test_summary.py
@@ -157,7 +157,7 @@ def _make_comparison() -> DataFrameComparison:
 @pytest.mark.parametrize(
     "show_perfect_column_matches, show_top_column_changes, slim, sample_rows, sample_pk, hide_value",
     [
-        (*combo[:2], combo[2], combo[3], combo[3] and combo[1], combo[4])
+        (combo[0], combo[1], combo[2], combo[3], combo[3] and combo[1], combo[4])
         for combo in itertools.product([True, False], repeat=5)
     ],
 )