igerber · igerber · Apr 4, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/TODO.md b/TODO.md
@@ -54,6 +54,7 @@ Deferred items from PR reviews that were not addressed before merge.
 |-------|----------|----|----------|
 | CallawaySantAnna: consider materializing NaN entries for non-estimable (g,t) cells in group_time_effects dict (currently omitted with consolidated warning); would require updating downstream consumers (event study, balance_e, aggregation) | `staggered.py` | #256 | Low |
 | ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium (deferred — only triggers when sparse solver fails) |
+| ImputationDiD survey pretrends: subpopulation approach implemented (full design with zero-padded scores). Resolved in #260. | `imputation.py` | #260 | Resolved |
 | Multi-absorb weighted demeaning needs iterative alternating projections for N > 1 absorbed FE with survey weights; unweighted multi-absorb also uses single-pass (pre-existing, exact only for balanced panels) | `estimators.py` | #218 | Medium |
 | Replicate-weight survey df — **Resolved**. `df_survey = rank(replicate_weights) - 1` matching R's `survey::degf()`. For IF paths, `n_valid - 1` when dropped replicates reduce effective count. | `survey.py` | #238 | Resolved |
 | CallawaySantAnna survey: strata/PSU/FPC — **Resolved**. Aggregated SEs (overall, event study, group) use `compute_survey_if_variance()`. Bootstrap uses PSU-level multiplier weights. | `staggered.py` | #237 | Resolved |

diff --git a/diff_diff/__init__.py b/diff_diff/__init__.py
@@ -94,6 +94,7 @@
     make_treatment_indicator,
     rank_control_units,
     summarize_did_data,
+    trim_weights,
     validate_did_data,
     wide_to_long,
 )
@@ -307,6 +308,7 @@
     "make_post_indicator",
     "wide_to_long",
     "balance_panel",
+    "trim_weights",
     "validate_did_data",
     "summarize_did_data",
     "generate_did_data",

diff --git a/diff_diff/bootstrap_utils.py b/diff_diff/bootstrap_utils.py
@@ -433,6 +433,10 @@ def generate_survey_multiplier_weights_batch(
     is present, weights are scaled by ``sqrt(1 - f_h)`` per stratum so
     the bootstrap variance matches the TSL variance.
 
+    For ``lonely_psu="adjust"``, singleton PSUs from different strata are
+    pooled into a combined pseudo-stratum and weights are generated for
+    the pooled group (no FPC scaling on pooled singletons).
+
     Parameters
     ----------
     n_bootstrap : int
@@ -454,11 +458,7 @@ def generate_survey_multiplier_weights_batch(
     psu = resolved_survey.psu
     strata = resolved_survey.strata
 
-    if resolved_survey.lonely_psu == "adjust":
-        raise NotImplementedError(
-            "lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
-            "Use lonely_psu='remove' or 'certainty', or use analytical inference."
-        )
+    _lonely_psu = resolved_survey.lonely_psu
 
     if psu is None:
         # Each observation is its own PSU
@@ -499,6 +499,7 @@ def generate_survey_multiplier_weights_batch(
         psu_to_col = {int(p): i for i, p in enumerate(psu_ids)}
 
         unique_strata = np.unique(strata)
+        _singleton_cols = []  # For lonely_psu="adjust" pooling
         for h in unique_strata:
             mask_h = strata == h
 
@@ -511,8 +512,12 @@ def generate_survey_multiplier_weights_batch(
             cols = np.array([psu_to_col[int(p)] for p in psus_in_h])
 
             if n_h < 2:
-                # Lonely PSU — zero weight (matches remove/certainty behavior)
-                weights[:, cols] = 0.0
+                if _lonely_psu == "adjust":
+                    # Collect for pooled pseudo-stratum processing
+                    _singleton_cols.extend(cols.tolist())
+                else:
+                    # remove / certainty — zero weight
+                    weights[:, cols] = 0.0
                 continue
 
             # Generate weights for this stratum
@@ -536,6 +541,31 @@ def generate_survey_multiplier_weights_batch(
 
             weights[:, cols] = stratum_weights
 
+        # Pool singleton PSUs into a pseudo-stratum for "adjust"
+        if _singleton_cols:
+            n_pooled = len(_singleton_cols)
+            if n_pooled >= 2:
+                pooled_weights = generate_bootstrap_weights_batch_numpy(
+                    n_bootstrap, n_pooled, weight_type, rng
+                )
+                # No FPC scaling for pooled singletons (conservative)
+                pooled_cols = np.array(_singleton_cols)
+                weights[:, pooled_cols] = pooled_weights
+            else:
+                # Single singleton — cannot pool, zero weight (library-specific
+                # fallback; bootstrap adjust with one singleton = remove).
+                import warnings
+
+                warnings.warn(
+                    "lonely_psu='adjust' with only 1 singleton stratum in "
+                    "bootstrap: singleton PSU contributes zero variance "
+                    "(same as 'remove'). At least 2 singleton strata are "
+                    "needed for pooled pseudo-stratum bootstrap.",
+                    UserWarning,
+                    stacklevel=3,
+                )
+                weights[:, _singleton_cols[0]] = 0.0
+
     return weights, psu_ids
 
 
@@ -553,6 +583,9 @@ def generate_rao_wu_weights(
     With FPC: ``m_h = max(1, round((1 - f_h) * (n_h - 1)))``
     (Rao, Wu & Yue 1992, Section 3).
 
+    For ``lonely_psu="adjust"``, singleton PSUs are pooled into a combined
+    pseudo-stratum and resampled together (no FPC scaling on pooled group).
+
     Parameters
     ----------
     resolved_survey : ResolvedSurveyDesign
@@ -570,11 +603,7 @@ def generate_rao_wu_weights(
     psu = resolved_survey.psu
     strata = resolved_survey.strata
 
-    if resolved_survey.lonely_psu == "adjust":
-        raise NotImplementedError(
-            "lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
-            "Use lonely_psu='remove' or 'certainty', or use analytical inference."
-        )
+    _lonely_psu_rw = resolved_survey.lonely_psu
 
     rescaled = np.zeros(n_obs, dtype=np.float64)
 
@@ -589,14 +618,20 @@ def generate_rao_wu_weights(
         unique_strata = np.unique(strata)
         strata_masks = [strata == h for h in unique_strata]
 
+    # Collect singleton PSUs for "adjust" pooling
+    _singleton_info = []  # list of (mask_h, unique_psu_h) tuples
+
     for mask_h in strata_masks:
         psu_h = obs_psu[mask_h]
         unique_psu_h = np.unique(psu_h)
         n_h = len(unique_psu_h)
 
         if n_h < 2:
-            # Census / lonely PSU — keep original weights (zero variance)
-            rescaled[mask_h] = base_weights[mask_h]
+            if _lonely_psu_rw == "adjust":
+                _singleton_info.append((mask_h, unique_psu_h))
+            else:
+                # remove / certainty — keep original weights (zero variance)
+                rescaled[mask_h] = base_weights[mask_h]
             continue
 
         # Compute resample size
@@ -629,6 +664,41 @@ def generate_rao_wu_weights(
         local_indices = np.array([psu_to_local[int(obs_psu[idx])] for idx in obs_in_h])
         rescaled[obs_in_h] = base_weights[obs_in_h] * scale_per_psu[local_indices]
 
+    # Pool singleton PSUs into a pseudo-stratum for "adjust"
+    if _singleton_info:
+        # Combine all singleton PSUs into one group
+        pooled_psus = np.concatenate([p for _, p in _singleton_info])
+        n_pooled = len(pooled_psus)
+
+        if n_pooled >= 2:
+            m_pooled = n_pooled - 1  # No FPC for pooled singletons
+            drawn = rng.choice(n_pooled, size=m_pooled, replace=True)
+            counts = np.bincount(drawn, minlength=n_pooled)
+            scale_per_psu = (n_pooled / m_pooled) * counts.astype(np.float64)
+
+            # Build PSU → scale mapping and apply
+            psu_scale_map = {int(pooled_psus[i]): scale_per_psu[i] for i in range(n_pooled)}
+            for mask_h, _ in _singleton_info:
+                obs_in_h = np.where(mask_h)[0]
+                for idx in obs_in_h:
+                    p = int(obs_psu[idx])
+                    rescaled[idx] = base_weights[idx] * psu_scale_map.get(p, 1.0)
+        else:
+            # Single singleton — cannot pool, keep base weights (library-specific
+            # fallback; bootstrap adjust with one singleton = remove).
+            import warnings
+
+            warnings.warn(
+                "lonely_psu='adjust' with only 1 singleton stratum in "
+                "bootstrap: singleton PSU contributes zero variance "
+                "(same as 'remove'). At least 2 singleton strata are "
+                "needed for pooled pseudo-stratum bootstrap.",
+                UserWarning,
+                stacklevel=2,
+            )
+            for mask_h, _ in _singleton_info:
+                rescaled[mask_h] = base_weights[mask_h]
+
     return rescaled
 
 

diff --git a/diff_diff/continuous_did_results.py b/diff_diff/continuous_did_results.py
@@ -154,6 +154,15 @@ def __repr__(self) -> str:
             f"n_periods={len(self.time_periods)})"
         )
 
+    @property
+    def coef_var(self) -> float:
+        """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
+        if not (np.isfinite(self.overall_att_se) and self.overall_att_se >= 0):
+            return np.nan
+        if not np.isfinite(self.overall_att) or self.overall_att == 0:
+            return np.nan
+        return self.overall_att_se / abs(self.overall_att)
+
     def summary(self, alpha: Optional[float] = None) -> str:
         """Generate formatted summary."""
         alpha = alpha or self.alpha
@@ -223,10 +232,15 @@ def summary(self, alpha: Optional[float] = None) -> str:
                 f"[{self.overall_att_conf_int[0]:.4f}, {self.overall_att_conf_int[1]:.4f}]",
                 f"{conf_level}% CI for ACRT_glob: "
                 f"[{self.overall_acrt_conf_int[0]:.4f}, {self.overall_acrt_conf_int[1]:.4f}]",
-                "",
             ]
         )
 
+        cv = self.coef_var
+        if np.isfinite(cv):
+            lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
+
+        lines.append("")
+
         # Dose-response curve summary (first/mid/last points)
         if len(self.dose_grid) > 0:
             lines.extend(

diff --git a/diff_diff/efficient_did_results.py b/diff_diff/efficient_did_results.py
@@ -172,6 +172,15 @@ def __repr__(self) -> str:
             f"n_periods={len(self.time_periods)})"
         )
 
+    @property
+    def coef_var(self) -> float:
+        """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
+        if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
+            return np.nan
+        if not np.isfinite(self.overall_att) or self.overall_att == 0:
+            return np.nan
+        return self.overall_se / abs(self.overall_att)
+
     def summary(self, alpha: Optional[float] = None) -> str:
         """Generate formatted summary of estimation results."""
         alpha = alpha or self.alpha
@@ -219,10 +228,15 @@ def summary(self, alpha: Optional[float] = None) -> str:
                 "",
                 f"{conf_level}% Confidence Interval: "
                 f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
-                "",
             ]
         )
 
+        cv = self.coef_var
+        if np.isfinite(cv):
+            lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
+
+        lines.append("")
+
         # Event study effects
         if self.event_study_effects:
             lines.extend(