Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Deferred items from PR reviews that were not addressed before merge.
|-------|----------|----|----------|
| CallawaySantAnna: consider materializing NaN entries for non-estimable (g,t) cells in group_time_effects dict (currently omitted with consolidated warning); would require updating downstream consumers (event study, balance_e, aggregation) | `staggered.py` | #256 | Low |
| ImputationDiD dense `(A0'A0).toarray()` scales O((U+T+K)^2), OOM risk on large panels | `imputation.py` | #141 | Medium (deferred — only triggers when sparse solver fails) |
| ImputationDiD survey pretrends: subpopulation approach implemented (full design with zero-padded scores). Resolved in #260. | `imputation.py` | #260 | Resolved |
| Multi-absorb weighted demeaning needs iterative alternating projections for N > 1 absorbed FE with survey weights; unweighted multi-absorb also uses single-pass (pre-existing, exact only for balanced panels) | `estimators.py` | #218 | Medium |
| Replicate-weight survey df — **Resolved**. `df_survey = rank(replicate_weights) - 1` matching R's `survey::degf()`. For IF paths, `n_valid - 1` when dropped replicates reduce effective count. | `survey.py` | #238 | Resolved |
| CallawaySantAnna survey: strata/PSU/FPC — **Resolved**. Aggregated SEs (overall, event study, group) use `compute_survey_if_variance()`. Bootstrap uses PSU-level multiplier weights. | `staggered.py` | #237 | Resolved |
Expand Down
2 changes: 2 additions & 0 deletions diff_diff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
make_treatment_indicator,
rank_control_units,
summarize_did_data,
trim_weights,
validate_did_data,
wide_to_long,
)
Expand Down Expand Up @@ -307,6 +308,7 @@
"make_post_indicator",
"wide_to_long",
"balance_panel",
"trim_weights",
"validate_did_data",
"summarize_did_data",
"generate_did_data",
Expand Down
98 changes: 84 additions & 14 deletions diff_diff/bootstrap_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,10 @@ def generate_survey_multiplier_weights_batch(
is present, weights are scaled by ``sqrt(1 - f_h)`` per stratum so
the bootstrap variance matches the TSL variance.

For ``lonely_psu="adjust"``, singleton PSUs from different strata are
pooled into a combined pseudo-stratum and weights are generated for
the pooled group (no FPC scaling on pooled singletons).

Parameters
----------
n_bootstrap : int
Expand All @@ -454,11 +458,7 @@ def generate_survey_multiplier_weights_batch(
psu = resolved_survey.psu
strata = resolved_survey.strata

if resolved_survey.lonely_psu == "adjust":
raise NotImplementedError(
"lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
"Use lonely_psu='remove' or 'certainty', or use analytical inference."
)
_lonely_psu = resolved_survey.lonely_psu

if psu is None:
# Each observation is its own PSU
Expand Down Expand Up @@ -499,6 +499,7 @@ def generate_survey_multiplier_weights_batch(
psu_to_col = {int(p): i for i, p in enumerate(psu_ids)}

unique_strata = np.unique(strata)
_singleton_cols = [] # For lonely_psu="adjust" pooling
for h in unique_strata:
mask_h = strata == h

Expand All @@ -511,8 +512,12 @@ def generate_survey_multiplier_weights_batch(
cols = np.array([psu_to_col[int(p)] for p in psus_in_h])

if n_h < 2:
# Lonely PSU — zero weight (matches remove/certainty behavior)
weights[:, cols] = 0.0
if _lonely_psu == "adjust":
# Collect for pooled pseudo-stratum processing
_singleton_cols.extend(cols.tolist())
else:
# remove / certainty — zero weight
weights[:, cols] = 0.0
continue

# Generate weights for this stratum
Expand All @@ -536,6 +541,31 @@ def generate_survey_multiplier_weights_batch(

weights[:, cols] = stratum_weights

# Pool singleton PSUs into a pseudo-stratum for "adjust"
if _singleton_cols:
n_pooled = len(_singleton_cols)
if n_pooled >= 2:
pooled_weights = generate_bootstrap_weights_batch_numpy(
n_bootstrap, n_pooled, weight_type, rng
)
# No FPC scaling for pooled singletons (conservative)
pooled_cols = np.array(_singleton_cols)
weights[:, pooled_cols] = pooled_weights
else:
# Single singleton — cannot pool, zero weight (library-specific
# fallback; bootstrap adjust with one singleton = remove).
import warnings

warnings.warn(
"lonely_psu='adjust' with only 1 singleton stratum in "
"bootstrap: singleton PSU contributes zero variance "
"(same as 'remove'). At least 2 singleton strata are "
"needed for pooled pseudo-stratum bootstrap.",
UserWarning,
stacklevel=3,
)
weights[:, _singleton_cols[0]] = 0.0

return weights, psu_ids


Expand All @@ -553,6 +583,9 @@ def generate_rao_wu_weights(
With FPC: ``m_h = max(1, round((1 - f_h) * (n_h - 1)))``
(Rao, Wu & Yue 1992, Section 3).

For ``lonely_psu="adjust"``, singleton PSUs are pooled into a combined
pseudo-stratum and resampled together (no FPC scaling on pooled group).

Parameters
----------
resolved_survey : ResolvedSurveyDesign
Expand All @@ -570,11 +603,7 @@ def generate_rao_wu_weights(
psu = resolved_survey.psu
strata = resolved_survey.strata

if resolved_survey.lonely_psu == "adjust":
raise NotImplementedError(
"lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
"Use lonely_psu='remove' or 'certainty', or use analytical inference."
)
_lonely_psu_rw = resolved_survey.lonely_psu

rescaled = np.zeros(n_obs, dtype=np.float64)

Expand All @@ -589,14 +618,20 @@ def generate_rao_wu_weights(
unique_strata = np.unique(strata)
strata_masks = [strata == h for h in unique_strata]

# Collect singleton PSUs for "adjust" pooling
_singleton_info = [] # list of (mask_h, unique_psu_h) tuples

for mask_h in strata_masks:
psu_h = obs_psu[mask_h]
unique_psu_h = np.unique(psu_h)
n_h = len(unique_psu_h)

if n_h < 2:
# Census / lonely PSU — keep original weights (zero variance)
rescaled[mask_h] = base_weights[mask_h]
if _lonely_psu_rw == "adjust":
_singleton_info.append((mask_h, unique_psu_h))
else:
# remove / certainty — keep original weights (zero variance)
rescaled[mask_h] = base_weights[mask_h]
continue

# Compute resample size
Expand Down Expand Up @@ -629,6 +664,41 @@ def generate_rao_wu_weights(
local_indices = np.array([psu_to_local[int(obs_psu[idx])] for idx in obs_in_h])
rescaled[obs_in_h] = base_weights[obs_in_h] * scale_per_psu[local_indices]

# Pool singleton PSUs into a pseudo-stratum for "adjust"
if _singleton_info:
# Combine all singleton PSUs into one group
pooled_psus = np.concatenate([p for _, p in _singleton_info])
n_pooled = len(pooled_psus)

if n_pooled >= 2:
m_pooled = n_pooled - 1 # No FPC for pooled singletons
drawn = rng.choice(n_pooled, size=m_pooled, replace=True)
counts = np.bincount(drawn, minlength=n_pooled)
scale_per_psu = (n_pooled / m_pooled) * counts.astype(np.float64)

# Build PSU → scale mapping and apply
psu_scale_map = {int(pooled_psus[i]): scale_per_psu[i] for i in range(n_pooled)}
for mask_h, _ in _singleton_info:
obs_in_h = np.where(mask_h)[0]
for idx in obs_in_h:
p = int(obs_psu[idx])
rescaled[idx] = base_weights[idx] * psu_scale_map.get(p, 1.0)
else:
# Single singleton — cannot pool, keep base weights (library-specific
# fallback; bootstrap adjust with one singleton = remove).
import warnings

warnings.warn(
"lonely_psu='adjust' with only 1 singleton stratum in "
"bootstrap: singleton PSU contributes zero variance "
"(same as 'remove'). At least 2 singleton strata are "
"needed for pooled pseudo-stratum bootstrap.",
UserWarning,
stacklevel=2,
)
for mask_h, _ in _singleton_info:
rescaled[mask_h] = base_weights[mask_h]

return rescaled


Expand Down
16 changes: 15 additions & 1 deletion diff_diff/continuous_did_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ def __repr__(self) -> str:
f"n_periods={len(self.time_periods)})"
)

@property
def coef_var(self) -> float:
"""Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
if not (np.isfinite(self.overall_att_se) and self.overall_att_se >= 0):
return np.nan
if not np.isfinite(self.overall_att) or self.overall_att == 0:
return np.nan
return self.overall_att_se / abs(self.overall_att)

def summary(self, alpha: Optional[float] = None) -> str:
"""Generate formatted summary."""
alpha = alpha or self.alpha
Expand Down Expand Up @@ -223,10 +232,15 @@ def summary(self, alpha: Optional[float] = None) -> str:
f"[{self.overall_att_conf_int[0]:.4f}, {self.overall_att_conf_int[1]:.4f}]",
f"{conf_level}% CI for ACRT_glob: "
f"[{self.overall_acrt_conf_int[0]:.4f}, {self.overall_acrt_conf_int[1]:.4f}]",
"",
]
)

cv = self.coef_var
if np.isfinite(cv):
lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")

lines.append("")

# Dose-response curve summary (first/mid/last points)
if len(self.dose_grid) > 0:
lines.extend(
Expand Down
16 changes: 15 additions & 1 deletion diff_diff/efficient_did_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,15 @@ def __repr__(self) -> str:
f"n_periods={len(self.time_periods)})"
)

@property
def coef_var(self) -> float:
"""Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
return np.nan
if not np.isfinite(self.overall_att) or self.overall_att == 0:
return np.nan
return self.overall_se / abs(self.overall_att)

def summary(self, alpha: Optional[float] = None) -> str:
"""Generate formatted summary of estimation results."""
alpha = alpha or self.alpha
Expand Down Expand Up @@ -219,10 +228,15 @@ def summary(self, alpha: Optional[float] = None) -> str:
"",
f"{conf_level}% Confidence Interval: "
f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
"",
]
)

cv = self.coef_var
if np.isfinite(cv):
lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")

lines.append("")

# Event study effects
if self.event_study_effects:
lines.extend(
Expand Down
Loading
Loading