diff --git a/dpsynth/data_generation_v3.py b/dpsynth/data_generation_v3.py index 2fdce0d..0daebf2 100644 --- a/dpsynth/data_generation_v3.py +++ b/dpsynth/data_generation_v3.py @@ -108,6 +108,7 @@ class TabularSynthesizer(primitives.DPMechanism): default_factory=discrete_mechanisms.MSTMechanism ) initializers: dict[str, primitives.DPMechanism] | None = None + total_count_mechanism: primitives.DPGaussianCount | None = None cross_attribute_constraints: Sequence[constraints.Constraint] = () def calibrate( @@ -190,11 +191,16 @@ def _calibrate_zcdp( self.domains, numerical_bins, init_delta ) init_rho = init_budget_fraction * zcdp_rho - per_col_rho = init_rho / len(inits) + # +1 for the DPGaussianCount that always measures the total. + per_col_rho = init_rho / (len(inits) + 1) discrete_rho = zcdp_rho - init_rho + calibrated_inits = { col: init.calibrate(zcdp_rho=per_col_rho) for col, init in inits.items() } + calibrated_total = primitives.DPGaussianCount().calibrate( + zcdp_rho=per_col_rho + ) calibrated_discrete = self.discrete_mechanism.calibrate( zcdp_rho=discrete_rho ) @@ -202,6 +208,7 @@ def _calibrate_zcdp( self, initializers=calibrated_inits, discrete_mechanism=calibrated_discrete, + total_count_mechanism=calibrated_total, ) def _calibrate_approx_dp( @@ -237,7 +244,8 @@ def _calibrate_approx_dp( inits = self.initializers or _create_initializers( self.domains, numerical_bins, init_delta ) - num_columns = len(inits) + # +1 for the DPGaussianCount that always measures the total. + num_shares = len(inits) + 1 # Stage 1: Convert (epsilon, remaining_delta) to zCDP and calibrate # initializers with init_budget_fraction of that budget. @@ -248,15 +256,18 @@ def _calibrate_approx_dp( make_fresh_accountant=dp_accounting.rdp.RdpAccountant, ) init_rho = init_budget_fraction * total_rho - per_col_rho = init_rho / num_columns + per_col_rho = init_rho / num_shares calibrated_inits = { col: init.calibrate(zcdp_rho=per_col_rho) for col, init in inits.items() } - + calibrated_total = primitives.DPGaussianCount().calibrate( + zcdp_rho=per_col_rho + ) # Stage 2: With init dp_events fixed, find the tightest discrete budget. # The accountant handles ApproximateDpEvent deltas from open-set # initializers automatically. init_events = [init.dp_event for init in calibrated_inits.values()] + init_events.append(calibrated_total.dp_event) # Determine accountant type based on discrete mechanism's dp_event. probe_event = self.discrete_mechanism.calibrate(zcdp_rho=1.0).dp_event @@ -285,6 +296,7 @@ def make_event_from_param(discrete_rho): self, initializers=calibrated_inits, discrete_mechanism=calibrated_discrete, + total_count_mechanism=calibrated_total, ) @property @@ -297,9 +309,10 @@ def dp_event(self) -> dp_accounting.DpEvent: Raises: ValueError: If calibrate() has not been called. """ - if self.initializers is None: + if self.initializers is None or self.total_count_mechanism is None: raise ValueError('Must call calibrate() before accessing dp_event.') events = [init.dp_event for init in self.initializers.values()] + events.append(self.total_count_mechanism.dp_event) events.append(self.discrete_mechanism.dp_event) return dp_accounting.ComposedDpEvent(events) @@ -320,7 +333,7 @@ def __call__( ValueError: If calibrate() has not been called or if required columns are missing from the input data. """ - if self.initializers is None: + if self.initializers is None or self.total_count_mechanism is None: raise ValueError('Must call calibrate() before running the mechanism.') for col in self.domains: if col not in data.columns: @@ -329,15 +342,22 @@ def __call__( ) # Phase 1: Per-column initialization. - col_results: dict[str, initialization.ColumnMeasurement] = {} + # Measure total count first, then run per-column initializers. + any_col = next(iter(self.domains)) + total = max(1.0, self.total_count_mechanism(rng, data[any_col].values)) + + results: dict[str, initialization.ColumnMeasurement] = {} for col, init in self.initializers.items(): - col_results[col] = init(rng, data[col].values) + if isinstance(init, initialization.NumericalInitializer): + results[col] = init(rng, data[col].values, estimated_total=total) + else: + results[col] = init(rng, data[col].values) # Phase 2: Encode data to discrete domain. discrete_domains = {} discrete_data = {} one_way_measurements = [] - for col, result in col_results.items(): + for col, result in results.items(): discrete_domains[col] = result.categorical_attribute.size if result.bin_edges is not None: discrete_data[col] = vtx.discretize( @@ -368,7 +388,7 @@ def __call__( # Phase 4: Decode synthetic data back to original domain. synthetic_columns = {} - for col, result in col_results.items(): + for col, result in results.items(): col_data = synthetic_data.df[col].values if result.bin_edges is not None: synthetic_columns[col] = vtx.undiscretize( diff --git a/dpsynth/local_mode/initialization.py b/dpsynth/local_mode/initialization.py index 08864c9..ab3685a 100644 --- a/dpsynth/local_mode/initialization.py +++ b/dpsynth/local_mode/initialization.py @@ -147,10 +147,18 @@ def __call__( measurement = None if estimated_total is not None: rho = self._zcdp_rho - uniform_counts = bin_weights * (estimated_total / self.num_partitions) - stddev = 1.0 / np.sqrt(rho) + if not self.attribute.clip_to_range: + # Prepend zero weight for the OUT_OF_DOMAIN slot at index 0. + bin_weights = np.r_[0, bin_weights] + # Query is the normalized histogram (probabilities); the noise scale + # absorbs the 1/estimated_total factor from dividing counts by n. + normalized = bin_weights / bin_weights.sum() + stddev = 1.0 / (np.sqrt(rho) * estimated_total) measurement = mbi.LinearMeasurement( - uniform_counts, (self.name,), stddev=stddev + normalized, + (self.name,), + stddev=stddev, + query=lambda f: f.normalize(1.0).datavector(), ) return ColumnMeasurement(cat_attr, bin_edges, measurement=measurement) diff --git a/dpsynth/local_mode/primitives.py b/dpsynth/local_mode/primitives.py index c21b6ce..71e0321 100644 --- a/dpsynth/local_mode/primitives.py +++ b/dpsynth/local_mode/primitives.py @@ -489,32 +489,6 @@ def _select_partitions_sips( return selected_partitions, selected_counts, max_sigma -def _gaussian_histogram( - rng: np.random.Generator, - data: np.ndarray, - domain_size: int, - sigma: float, -) -> np.ndarray: - """Computes a noisy histogram over a closed domain using the Gaussian mechanism. - - The histogram query has L2 sensitivity 1 under item-level DP (each record - contributes +1 to exactly one bin). Gaussian noise with the given standard - deviation is added independently to each bin count. - - Args: - rng: A numpy random number generator. - data: 1D array of integer-encoded categorical values in [0, domain_size). - domain_size: Number of categories in the closed domain. - sigma: Standard deviation of the Gaussian noise added to each bin. - - Returns: - A length-`domain_size` array of noisy counts. - """ - return np.bincount(data, minlength=domain_size) + rng.normal( - scale=sigma, size=domain_size - ) - - # --------------------------------------------------------------------------- # DPMechanism subclasses # --------------------------------------------------------------------------- @@ -636,9 +610,33 @@ def __call__( """Computes a differentially private histogram.""" if self.sigma is None: raise ValueError(_UNCALIBRATED_MSG.format(param='sigma')) - return HistogramResult( - counts=_gaussian_histogram(rng, data, self.domain_size, self.sigma) - ) + true_counts = np.bincount(data, minlength=self.domain_size) + noise = rng.normal(scale=self.sigma, size=self.domain_size) + return HistogramResult(counts=true_counts + noise) + + +@dataclasses.dataclass +class DPGaussianCount(DPMechanism): + """Differentially private count via the Gaussian mechanism.""" + + sigma: float | None = None + + def calibrate(self, *, zcdp_rho: float) -> DPGaussianCount: + """Returns a copy with sigma derived from the zCDP budget.""" + return dataclasses.replace(self, sigma=math.sqrt(0.5 / zcdp_rho)) + + @property + def dp_event(self) -> dp_accounting.DpEvent: + """Returns the Gaussian privacy event for this mechanism.""" + if self.sigma is None: + raise ValueError(_UNCALIBRATED_MSG.format(param='sigma')) + return dp_accounting.GaussianDpEvent(noise_multiplier=self.sigma) + + def __call__(self, rng: np.random.Generator, data: np.ndarray) -> float: + """Returns a noisy count of len(data) + Gaussian noise.""" + if self.sigma is None: + raise ValueError(_UNCALIBRATED_MSG.format(param='sigma')) + return float(len(data) + rng.normal(scale=self.sigma)) @dataclasses.dataclass diff --git a/tests/data_generation_v3_test.py b/tests/data_generation_v3_test.py index b8186be..d82579b 100644 --- a/tests/data_generation_v3_test.py +++ b/tests/data_generation_v3_test.py @@ -149,6 +149,38 @@ def test_calibrate_small_epsilon(self): self.assertIsInstance(synthetic_df, pd.DataFrame) self.assertListEqual(synthetic_df.columns.tolist(), ['A', 'B']) + def test_numerical_only_uses_dp_count(self): + """Numerical-only domains should allocate a DPGaussianCount for total.""" + domains = { + 'A': domain.NumericalAttribute(min_value=0, max_value=10), + 'B': domain.NumericalAttribute(min_value=-10, max_value=10), + } + df = pd.DataFrame({'A': [5, 5, 0], 'B': [5, -10, -5]}, dtype=float) + rng = np.random.default_rng(0) + calibrated = TabularSynthesizer(domains=domains).calibrate(zcdp_rho=100.0) + + # total_count_mechanism should be set for numerical-only domains. + self.assertIsNotNone(calibrated.total_count_mechanism) + synthetic_df = calibrated(rng, df).synthetic_data + self.assertListEqual(synthetic_df.columns.tolist(), ['A', 'B']) + + def test_mixed_domain_always_has_dp_count(self): + """Mixed domains also allocate a DPGaussianCount for total.""" + domains = { + 'A': domain.CategoricalAttribute( + possible_values=['a', 'b', 'c'], out_of_domain_index=0 + ), + 'B': domain.NumericalAttribute(min_value=0, max_value=10), + } + df = pd.DataFrame({'A': ['a', 'b', 'c'], 'B': [1.0, 5.0, 10.0]}) + rng = np.random.default_rng(0) + calibrated = TabularSynthesizer(domains=domains).calibrate(zcdp_rho=100.0) + + # DPGaussianCount is always allocated. + self.assertIsNotNone(calibrated.total_count_mechanism) + synthetic_df = calibrated(rng, df).synthetic_data + self.assertListEqual(synthetic_df.columns.tolist(), ['A', 'B']) + if __name__ == '__main__': absltest.main() diff --git a/tests/local_mode/initialization_test.py b/tests/local_mode/initialization_test.py index aa13b65..6b15562 100644 --- a/tests/local_mode/initialization_test.py +++ b/tests/local_mode/initialization_test.py @@ -125,9 +125,9 @@ def test_numerical_initializer_measurement_with_merged_bins(self): rng, data, estimated_total=100.0 ) self.assertIsNotNone(result.measurement) - # Measurement counts should sum to estimated_total. + # Measurement probabilities should sum to 1.0. np.testing.assert_allclose( - result.measurement.noisy_measurement.sum(), 100.0 + result.measurement.noisy_measurement.sum(), 1.0, atol=1e-10 ) def test_numerical_initializer_measurement_with_estimated_total(self): @@ -142,16 +142,16 @@ def test_numerical_initializer_measurement_with_estimated_total(self): ) self.assertIsNotNone(result.measurement) - # Measurement should be uniform: 100.0 / num_bins for each bin. + # Measurement should be uniform probabilities: 1.0 / num_bins each. num_bins = result.categorical_attribute.size - expected_count = 100.0 / num_bins + expected_prob = 1.0 / num_bins np.testing.assert_allclose( result.measurement.noisy_measurement, - np.full(num_bins, expected_count), + np.full(num_bins, expected_prob), ) self.assertEqual(result.measurement.clique, ('num_col',)) - # stddev should be 1/sqrt(rho) = 1/sqrt(1.0) = 1.0 - self.assertAlmostEqual(result.measurement.stddev, 1.0) + # stddev should be 1/(sqrt(rho) * estimated_total) = 1/(1.0 * 100) = 0.01 + self.assertAlmostEqual(result.measurement.stddev, 0.01) def test_numerical_initializer_no_measurement_without_estimated_total(self): attr = domain.NumericalAttribute(min_value=0, max_value=10) @@ -178,9 +178,9 @@ def test_integer_edges_at_max_value_absorbed_into_last_bin(self): # No edge should equal max_value (they get absorbed). if len(result.bin_edges) > 0: self.assertLess(result.bin_edges[-1], 10) - # Measurement counts must still sum to estimated_total. + # Measurement probabilities must still sum to 1.0. np.testing.assert_allclose( - result.measurement.noisy_measurement.sum(), 100.0 + result.measurement.noisy_measurement.sum(), 1.0, atol=1e-10 ) # The last bin (containing max_value=10) should get the most mass. counts = result.measurement.noisy_measurement @@ -198,11 +198,12 @@ def test_bin_weights_sum_to_num_partitions(self): result = initializer.calibrate(zcdp_rho=1.0)( rng, data, estimated_total=100.0 ) - # Sum of measurement counts = estimated_total = num_partitions * per_bin. + # Sum of measurement probabilities = 1.0. np.testing.assert_allclose( result.measurement.noisy_measurement.sum(), - 100.0, - err_msg=f'seed={seed}: counts do not sum to estimated_total', + 1.0, + atol=1e-10, + err_msg=f'seed={seed}: probabilities do not sum to 1', ) def test_integer_jitter_prevents_spurious_splits(self): @@ -363,19 +364,21 @@ def test_measurement_approximates_true_histogram( self.assertGreaterEqual(num_bins, 2) measurement = result.measurement self.assertIsNotNone(measurement) - # Measurement counts must sum to estimated_total. - np.testing.assert_allclose(measurement.noisy_measurement.sum(), len(data)) - # All measurement counts should be positive. + # Measurement probabilities must sum to 1.0. + np.testing.assert_allclose( + measurement.noisy_measurement.sum(), 1.0, atol=1e-10 + ) + # All measurement probabilities should be positive. self.assertTrue( np.all(measurement.noisy_measurement > 0), - f'non-positive measurement counts: {measurement.noisy_measurement}', + 'non-positive measurement probabilities:' + f' {measurement.noisy_measurement}', ) # -- Statistical approximation check -- encoded = vtx.discretize(data, result.bin_edges, attr) true_counts = np.bincount(encoded, minlength=num_bins).astype(float) - meas_counts = measurement.noisy_measurement true_prob = true_counts / true_counts.sum() - meas_prob = meas_counts / meas_counts.sum() + meas_prob = measurement.noisy_measurement l1_dist = np.abs(true_prob - meas_prob).sum() # 3/sqrt(rho) covers quantile noise; 2-1/K covers uniform-vs-delta. max_l1 = max(3.0 / np.sqrt(rho), 2.0 - 1.0 / num_bins) @@ -384,8 +387,8 @@ def test_measurement_approximates_true_histogram( max_l1, f'Measurement too far from true histogram (L1={l1_dist:.3f},' f' bound={max_l1:.3f}, rho={rho}):\n' - f' true_counts = {true_counts}\n' - f' meas_counts = {meas_counts}', + f' true_prob = {true_prob}\n' + f' meas_prob = {meas_prob}', ) def test_measurement_property_random_configs(self): @@ -432,12 +435,13 @@ def test_measurement_property_random_configs(self): self.assertIsNotNone(measurement) np.testing.assert_allclose( measurement.noisy_measurement.sum(), - len(data), - err_msg=f'trial={trial}: counts do not sum to N', + 1.0, + atol=1e-10, + err_msg=f'trial={trial}: probabilities do not sum to 1', ) self.assertTrue( np.all(measurement.noisy_measurement > 0), - f'trial={trial}: non-positive counts ' + f'trial={trial}: non-positive probabilities ' f'{measurement.noisy_measurement}', ) @@ -445,9 +449,8 @@ def test_measurement_property_random_configs(self): num_bins = result.categorical_attribute.size encoded = vtx.discretize(data, result.bin_edges, attr) true_counts = np.bincount(encoded, minlength=num_bins).astype(float) - meas_counts = measurement.noisy_measurement true_prob = true_counts / true_counts.sum() - meas_prob = meas_counts / meas_counts.sum() + meas_prob = measurement.noisy_measurement l1_dist = np.abs(true_prob - meas_prob).sum() max_l1 = max(3.0 / np.sqrt(rho), 2.0 - 1.0 / num_bins) self.assertLess( @@ -456,8 +459,8 @@ def test_measurement_property_random_configs(self): f'trial={trial} (rho={rho:.2f}, K={num_partitions},' f' modes={modes}, is_int={is_int}):\n' f' L1={l1_dist:.3f}, bound={max_l1:.3f}\n' - f' true_counts={true_counts}\n' - f' meas_counts={meas_counts}', + f' true_prob={true_prob}\n' + f' meas_prob={meas_prob}', ) diff --git a/tests/local_mode/primitives_test.py b/tests/local_mode/primitives_test.py index 98a57a6..3f30d69 100644 --- a/tests/local_mode/primitives_test.py +++ b/tests/local_mode/primitives_test.py @@ -217,60 +217,44 @@ def setUp(self): def test_basic_operation(self): data = np.array([1] * 50 + [2] * 5) - selected, counts, sigma = ( - primitives.select_partitions_gaussian_thresholding( - self.rng, data, gdp_budget=10.0, delta=1e-5 - ) + mech = primitives.DPPartitionSelection( + delta=1e-5, sigma=1.0 / np.sqrt(10.0) + ) + result = mech(self.rng, data) + self.assertIn(1, result.selected_partitions) + self.assertEqual( + result.selected_partitions.size, result.estimated_counts.size ) - self.assertIn(1, selected) - self.assertEqual(sigma, 1.0 / np.sqrt(10.0)) - self.assertEqual(selected.size, counts.size) def test_empty_data(self): data = np.array([], dtype=int) - selected, counts, sigma = ( - primitives.select_partitions_gaussian_thresholding( - self.rng, data, gdp_budget=1.0, delta=1e-5 - ) - ) - self.assertEmpty(selected) - self.assertEmpty(counts) - self.assertEqual(sigma, 1.0) + mech = primitives.DPPartitionSelection(delta=1e-5, sigma=1.0) + result = mech(self.rng, data) + self.assertEmpty(result.selected_partitions) + self.assertEmpty(result.estimated_counts) def test_high_budget_selects_all(self): data = np.array([1, 2, 3, 4, 5]) - selected, _, _ = primitives.select_partitions_gaussian_thresholding( - self.rng, data, gdp_budget=np.inf, delta=0.1 - ) - self.assertCountEqual(selected, [1, 2, 3, 4, 5]) - - def test_zero_budget_raises(self): - data = np.array([1, 2, 3]) - with self.assertRaises(ValueError): - primitives.select_partitions_gaussian_thresholding( - self.rng, data, gdp_budget=-0.1, delta=1e-5 - ) - with self.assertRaises(ValueError): - primitives.select_partitions_gaussian_thresholding( - self.rng, data, gdp_budget=1.0, delta=-0.001 - ) + mech = primitives.DPPartitionSelection(delta=0.1, sigma=0.0) + result = mech(self.rng, data) + self.assertCountEqual(result.selected_partitions, [1, 2, 3, 4, 5]) def test_rare_items_not_selected(self): # One item with many occurrences, another with just 1. # With moderate budget and tight delta, the rare item should be dropped. data = np.array([1] * 100 + [2]) - selected, _, _ = primitives.select_partitions_gaussian_thresholding( - self.rng, data, gdp_budget=0.5, delta=1e-6 - ) - self.assertIn(1, selected) - self.assertNotIn(2, selected) + mech = primitives.DPPartitionSelection(delta=1e-6, sigma=1.0 / np.sqrt(0.5)) + result = mech(self.rng, data) + self.assertIn(1, result.selected_partitions) + self.assertNotIn(2, result.selected_partitions) def test_string_data_type(self): data = np.array(["a", "b", "a", "a", "c", "a", "c"]) - selected, _, _ = primitives.select_partitions_gaussian_thresholding( - self.rng, data, gdp_budget=10.0, delta=1e-5 + mech = primitives.DPPartitionSelection( + delta=1e-5, sigma=1.0 / np.sqrt(10.0) ) - self.assertTrue(all(isinstance(p, str) for p in selected)) + result = mech(self.rng, data) + self.assertTrue(all(isinstance(p, str) for p in result.selected_partitions)) def test_min_count_filters_low_count_partitions(self): # Partition 1 has count 50, partition 2 has count 3. @@ -327,20 +311,23 @@ def setUp(self): def test_basic_operation(self): data = np.array([0, 0, 1, 1, 1, 2]) - result = primitives._gaussian_histogram(self.rng, data, 4, sigma=1.0) - self.assertLen(result, 4) + mech = primitives.DPGaussianHistogram(domain_size=4, sigma=1.0) + result = mech(self.rng, data) + self.assertLen(result.counts, 4) # Noisy counts should be close to true counts [2, 3, 1, 0]. - np.testing.assert_allclose(result, [2, 3, 1, 0], atol=5.0) + np.testing.assert_allclose(result.counts, [2, 3, 1, 0], atol=5.0) def test_zero_sigma(self): data = np.array([0, 0, 1, 2, 2, 2]) - result = primitives._gaussian_histogram(self.rng, data, 3, sigma=0.0) - np.testing.assert_array_equal(result, [2, 1, 3]) + mech = primitives.DPGaussianHistogram(domain_size=3, sigma=0.0) + result = mech(self.rng, data) + np.testing.assert_array_equal(result.counts, [2, 1, 3]) def test_empty_data(self): data = np.array([], dtype=int) - result = primitives._gaussian_histogram(self.rng, data, 3, sigma=1.0) - self.assertLen(result, 3) + mech = primitives.DPGaussianHistogram(domain_size=3, sigma=1.0) + result = mech(self.rng, data) + self.assertLen(result.counts, 3) # --------------------------------------------------------------------------- @@ -460,5 +447,36 @@ def test_dp_event_type(self): self.assertAlmostEqual(event.noise_multiplier, 1.0) +class DPGaussianCountTest(absltest.TestCase): + + def setUp(self): + super().setUp() + self.rng = np.random.default_rng(42) + + def test_calibrate_and_call(self): + mech = primitives.DPGaussianCount() + calibrated = mech.calibrate(zcdp_rho=0.5) + data = np.array([1, 2, 3, 4, 5]) + result = calibrated(self.rng, data) + self.assertIsInstance(result, float) + np.testing.assert_allclose(result, 5.0, atol=5.0) + + def test_zero_sigma_returns_exact_count(self): + mech = primitives.DPGaussianCount(sigma=0.0) + data = np.array([10, 20, 30]) + self.assertEqual(mech(self.rng, data), 3.0) + + def test_dp_event_raises_before_calibration(self): + mech = primitives.DPGaussianCount() + with self.assertRaises(ValueError): + _ = mech.dp_event + + def test_dp_event_type(self): + mech = primitives.DPGaussianCount().calibrate(zcdp_rho=0.5) + event = mech.dp_event + self.assertIsInstance(event, dp_accounting.GaussianDpEvent) + self.assertAlmostEqual(event.noise_multiplier, 1.0) + + if __name__ == "__main__": absltest.main() diff --git a/tests/pydantic_api_test.py b/tests/pydantic_api_test.py index 5624f8e..eeb7aa9 100644 --- a/tests/pydantic_api_test.py +++ b/tests/pydantic_api_test.py @@ -259,7 +259,7 @@ def test_dp_synthetic_data_generation_with_supported_model(self): synth = data_generation_v3.TabularSynthesizer( domains=domains, ).calibrate(epsilon=epsilon, delta=delta) - synthetic_df = synth(np.random.default_rng(), df) + synthetic_df = synth(np.random.default_rng(), df).synthetic_data synthetic_records = pydantic_api.dataframe_to_models( synthetic_df, SupportedModel, domains ) @@ -297,7 +297,7 @@ def test_dp_synthetic_data_generation_with_numerical_model(self): synth = data_generation_v3.TabularSynthesizer( domains=domains, ).calibrate(epsilon=epsilon, delta=delta) - synthetic_df = synth(np.random.default_rng(), df) + synthetic_df = synth(np.random.default_rng(), df).synthetic_data synthetic_records = pydantic_api.dataframe_to_models( synthetic_df, ModelForNumericalDefaults, domains ) @@ -337,7 +337,7 @@ def test_dp_synthetic_data_generation_with_categorical_model(self): synth = data_generation_v3.TabularSynthesizer( domains=domains, ).calibrate(epsilon=epsilon, delta=delta) - synthetic_df = synth(np.random.default_rng(), df) + synthetic_df = synth(np.random.default_rng(), df).synthetic_data synthetic_records = pydantic_api.dataframe_to_models( synthetic_df, ModelForCategorical, domains )