From 82882b67982eab525ca5c80a1a2742ca814c8dba Mon Sep 17 00:00:00 2001 From: Anshul Singh Date: Thu, 14 May 2026 14:28:50 +0530 Subject: [PATCH 1/5] Preserve user new-format JsonNode in AbstractIndexType#convertToNewFormat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migration step unconditionally overwrote every FieldConfig.indexes JsonNode for which the typed POJO was non-default — even on columns the user had already supplied in new-format slim shape. After the typed-POJO round-trip (Jackson fills missing primitive @JsonCreator params with 0/false; the bean serializer emits every key), the user's slim {"forward":{"compressionCodec":"SNAPPY"}} got fattened into a verbose 7-key blob on every migration pass. This change makes the write gap-filling: when a column already carries a JsonNode at FieldConfig.indexes[prettyName], the migration preserves it verbatim. Pure-legacy inputs still translate to new format unchanged. Same-column / same-index-type declared in both legacy and new format continues to raise ConfigDeclaredTwiceException via the existing merged deserializer (no semantic change there). The new test class pins the full contract: pure-new-format preservation (including explicit-default and explicit-false), legacy-only translation, both-formats coexistence across different index types, idempotency on repeated migration, multi-column independence, multiple index types on one column, legacy-field cleanup, default-config skip path, cross-type coverage (range/text/json), and an end-to-end Jackson round-trip. --- .../ConvertToNewFormatPreservesSlimTest.java | 441 ++++++++++++++++++ .../segment/spi/index/AbstractIndexType.java | 36 +- 2 files changed, 472 insertions(+), 5 deletions(-) create mode 100644 pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java new file mode 100644 index 000000000000..22f4c046b8a6 --- /dev/null +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java @@ -0,0 +1,441 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.utils; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.pinot.segment.spi.index.MergedColumnConfigDeserializer.ConfigDeclaredTwiceException; +import org.apache.pinot.spi.config.table.FieldConfig; +import org.apache.pinot.spi.config.table.FieldConfig.EncodingType; +import org.apache.pinot.spi.config.table.IndexingConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.expectThrows; + + +/** + * Pins the gap-filling contract for {@link TableConfigUtils#createTableConfigFromOldFormat} / + * {@code AbstractIndexType#convertToNewFormat}: migration only writes + * {@code FieldConfig.indexes[prettyName]} when the column does not already carry a JsonNode for + * that index type. Columns already in new format keep their slim shape verbatim; legacy-only + * inputs are translated as before; both-format coexistence resolves "new format wins". + * + *

This contract is load-bearing for downstream APIs (e.g. StarTree Preview / InferIndex) that + * round-trip user-supplied {@code FieldConfig.indexes} JsonNodes through the migration step and + * back into a response: without the gap-filling rule the user's slim shape gets fattened on + * every round-trip because the typed-POJO bean serializer always emits all keys. + */ +public class ConvertToNewFormatPreservesSlimTest { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + // ---- Pure new-format input survives byte-for-byte ---- + + @Test + public void newFormatForwardSlimPreservedVerbatim() { + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("forward").put("compressionCodec", "SNAPPY"); + TableConfig tc = withFieldConfig("c1", EncodingType.RAW, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode forward = indexesOf(migrated, "c1").get("forward"); + assertNotNull(forward); + assertEquals(forward.size(), 1, "Single user key survives verbatim. Got: " + forward); + assertEquals(forward.get("compressionCodec").asText(), "SNAPPY"); + } + + @Test + public void newFormatBloomSlimPreservedVerbatim() { + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("bloom").put("fpp", 0.1); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode bloom = indexesOf(migrated, "c1").get("bloom"); + assertNotNull(bloom); + assertEquals(bloom.size(), 1); + assertEquals(bloom.get("fpp").asDouble(), 0.1); + } + + @Test + public void newFormatExplicitDefaultValuePreserved() { + // User explicitly sets a value that happens to equal today's default. Migration must keep + // the explicit key so a future default flip cannot silently swallow user intent. + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("forward").put("rawIndexWriterVersion", 4); + TableConfig tc = withFieldConfig("c1", EncodingType.RAW, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode forward = indexesOf(migrated, "c1").get("forward"); + assertEquals(forward.size(), 1); + assertEquals(forward.get("rawIndexWriterVersion").asInt(), 4); + } + + @Test + public void newFormatExplicitFalsePreserved() { + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("forward").put("deriveNumDocsPerChunk", false); + TableConfig tc = withFieldConfig("c1", EncodingType.RAW, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode forward = indexesOf(migrated, "c1").get("forward"); + assertEquals(forward.size(), 1); + assertFalse(forward.get("deriveNumDocsPerChunk").asBoolean()); + } + + @Test + public void newFormatEmptyObjectPreserved() { + // {"inverted": {}} means "enabled with no settings" — must remain an empty object, not be + // replaced with the typed POJO default. + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("inverted"); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode inverted = indexesOf(migrated, "c1").get("inverted"); + assertNotNull(inverted); + assertTrue(inverted.isObject()); + assertEquals(inverted.size(), 0); + } + + // ---- Pure legacy input still translates to new format (back-compat) ---- + + @Test + public void legacyBloomFilterColumnsTranslatedToNewFormat() { + TableConfig tc = baseTc(); + tc.getIndexingConfig().setBloomFilterColumns(new ArrayList<>(Arrays.asList("c1"))); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode indexes = indexesOf(migrated, "c1"); + assertNotNull(indexes.get("bloom"), "Legacy bloomFilterColumns translates to new format"); + } + + @Test + public void legacyNoDictionaryColumnsTranslatedToRawEncoding() { + TableConfig tc = baseTc(); + tc.getIndexingConfig().setNoDictionaryColumns(new ArrayList<>(Arrays.asList("c1"))); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + FieldConfig fc = findFieldConfig(migrated, "c1"); + assertEquals(fc.getEncodingType(), EncodingType.RAW); + } + + // ---- Both-formats coexistence ---- + + @Test + public void bothFormatsDifferentIndexTypesCoexist() { + // User: slim new-format bloom on c1. Legacy: noDictionaryColumns=[c1] (i.e. RAW encoding). + // Different index types in different surfaces — both translate independently, each obeying the + // gap-fill rule for its own type. + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("bloom").put("fpp", 0.1); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + tc.getIndexingConfig().setNoDictionaryColumns(new ArrayList<>(Arrays.asList("c1"))); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + // bloom: user slim shape preserved verbatim. + JsonNode bloom = indexesOf(migrated, "c1").get("bloom"); + assertEquals(bloom.size(), 1, "User slim shape preserved. Got: " + bloom); + assertEquals(bloom.get("fpp").asDouble(), 0.1); + // encoding flipped to RAW by legacy translation. + assertEquals(findFieldConfig(migrated, "c1").getEncodingType(), EncodingType.RAW); + } + + @Test + public void bothFormatsSameIndexTypeThrowsConfigDeclaredTwice() { + // User: new-format bloom on c1. Legacy: bloomFilterColumns=[c1]. Same column + same index type + // declared in both legacy and new format → existing deserializer raises ConfigDeclaredTwice. + // Pin this behavior — gap-fill must not paper over the conflict. + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("bloom").put("fpp", 0.1); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + tc.getIndexingConfig().setBloomFilterColumns(new ArrayList<>(Arrays.asList("c1"))); + + expectThrows(ConfigDeclaredTwiceException.class, + () -> TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1"))); + } + + // ---- Idempotency: running migration twice == running it once ---- + + @Test + public void migrationIsIdempotentOnNewFormatInput() { + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("forward").put("compressionCodec", "SNAPPY"); + TableConfig tc = withFieldConfig("c1", EncodingType.RAW, slim); + + TableConfig once = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + TableConfig twice = TableConfigUtils.createTableConfigFromOldFormat(once, schemaWith("c1")); + + assertEquals( + indexesOf(twice, "c1").toString(), + indexesOf(once, "c1").toString(), + "Running migration twice produces the same FieldConfig.indexes as once"); + } + + @Test + public void migrationIsIdempotentOnLegacyInput() { + TableConfig tc = baseTc(); + tc.getIndexingConfig().setBloomFilterColumns(new ArrayList<>(Arrays.asList("c1"))); + + TableConfig once = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + TableConfig twice = TableConfigUtils.createTableConfigFromOldFormat(once, schemaWith("c1")); + + assertEquals( + indexesOf(twice, "c1").toString(), + indexesOf(once, "c1").toString()); + } + + // ---- Multi-column independence ---- + + @Test + public void mixedColumnsEachShapeIndependent() { + // c1: slim new format. c2: legacy only. c3: column not in field config (added by legacy). + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("forward").put("compressionCodec", "SNAPPY"); + TableConfig tc = baseTc(); + List fcl = new ArrayList<>(); + fcl.add(new FieldConfig.Builder("c1").withEncodingType(EncodingType.RAW).withIndexes(slim).build()); + tc.setFieldConfigList(fcl); + tc.getIndexingConfig().setBloomFilterColumns(new ArrayList<>(Arrays.asList("c2", "c3"))); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1", "c2", "c3")); + + // c1: slim survives. + JsonNode c1Forward = indexesOf(migrated, "c1").get("forward"); + assertEquals(c1Forward.size(), 1); + assertEquals(c1Forward.get("compressionCodec").asText(), "SNAPPY"); + + // c2 + c3: legacy translated to new format. + assertNotNull(indexesOf(migrated, "c2").get("bloom")); + assertNotNull(indexesOf(migrated, "c3").get("bloom")); + } + + // ---- Multiple index types on one column ---- + + @Test + public void multipleIndexTypesOneColumnEachSlimShapePreserved() { + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("forward").put("compressionCodec", "LZ4"); + slim.putObject("bloom").put("fpp", 0.05); + slim.putObject("inverted"); + TableConfig tc = withFieldConfig("c1", EncodingType.RAW, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode indexes = indexesOf(migrated, "c1"); + assertEquals(indexes.get("forward").size(), 1); + assertEquals(indexes.get("forward").get("compressionCodec").asText(), "LZ4"); + assertEquals(indexes.get("bloom").size(), 1); + assertEquals(indexes.get("bloom").get("fpp").asDouble(), 0.05); + assertEquals(indexes.get("inverted").size(), 0); + } + + // ---- Cross-type inference still fires (dictionary auto-create for inverted, etc.) ---- + + @Test + public void invertedIndexLegacyOnlyTranslatedWithoutFattening() { + // Pure legacy invertedIndexColumns. No FieldConfig.indexes yet → migration writes the inverted + // entry. Verbose form is acceptable here because the user did not supply a slim shape. + TableConfig tc = baseTc(); + tc.getIndexingConfig().setInvertedIndexColumns(new ArrayList<>(Arrays.asList("c1"))); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode inverted = indexesOf(migrated, "c1").get("inverted"); + assertNotNull(inverted, "Legacy invertedIndexColumns translates to new format"); + } + + // ---- Legacy cleanup ---- + + @Test + public void legacyFieldsClearedAfterMigration() { + TableConfig tc = baseTc(); + tc.getIndexingConfig().setBloomFilterColumns(new ArrayList<>(Arrays.asList("c1"))); + tc.getIndexingConfig().setNoDictionaryColumns(new ArrayList<>(Arrays.asList("c2"))); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1", "c2")); + IndexingConfig ic = migrated.getIndexingConfig(); + + assertTrue(ic.getBloomFilterColumns() == null || ic.getBloomFilterColumns().isEmpty(), + "Legacy bloomFilterColumns cleared post-migration"); + assertTrue(ic.getNoDictionaryColumns() == null || ic.getNoDictionaryColumns().isEmpty(), + "Legacy noDictionaryColumns cleared post-migration"); + } + + @Test + public void legacyCleanupRunsEvenWhenUserSlimShapePreserved() { + // User has slim new-format bloom on c1 (new format) AND legacy invertedIndexColumns=[c1] + // (different index type, so no ConfigDeclaredTwice). Migration preserves user slim bloom and + // also translates legacy inverted → new format. Legacy cleanup must still drop + // invertedIndexColumns from indexingConfig so ZK doesn't carry both representations. + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("bloom").put("fpp", 0.1); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + tc.getIndexingConfig().setInvertedIndexColumns(new ArrayList<>(Arrays.asList("c1"))); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + assertTrue( + migrated.getIndexingConfig().getInvertedIndexColumns() == null + || migrated.getIndexingConfig().getInvertedIndexColumns().isEmpty(), + "Legacy invertedIndexColumns dropped post-migration"); + assertEquals(indexesOf(migrated, "c1").get("bloom").size(), 1, + "User slim bloom shape preserved untouched"); + assertNotNull(indexesOf(migrated, "c1").get("inverted"), + "Legacy inverted translated into new format"); + } + + // ---- Default-config skip path ---- + + @Test + public void defaultConfigInNewFormatKeptVerbatim() { + // User supplies a config that equals the type's default. Today's code: skipped (continue). + // Behavior preserved with the gap-fill change. + ObjectNode slim = MAPPER.createObjectNode(); + // disabled=false matches IndexConfig default → typed value equals default → skipped. + slim.putObject("inverted").put("disabled", false); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + // The original JsonNode is preserved verbatim because we never wrote (skipped). + JsonNode inverted = indexesOf(migrated, "c1").get("inverted"); + assertNotNull(inverted); + assertTrue(inverted.has("disabled")); + assertFalse(inverted.get("disabled").asBoolean(), "User's explicit disabled=false preserved"); + } + + // ---- Cross-type coverage: gap-fill applies uniformly to every AbstractIndexType subclass ---- + + @Test + public void newFormatRangeSlimPreservedVerbatim() { + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("range").put("version", 1); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode range = indexesOf(migrated, "c1").get("range"); + assertEquals(range.size(), 1, "Range slim preserved. Got: " + range); + assertEquals(range.get("version").asInt(), 1); + } + + @Test + public void newFormatTextSlimPreservedVerbatim() { + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("text").put("luceneAnalyzerClass", "org.example.MyAnalyzer"); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode text = indexesOf(migrated, "c1").get("text"); + assertEquals(text.size(), 1, "Text slim preserved. Got: " + text); + assertEquals(text.get("luceneAnalyzerClass").asText(), "org.example.MyAnalyzer"); + } + + @Test + public void newFormatJsonSlimPreservedVerbatim() { + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("json").put("maxLevels", 5); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode json = indexesOf(migrated, "c1").get("json"); + assertEquals(json.size(), 1, "Json slim preserved. Got: " + json); + assertEquals(json.get("maxLevels").asInt(), 5); + } + + // ---- End-to-end Jackson round-trip: slim shape survives serialize → deserialize ---- + + @Test + public void slimShapeSurvivesJacksonRoundTrip() throws Exception { + // Production failure mode: user POSTs slim JSON → service deserializes → migration → service + // re-serializes the result → response gets fattened with bean-serializer defaults. Pin the + // full round-trip so a regression in FieldConfig serialization or IndexConfig.toJsonNode + // would break this test, not just the in-memory checks above. + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("forward").put("compressionCodec", "SNAPPY"); + TableConfig tc = withFieldConfig("c1", EncodingType.RAW, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + String serialized = MAPPER.writeValueAsString(migrated); + TableConfig deserialized = MAPPER.readValue(serialized, TableConfig.class); + + JsonNode forward = indexesOf(deserialized, "c1").get("forward"); + assertNotNull(forward, "forward survives Jackson round-trip"); + assertEquals(forward.size(), 1, + "Slim shape survives serialize → deserialize. Got: " + forward); + assertEquals(forward.get("compressionCodec").asText(), "SNAPPY"); + } + + // ---- Helpers ---- + + private static TableConfig baseTc() { + return new TableConfigBuilder(TableType.OFFLINE).setTableName("t").build(); + } + + private static TableConfig withFieldConfig(String name, EncodingType encoding, JsonNode indexes) { + TableConfig tc = baseTc(); + List fcl = new ArrayList<>(); + fcl.add(new FieldConfig.Builder(name).withEncodingType(encoding).withIndexes(indexes).build()); + tc.setFieldConfigList(fcl); + return tc; + } + + private static Schema schemaWith(String... cols) { + Schema.SchemaBuilder b = new Schema.SchemaBuilder().setSchemaName("t"); + for (String c : cols) { + b.addSingleValueDimension(c, DataType.STRING); + } + return b.build(); + } + + private static FieldConfig findFieldConfig(TableConfig tc, String col) { + return tc.getFieldConfigList().stream() + .filter(fc -> fc.getName().equals(col)) + .findFirst() + .orElseThrow(); + } + + private static JsonNode indexesOf(TableConfig tc, String col) { + return findFieldConfig(tc, col).getIndexes(); + } +} diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java index ced61841acf4..8abb21d6fddd 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java @@ -38,6 +38,8 @@ public abstract class AbstractIndexType implements IndexType { + private static final ObjectMapper MAPPER = new ObjectMapper(); + private final String _id; private ColumnConfigDeserializer _deserializer; private IndexReaderFactory _readerFactory; @@ -88,6 +90,23 @@ public IndexReaderFactory getReaderFactory() { return _readerFactory; } + /** + * Migrates legacy {@link org.apache.pinot.spi.config.table.IndexingConfig} index settings into + * the new {@code FieldConfig.indexes} JsonNode format. + * + *

The migration is gap-filling: for each column whose typed config is non-default, this + * method writes the typed config's verbose JsonNode into {@code FieldConfig.indexes} only when + * the column does not already carry a new-format JsonNode for this index type. Columns + * already supplied in new format keep their original (possibly slim) JsonNode shape — this + * preserves user-supplied keys verbatim through the round-trip and avoids fattening pure + * new-format inputs with the typed-POJO bean-serializer defaults. + * + *

Both-formats coexistence resolves "new format wins": if the user supplied both a legacy + * {@code indexingConfig.*} entry and a new-format {@code FieldConfig.indexes[prettyName]} + * JsonNode for the same column + type, the new-format JsonNode is the source of truth. The + * legacy entry is dropped by {@link #handleIndexSpecificCleanup} below, matching the documented + * migration intent. + */ public void convertToNewFormat(TableConfig tableConfig, Schema schema) { Map deserialize = getConfig(tableConfig, schema); List fieldConfigList = tableConfig.getFieldConfigList() == null @@ -103,15 +122,22 @@ public void convertToNewFormat(TableConfig tableConfig, Schema schema) { FieldConfig fieldConfig = fieldConfigMap.get(entry.getKey()); if (fieldConfig != null) { ObjectNode currentIndexes = fieldConfig.getIndexes().isNull() - ? new ObjectMapper().createObjectNode() - : new ObjectMapper().valueToTree(fieldConfig.getIndexes()); - JsonNode indexes = currentIndexes.set(getPrettyName(), configValue.toJsonNode()); + ? MAPPER.createObjectNode() + : MAPPER.valueToTree(fieldConfig.getIndexes()); + JsonNode existing = currentIndexes.get(getPrettyName()); + if (existing != null && !existing.isNull()) { + // Column already carries a new-format JsonNode for this index type — preserve the + // user's shape verbatim. Legacy-only inputs fall through to the set() branch below; + // both-formats coexistence resolves to "new format wins". + continue; + } + currentIndexes.set(getPrettyName(), configValue.toJsonNode()); FieldConfig.Builder builder = new FieldConfig.Builder(fieldConfig); - builder.withIndexes(indexes); + builder.withIndexes(currentIndexes); fieldConfigList.remove(fieldConfig); fieldConfigList.add(builder.build()); } else { - JsonNode indexes = new ObjectMapper().createObjectNode().set(getPrettyName(), configValue.toJsonNode()); + JsonNode indexes = MAPPER.createObjectNode().set(getPrettyName(), configValue.toJsonNode()); FieldConfig.Builder builder = new FieldConfig.Builder(entry.getKey()); builder.withIndexes(indexes); builder.withEncodingType(FieldConfig.EncodingType.DICTIONARY); From 78cd18407dcf78bae88599f8281b2f99901af98f Mon Sep 17 00:00:00 2001 From: Anshul Singh Date: Thu, 14 May 2026 15:31:52 +0530 Subject: [PATCH 2/5] [pinot-segment-spi] Add disabled=true preservation test Pins the gap-fill rule against silent re-enable: user sets {"forward":{"disabled":true}}, migration must preserve that exact key, not overwrite with the typed-POJO default (disabled=false). --- .../ConvertToNewFormatPreservesSlimTest.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java index 22f4c046b8a6..92471843bb33 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java @@ -116,6 +116,22 @@ public void newFormatExplicitFalsePreserved() { assertFalse(forward.get("deriveNumDocsPerChunk").asBoolean()); } + @Test + public void newFormatExplicitDisabledTruePreserved() { + // User opts out of forward by setting disabled=true. Migration must keep that exact shape, + // not overwrite with the typed POJO's disabled=false default — silent re-enable would be a + // correctness bug. + ObjectNode slim = MAPPER.createObjectNode(); + slim.putObject("forward").put("disabled", true); + TableConfig tc = withFieldConfig("c1", EncodingType.RAW, slim); + + TableConfig migrated = TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + + JsonNode forward = indexesOf(migrated, "c1").get("forward"); + assertEquals(forward.size(), 1); + assertTrue(forward.get("disabled").asBoolean(), "Explicit disabled=true preserved"); + } + @Test public void newFormatEmptyObjectPreserved() { // {"inverted": {}} means "enabled with no settings" — must remain an empty object, not be From 113bfae639a63ec56427ee79699070f26ed5ae28 Mon Sep 17 00:00:00 2001 From: Anshul Singh Date: Thu, 14 May 2026 16:34:50 +0530 Subject: [PATCH 3/5] [pinot-segment-spi] Address review: javadoc clarity + NullNode rejection test + MAPPER comment Clarify convertToNewFormat javadoc to accurately describe the merged deserializer's role: same-column-same-type conflicts raise ConfigDeclaredTwiceException BEFORE the gap-fill loop runs, so the loop is only entered for non-conflicting inputs. Drop the misleading "new format wins" prose. Add newFormatExplicitNullValueRejectedByIndexTypeValidator test pinning that user-supplied {"forward": null} is rejected by per-index-type validators (ForwardIndexType) before reaching the gap-fill guard. Users must use {} (empty object) for "enabled with defaults", not null. Add inline comment on MAPPER documenting thread-safe sharing. --- .../ConvertToNewFormatPreservesSlimTest.java | 21 +++++++++- .../segment/spi/index/AbstractIndexType.java | 39 +++++++++++++------ 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java index 92471843bb33..a859eea9e214 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java @@ -120,7 +120,7 @@ public void newFormatExplicitFalsePreserved() { public void newFormatExplicitDisabledTruePreserved() { // User opts out of forward by setting disabled=true. Migration must keep that exact shape, // not overwrite with the typed POJO's disabled=false default — silent re-enable would be a - // correctness bug. + // correctness bug. Non-default typed POJO → gap-fill branch fires. ObjectNode slim = MAPPER.createObjectNode(); slim.putObject("forward").put("disabled", true); TableConfig tc = withFieldConfig("c1", EncodingType.RAW, slim); @@ -132,6 +132,25 @@ public void newFormatExplicitDisabledTruePreserved() { assertTrue(forward.get("disabled").asBoolean(), "Explicit disabled=true preserved"); } + @Test + public void newFormatExplicitNullValueRejectedByIndexTypeValidator() { + // Pin the actual handling of {"forward": null}: per-index-type validators (here + // ForwardIndexType.createDeserializer's lambda) reject NullNode as an invalid forward index + // config and raise IllegalStateException. The gap-fill loop is never reached. Users wanting + // "enabled with defaults" must use {} (empty object), not null. + ObjectNode slim = MAPPER.createObjectNode(); + slim.set("forward", com.fasterxml.jackson.databind.node.NullNode.getInstance()); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + try { + TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + org.testng.Assert.fail("Expected IllegalStateException from ForwardIndexType validator"); + } catch (IllegalStateException expected) { + assertTrue(expected.getMessage() != null && expected.getMessage().contains("forward index"), + "Expected forward-index validation error; got: " + expected.getMessage()); + } + } + @Test public void newFormatEmptyObjectPreserved() { // {"inverted": {}} means "enabled with no settings" — must remain an empty object, not be diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java index 8abb21d6fddd..3c7b59066324 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java @@ -38,6 +38,7 @@ public abstract class AbstractIndexType implements IndexType { + // ObjectMapper is thread-safe after construction; share across invocations. private static final ObjectMapper MAPPER = new ObjectMapper(); private final String _id; @@ -96,16 +97,31 @@ public IndexReaderFactory getReaderFactory() { * *

The migration is gap-filling: for each column whose typed config is non-default, this * method writes the typed config's verbose JsonNode into {@code FieldConfig.indexes} only when - * the column does not already carry a new-format JsonNode for this index type. Columns - * already supplied in new format keep their original (possibly slim) JsonNode shape — this - * preserves user-supplied keys verbatim through the round-trip and avoids fattening pure + * the column does not already carry a JsonNode at {@code prettyName} for this index type. + * Columns already supplied in new format keep their original (possibly slim) JsonNode shape — + * this preserves user-supplied keys verbatim through the round-trip and avoids fattening pure * new-format inputs with the typed-POJO bean-serializer defaults. * - *

Both-formats coexistence resolves "new format wins": if the user supplied both a legacy - * {@code indexingConfig.*} entry and a new-format {@code FieldConfig.indexes[prettyName]} - * JsonNode for the same column + type, the new-format JsonNode is the source of truth. The - * legacy entry is dropped by {@link #handleIndexSpecificCleanup} below, matching the documented - * migration intent. + *

Same-column + same-type conflict resolution is not handled here. If a column declares + * the same index type in both legacy {@code indexingConfig.*} and new-format + * {@code FieldConfig.indexes[prettyName]}, {@link #getConfig(TableConfig, Schema)} raises + * {@link MergedColumnConfigDeserializer.ConfigDeclaredTwiceException} before the gap-fill + * loop is reached. This method is only entered for non-conflicting inputs: + * + *

+ * + *

An explicit Jackson {@code NullNode} at {@code prettyName} (i.e. {@code "forward": null}) + * is not a valid input shape — per-index-type validators (called from {@code getConfig}) reject + * it before the gap-fill loop runs. Use an empty object {@code {}} to mean "enabled with + * defaults", not {@code null}. */ public void convertToNewFormat(TableConfig tableConfig, Schema schema) { Map deserialize = getConfig(tableConfig, schema); @@ -126,9 +142,10 @@ public void convertToNewFormat(TableConfig tableConfig, Schema schema) { : MAPPER.valueToTree(fieldConfig.getIndexes()); JsonNode existing = currentIndexes.get(getPrettyName()); if (existing != null && !existing.isNull()) { - // Column already carries a new-format JsonNode for this index type — preserve the - // user's shape verbatim. Legacy-only inputs fall through to the set() branch below; - // both-formats coexistence resolves to "new format wins". + // Column already carries a JsonNode at prettyName for this index type — preserve the + // user's shape verbatim. Legacy-only inputs (existing == null) fall through to the + // set() branch below. Same-column + same-type conflicts are surfaced as + // ConfigDeclaredTwiceException by getConfig() before this loop is reached. continue; } currentIndexes.set(getPrettyName(), configValue.toJsonNode()); From 5e5ca6378826e6fdfabb58634e637969750fc2b5 Mon Sep 17 00:00:00 2001 From: Anshul Singh Date: Thu, 14 May 2026 16:41:36 +0530 Subject: [PATCH 4/5] [pinot-segment-spi] Replace inline FQCNs with imports in NullNode test Pinot convention prefers imports over fully-qualified class names. Add imports for NullNode and static Assert.fail; use unqualified references in newFormatExplicitNullValueRejectedByIndexTypeValidator. --- .../local/utils/ConvertToNewFormatPreservesSlimTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java index a859eea9e214..6fcac82eb66d 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java @@ -20,6 +20,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.NullNode; import com.fasterxml.jackson.databind.node.ObjectNode; import java.util.ArrayList; import java.util.Arrays; @@ -40,6 +41,7 @@ import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; import static org.testng.Assert.expectThrows; +import static org.testng.Assert.fail; /** @@ -139,12 +141,12 @@ public void newFormatExplicitNullValueRejectedByIndexTypeValidator() { // config and raise IllegalStateException. The gap-fill loop is never reached. Users wanting // "enabled with defaults" must use {} (empty object), not null. ObjectNode slim = MAPPER.createObjectNode(); - slim.set("forward", com.fasterxml.jackson.databind.node.NullNode.getInstance()); + slim.set("forward", NullNode.getInstance()); TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); try { TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); - org.testng.Assert.fail("Expected IllegalStateException from ForwardIndexType validator"); + fail("Expected IllegalStateException from ForwardIndexType validator"); } catch (IllegalStateException expected) { assertTrue(expected.getMessage() != null && expected.getMessage().contains("forward index"), "Expected forward-index validation error; got: " + expected.getMessage()); From 0ee367ad228db0f84614c0182ce67a2db3fe92b0 Mon Sep 17 00:00:00 2001 From: Anshul Singh Date: Thu, 14 May 2026 16:49:58 +0530 Subject: [PATCH 5/5] [pinot-segment-spi] Address review nits: softer NullNode prose + adversarial-input tests - Production javadoc: drop ForwardIndexType-specific claim; describe both layers (per-type Preconditions check AND gap-fill !isNull() fallback). - Test javadoc: drop misleading "new format wins" prose; replace with ConfigDeclaredTwiceException reference matching production. - NullNode test assertion: match on "column: c1" instead of the index-type prose to decouple from the exact validator message. - Add newFormatPrimitiveAtPrettyNameRejected and newFormatArrayAtPrettyNameRejected for adversarial non-object JsonNode shapes; pin the loud-failure path. --- .../ConvertToNewFormatPreservesSlimTest.java | 41 +++++++++++++++++-- .../segment/spi/index/AbstractIndexType.java | 11 +++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java index 6fcac82eb66d..eb197f16e07b 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/ConvertToNewFormatPreservesSlimTest.java @@ -49,7 +49,9 @@ * {@code AbstractIndexType#convertToNewFormat}: migration only writes * {@code FieldConfig.indexes[prettyName]} when the column does not already carry a JsonNode for * that index type. Columns already in new format keep their slim shape verbatim; legacy-only - * inputs are translated as before; both-format coexistence resolves "new format wins". + * inputs are translated as before. Same-column same-type coexistence is rejected upstream with + * {@code ConfigDeclaredTwiceException}; different index types on the same column translate + * independently. * *

This contract is load-bearing for downstream APIs (e.g. StarTree Preview / InferIndex) that * round-trip user-supplied {@code FieldConfig.indexes} JsonNodes through the migration step and @@ -148,8 +150,41 @@ public void newFormatExplicitNullValueRejectedByIndexTypeValidator() { TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); fail("Expected IllegalStateException from ForwardIndexType validator"); } catch (IllegalStateException expected) { - assertTrue(expected.getMessage() != null && expected.getMessage().contains("forward index"), - "Expected forward-index validation error; got: " + expected.getMessage()); + assertTrue(expected.getMessage() != null && expected.getMessage().contains("column: c1"), + "Expected validation error naming column c1; got: " + expected.getMessage()); + } + } + + @Test + public void newFormatPrimitiveAtPrettyNameRejected() { + // Adversarial input: {"forward": 42}. ForwardIndexType's createDeserializer rejects + // non-object values; pin the loud failure path. + ObjectNode slim = MAPPER.createObjectNode(); + slim.put("forward", 42); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + try { + TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + fail("Expected IllegalStateException for primitive at prettyName"); + } catch (IllegalStateException expected) { + assertTrue(expected.getMessage() != null && expected.getMessage().contains("column: c1"), + "Expected validation error naming column c1; got: " + expected.getMessage()); + } + } + + @Test + public void newFormatArrayAtPrettyNameRejected() { + // Adversarial input: {"forward": [1,2,3]}. Same validator path as above. + ObjectNode slim = MAPPER.createObjectNode(); + slim.putArray("forward").add(1).add(2).add(3); + TableConfig tc = withFieldConfig("c1", EncodingType.DICTIONARY, slim); + + try { + TableConfigUtils.createTableConfigFromOldFormat(tc, schemaWith("c1")); + fail("Expected IllegalStateException for array at prettyName"); + } catch (IllegalStateException expected) { + assertTrue(expected.getMessage() != null && expected.getMessage().contains("column: c1"), + "Expected validation error naming column c1; got: " + expected.getMessage()); } } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java index 3c7b59066324..446c5a536123 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/AbstractIndexType.java @@ -118,10 +118,13 @@ public IndexReaderFactory getReaderFactory() { * follows one of the rules above. * * - *

An explicit Jackson {@code NullNode} at {@code prettyName} (i.e. {@code "forward": null}) - * is not a valid input shape — per-index-type validators (called from {@code getConfig}) reject - * it before the gap-fill loop runs. Use an empty object {@code {}} to mean "enabled with - * defaults", not {@code null}. + *

An explicit Jackson {@code NullNode} at {@code prettyName} (e.g. {@code "forward": null}) + * is handled by two layers: (1) some index types (e.g. {@code ForwardIndexType}) reject + * non-object values via a {@code Preconditions.checkState(...)} in their + * {@code createDeserializer} lambda before the gap-fill loop runs; (2) for types without that + * upstream check, the gap-fill predicate itself treats {@code NullNode} as absent + * ({@code !existing.isNull()}) and falls through to {@code set()}. Use an empty object + * {@code {}} to mean "enabled with defaults", not {@code null}. */ public void convertToNewFormat(TableConfig tableConfig, Schema schema) { Map deserialize = getConfig(tableConfig, schema);