From 7aad8286593ff7e907a6ccbe0688d939e76d645d Mon Sep 17 00:00:00 2001 From: silverweed Date: Tue, 28 Apr 2026 11:44:23 +0200 Subject: [PATCH 1/6] [ntuple] Fix RFieldFundamental::GenerateColumns using the wrong repr idx (cherry picked from commit b73cca9c6584592648ca68aa08e229cc204ab6c5) --- .../inc/ROOT/RField/RFieldFundamental.hxx | 4 +-- tree/ntuple/test/ntuple_multi_column.cxx | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx b/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx index c403fbfc175ea..a1e15a2157b68 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldFundamental.hxx @@ -400,11 +400,11 @@ protected: fAvailableColumns.emplace_back(ROOT::Internal::RColumn::Create(onDiskTypes[0], 0, representationIndex)); if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Trunc) { const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId()); - const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]); + const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]); column->SetBitsOnStorage(coldesc.GetBitsOnStorage()); } else if (onDiskTypes[0] == ROOT::ENTupleColumnType::kReal32Quant) { const auto &fdesc = desc.GetFieldDescriptor(Base::GetOnDiskId()); - const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[0]); + const auto &coldesc = desc.GetColumnDescriptor(fdesc.GetLogicalColumnIds()[representationIndex]); assert(coldesc.GetValueRange().has_value()); const auto [valMin, valMax] = *coldesc.GetValueRange(); column->SetBitsOnStorage(coldesc.GetBitsOnStorage()); diff --git a/tree/ntuple/test/ntuple_multi_column.cxx b/tree/ntuple/test/ntuple_multi_column.cxx index b62a16e27e958..6999193f7514f 100644 --- a/tree/ntuple/test/ntuple_multi_column.cxx +++ b/tree/ntuple/test/ntuple_multi_column.cxx @@ -416,6 +416,36 @@ TEST(RNTuple, MultiColumnRepresentationFriends) } } +TEST(RNTuple, MultiColumnRepresentationVariableBitWidth) +{ + FileRaii fileGuard("test_ntuple_multi_column_representation_varbitwidth.root"); + + { + auto model = RNTupleModel::Create(); + auto fldPx = std::make_unique>("px"); + fldPx->SetTruncated(26); + fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal32}, {ROOT::ENTupleColumnType::kReal32Trunc}}); + model->AddField(std::move(fldPx)); + auto ptrPx = model->GetDefaultEntry().GetPtr("px"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + *ptrPx = 1.0; + writer->Fill(); + writer->CommitCluster(); + ROOT::Internal::RFieldRepresentationModifier::SetPrimaryColumnRepresentation( + const_cast(writer->GetModel().GetConstField("px")), 1); + *ptrPx = 2.0; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + auto fldPx = reader->GetModel().GetDefaultEntry().GetPtr("px"); + + reader->LoadEntry(0); + EXPECT_FLOAT_EQ(1.0, *fldPx); + reader->LoadEntry(1); + EXPECT_FLOAT_EQ(2.0, *fldPx); +} + TEST(RNTuple, MultiColumnRepresentationDedup) { FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root"); From 49823379f98d5fb22ba2220eaf70ed33d0af9de6 Mon Sep 17 00:00:00 2001 From: silverweed Date: Fri, 17 Apr 2026 11:08:30 +0200 Subject: [PATCH 2/6] [ntuple] update merger test to make sure we test nRepetitions (cherry picked from commit 3ad754799b8cdcef52b887fd218b9a928f9950af) --- tree/ntuple/test/ntuple_merger.cxx | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tree/ntuple/test/ntuple_merger.cxx b/tree/ntuple/test/ntuple_merger.cxx index 962709173bd8c..e5f31384a3762 100644 --- a/tree/ntuple/test/ntuple_merger.cxx +++ b/tree/ntuple/test/ntuple_merger.cxx @@ -1003,12 +1003,13 @@ TEST(RNTupleMerger, MergeLateModelExtension) { auto model = RNTupleModel::Create(); auto fieldFoo = model->MakeField>("foo"); - auto fieldVfoo = model->MakeField>("vfoo"); + auto fieldVfoo = model->MakeField[3]>("vfoo"); auto fieldBar = model->MakeField("bar"); auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard1.GetPath(), RNTupleWriteOptions()); for (size_t i = 0; i < 10; ++i) { fieldFoo->insert(std::make_pair(std::to_string(i), i * 123)); - *fieldVfoo = {(int)i * 123}; + fieldVfoo[0] = {(int)i * 123}; + fieldVfoo[2] = {(int)i * 345}; *fieldBar = i * 321; ntuple->Fill(); } @@ -1019,14 +1020,15 @@ TEST(RNTupleMerger, MergeLateModelExtension) auto model = RNTupleModel::Create(); auto fieldBaz = model->MakeField("baz"); auto fieldFoo = model->MakeField>("foo"); - auto fieldVfoo = model->MakeField>("vfoo"); + auto fieldVfoo = model->MakeField[3]>("vfoo"); auto wopts = RNTupleWriteOptions(); wopts.SetCompression(0); auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard2.GetPath(), wopts); for (size_t i = 0; i < 10; ++i) { *fieldBaz = i * 567; fieldFoo->insert(std::make_pair(std::to_string(i), i * 765)); - *fieldVfoo = {(int)i * 765}; + fieldVfoo[0] = {(int)i * 765}; + fieldVfoo[2] = {(int)i * 987}; ntuple->Fill(); } } @@ -1060,21 +1062,25 @@ TEST(RNTupleMerger, MergeLateModelExtension) auto ntuple = RNTupleReader::Open("ntuple", fileGuard3.GetPath()); EXPECT_EQ(ntuple->GetNEntries(), 20); auto foo = ntuple->GetModel().GetDefaultEntry().GetPtr>("foo"); - auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr>("vfoo"); + auto vfoo = ntuple->GetModel().GetDefaultEntry().GetPtr[3]>("vfoo"); auto bar = ntuple->GetModel().GetDefaultEntry().GetPtr("bar"); auto baz = ntuple->GetModel().GetDefaultEntry().GetPtr("baz"); for (int i = 0; i < 10; ++i) { ntuple->LoadEntry(i); ASSERT_EQ((*foo)[std::to_string(i)], i * 123); - ASSERT_EQ((*vfoo)[0], i * 123); + ASSERT_EQ(vfoo[0][0], i * 123); + ASSERT_EQ(vfoo[2][0], i * 345); + ASSERT_TRUE(vfoo[1].empty()); ASSERT_EQ(*bar, i * 321); ASSERT_EQ(*baz, 0); } for (int i = 10; i < 20; ++i) { ntuple->LoadEntry(i); ASSERT_EQ((*foo)[std::to_string(i - 10)], (i - 10) * 765); - ASSERT_EQ((*vfoo)[0], (i - 10) * 765); + ASSERT_EQ(vfoo[0][0], (i - 10) * 765); + ASSERT_EQ(vfoo[2][0], (i - 10) * 987); + ASSERT_TRUE(vfoo[1].empty()); ASSERT_EQ(*bar, 0); ASSERT_EQ(*baz, (i - 10) * 567); } From 4b5d3fa0eabced6c9d6feda90f3e0892c10d9fba Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 22 Apr 2026 14:47:11 +0200 Subject: [PATCH 3/6] [ntuple] Clarify a bit RFieldBase::EntryToColumnElementIndex Also fix the type of result (cherry picked from commit 38883b359b58eb7b3ba7c4733bbe1efa53e4f1b6) --- tree/ntuple/inc/ROOT/RFieldBase.hxx | 7 ++++--- tree/ntuple/src/RFieldBase.cxx | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RFieldBase.hxx b/tree/ntuple/inc/ROOT/RFieldBase.hxx index 6dd57a2eafd03..2323a04a849c5 100644 --- a/tree/ntuple/inc/ROOT/RFieldBase.hxx +++ b/tree/ntuple/inc/ROOT/RFieldBase.hxx @@ -238,14 +238,15 @@ private: func(target); } - /// Translate an entry index to a column element index of the principal column and vice versa. These functions - /// take into account the role and number of repetitions on each level of the field hierarchy as follows: + /// Translate an entry index to a column element index of the principal column. This function + /// takes into account the role and number of repetitions on each level of the field hierarchy as follows: /// - Top level fields: element index == entry index /// - Record fields propagate their principal column index to the principal columns of direct descendant fields /// - Collection and variant fields set the principal column index of their children to 0 /// /// The column element index also depends on the number of repetitions of each field in the hierarchy, e.g., given a - /// field with type `std::array, 2>`, this function returns 8 for the innermost field. + /// field with type `std::array, 2>`, this function called with `globalIndex == 1` + /// returns 8 for the innermost field. ROOT::NTupleSize_t EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const; /// Flushes data from active columns diff --git a/tree/ntuple/src/RFieldBase.cxx b/tree/ntuple/src/RFieldBase.cxx index bf096642bb844..e6a46b5577bdd 100644 --- a/tree/ntuple/src/RFieldBase.cxx +++ b/tree/ntuple/src/RFieldBase.cxx @@ -700,14 +700,14 @@ void ROOT::RFieldBase::Attach(std::unique_ptr child) ROOT::NTupleSize_t ROOT::RFieldBase::EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const { - std::size_t result = globalIndex; + ROOT::NTupleSize_t result = globalIndex; for (auto f = this; f != nullptr; f = f->GetParent()) { auto parent = f->GetParent(); if (parent && (parent->GetStructure() == ROOT::ENTupleStructure::kCollection || parent->GetStructure() == ROOT::ENTupleStructure::kVariant)) { return 0U; } - result *= std::max(f->GetNRepetitions(), std::size_t{1U}); + result *= std::max(f->GetNRepetitions(), ROOT::NTupleSize_t{1U}); } return result; } From 68be090788ae35ce407a52580577b84b198bc370 Mon Sep 17 00:00:00 2001 From: silverweed Date: Mon, 4 May 2026 14:45:20 +0200 Subject: [PATCH 4/6] [ntuple] Introduce feature flag 0 (nested deferred columns) In code, this feature is represented by an update in the logic of RClusterDescriptorBuilder::AddExtendedColumnRanges(), which now handles properly the case where a column is added in a later cluster and therefore cannot rely on the (NEntries * NRepetitions) calculation, so it instead copies its FirstElementIndex and NElements from the 0th representation (which is guaranteed to have valid numbers for them). (cherry picked from commit 6c8f6ce07f36b504c4360c348fb34e3525d412d3) --- tree/ntuple/doc/BinaryFormatSpecification.md | 9 +++++++-- tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx | 19 ++++++++++++++++++- tree/ntuple/src/RNTupleDescriptor.cxx | 17 +++++++++++++++-- tree/ntuple/test/ntuple_compat.cxx | 4 ++-- 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/tree/ntuple/doc/BinaryFormatSpecification.md b/tree/ntuple/doc/BinaryFormatSpecification.md index 404477d0461fe..08159e9a56bc0 100644 --- a/tree/ntuple/doc/BinaryFormatSpecification.md +++ b/tree/ntuple/doc/BinaryFormatSpecification.md @@ -1,4 +1,4 @@ -# RNTuple Binary Format Specification 1.0.0.2 +# RNTuple Binary Format Specification 1.0.0.3 ## Versioning Notes @@ -167,7 +167,12 @@ That means that readers need to continue reading feature flags as long as their Readers should gracefully abort reading when they encounter unknown bits set. -At the moment, there are no feature flag bits defined. +Here is the list of all currently-defined feature flags. Note that the flag name is only for informational purposes +and is not normative. + +| Flag Bit | Introduced in | Name | Meaning | +|----------|---------------|-------------------------|----------------------------------------------| +| 0 | 1.0.0.3 | Nested Deferred Columns | Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended
(i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field
backed by columns with different encoding, e.g. a `vector` whose elements are represented by SplitReal32
in the first ntuple and by Real32 in the second. | ## Frames diff --git a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx index 9952fb6ffac93..f50bc76f1eb05 100644 --- a/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleDescriptor.hxx @@ -695,7 +695,24 @@ private: RNTupleDescriptor CloneSchema() const; public: - static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing + /// All known feature flags. + /// Note that the flag values represent the bit _index_, not the already-bitshifted integer. + enum EFeatureFlags { + /// Signals that the RNTuple contains at least one deferred column that is part of a collection and was extended + /// (i.e. it appears in the footer). This can happen when merging two RNTuples that have the same collection field + /// backed by columns with different encoding, e.g. a vector whose elements are represented by SplitReal32 + /// in the first ntuple and by Real32 in the second. + /// Added in version 1.1.0.0 of the binary format. + kFeatureFlag_NestedDeferredColumns = 0, + // Insert new feature flags here, with contiguous values. If at any point a "hole" appears in the valid feature + // flags values, the check in RNTupleSerialize must be updated. + + // End of regular feature flags + kFeatureFlag_COUNT, + + /// Reserved for forward-compatibility testing + kFeatureFlag_Test = 137 + }; class RColumnDescriptorIterable; class RFieldDescriptorIterable; diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx index 85207ed0b52de..f68b7e72f9fda 100644 --- a/tree/ntuple/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/src/RNTupleDescriptor.cxx @@ -911,8 +911,21 @@ ROOT::Internal::RClusterDescriptorBuilder::AddExtendedColumnRanges(const RNTuple // `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the // field zero excluding subfields of collection and variant fields. if (c.IsDeferredColumn()) { - columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions); - columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions); + if (c.GetRepresentationIndex() == 0) { + columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions); + columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions); + } else { + // Deferred representations which are not the first cannot count on the number of elements being + // equal to Entries * nRepetitions because they might have been added in a later cluster. But they + // can rely on the first representation having the correct FirstElement/NElements (by definition + // the first representation cannot be an "extended" one), therefore they can just copy the value + // from it. + const auto &field = desc.GetFieldDescriptor(fieldId); + const auto firstReprColumnId = field.GetLogicalColumnIds()[c.GetIndex()]; + const auto &firstReprColumnRange = fCluster.fColumnRanges[firstReprColumnId]; + columnRange.SetFirstElementIndex(firstReprColumnRange.GetFirstElementIndex()); + columnRange.SetNElements(firstReprColumnRange.GetNElements()); + } if (!columnRange.IsSuppressed()) { auto &pageRange = fCluster.fPageRanges[physicalId]; pageRange.fPhysicalColumnId = physicalId; diff --git a/tree/ntuple/test/ntuple_compat.cxx b/tree/ntuple/test/ntuple_compat.cxx index 5b8bc294aa4ac..3bede6cb2f351 100644 --- a/tree/ntuple/test/ntuple_compat.cxx +++ b/tree/ntuple/test/ntuple_compat.cxx @@ -36,7 +36,7 @@ TEST(RNTupleCompat, FeatureFlag) RNTupleDescriptorBuilder descBuilder; descBuilder.SetVersionForWriting(); descBuilder.SetNTuple("ntpl", ""); - descBuilder.SetFeature(RNTupleDescriptor::kFeatureFlagTest); + descBuilder.SetFeature(RNTupleDescriptor::kFeatureFlag_Test); descBuilder.AddField(RFieldDescriptorBuilder::FromField(ROOT::RFieldZero()).FieldId(0).MakeDescriptor().Unwrap()); ASSERT_TRUE(static_cast(descBuilder.EnsureValidDescriptor())); @@ -87,7 +87,7 @@ TEST(RNTupleCompat, FeatureFlagInFooter) writer->WriteNTupleHeader(buffer.get(), ctx.GetHeaderSize(), ctx.GetHeaderSize()); // Write the feature flags in the footer - descBuilder.SetFeature(RNTupleDescriptor::kFeatureFlagTest); + descBuilder.SetFeature(RNTupleDescriptor::kFeatureFlag_Test); auto szFooter = serializer.SerializeFooter(nullptr, descBuilder.GetDescriptor(), ctx).Unwrap(); buffer = std::make_unique(szFooter); From 4198df7b73ab828748bbf7fdbc33780a5a44921e Mon Sep 17 00:00:00 2001 From: silverweed Date: Thu, 18 Jun 2026 10:25:33 +0200 Subject: [PATCH 5/6] [ntuple] patch up column ids in the header extension when shifting (cherry picked from commit d32b64cf4a198dd4802e1856cb34b74d1d04028f) --- tree/ntuple/src/RNTupleDescriptor.cxx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tree/ntuple/src/RNTupleDescriptor.cxx b/tree/ntuple/src/RNTupleDescriptor.cxx index f68b7e72f9fda..7f12307c94790 100644 --- a/tree/ntuple/src/RNTupleDescriptor.cxx +++ b/tree/ntuple/src/RNTupleDescriptor.cxx @@ -1338,6 +1338,14 @@ void ROOT::Internal::RNTupleDescriptorBuilder::ShiftAliasColumns(std::uint32_t o R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0); fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c)); } + + // Patch up column ids in the header extension + if (auto &xHeader = fDescriptor.fHeaderExtension) { + for (auto &columnId : xHeader->fExtendedColumnRepresentations) { + if (columnId >= fDescriptor.GetNPhysicalColumns()) + columnId += offset; + } + } } ROOT::RResult ROOT::Internal::RNTupleDescriptorBuilder::AddCluster(RClusterDescriptor &&clusterDesc) From c42356c8cf224759bcb009dc55d6069e827bf72e Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 17 Jun 2026 17:02:01 +0200 Subject: [PATCH 6/6] [ntuple] Allow adding duplicate column representations This is necessary to support multiple representations that use the same column type but different metadata (e.g. different bit width on Real32Trunc columns) (cherry picked from commit 6bf5355cc45d8566ae4558e535637088b5ef4d9b) --- tree/ntuple/src/RFieldBase.cxx | 5 +---- tree/ntuple/test/ntuple_multi_column.cxx | 9 --------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/tree/ntuple/src/RFieldBase.cxx b/tree/ntuple/src/RFieldBase.cxx index e6a46b5577bdd..c97d94673d007 100644 --- a/tree/ntuple/src/RFieldBase.cxx +++ b/tree/ntuple/src/RFieldBase.cxx @@ -867,10 +867,7 @@ void ROOT::RFieldBase::SetColumnRepresentatives(const RColumnRepresentations::Se if (itRepresentative == std::end(validTypes)) throw RException(R__FAIL("invalid column representative")); - // don't add a duplicate representation - if (std::find_if(fColumnRepresentatives.begin(), fColumnRepresentatives.end(), - [&r](const auto &rep) { return r == rep.get(); }) == fColumnRepresentatives.end()) - fColumnRepresentatives.emplace_back(*itRepresentative); + fColumnRepresentatives.emplace_back(*itRepresentative); } } diff --git a/tree/ntuple/test/ntuple_multi_column.cxx b/tree/ntuple/test/ntuple_multi_column.cxx index 6999193f7514f..9abf5dbe4d132 100644 --- a/tree/ntuple/test/ntuple_multi_column.cxx +++ b/tree/ntuple/test/ntuple_multi_column.cxx @@ -445,12 +445,3 @@ TEST(RNTuple, MultiColumnRepresentationVariableBitWidth) reader->LoadEntry(1); EXPECT_FLOAT_EQ(2.0, *fldPx); } - -TEST(RNTuple, MultiColumnRepresentationDedup) -{ - FileRaii fileGuard("test_ntuple_multi_column_representation_dedup.root"); - - auto fldPx = RFieldBase::Create("px", "float").Unwrap(); - fldPx->SetColumnRepresentatives({{ROOT::ENTupleColumnType::kReal16}, {ROOT::ENTupleColumnType::kReal16}}); - EXPECT_EQ(fldPx->GetColumnRepresentatives().size(), 1); -}