From adb3fbdd010af66ecfc2a6a5367c849045d0be40 Mon Sep 17 00:00:00 2001 From: albfsg Date: Fri, 19 Jun 2026 17:28:11 +0200 Subject: [PATCH] [ntuple] Add PrintSchemaProfile method in RNTupleInspector Produces an output readable by performance profile viewers, as of this commit just Speedscope, to visualize the fields of an ntuple and the columns below them. Comes with a unit test in ntuple_inspector. --- tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx | 10 ++ tree/ntupleutil/src/RNTupleInspector.cxx | 127 ++++++++++++++++++ tree/ntupleutil/test/ntuple_inspector.cxx | 56 ++++++++ 3 files changed, 193 insertions(+) diff --git a/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx b/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx index 4f679532c1c87..9ba5b9816fdbd 100644 --- a/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx +++ b/tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx @@ -50,6 +50,11 @@ enum class ENTupleInspectorHist { kUncompressedSize }; +enum class ESchemaProfileFormat { + /// https://www.speedscope.app/file-format-schema.json + kSpeedscopeJSON +}; + // clang-format off /** \class ROOT::Experimental::RNTupleInspector @@ -493,6 +498,11 @@ public: { PrintFieldTreeAsDot(GetDescriptor().GetFieldZero(), output); } + + ///////////////////////////////////////////////////////////////////////////// + /// \brief Print a string that represents the tree of the (sub)fields and columns of an RNTuple in a format which a + /// performance profile visualizer can render + void PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output = std::cout) const; }; } // namespace Experimental } // namespace ROOT diff --git a/tree/ntupleutil/src/RNTupleInspector.cxx b/tree/ntupleutil/src/RNTupleInspector.cxx index 1a5b192f6ad53..24bab6fd53328 100644 --- a/tree/ntupleutil/src/RNTupleInspector.cxx +++ b/tree/ntupleutil/src/RNTupleInspector.cxx @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -565,3 +566,129 @@ void ROOT::Experimental::RNTupleInspector::PrintFieldTreeAsDot(const ROOT::RFiel if (isZeroField) output << "}"; } + +namespace { + +struct SpeedscopeFrame { + std::string fPrimaryString; + std::string fSecondaryString; + std::uint64_t fOpeningPosition = 0; + std::uint64_t fClosingPosition = 0; +}; + +static void PrintSpeedscopeFrames(const std::vector &frames, std::ostream &output) +{ + output << "{\n"; + output << " \"$schema\":\"https://www.speedscope.app/file-format-schema.json\",\n"; + output << " \"shared\":{\n"; + output << " \"frames\":[\n"; + + for (std::size_t i = 0; i < frames.size(); ++i) { + output << " { \"name\":\"" << frames[i].fPrimaryString + << "\", \"file\":\"Type: " << frames[i].fSecondaryString + << ", Size: " << frames[i].fClosingPosition - frames[i].fOpeningPosition << "B\" }" + << (i + 1 < frames.size() ? ",\n" : "\n"); + } + + output << " ]\n"; + output << " },\n"; + output << " \"profiles\":[\n"; + output << " {\n"; + output << " \"type\":\"evented\",\n"; + output << " \"name\":\"Flattened Timeline\",\n"; + output << " \"unit\":\"bytes\",\n"; + output << " \"startValue\":0,\n"; + output << " \"endValue\":" << frames.back().fClosingPosition << ",\n"; + output << " \"events\":[\n"; + + bool first = true; + + // Parameter idx Index of the frame being processed + // Parameter limit + // - If the frame is not root: Closing Position of its father + // - If the frame is root: Closing Position of the last element of frames + // Returns the next index to be processed + std::function processRecursive = [&](std::size_t nextIdxToProcess, + std::uint32_t limit) -> std::size_t { + while (nextIdxToProcess < frames.size() && frames[nextIdxToProcess].fOpeningPosition < limit) { + const std::size_t currentIdx = nextIdxToProcess; + + if (!first) + output << ",\n"; + + output << " {\"type\":\"O\",\"frame\":" << currentIdx + << ",\"at\":" << frames[currentIdx].fOpeningPosition << "}"; + first = false; + + nextIdxToProcess = processRecursive(nextIdxToProcess + 1, frames[currentIdx].fClosingPosition); + + output << ",\n {\"type\":\"C\",\"frame\":" << currentIdx + << ",\"at\":" << frames[currentIdx].fClosingPosition << "}"; + } + return nextIdxToProcess; + }; + + processRecursive(0, frames.back().fClosingPosition); + + output << "\n ]\n"; + output << " }\n"; + output << " ]\n"; + output << "}\n"; +} +} // namespace + +void ROOT::Experimental::RNTupleInspector::PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output) const +{ + // There is only one format at the moment + assert(format == ESchemaProfileFormat::kSpeedscopeJSON); + + const auto &tupleDescriptor = GetDescriptor(); + ROOT::DescriptorId_t rootId = tupleDescriptor.GetFieldZeroId(); + const auto &rootFieldDescriptor = tupleDescriptor.GetFieldDescriptor(rootId); + + std::vector frames; + std::uint64_t positionCursor = 0; + + // Returns size of the visited field + auto visitFieldsRecursive = [&](auto &self, const ROOT::RFieldDescriptor &fieldDescriptor) -> std::size_t { + SpeedscopeFrame fieldSpeedscopeFrame; + fieldSpeedscopeFrame.fPrimaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId()); + fieldSpeedscopeFrame.fSecondaryString = fieldDescriptor.GetTypeName(); + fieldSpeedscopeFrame.fOpeningPosition = positionCursor; + frames.push_back(fieldSpeedscopeFrame); + + const std::size_t fieldSpeedscopeFrameIndex = frames.size() - 1; + + std::size_t subTreeSize = 0; + const auto &childIds = fieldDescriptor.GetLinkIds(); + + for (const auto &childFieldId : childIds) { + const auto &childFieldDescriptor = tupleDescriptor.GetFieldDescriptor(childFieldId); + subTreeSize += self(self, childFieldDescriptor); + } + + for (const auto &columnDescriptor : tupleDescriptor.GetColumnIterable(fieldDescriptor.GetId())) { + const auto &columnInfo = GetColumnInspector(columnDescriptor.GetPhysicalId()); + std::size_t columnSize = columnInfo.GetCompressedSize(); + + SpeedscopeFrame columnSpeedscopeFrame; + columnSpeedscopeFrame.fPrimaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId()) + + " [col#" + std::to_string(columnDescriptor.GetPhysicalId()) + "]"; + columnSpeedscopeFrame.fSecondaryString = + ROOT::Internal::RColumnElementBase::GetColumnTypeName(columnDescriptor.GetType()); + columnSpeedscopeFrame.fOpeningPosition = positionCursor; + positionCursor += columnSize; + columnSpeedscopeFrame.fClosingPosition = positionCursor; + frames.push_back(columnSpeedscopeFrame); + subTreeSize += columnSize; + } + + frames[fieldSpeedscopeFrameIndex].fClosingPosition = positionCursor; + + return subTreeSize; + }; + + visitFieldsRecursive(visitFieldsRecursive, rootFieldDescriptor); + + PrintSpeedscopeFrames(frames, output); +} diff --git a/tree/ntupleutil/test/ntuple_inspector.cxx b/tree/ntupleutil/test/ntuple_inspector.cxx index 5812a926eb9ee..7d5f439e9281b 100644 --- a/tree/ntupleutil/test/ntuple_inspector.cxx +++ b/tree/ntupleutil/test/ntuple_inspector.cxx @@ -862,3 +862,59 @@ TEST(RNTupleInspector, FieldTreeAsDot) "int

Type: std::int32_t

ID: 1

>]\n}"; EXPECT_EQ(dot, expected); } + +TEST(RNTupleInspector, SchemaProfile) +{ + FileRaii fileGuard("test_schema_profile.root"); + { + auto model = RNTupleModel::Create(); + auto fieldFloat1 = model->MakeField("float1"); + auto fieldInt = model->MakeField("int"); + auto writer = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath()); + + for (int i = 0; i < 10; ++i) { + *fieldFloat1 = 3.14f * i; + *fieldInt = 42 * i; + writer->Fill(); + } + } + auto inspector = RNTupleInspector::Create("ntuple", fileGuard.GetPath()); + std::ostringstream schemaProfileStream; + inspector->PrintSchemaProfile(ROOT::Experimental::ESchemaProfileFormat::kSpeedscopeJSON, schemaProfileStream); + const std::string schemaProfile = schemaProfileStream.str(); + const std::string expected = R"({ + "$schema":"https://www.speedscope.app/file-format-schema.json", + "shared":{ + "frames":[ + { "name":"", "file":"Type: , Size: 80B" }, + { "name":"float1", "file":"Type: float, Size: 40B" }, + { "name":"float1 [col#0]", "file":"Type: SplitReal32, Size: 40B" }, + { "name":"int", "file":"Type: std::int32_t, Size: 40B" }, + { "name":"int [col#1]", "file":"Type: SplitInt32, Size: 40B" } + ] + }, + "profiles":[ + { + "type":"evented", + "name":"Flattened Timeline", + "unit":"bytes", + "startValue":0, + "endValue":80, + "events":[ + {"type":"O","frame":0,"at":0}, + {"type":"O","frame":1,"at":0}, + {"type":"O","frame":2,"at":0}, + {"type":"C","frame":2,"at":40}, + {"type":"C","frame":1,"at":40}, + {"type":"O","frame":3,"at":40}, + {"type":"O","frame":4,"at":40}, + {"type":"C","frame":4,"at":80}, + {"type":"C","frame":3,"at":80}, + {"type":"C","frame":0,"at":80} + ] + } + ] +} +)"; + EXPECT_EQ(schemaProfile, expected); +}