Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions tree/ntupleutil/inc/ROOT/RNTupleInspector.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ enum class ENTupleInspectorHist {
kUncompressedSize
};

enum class ESchemaProfileFormat {
Comment thread
albfsg marked this conversation as resolved.
/// https://www.speedscope.app/file-format-schema.json
kSpeedscopeJSON
Comment thread
albfsg marked this conversation as resolved.
};

// clang-format off
/**
\class ROOT::Experimental::RNTupleInspector
Expand Down Expand Up @@ -493,6 +498,11 @@ public:
{
PrintFieldTreeAsDot(GetDescriptor().GetFieldZero(), output);
}

/////////////////////////////////////////////////////////////////////////////
/// \brief Print a string that represents the tree of the (sub)fields and columns of an RNTuple in a format which a
/// performance profile visualizer can render
void PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output = std::cout) const;
};
} // namespace Experimental
} // namespace ROOT
Expand Down
127 changes: 127 additions & 0 deletions tree/ntupleutil/src/RNTupleInspector.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cstring>
#include <deque>
#include <exception>
#include <functional>
#include <iomanip>
#include <iostream>

Expand Down Expand Up @@ -565,3 +566,129 @@ void ROOT::Experimental::RNTupleInspector::PrintFieldTreeAsDot(const ROOT::RFiel
if (isZeroField)
output << "}";
}

namespace {

struct SpeedscopeFrame {
std::string fPrimaryString;
std::string fSecondaryString;
std::uint64_t fOpeningPosition = 0;
std::uint64_t fClosingPosition = 0;
};

static void PrintSpeedscopeFrames(const std::vector<SpeedscopeFrame> &frames, std::ostream &output)
{
output << "{\n";
output << " \"$schema\":\"https://www.speedscope.app/file-format-schema.json\",\n";
output << " \"shared\":{\n";
output << " \"frames\":[\n";

for (std::size_t i = 0; i < frames.size(); ++i) {
output << " { \"name\":\"" << frames[i].fPrimaryString
<< "\", \"file\":\"Type: " << frames[i].fSecondaryString
<< ", Size: " << frames[i].fClosingPosition - frames[i].fOpeningPosition << "B\" }"
<< (i + 1 < frames.size() ? ",\n" : "\n");
}

output << " ]\n";
output << " },\n";
output << " \"profiles\":[\n";
output << " {\n";
output << " \"type\":\"evented\",\n";
output << " \"name\":\"Flattened Timeline\",\n";
output << " \"unit\":\"bytes\",\n";
output << " \"startValue\":0,\n";
output << " \"endValue\":" << frames.back().fClosingPosition << ",\n";
output << " \"events\":[\n";

bool first = true;

// Parameter idx Index of the frame being processed
// Parameter limit
// - If the frame is not root: Closing Position of its father
// - If the frame is root: Closing Position of the last element of frames
// Returns the next index to be processed
std::function<std::size_t(std::size_t, std::uint32_t)> processRecursive = [&](std::size_t nextIdxToProcess,
std::uint32_t limit) -> std::size_t {
while (nextIdxToProcess < frames.size() && frames[nextIdxToProcess].fOpeningPosition < limit) {
const std::size_t currentIdx = nextIdxToProcess;

if (!first)
output << ",\n";

output << " {\"type\":\"O\",\"frame\":" << currentIdx
<< ",\"at\":" << frames[currentIdx].fOpeningPosition << "}";
first = false;

nextIdxToProcess = processRecursive(nextIdxToProcess + 1, frames[currentIdx].fClosingPosition);

output << ",\n {\"type\":\"C\",\"frame\":" << currentIdx
<< ",\"at\":" << frames[currentIdx].fClosingPosition << "}";
}
return nextIdxToProcess;
};

processRecursive(0, frames.back().fClosingPosition);

output << "\n ]\n";
output << " }\n";
output << " ]\n";
output << "}\n";
}
} // namespace

void ROOT::Experimental::RNTupleInspector::PrintSchemaProfile(ESchemaProfileFormat format, std::ostream &output) const
{
// There is only one format at the moment
assert(format == ESchemaProfileFormat::kSpeedscopeJSON);

const auto &tupleDescriptor = GetDescriptor();
ROOT::DescriptorId_t rootId = tupleDescriptor.GetFieldZeroId();
const auto &rootFieldDescriptor = tupleDescriptor.GetFieldDescriptor(rootId);

std::vector<SpeedscopeFrame> frames;
std::uint64_t positionCursor = 0;

// Returns size of the visited field
auto visitFieldsRecursive = [&](auto &self, const ROOT::RFieldDescriptor &fieldDescriptor) -> std::size_t {
SpeedscopeFrame fieldSpeedscopeFrame;
fieldSpeedscopeFrame.fPrimaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId());
fieldSpeedscopeFrame.fSecondaryString = fieldDescriptor.GetTypeName();
fieldSpeedscopeFrame.fOpeningPosition = positionCursor;
frames.push_back(fieldSpeedscopeFrame);

const std::size_t fieldSpeedscopeFrameIndex = frames.size() - 1;

std::size_t subTreeSize = 0;
const auto &childIds = fieldDescriptor.GetLinkIds();

for (const auto &childFieldId : childIds) {
const auto &childFieldDescriptor = tupleDescriptor.GetFieldDescriptor(childFieldId);
subTreeSize += self(self, childFieldDescriptor);
}

for (const auto &columnDescriptor : tupleDescriptor.GetColumnIterable(fieldDescriptor.GetId())) {
const auto &columnInfo = GetColumnInspector(columnDescriptor.GetPhysicalId());
std::size_t columnSize = columnInfo.GetCompressedSize();

SpeedscopeFrame columnSpeedscopeFrame;
columnSpeedscopeFrame.fPrimaryString = tupleDescriptor.GetQualifiedFieldName(fieldDescriptor.GetId()) +
" [col#" + std::to_string(columnDescriptor.GetPhysicalId()) + "]";
columnSpeedscopeFrame.fSecondaryString =
ROOT::Internal::RColumnElementBase::GetColumnTypeName(columnDescriptor.GetType());
columnSpeedscopeFrame.fOpeningPosition = positionCursor;
positionCursor += columnSize;
columnSpeedscopeFrame.fClosingPosition = positionCursor;
frames.push_back(columnSpeedscopeFrame);
subTreeSize += columnSize;
}

frames[fieldSpeedscopeFrameIndex].fClosingPosition = positionCursor;

return subTreeSize;
};

visitFieldsRecursive(visitFieldsRecursive, rootFieldDescriptor);

PrintSpeedscopeFrames(frames, output);
}
56 changes: 56 additions & 0 deletions tree/ntupleutil/test/ntuple_inspector.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -862,3 +862,59 @@ TEST(RNTupleInspector, FieldTreeAsDot)
"</b>int<br></br><b>Type: </b>std::int32_t<br></br><b>ID: </b>1<br></br>>]\n}";
EXPECT_EQ(dot, expected);
}

TEST(RNTupleInspector, SchemaProfile)
{
FileRaii fileGuard("test_schema_profile.root");
{
auto model = RNTupleModel::Create();
auto fieldFloat1 = model->MakeField<float>("float1");
auto fieldInt = model->MakeField<std::int32_t>("int");
auto writer = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath());

for (int i = 0; i < 10; ++i) {
*fieldFloat1 = 3.14f * i;
*fieldInt = 42 * i;
writer->Fill();
}
}
auto inspector = RNTupleInspector::Create("ntuple", fileGuard.GetPath());
std::ostringstream schemaProfileStream;
inspector->PrintSchemaProfile(ROOT::Experimental::ESchemaProfileFormat::kSpeedscopeJSON, schemaProfileStream);
const std::string schemaProfile = schemaProfileStream.str();
const std::string expected = R"({
"$schema":"https://www.speedscope.app/file-format-schema.json",
"shared":{
"frames":[
{ "name":"", "file":"Type: , Size: 80B" },
{ "name":"float1", "file":"Type: float, Size: 40B" },
{ "name":"float1 [col#0]", "file":"Type: SplitReal32, Size: 40B" },
{ "name":"int", "file":"Type: std::int32_t, Size: 40B" },
{ "name":"int [col#1]", "file":"Type: SplitInt32, Size: 40B" }
]
},
"profiles":[
{
"type":"evented",
"name":"Flattened Timeline",
"unit":"bytes",
"startValue":0,
"endValue":80,
"events":[
{"type":"O","frame":0,"at":0},
{"type":"O","frame":1,"at":0},
{"type":"O","frame":2,"at":0},
{"type":"C","frame":2,"at":40},
{"type":"C","frame":1,"at":40},
{"type":"O","frame":3,"at":40},
{"type":"O","frame":4,"at":40},
{"type":"C","frame":4,"at":80},
{"type":"C","frame":3,"at":80},
{"type":"C","frame":0,"at":80}
]
}
]
}
)";
EXPECT_EQ(schemaProfile, expected);
}
Loading