Skip to content

Commit 6ed2aee

Browse files
xiangfu0claude
andcommitted
Add sorted index path to BenchmarkInvertedIndexDistinct
Add sortedIndexPath() benchmark method that simulates the sorted column execution path: merge-iterates filter bitmap against contiguous doc ranges using PeekableIntIterator.advanceIfNeeded(). This benchmarks all three DISTINCT operator paths side-by-side: - sortedIndexPath: O(cardinality + filteredDocs) merge iteration - invertedIndexPath: O(cardinality) bitmap intersects() checks - scanPath: O(filteredDocs) forward index lookups with dedup Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e1c3b35 commit 6ed2aee

3 files changed

Lines changed: 93 additions & 220 deletions

File tree

pinot-core/src/main/java/org/apache/pinot/core/operator/query/InvertedIndexDistinctOperator.java

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,13 @@
7070
/**
7171
* Inverted-index-based operator for single-column distinct queries on a single segment.
7272
*
73-
* <p>Supports two execution paths, chosen at runtime based on a cost heuristic:
73+
* <p>Supports three execution paths, chosen at runtime:
7474
* <ul>
75-
* <li><b>Inverted index path</b>: Iterates dictionary entries and uses inverted index bitmap intersections
76-
* to check filter membership. Avoids the projection pipeline entirely. Preferred when dictionary cardinality
77-
* is much smaller than the filtered doc count.</li>
75+
* <li><b>Sorted index path</b>: For sorted columns, merge-iterates filter bitmap against contiguous doc ranges.
76+
* Cost ~ O(cardinality + filteredDocs). Always chosen when the column has a sorted forward index.</li>
77+
* <li><b>Bitmap inverted index path</b>: Iterates dictionary entries and uses inverted index bitmap intersections
78+
* to check filter membership. Avoids the projection pipeline entirely. Chosen by cost heuristic when dictionary
79+
* cardinality is much smaller than the filtered doc count.</li>
7880
* <li><b>Scan path (fallback)</b>: Uses ProjectOperator + DistinctExecutor to scan filtered docs.
7981
* Used when the cost heuristic determines scanning is cheaper.</li>
8082
* </ul>
@@ -311,11 +313,9 @@ private DistinctResultsBlock executeInvertedIndexPath() {
311313
for (dictId = 0; dictId < dictLength; dictId++) {
312314
QueryThreadContext.checkTerminationAndSampleUsagePeriodically(dictId, EXPLAIN_NAME);
313315

314-
Object docIdsObj = _invertedIndexReader.getDocIds(dictId);
315-
if (!(docIdsObj instanceof ImmutableRoaringBitmap)) {
316-
continue;
317-
}
318-
ImmutableRoaringBitmap docIds = (ImmutableRoaringBitmap) docIdsObj;
316+
// SortedIndexReader is handled separately in getNextBlock(), so this path only sees bitmap inverted indexes
317+
// whose getDocIds() returns ImmutableRoaringBitmap.
318+
ImmutableRoaringBitmap docIds = (ImmutableRoaringBitmap) _invertedIndexReader.getDocIds(dictId);
319319
if (docIds.isEmpty()) {
320320
continue;
321321
}
@@ -534,10 +534,15 @@ private static boolean addToTable(BytesDistinctTable table, ByteArray value,
534534

535535
@Override
536536
public List<? extends Operator> getChildOperators() {
537-
if (_usedInvertedIndexPath || _projectOperator == null) {
538-
return Collections.emptyList();
537+
if (_usedInvertedIndexPath) {
538+
// For inverted/sorted index paths, the filter operator is the logical child
539+
// (it provides the filtered docId set that we intersect with dictionary bitmaps).
540+
return Collections.singletonList(_filterOperator);
541+
}
542+
if (_projectOperator != null) {
543+
return Collections.singletonList(_projectOperator);
539544
}
540-
return Collections.singletonList(_projectOperator);
545+
return Collections.emptyList();
541546
}
542547

543548
@Override
@@ -549,9 +554,10 @@ public IndexSegment getIndexSegment() {
549554
public ExecutionStatistics getExecutionStatistics() {
550555
int numTotalDocs = _indexSegment.getSegmentMetadata().getTotalDocs();
551556
if (_usedInvertedIndexPath || _projectOperator == null) {
552-
// For the inverted index path, report dictionary entries examined as numDocsScanned.
553-
// This isn't strictly "docs scanned" but represents the work done by this operator.
554-
return new ExecutionStatistics(_numDocsScanned > 0 ? _numDocsScanned : _numEntriesExamined, 0, 0, numTotalDocs);
557+
// For inverted/sorted index paths: numDocsScanned=0 (no forward index lookups),
558+
// numEntriesScannedInFilter=0, numEntriesScannedPostFilter=numEntriesExamined
559+
// (dictionary entries examined via bitmap intersection or sorted range checks).
560+
return new ExecutionStatistics(0, 0, _numEntriesExamined, numTotalDocs);
555561
}
556562
long numEntriesScannedInFilter = _projectOperator.getExecutionStatistics().getNumEntriesScannedInFilter();
557563
long numEntriesScannedPostFilter = (long) _numDocsScanned * _projectOperator.getNumColumnsProjected();

pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkBitmapIntersectionVsAnd.java

Lines changed: 0 additions & 192 deletions
This file was deleted.

pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkInvertedIndexDistinct.java

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,24 @@
3535
import org.openjdk.jmh.infra.Blackhole;
3636
import org.openjdk.jmh.runner.Runner;
3737
import org.openjdk.jmh.runner.options.OptionsBuilder;
38+
import org.roaringbitmap.PeekableIntIterator;
3839
import org.roaringbitmap.RoaringBitmap;
3940

4041

4142
/**
42-
* Benchmark to compare inverted-index-based distinct vs scan-based distinct execution paths.
43+
* Benchmark comparing three execution paths for single-column DISTINCT queries:
4344
*
44-
* <p>Inverted index path: iterates all dictionary entries, does bitmap intersection with filter for each.
45-
* Cost ~ O(dictionaryCardinality * bitmapIntersectionCost).
45+
* <ol>
46+
* <li><b>Sorted index path</b>: merge-iterates filter bitmap against contiguous doc ranges per dictId.
47+
* Cost ~ O(dictionaryCardinality + filterCardinality). Only applicable when the column is sorted.</li>
48+
* <li><b>Bitmap inverted index path</b>: iterates all dictionary entries, uses {@code intersects()} to check
49+
* filter membership per entry. Cost ~ O(dictionaryCardinality * bitmapIntersectionCost).</li>
50+
* <li><b>Scan path</b>: iterates all filtered docIds, looks up dictId from forward index, deduplicates.
51+
* Cost ~ O(filterCardinality * forwardIndexLookupCost).</li>
52+
* </ol>
4653
*
47-
* <p>Scan path: iterates all filtered docIds, looks up dictId from forward index (int[]), deduplicates.
48-
* Cost ~ O(filterCardinality * forwardIndexLookupCost).
49-
*
50-
* <p>The goal is to determine the crossover ratio (dictionaryCardinality / filterCardinality) at which
51-
* the inverted index path becomes faster than the scan path.
54+
* <p>The sorted index path is always the fastest when applicable (column is sorted), as it avoids both
55+
* bitmap intersection and per-doc forward index lookups.
5256
*
5357
* <p>Usage: {@code java -jar pinot-perf/target/benchmarks.jar BenchmarkInvertedIndexDistinct}
5458
*/
@@ -70,12 +74,17 @@ public class BenchmarkInvertedIndexDistinct {
7074
@Param({"0.001", "0.01", "0.1", "0.5", "1.0"})
7175
double _filterSelectivity;
7276

73-
// -- Inverted index: dictId -> docIds bitmap
77+
// -- Bitmap inverted index: dictId -> docIds bitmap (non-sorted, random distribution)
7478
private RoaringBitmap[] _invertedIndex;
7579

76-
// -- Forward index: docId -> dictId (simulates column forward index)
80+
// -- Forward index: docId -> dictId (simulates column forward index for scan path)
7781
private int[] _forwardIndex;
7882

83+
// -- Sorted index ranges: dictId -> [startDocId, endDocId] (inclusive)
84+
// Simulates SortedIndexReader.getDocIds(dictId) which returns contiguous doc ranges
85+
private int[] _sortedRangeStarts;
86+
private int[] _sortedRangeEnds;
87+
7988
// -- Filter bitmap: which docIds pass the filter
8089
private RoaringBitmap _filterBitmap;
8190

@@ -85,6 +94,8 @@ public class BenchmarkInvertedIndexDistinct {
8594
public void setup() {
8695
Random random = new Random(42);
8796

97+
// ---- Non-sorted data (for inverted index and scan paths) ----
98+
8899
// Build forward index: assign each doc a random dictId
89100
_forwardIndex = new int[_numDocs];
90101
for (int docId = 0; docId < _numDocs; docId++) {
@@ -100,7 +111,22 @@ public void setup() {
100111
_invertedIndex[_forwardIndex[docId]].add(docId);
101112
}
102113

103-
// Build filter bitmap: randomly select filterSelectivity fraction of docs
114+
// ---- Sorted data (for sorted index path) ----
115+
116+
// Each dictId maps to a contiguous doc range, simulating a sorted column.
117+
// dictId=0: docs [0, docsPerValue-1], dictId=1: docs [docsPerValue, 2*docsPerValue-1], etc.
118+
int docsPerValue = _numDocs / _dictionaryCardinality;
119+
_sortedRangeStarts = new int[_dictionaryCardinality];
120+
_sortedRangeEnds = new int[_dictionaryCardinality];
121+
for (int dictId = 0; dictId < _dictionaryCardinality; dictId++) {
122+
_sortedRangeStarts[dictId] = dictId * docsPerValue;
123+
_sortedRangeEnds[dictId] = (dictId + 1) * docsPerValue - 1;
124+
}
125+
// Last range absorbs any remainder
126+
_sortedRangeEnds[_dictionaryCardinality - 1] = _numDocs - 1;
127+
128+
// ---- Filter bitmap (shared across all paths) ----
129+
104130
_filterCardinality = Math.max(1, (int) (_numDocs * _filterSelectivity));
105131
_filterBitmap = new RoaringBitmap();
106132
if (_filterSelectivity >= 1.0) {
@@ -131,8 +157,40 @@ public void setup() {
131157
}
132158

133159
/**
134-
* Inverted index path: iterate all dictionary entries, intersect each with filter bitmap.
135-
* For each matching value, add to a dedup set (simulated by a RoaringBitmap of dictIds).
160+
* Sorted index path: merge-iterate filter bitmap against contiguous doc ranges.
161+
* Uses PeekableIntIterator.advanceIfNeeded() to skip filter docs between ranges.
162+
* Cost ~ O(dictionaryCardinality + filterCardinality).
163+
*/
164+
@Benchmark
165+
public int sortedIndexPath(Blackhole bh) {
166+
RoaringBitmap seenDictIds = new RoaringBitmap();
167+
int valuesProcessed = 0;
168+
169+
PeekableIntIterator filterIter = _filterBitmap.getIntIterator();
170+
for (int dictId = 0; dictId < _dictionaryCardinality && filterIter.hasNext(); dictId++) {
171+
int startDocId = _sortedRangeStarts[dictId];
172+
int endDocId = _sortedRangeEnds[dictId];
173+
174+
// Skip filter docs before this range
175+
filterIter.advanceIfNeeded(startDocId);
176+
177+
// Check if any filter doc falls within this range
178+
if (filterIter.hasNext() && filterIter.peekNext() <= endDocId) {
179+
seenDictIds.add(dictId);
180+
valuesProcessed++;
181+
// Advance past the current range for next dictId
182+
filterIter.advanceIfNeeded(endDocId + 1);
183+
}
184+
}
185+
186+
bh.consume(seenDictIds);
187+
return valuesProcessed;
188+
}
189+
190+
/**
191+
* Bitmap inverted index path: iterate all dictionary entries, intersect each with filter bitmap.
192+
* Uses intersects() for early termination instead of computing full intersection.
193+
* Cost ~ O(dictionaryCardinality * bitmapIntersectionCost).
136194
*/
137195
@Benchmark
138196
public int invertedIndexPath(Blackhole bh) {
@@ -157,6 +215,7 @@ public int invertedIndexPath(Blackhole bh) {
157215

158216
/**
159217
* Scan path: iterate all filtered docIds, look up dictId from forward index, dedup.
218+
* Cost ~ O(filterCardinality * forwardIndexLookupCost).
160219
*/
161220
@Benchmark
162221
public int scanPath(Blackhole bh) {

0 commit comments

Comments
 (0)