From 8b839e0551ae7797771ea7c1f6535bb93cb9f67b Mon Sep 17 00:00:00 2001 From: Mohit Godwani Date: Mon, 18 May 2026 12:04:52 +0530 Subject: [PATCH] Add stats for parquet/lucene plugins Signed-off-by: Mohit Godwani --- .../opensearch/be/lucene/LucenePlugin.java | 13 +- .../be/lucene/LuceneReaderManager.java | 17 +- .../be/lucene/LuceneSearchBackEnd.java | 14 +- .../be/lucene/index/LuceneCommitter.java | 32 +- .../lucene/index/LuceneCommitterFactory.java | 11 +- .../index/LuceneDeleteExecutionEngine.java | 28 +- .../index/LuceneIndexingExecutionEngine.java | 159 ++++--- .../lucene/index/LuceneReplicaCommitter.java | 19 +- .../be/lucene/index/LuceneWriter.java | 96 +++-- .../be/lucene/merge/LuceneMerger.java | 123 +++--- .../be/lucene/stats/LuceneShardStats.java | 317 ++++++++++++++ .../LuceneDataFormatAwareEngineTests.java | 3 +- .../be/lucene/LuceneMergerTests.java | 16 +- .../be/lucene/LuceneReaderManagerTests.java | 60 ++- .../be/lucene/index/DeleterImplTests.java | 5 +- ...eneCommitterCSManagerIntegrationTests.java | 50 ++- .../index/LuceneCommitterFactoryTests.java | 8 +- .../be/lucene/index/LuceneCommitterTests.java | 29 +- .../LuceneDeleteExecutionEngineTests.java | 14 +- .../LuceneIndexingExecutionEngineTests.java | 26 +- .../be/lucene/index/LuceneWriterTests.java | 25 +- .../composite/DataFormatStatsIT.java | 235 ++++++++++ .../composite/CompositeDataFormatPlugin.java | 29 +- .../CompositeIndexingExecutionEngine.java | 67 ++- .../CompositeRegistryInitializer.java | 29 ++ .../opensearch/composite/CompositeWriter.java | 151 ++++--- .../action/CatalogSnapshotAction.java | 219 ++++++++++ .../action/DataFormatStatsAction.java | 106 +++++ .../merge/CompositeMergeExecutor.java | 31 +- .../composite/merge/CompositeMerger.java | 5 +- .../composite/stats/CompositeShardStats.java | 382 +++++++++++++++++ .../stats/CompositeStatsRegistry.java | 59 +++ ...CompositeIndexingExecutionEngineTests.java | 14 +- .../composite/CompositeWriterTests.java | 27 +- .../composite/merge/CompositeMergerTests.java | 11 +- .../parquet/ParquetDataFormatPlugin.java | 30 +- .../parquet/action/ParquetAnalyzeAction.java | 323 ++++++++++++++ .../action/ParquetRegistryInitializer.java | 34 ++ .../parquet/bridge/NativeParquetWriter.java | 52 ++- .../opensearch/parquet/bridge/RustBridge.java | 34 ++ .../parquet/engine/ParquetIndexingEngine.java | 20 +- .../merge/NativeParquetMergeStrategy.java | 17 +- .../parquet/stats/ParquetShardStats.java | 401 ++++++++++++++++++ .../opensearch/parquet/vsr/VSRManager.java | 49 ++- .../parquet/writer/ParquetWriter.java | 50 ++- .../src/main/rust/src/ffm.rs | 307 ++++++++++++++ .../index/engine/DataFormatAwareEngine.java | 9 +- .../DataFormatAwareNRTReplicationEngine.java | 55 ++- .../exec/CatalogSnapshotDeletionPolicy.java | 1 - .../index/engine/exec/commit/Committer.java | 9 +- .../exec/coord/CatalogSnapshotManager.java | 24 +- .../coord/DataformatAwareCatalogSnapshot.java | 4 +- .../engine/exec/coord/IndexFileDeleter.java | 12 +- .../engine/CombinedDeletionPolicyTests.java | 3 +- .../DataFormatAwareEngineRecoveryTests.java | 89 +++- ...aFormatAwareNRTReplicationEngineTests.java | 8 +- .../exec/coord/IndexFileDeleterTests.java | 3 +- .../dataformat/stub/InMemoryCommitter.java | 41 +- 58 files changed, 3546 insertions(+), 459 deletions(-) create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/stats/LuceneShardStats.java create mode 100644 sandbox/plugins/composite-engine/src/internalClusterTest/java/org/opensearch/composite/DataFormatStatsIT.java create mode 100644 sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeRegistryInitializer.java create mode 100644 sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/action/CatalogSnapshotAction.java create mode 100644 sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/action/DataFormatStatsAction.java create mode 100644 sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/stats/CompositeShardStats.java create mode 100644 sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/stats/CompositeStatsRegistry.java create mode 100644 sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/action/ParquetAnalyzeAction.java create mode 100644 sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/action/ParquetRegistryInitializer.java create mode 100644 sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/stats/ParquetShardStats.java diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LucenePlugin.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LucenePlugin.java index 78e6a3d6bc914..c0159b275ad9e 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LucenePlugin.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LucenePlugin.java @@ -13,13 +13,14 @@ import org.opensearch.be.lucene.index.LuceneCommitterFactory; import org.opensearch.be.lucene.index.LuceneDeleteExecutionEngine; import org.opensearch.be.lucene.index.LuceneIndexingExecutionEngine; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.dataformat.DataFormatDescriptor; import org.opensearch.index.engine.dataformat.DataFormatPlugin; -import org.opensearch.index.engine.dataformat.DeleteExecutionEngine; import org.opensearch.index.engine.dataformat.DataFormatRegistry; +import org.opensearch.index.engine.dataformat.DeleteExecutionEngine; import org.opensearch.index.engine.dataformat.IndexingEngineConfig; import org.opensearch.index.engine.dataformat.IndexingExecutionEngine; import org.opensearch.index.engine.dataformat.ReaderManagerConfig; @@ -55,6 +56,7 @@ public class LucenePlugin extends Plugin implements DataFormatPlugin, SearchBackEndPlugin, EnginePlugin { private static final LuceneDataFormat DATA_FORMAT = new LuceneDataFormat(); + private final LuceneShardStats stats = new LuceneShardStats(); /** Creates a new LucenePlugin. */ public LucenePlugin() {} @@ -93,7 +95,10 @@ public DataFormat getDataFormat() { } @Override - public Map> getFormatDescriptors(IndexSettings indexSettings, DataFormatRegistry dataFormatRegistry) { + public Map> getFormatDescriptors( + IndexSettings indexSettings, + DataFormatRegistry dataFormatRegistry + ) { return Map.of(DATA_FORMAT.name(), () -> new DataFormatDescriptor(DATA_FORMAT.name(), new LuceneChecksumHandler())); } @@ -134,11 +139,11 @@ public EngineReaderManager createReaderManager(ReaderManagerCon */ @Override public Optional getCommitterFactory(IndexSettings indexSettings) { - return Optional.of(new LuceneCommitterFactory()); + return Optional.of(new LuceneCommitterFactory(stats)); } @Override public DeleteExecutionEngine getDeleteExecutionEngine(Committer committer) { - return new LuceneDeleteExecutionEngine(DATA_FORMAT, committer); + return new LuceneDeleteExecutionEngine(DATA_FORMAT, committer, stats); } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java index 60093244bf3ba..e3837e29d378f 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java @@ -15,8 +15,6 @@ import org.opensearch.be.lucene.index.LuceneReplicaCommitter; import org.opensearch.common.CheckedBiFunction; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.common.util.concurrent.AbstractRefCounted; -import org.opensearch.common.util.concurrent.RefCounted; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.Segment; @@ -51,12 +49,19 @@ public class LuceneReaderManager implements EngineReaderManager /** * Creates a new LuceneReaderManager. * - * @param dataFormat the data format this reader manager serves - * @param initialReader the initial DirectoryReader, must not be null + * @param dataFormat the data format this reader manager serves + * @param initialReader the initial DirectoryReader, must not be null + * @param readers shared map of generation to DirectoryReader for segment-level reader reuse + * @param readerRefresher function that opens a refreshed reader given the current reader and new + * {@link SegmentInfos}; returns {@code null} if no refresh is needed * @throws NullPointerException if initialReader is null */ - public LuceneReaderManager(DataFormat dataFormat, DirectoryReader initialReader, Map readers, - CheckedBiFunction readerRefresher) { + public LuceneReaderManager( + DataFormat dataFormat, + DirectoryReader initialReader, + Map readers, + CheckedBiFunction readerRefresher + ) { this.dataFormat = dataFormat; Objects.requireNonNull(initialReader, "initialReader must not be null"); this.currentReader = initialReader; diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchBackEnd.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchBackEnd.java index 259e8d5be30a4..301abc57d2a03 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchBackEnd.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchBackEnd.java @@ -17,7 +17,6 @@ import org.apache.lucene.index.StandardDirectoryReader; import org.opensearch.be.lucene.index.LuceneIndexingExecutionEngine; import org.opensearch.common.CheckedBiFunction; -import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.ReaderManagerConfig; import org.opensearch.index.engine.exec.EngineReaderManager; @@ -25,13 +24,10 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import static org.opensearch.index.engine.NRTReplicationReaderManager.unwrapStandardReader; - /** * Static helpers for creating Lucene-based {@link EngineReaderManager} instances. *

@@ -78,7 +74,7 @@ static EngineReaderManager createReaderManager(ReaderManagerCon } private static DirectoryReader buildReader(DirectoryReader oldReader, SegmentInfos newSis) throws IOException { - if (newSis == null || ((StandardDirectoryReader)oldReader).getSegmentInfos().version == newSis.version) { + if (newSis == null || ((StandardDirectoryReader) oldReader).getSegmentInfos().version == newSis.version) { return null; } final List subs = new ArrayList<>(); @@ -87,12 +83,6 @@ private static DirectoryReader buildReader(DirectoryReader oldReader, SegmentInf } // Segment_n here is ignored because it is either already committed on disk as part of previous commit point or // does not yet exist on store (not yet committed) - return StandardDirectoryReader.open( - oldReader.directory(), - newSis, - subs, - null, - null - ); + return StandardDirectoryReader.open(oldReader.directory(), newSis, subs, null, null); } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneCommitter.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneCommitter.java index f38f92f444318..99997acbe401f 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneCommitter.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneCommitter.java @@ -27,6 +27,7 @@ import org.apache.lucene.store.ByteBuffersIndexOutput; import org.apache.lucene.util.Version; import org.opensearch.be.lucene.LuceneDataFormat; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.CommitStats; import org.opensearch.index.engine.EngineConfig; @@ -51,6 +52,7 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; import java.util.stream.Collectors; @@ -96,6 +98,7 @@ public class LuceneCommitter extends SafeBootstrapCommitter { private final Store store; private final MergeIndexWriter indexWriter; private final LuceneCommitDeletionPolicy deletionPolicy; + private final LuceneShardStats stats; private final AtomicBoolean isClosed = new AtomicBoolean(); // Keyed by catalog snapshot generation — survives snapshot cloning at the upload boundary. private final Map readers = new ConcurrentHashMap<>(); @@ -105,10 +108,12 @@ public class LuceneCommitter extends SafeBootstrapCommitter { * then opens the IndexWriter. * * @param committerConfig the committer committerConfig (shard path, index committerConfig, engine config, store) + * @param stats the shard-level stats collector * @throws IOException if opening the IndexWriter fails */ - public LuceneCommitter(CommitterConfig committerConfig) throws IOException { + public LuceneCommitter(CommitterConfig committerConfig, LuceneShardStats stats) throws IOException { super(committerConfig); + this.stats = stats; this.store = Objects.requireNonNull(committerConfig.engineConfig().getStore()); this.store.incRef(); try { @@ -134,13 +139,19 @@ public LuceneCommitter(CommitterConfig committerConfig) throws IOException { @Override public synchronized CommitResult commit(CommitInput commitData) throws IOException { ensureOpen(); - indexWriter.setLiveCommitData(commitData.userData()); - indexWriter.commit(); - SegmentInfos committed = SegmentInfos.readLatestCommit(indexWriter.getDirectory()); + long start = System.nanoTime(); + try { + indexWriter.setLiveCommitData(commitData.userData()); + indexWriter.commit(); + SegmentInfos committed = SegmentInfos.readLatestCommit(indexWriter.getDirectory()); - // Encode writer's Lucene version as a long — keeps CatalogSnapshot Lucene-type-agnostic. - long version = LuceneVersionConverter.encode(committed.getCommitLuceneVersion()); - return new CommitResult(committed.getSegmentsFileName(), committed.getGeneration(), version); + // Encode writer's Lucene version as a long — keeps CatalogSnapshot Lucene-type-agnostic. + long version = LuceneVersionConverter.encode(committed.getCommitLuceneVersion()); + return new CommitResult(committed.getSegmentsFileName(), committed.getGeneration(), version); + } finally { + stats.incCommitTotal(); + stats.addCommitTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)); + } } /** @@ -396,7 +407,12 @@ static Map loadCommittedSnapshots(Store store) thr // the first real flush — preventing the catalog generation fallback from // leaking into ReplicationCheckpoint.segmentsGen. dfa = (DataformatAwareCatalogSnapshot) CatalogSnapshotManager.createInitialSnapshot( - 0L, 0L, 0L, List.of(), -1L, ic.getUserData() + 0L, + 0L, + 0L, + List.of(), + -1L, + ic.getUserData() ); } SegmentInfos committed = SegmentInfos.readCommit(store.directory(), ic.getSegmentsFileName()); diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneCommitterFactory.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneCommitterFactory.java index 996edf70b4799..4d5dcb2f8b4ce 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneCommitterFactory.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneCommitterFactory.java @@ -8,6 +8,7 @@ package org.opensearch.be.lucene.index; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.commit.Committer; import org.opensearch.index.engine.exec.commit.CommitterConfig; @@ -28,8 +29,12 @@ @ExperimentalApi public final class LuceneCommitterFactory implements CommitterFactory { - /** Creates a new factory instance. */ - public LuceneCommitterFactory() {} + private final LuceneShardStats stats; + + /** Creates a new factory instance with the given stats collector. */ + public LuceneCommitterFactory(LuceneShardStats stats) { + this.stats = stats; + } /** * Creates a new {@link LuceneCommitter} for the given settings. @@ -42,6 +47,6 @@ public Committer getCommitter(CommitterConfig committerConfig) throws IOExceptio if (committerConfig.isReplica()) { return new LuceneReplicaCommitter(committerConfig); } - return new LuceneCommitter(committerConfig); + return new LuceneCommitter(committerConfig, stats); } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneDeleteExecutionEngine.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneDeleteExecutionEngine.java index fe31c56212566..4d4fd3ddaeeea 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneDeleteExecutionEngine.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneDeleteExecutionEngine.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.index.Term; import org.opensearch.be.lucene.LuceneDataFormat; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.dataformat.DeleteExecutionEngine; import org.opensearch.index.engine.dataformat.DeleteInput; @@ -26,6 +27,7 @@ import java.io.IOException; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; /** * Lucene-based implementation of {@link DeleteExecutionEngine} that tracks per-generation @@ -41,11 +43,13 @@ public class LuceneDeleteExecutionEngine implements DeleteExecutionEngine generationToDeleterMap; private final DataFormat dataFormat; private final LuceneCommitter committer; + private final LuceneShardStats stats; - public LuceneDeleteExecutionEngine(DataFormat dataFormat, Committer committer) { + public LuceneDeleteExecutionEngine(DataFormat dataFormat, Committer committer, LuceneShardStats stats) { this.generationToDeleterMap = new ConcurrentHashMap<>(); this.dataFormat = dataFormat; this.committer = (LuceneCommitter) committer; + this.stats = stats; } @Override @@ -67,13 +71,21 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { @Override public DeleteResult deleteDocument(DeleteInput deleteInput) throws IOException { - Deleter deleter = generationToDeleterMap.get(deleteInput.generation()); - if (deleter != null) { - return deleter.deleteDoc(deleteInput); - } else { - Term uid = new Term(deleteInput.fieldName(), deleteInput.value()); - this.committer.getIndexWriter().deleteDocuments(uid); - return new DeleteResult.Success(1L, 1L, 1L); + long start = System.nanoTime(); + try { + Deleter deleter = generationToDeleterMap.get(deleteInput.generation()); + if (deleter != null) { + stats.incDeleteByGenerationTotal(); + return deleter.deleteDoc(deleteInput); + } else { + stats.incDeleteSharedWriterFallbackTotal(); + Term uid = new Term(deleteInput.fieldName(), deleteInput.value()); + this.committer.getIndexWriter().deleteDocuments(uid); + return new DeleteResult.Success(1L, 1L, 1L); + } + } finally { + stats.incDeleteTotal(); + stats.addDeleteTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)); } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngine.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngine.java index 888f94d037dd8..c56c036757565 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngine.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngine.java @@ -25,6 +25,7 @@ import org.opensearch.be.lucene.LuceneDataFormat; import org.opensearch.be.lucene.LuceneFieldFactoryRegistry; import org.opensearch.be.lucene.merge.LuceneMerger; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.dataformat.IndexingExecutionEngine; @@ -49,6 +50,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; /** * Lucene-specific {@link IndexingExecutionEngine} that manages per-writer Lucene segments @@ -75,6 +77,7 @@ public class LuceneIndexingExecutionEngine implements IndexingExecutionEngine createWriter(WriterConfig config) { baseDirectory, analyzer, codec, - sharedWriter.getConfig().getIndexSort() + sharedWriter.getConfig().getIndexSort(), + stats ); } catch (IOException e) { throw new RuntimeException("Failed to create LuceneWriter for generation " + config.writerGeneration(), e); @@ -213,85 +224,97 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { return new RefreshResult(List.of()); } - List resultSegments = new ArrayList<>(refreshInput.existingSegments()); - - // Collect all source directories and their paths for a single batched addIndexes call - List sourceDirectories = new ArrayList<>(); - Set writerGenerations = new HashSet<>(); - - for (Segment segment : refreshInput.writerFiles()) { - WriterFileSet wfs = segment.dfGroupedSearchableFiles().get(LuceneDataFormat.LUCENE_FORMAT_NAME); - if (wfs == null) { - continue; - } + long refreshStart = System.nanoTime(); + try { + List resultSegments = new ArrayList<>(refreshInput.existingSegments()); - Path dirPath = Path.of(wfs.directory()); - if (Files.isDirectory(dirPath) == false) { - logger.warn("Lucene writer directory does not exist: {}", dirPath); - continue; - } + // Collect all source directories and their paths for a single batched addIndexes call + List sourceDirectories = new ArrayList<>(); + Set writerGenerations = new HashSet<>(); - sourceDirectories.add(new HardlinkCopyDirectoryWrapper(new MMapDirectory(dirPath))); - writerGenerations.add(wfs.writerGeneration()); - } + for (Segment segment : refreshInput.writerFiles()) { + WriterFileSet wfs = segment.dfGroupedSearchableFiles().get(LuceneDataFormat.LUCENE_FORMAT_NAME); + if (wfs == null) { + continue; + } - // Single batched addIndexes call for all source directories - if (sourceDirectories.isEmpty() == false) { - try { - sharedWriter.addIndexes(sourceDirectories.toArray(new Directory[0])); - logger.debug("Incorporated {} Lucene segments into shared writer in a single addIndexes call", sourceDirectories.size()); - } finally { - // Close all source directories - for (Directory dir : sourceDirectories) { - try { - dir.close(); - } catch (IOException e) { - logger.warn("Failed to close source directory after addIndexes", e); - } + Path dirPath = Path.of(wfs.directory()); + if (Files.isDirectory(dirPath) == false) { + logger.warn("Lucene writer directory does not exist: {}", dirPath); + continue; } - } - // After addIndexes, open an NRT reader to discover the actual file names - // for the newly added segments. Lucene renames files during addIndexes, - // so the original temp directory file names are no longer valid. - Path sharedDir = store.shardPath().resolveIndex(); - - try (DirectoryReader reader = DirectoryReader.open(sharedWriter)) { - List leaves = reader.leaves(); - - for (int i = 0; i < leaves.size(); i++) { - LeafReaderContext ctx = leaves.get(i); - if (ctx.reader() instanceof SegmentReader segReader) { - SegmentCommitInfo segInfo = segReader.getSegmentInfo(); - String genAttr = segInfo.info.getAttribute(LuceneWriter.WRITER_GENERATION_ATTRIBUTE); - if (genAttr == null) { - continue; - } + sourceDirectories.add(new HardlinkCopyDirectoryWrapper(new MMapDirectory(dirPath))); + writerGenerations.add(wfs.writerGeneration()); + } - long writerGen = Long.parseLong(genAttr); - if (!writerGenerations.contains(writerGen)) { - continue; + // Single batched addIndexes call for all source directories + if (sourceDirectories.isEmpty() == false) { + long addIndexesStart = System.nanoTime(); + try { + sharedWriter.addIndexes(sourceDirectories.toArray(new Directory[0])); + logger.debug( + "Incorporated {} Lucene segments into shared writer in a single addIndexes call", + sourceDirectories.size() + ); + } finally { + stats.addRefreshAddIndexesTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - addIndexesStart)); + // Close all source directories + for (Directory dir : sourceDirectories) { + try { + dir.close(); + } catch (IOException e) { + logger.warn("Failed to close source directory after addIndexes", e); } - long numDocs = segInfo.info.maxDoc(); - - WriterFileSet.Builder wfsBuilder = WriterFileSet.builder() - .directory(sharedDir) - .writerGeneration(writerGen) - .addNumRows(numDocs); + } + } - for (String file : segInfo.files()) { - wfsBuilder.addFile(file); + // After addIndexes, open an NRT reader to discover the actual file names + // for the newly added segments. Lucene renames files during addIndexes, + // so the original temp directory file names are no longer valid. + Path sharedDir = store.shardPath().resolveIndex(); + + try (DirectoryReader reader = DirectoryReader.open(sharedWriter)) { + List leaves = reader.leaves(); + + for (int i = 0; i < leaves.size(); i++) { + LeafReaderContext ctx = leaves.get(i); + if (ctx.reader() instanceof SegmentReader segReader) { + SegmentCommitInfo segInfo = segReader.getSegmentInfo(); + String genAttr = segInfo.info.getAttribute(LuceneWriter.WRITER_GENERATION_ATTRIBUTE); + if (genAttr == null) { + continue; + } + + long writerGen = Long.parseLong(genAttr); + if (!writerGenerations.contains(writerGen)) { + continue; + } + long numDocs = segInfo.info.maxDoc(); + + WriterFileSet.Builder wfsBuilder = WriterFileSet.builder() + .directory(sharedDir) + .writerGeneration(writerGen) + .addNumRows(numDocs); + + for (String file : segInfo.files()) { + wfsBuilder.addFile(file); + } + + resultSegments.add(Segment.builder(writerGen).addSearchableFiles(dataFormat, wfsBuilder.build()).build()); + writerGenerations.remove(writerGen); + stats.incRefreshSegmentsIncorporatedTotal(); } - - resultSegments.add(Segment.builder(writerGen).addSearchableFiles(dataFormat, wfsBuilder.build()).build()); - writerGenerations.remove(writerGen); } } + assert writerGenerations.isEmpty() : "Could not get segments from all writers"; } - assert writerGenerations.isEmpty() : "Could not get segments from all writers"; - } - return new RefreshResult(List.copyOf(resultSegments)); + return new RefreshResult(List.copyOf(resultSegments)); + } finally { + stats.incRefreshTotal(); + stats.addRefreshTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - refreshStart)); + } } /** Returns {@code null} — merge scheduling is not yet implemented for the Lucene format. */ diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneReplicaCommitter.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneReplicaCommitter.java index 8d2a42064c0cc..cdee1d53d9003 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneReplicaCommitter.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneReplicaCommitter.java @@ -8,7 +8,6 @@ package org.opensearch.be.lucene.index; -import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.SegmentInfos; @@ -33,6 +32,21 @@ import static org.opensearch.be.lucene.index.LuceneCommitter.loadCommittedSnapshots; +/** + * Replica-side {@link Committer} implementation backed by Lucene's {@link SegmentInfos} commit mechanism. + *

+ * Unlike the primary's {@link LuceneCommitter} which owns an {@link IndexWriter}, this committer + * operates without a writer — it directly writes {@code segments_N} files to the store directory + * using {@link Store#commitSegmentInfos(SegmentInfos, long, long)}. This is appropriate for replicas + * that receive segments via segment replication and only need to persist the commit point. + *

+ * The commit path clones the primary's {@link SegmentInfos} (received during replication), injects + * the user data (containing the serialized {@link CatalogSnapshot}), and writes it as a new commit. + * The {@link #listCommittedSnapshots()} method discovers existing commits by reading all + * {@code segments_N} files from the store directory. + * + * @opensearch.experimental + */ public class LuceneReplicaCommitter implements Committer { private final Store store; @@ -61,7 +75,7 @@ public synchronized CommitResult commit(CommitInput commitInput) throws IOExcept sis.updateGeneration(lastCommittedSegmentInfos); } Map userData = new HashMap<>(sis.userData); - for (Map.Entry entries: commitInput.userData()) { + for (Map.Entry entries : commitInput.userData()) { userData.put(entries.getKey(), entries.getValue()); } sis.setUserData(userData, false); @@ -146,4 +160,3 @@ public static SegmentInfos getSegmentInfos(CatalogSnapshot catalogSnapshot) { return null; } } - diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneWriter.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneWriter.java index e63746fed9f77..0f895df86f8c4 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneWriter.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneWriter.java @@ -22,6 +22,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MMapDirectory; import org.opensearch.be.lucene.LuceneDataFormat; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.engine.dataformat.DeleteInput; @@ -36,6 +37,7 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.Optional; +import java.util.concurrent.TimeUnit; /** * Per-generation Lucene writer that creates segments in an isolated temporary directory. @@ -73,6 +75,7 @@ public class LuceneWriter implements Writer { private final long writerGeneration; private final LuceneDataFormat dataFormat; + private final LuceneShardStats stats; private final Path tempDirectory; private final Directory directory; private final IndexWriter indexWriter; @@ -89,6 +92,7 @@ public class LuceneWriter implements Writer { * @param analyzer the analyzer to use for tokenized fields, or null for default * @param codec the codec to use, or null for default * @param indexSort the index sort to apply to segments, or null for no sort + * @param stats the shard-level stats collector * @throws IOException if directory creation or IndexWriter opening fails */ public LuceneWriter( @@ -98,11 +102,13 @@ public LuceneWriter( Path baseDirectory, Analyzer analyzer, Codec codec, - Sort indexSort + Sort indexSort, + LuceneShardStats stats ) throws IOException { this.writerGeneration = writerGeneration; this.mappingVersion = mappingVersion; this.dataFormat = dataFormat; + this.stats = stats; this.docCount = 0; // Create an isolated temp directory for this writer's segment @@ -132,19 +138,28 @@ public LuceneWriter( */ @Override public WriteResult addDoc(LuceneDocumentInput input) throws IOException { - Document doc = input.getFinalInput(); - assert doc.getField(LuceneDocumentInput.ROW_ID_FIELD) != null : "Document missing required " - + LuceneDocumentInput.ROW_ID_FIELD - + " field at doc position " - + docCount; - assert doc.getField(LuceneDocumentInput.ROW_ID_FIELD).numericValue().longValue() == docCount : "Row ID mismatch: expected " - + docCount - + " but got " - + doc.getField(LuceneDocumentInput.ROW_ID_FIELD).numericValue().longValue(); - indexWriter.addDocument(doc); - long currentDocId = docCount; - docCount++; - return new WriteResult.Success(1L, 1L, currentDocId); + long start = System.nanoTime(); + try { + Document doc = input.getFinalInput(); + assert doc.getField(LuceneDocumentInput.ROW_ID_FIELD) != null : "Document missing required " + + LuceneDocumentInput.ROW_ID_FIELD + + " field at doc position " + + docCount; + assert doc.getField(LuceneDocumentInput.ROW_ID_FIELD).numericValue().longValue() == docCount : "Row ID mismatch: expected " + + docCount + + " but got " + + doc.getField(LuceneDocumentInput.ROW_ID_FIELD).numericValue().longValue(); + indexWriter.addDocument(doc); + long currentDocId = docCount; + docCount++; + stats.addDocsIndexed(1); + return new WriteResult.Success(1L, 1L, currentDocId); + } catch (IOException e) { + stats.incDocsIndexedFailures(); + throw e; + } finally { + stats.addIndexTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)); + } } /** @@ -165,34 +180,45 @@ public FileInfos flush() throws IOException { return FileInfos.empty(); } - // Force merge to exactly 1 segment to maintain 1:1 mapping with other formats. - indexWriter.forceMerge(1, true); - indexWriter.commit(); + long flushStart = System.nanoTime(); + try { + // Force merge to exactly 1 segment to maintain 1:1 mapping with other formats. + long forceMergeStart = System.nanoTime(); + try { + indexWriter.forceMerge(1, true); + } finally { + stats.addFlushForceMergeTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - forceMergeStart)); + } + indexWriter.commit(); - // Verify the invariant: exactly 1 segment with docCount documents - SegmentInfos segmentInfos = SegmentInfos.readLatestCommit(directory); - assert segmentInfos.size() == 1 : "Expected exactly 1 segment after force merge, got " + segmentInfos.size(); + // Verify the invariant: exactly 1 segment with docCount documents + SegmentInfos segmentInfos = SegmentInfos.readLatestCommit(directory); + assert segmentInfos.size() == 1 : "Expected exactly 1 segment after force merge, got " + segmentInfos.size(); - SegmentCommitInfo segmentInfo = segmentInfos.info(0); - assert segmentInfo.info.maxDoc() == docCount : "Expected " + docCount + " docs in segment, got " + segmentInfo.info.maxDoc(); + SegmentCommitInfo segmentInfo = segmentInfos.info(0); + assert segmentInfo.info.maxDoc() == docCount : "Expected " + docCount + " docs in segment, got " + segmentInfo.info.maxDoc(); - // Build the WriterFileSet pointing to the temp directory - WriterFileSet.Builder wfsBuilder = WriterFileSet.builder() - .directory(tempDirectory) - .writerGeneration(writerGeneration) - .addNumRows(docCount); + // Build the WriterFileSet pointing to the temp directory + WriterFileSet.Builder wfsBuilder = WriterFileSet.builder() + .directory(tempDirectory) + .writerGeneration(writerGeneration) + .addNumRows(docCount); - // Add all files in the segment - for (String file : directory.listAll()) { - if (file.startsWith("segments") == false && file.equals("write.lock") == false) { - wfsBuilder.addFile(file); + // Add all files in the segment + for (String file : directory.listAll()) { + if (file.startsWith("segments") == false && file.equals("write.lock") == false) { + wfsBuilder.addFile(file); + } } - } - // Since flush is once only, close the IndexWriter but keep directory open for close() - indexWriter.close(); + // Since flush is once only, close the IndexWriter but keep directory open for close() + indexWriter.close(); - return FileInfos.builder().putWriterFileSet(dataFormat, wfsBuilder.build()).build(); + return FileInfos.builder().putWriterFileSet(dataFormat, wfsBuilder.build()).build(); + } finally { + stats.incFlushTotal(); + stats.addFlushTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - flushStart)); + } } /** diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/merge/LuceneMerger.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/merge/LuceneMerger.java index 4b17852920e05..8883d398fd0e8 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/merge/LuceneMerger.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/merge/LuceneMerger.java @@ -15,6 +15,7 @@ import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.index.SegmentInfos; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; @@ -33,6 +34,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import static org.opensearch.be.lucene.index.LuceneWriter.WRITER_GENERATION_ATTRIBUTE; @@ -79,81 +81,92 @@ private static Field initSegmentInfosField() { private final DataFormat dataFormat; private final Path storeDirectory; private final LuceneMergeStrategy strategy; + private final LuceneShardStats stats; - public LuceneMerger(MergeIndexWriter indexWriter, DataFormat dataFormat, Path storeDirectory) { + public LuceneMerger(MergeIndexWriter indexWriter, DataFormat dataFormat, Path storeDirectory, LuceneShardStats stats) { if (indexWriter == null) { throw new IllegalArgumentException("IndexWriter must not be null"); } this.indexWriter = indexWriter; this.dataFormat = dataFormat; this.storeDirectory = storeDirectory; + this.stats = stats; // TODO implement primary and integrate the same here this.strategy = new SecondaryLuceneMergeStrategy(); } @Override public MergeResult merge(MergeInput mergeInput) throws IOException { - RowIdMapping rowIdMapping = mergeInput.rowIdMapping(); - List segments = mergeInput.segments(); + long start = System.nanoTime(); + try { + RowIdMapping rowIdMapping = mergeInput.rowIdMapping(); + List segments = mergeInput.segments(); - if (segments.isEmpty()) { - return new MergeResult(Map.of()); - } + if (segments.isEmpty()) { + return new MergeResult(Map.of()); + } - Set generationsToMerge = new HashSet<>(); - for (Segment segment : segments) { - generationsToMerge.add(segment.generation()); - } + Set generationsToMerge = new HashSet<>(); + for (Segment segment : segments) { + generationsToMerge.add(segment.generation()); + } - SegmentInfos segmentInfos; - try { - segmentInfos = (SegmentInfos) SEGMENT_INFOS_FIELD.get(indexWriter); - } catch (IllegalAccessException e) { - throw new IOException("Failed to access IndexWriter segmentInfos via reflection", e); - } + SegmentInfos segmentInfos; + try { + segmentInfos = (SegmentInfos) SEGMENT_INFOS_FIELD.get(indexWriter); + } catch (IllegalAccessException e) { + throw new IOException("Failed to access IndexWriter segmentInfos via reflection", e); + } - if (segmentInfos.size() == 0) { - logger.warn("No segments in IndexWriter — skipping merge"); - return new MergeResult(Map.of()); - } + if (segmentInfos.size() == 0) { + logger.warn("No segments in IndexWriter — skipping merge"); + return new MergeResult(Map.of()); + } - List matchingSegments = findMatchingSegments(segmentInfos, generationsToMerge); + List matchingSegments = findMatchingSegments(segmentInfos, generationsToMerge); - if (matchingSegments.isEmpty()) { - logger.warn("No segments found matching writer generations {} — skipping merge", generationsToMerge); - return new MergeResult(Map.of()); - } + if (matchingSegments.isEmpty()) { + logger.warn("No segments found matching writer generations {} — skipping merge", generationsToMerge); + return new MergeResult(Map.of()); + } - logger.debug( - "LuceneMerger: merging {} segments (generations {}) using merge(OneMerge) + IndexSort", - matchingSegments.size(), - generationsToMerge - ); - - // Delegate OneMerge creation to the strategy (primary vs secondary behavior). - // For the secondary path, the returned RowIdRemappingOneMerge stamps the - // writer_generation attribute onto the merged SegmentInfo via setMergeInfo, which - // Lucene invokes immediately before codec.segmentInfoFormat().write(...) — so the - // attribute is persisted to the .si file and survives a writer reopen. - MergePolicy.OneMerge oneMerge = strategy.createOneMerge(matchingSegments, rowIdMapping, mergeInput.newWriterGeneration()); - indexWriter.executeMerge(oneMerge, mergeInput.newWriterGeneration()); - - // Build the merged WriterFileSet from the output segment info - SegmentCommitInfo mergedInfo = oneMerge.getMergeInfo(); - WriterFileSet mergedFileSet = buildMergedFileSet(mergedInfo, mergeInput.newWriterGeneration()); - - // Delegate RowIdMapping production to the strategy - RowIdMapping outputMapping = strategy.buildRowIdMapping(oneMerge, mergeInput); - - logger.debug( - "LuceneMerger: completed merge of {} segments at generation {} ({} docs, {} files)", - matchingSegments.size(), - mergeInput.newWriterGeneration(), - oneMerge.getMergeInfo().info.maxDoc(), - oneMerge.getMergeInfo().files().size() - ); - - return new MergeResult(Map.of(dataFormat, mergedFileSet), outputMapping); + logger.debug( + "LuceneMerger: merging {} segments (generations {}) using merge(OneMerge) + IndexSort", + matchingSegments.size(), + generationsToMerge + ); + + // Delegate OneMerge creation to the strategy (primary vs secondary behavior). + // For the secondary path, the returned RowIdRemappingOneMerge stamps the + // writer_generation attribute onto the merged SegmentInfo via setMergeInfo, which + // Lucene invokes immediately before codec.segmentInfoFormat().write(...) — so the + // attribute is persisted to the .si file and survives a writer reopen. + MergePolicy.OneMerge oneMerge = strategy.createOneMerge(matchingSegments, rowIdMapping, mergeInput.newWriterGeneration()); + indexWriter.executeMerge(oneMerge, mergeInput.newWriterGeneration()); + + // Build the merged WriterFileSet from the output segment info + SegmentCommitInfo mergedInfo = oneMerge.getMergeInfo(); + WriterFileSet mergedFileSet = buildMergedFileSet(mergedInfo, mergeInput.newWriterGeneration()); + + // Delegate RowIdMapping production to the strategy + RowIdMapping outputMapping = strategy.buildRowIdMapping(oneMerge, mergeInput); + + logger.debug( + "LuceneMerger: completed merge of {} segments at generation {} ({} docs, {} files)", + matchingSegments.size(), + mergeInput.newWriterGeneration(), + oneMerge.getMergeInfo().info.maxDoc(), + oneMerge.getMergeInfo().files().size() + ); + + stats.incMergeTotal(); + return new MergeResult(Map.of(dataFormat, mergedFileSet), outputMapping); + } catch (IOException e) { + stats.incMergeFailures(); + throw e; + } finally { + stats.addMergeTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)); + } } /** diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/stats/LuceneShardStats.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/stats/LuceneShardStats.java new file mode 100644 index 0000000000000..98feabb37ee37 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/stats/LuceneShardStats.java @@ -0,0 +1,317 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene.stats; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; + +/** + * Shard-level statistics collector for the Lucene data format plugin. + * Uses LongAdder for high-throughput counters and AtomicLong for gauges. + * Serves as both the live collector and the serializable snapshot. + */ +@ExperimentalApi +public class LuceneShardStats implements ToXContentFragment, Writeable { + + // Indexing counters + private final LongAdder docsIndexedTotal = new LongAdder(); + private final LongAdder docsIndexedFailures = new LongAdder(); + private final LongAdder indexTimeMillis = new LongAdder(); + + // Flush counters + private final LongAdder flushTotal = new LongAdder(); + private final LongAdder flushTimeMillis = new LongAdder(); + private final LongAdder flushForceMergeTimeMillis = new LongAdder(); + + // Refresh counters + private final LongAdder refreshTotal = new LongAdder(); + private final LongAdder refreshTimeMillis = new LongAdder(); + private final LongAdder refreshAddIndexesTimeMillis = new LongAdder(); + private final LongAdder refreshSegmentsIncorporatedTotal = new LongAdder(); + + // Commit counters + private final LongAdder commitTotal = new LongAdder(); + private final LongAdder commitTimeMillis = new LongAdder(); + + // Merge counters + private final LongAdder mergeTotal = new LongAdder(); + private final LongAdder mergeTimeMillis = new LongAdder(); + private final LongAdder mergeDocsTotal = new LongAdder(); + private final LongAdder mergeFailures = new LongAdder(); + + // Delete counters + private final LongAdder deleteTotal = new LongAdder(); + private final LongAdder deleteTimeMillis = new LongAdder(); + private final LongAdder deleteByGenerationTotal = new LongAdder(); + private final LongAdder deleteSharedWriterFallbackTotal = new LongAdder(); + + // Memory gauges + private final AtomicLong ramBufferBytesUsed = new AtomicLong(); + private final AtomicLong activeWriters = new AtomicLong(); + private final AtomicLong activeReaders = new AtomicLong(); + + public LuceneShardStats() {} + + public LuceneShardStats(StreamInput in) throws IOException { + // Indexing + docsIndexedTotal.add(in.readVLong()); + docsIndexedFailures.add(in.readVLong()); + indexTimeMillis.add(in.readVLong()); + + // Flush + flushTotal.add(in.readVLong()); + flushTimeMillis.add(in.readVLong()); + flushForceMergeTimeMillis.add(in.readVLong()); + + // Refresh + refreshTotal.add(in.readVLong()); + refreshTimeMillis.add(in.readVLong()); + refreshAddIndexesTimeMillis.add(in.readVLong()); + refreshSegmentsIncorporatedTotal.add(in.readVLong()); + + // Commit + commitTotal.add(in.readVLong()); + commitTimeMillis.add(in.readVLong()); + + // Merge + mergeTotal.add(in.readVLong()); + mergeTimeMillis.add(in.readVLong()); + mergeDocsTotal.add(in.readVLong()); + mergeFailures.add(in.readVLong()); + + // Delete + deleteTotal.add(in.readVLong()); + deleteTimeMillis.add(in.readVLong()); + deleteByGenerationTotal.add(in.readVLong()); + deleteSharedWriterFallbackTotal.add(in.readVLong()); + + // Memory + ramBufferBytesUsed.set(in.readVLong()); + activeWriters.set(in.readVLong()); + activeReaders.set(in.readVLong()); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // Indexing + out.writeVLong(docsIndexedTotal.sum()); + out.writeVLong(docsIndexedFailures.sum()); + out.writeVLong(indexTimeMillis.sum()); + + // Flush + out.writeVLong(flushTotal.sum()); + out.writeVLong(flushTimeMillis.sum()); + out.writeVLong(flushForceMergeTimeMillis.sum()); + + // Refresh + out.writeVLong(refreshTotal.sum()); + out.writeVLong(refreshTimeMillis.sum()); + out.writeVLong(refreshAddIndexesTimeMillis.sum()); + out.writeVLong(refreshSegmentsIncorporatedTotal.sum()); + + // Commit + out.writeVLong(commitTotal.sum()); + out.writeVLong(commitTimeMillis.sum()); + + // Merge + out.writeVLong(mergeTotal.sum()); + out.writeVLong(mergeTimeMillis.sum()); + out.writeVLong(mergeDocsTotal.sum()); + out.writeVLong(mergeFailures.sum()); + + // Delete + out.writeVLong(deleteTotal.sum()); + out.writeVLong(deleteTimeMillis.sum()); + out.writeVLong(deleteByGenerationTotal.sum()); + out.writeVLong(deleteSharedWriterFallbackTotal.sum()); + + // Memory + out.writeVLong(ramBufferBytesUsed.get()); + out.writeVLong(activeWriters.get()); + out.writeVLong(activeReaders.get()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // Indexing + builder.startObject("indexing"); + builder.field("docs_indexed_total", docsIndexedTotal.sum()); + builder.field("docs_indexed_failures", docsIndexedFailures.sum()); + builder.field("index_time_millis", indexTimeMillis.sum()); + builder.endObject(); + + // Flush + builder.startObject("flush"); + builder.field("flush_total", flushTotal.sum()); + builder.field("flush_time_millis", flushTimeMillis.sum()); + builder.field("flush_force_merge_time_millis", flushForceMergeTimeMillis.sum()); + builder.endObject(); + + // Refresh + builder.startObject("refresh"); + builder.field("refresh_total", refreshTotal.sum()); + builder.field("refresh_time_millis", refreshTimeMillis.sum()); + builder.field("refresh_add_indexes_time_millis", refreshAddIndexesTimeMillis.sum()); + builder.field("refresh_segments_incorporated_total", refreshSegmentsIncorporatedTotal.sum()); + builder.endObject(); + + // Commit + builder.startObject("commit"); + builder.field("commit_total", commitTotal.sum()); + builder.field("commit_time_millis", commitTimeMillis.sum()); + builder.endObject(); + + // Merge + builder.startObject("merge"); + builder.field("merge_total", mergeTotal.sum()); + builder.field("merge_time_millis", mergeTimeMillis.sum()); + builder.field("merge_docs_total", mergeDocsTotal.sum()); + builder.field("merge_failures", mergeFailures.sum()); + builder.endObject(); + + // Delete + builder.startObject("delete"); + builder.field("delete_total", deleteTotal.sum()); + builder.field("delete_time_millis", deleteTimeMillis.sum()); + builder.field("delete_by_generation_total", deleteByGenerationTotal.sum()); + builder.field("delete_shared_writer_fallback_total", deleteSharedWriterFallbackTotal.sum()); + builder.endObject(); + + // Memory + builder.startObject("memory"); + builder.field("ram_buffer_bytes_used", ramBufferBytesUsed.get()); + builder.field("active_writers", activeWriters.get()); + builder.field("active_readers", activeReaders.get()); + builder.endObject(); + + return builder; + } + + /** + * Returns this instance as a snapshot. Since LongAdder.sum() provides a point-in-time + * view and the class implements Writeable, it can serialize its own current state. + */ + public LuceneShardStats snapshot() { + return this; + } + + // --- Indexing methods --- + + public void addDocsIndexed(long n) { + docsIndexedTotal.add(n); + } + + public void incDocsIndexedFailures() { + docsIndexedFailures.increment(); + } + + public void addIndexTimeMillis(long ms) { + indexTimeMillis.add(ms); + } + + // --- Flush methods --- + + public void incFlushTotal() { + flushTotal.increment(); + } + + public void addFlushTimeMillis(long ms) { + flushTimeMillis.add(ms); + } + + public void addFlushForceMergeTimeMillis(long ms) { + flushForceMergeTimeMillis.add(ms); + } + + // --- Refresh methods --- + + public void incRefreshTotal() { + refreshTotal.increment(); + } + + public void addRefreshTimeMillis(long ms) { + refreshTimeMillis.add(ms); + } + + public void addRefreshAddIndexesTimeMillis(long ms) { + refreshAddIndexesTimeMillis.add(ms); + } + + public void incRefreshSegmentsIncorporatedTotal() { + refreshSegmentsIncorporatedTotal.increment(); + } + + // --- Commit methods --- + + public void incCommitTotal() { + commitTotal.increment(); + } + + public void addCommitTimeMillis(long ms) { + commitTimeMillis.add(ms); + } + + // --- Merge methods --- + + public void incMergeTotal() { + mergeTotal.increment(); + } + + public void addMergeTimeMillis(long ms) { + mergeTimeMillis.add(ms); + } + + public void addMergeDocsTotal(long n) { + mergeDocsTotal.add(n); + } + + public void incMergeFailures() { + mergeFailures.increment(); + } + + // --- Delete methods --- + + public void incDeleteTotal() { + deleteTotal.increment(); + } + + public void addDeleteTimeMillis(long ms) { + deleteTimeMillis.add(ms); + } + + public void incDeleteByGenerationTotal() { + deleteByGenerationTotal.increment(); + } + + public void incDeleteSharedWriterFallbackTotal() { + deleteSharedWriterFallbackTotal.increment(); + } + + // --- Memory gauge methods --- + + public void setRamBufferBytesUsed(long bytes) { + ramBufferBytesUsed.set(bytes); + } + + public void setActiveWriters(long count) { + activeWriters.set(count); + } + + public void setActiveReaders(long count) { + activeReaders.set(count); + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneDataFormatAwareEngineTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneDataFormatAwareEngineTests.java index 70bf24b5a4190..36017e3e61bcc 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneDataFormatAwareEngineTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneDataFormatAwareEngineTests.java @@ -15,6 +15,7 @@ import org.opensearch.Version; import org.opensearch.be.lucene.index.LuceneCommitterFactory; import org.opensearch.be.lucene.index.LuceneDocumentInput; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -75,7 +76,7 @@ protected String dataFormatName() { @Override protected CommitterFactory createCommitterFactory(Store store) { - return new LuceneCommitterFactory(); + return new LuceneCommitterFactory(new LuceneShardStats()); } @Override diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneMergerTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneMergerTests.java index a7f7c84029e69..b8d8098252923 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneMergerTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneMergerTests.java @@ -30,6 +30,7 @@ import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.tests.analysis.MockAnalyzer; import org.opensearch.be.lucene.merge.LuceneMerger; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.SuppressForbidden; import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.engine.dataformat.MergeInput; @@ -90,7 +91,7 @@ public void tearDown() throws Exception { * Merge with empty input returns empty result without error. */ public void testMergeWithEmptyInput() throws IOException { - LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath); + LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath, new LuceneShardStats()); MergeInput input = MergeInput.builder().segments(List.of()).newWriterGeneration(99L).build(); MergeResult result = merger.merge(input); @@ -105,7 +106,7 @@ public void testMergeWithNoMatchingSegments() throws IOException { writeSegment(writer, 1L, 0, 3); writer.commit(); - LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath); + LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath, new LuceneShardStats()); Segment segment = Segment.builder(99L).build(); MergeInput input = MergeInput.builder().addSegment(segment).newWriterGeneration(100L).build(); @@ -157,7 +158,7 @@ public void testMergeWithRowIdMappingRemapsRowIds() throws IOException { return oldId; }; - LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath); + LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath, new LuceneShardStats()); SegmentInfos infos = getSegmentInfos(writer); List segments = buildSegments(infos); @@ -213,7 +214,7 @@ public void testMergePreservesFieldDataIntegrity() throws IOException { writeSegmentWithRichFields(writer, 2L, 3, 2); writer.commit(); - LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath); + LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath, new LuceneShardStats()); SegmentInfos infos = getSegmentInfos(writer); List segments = buildSegments(infos); @@ -246,7 +247,10 @@ public void testMergePreservesFieldDataIntegrity() throws IOException { * Constructor with null IndexWriter throws IllegalArgumentException. */ public void testConstructorWithNullIndexWriterThrows() { - expectThrows(IllegalArgumentException.class, () -> new LuceneMerger(null, new LuceneDataFormat(), Path.of("."))); + expectThrows( + IllegalArgumentException.class, + () -> new LuceneMerger(null, new LuceneDataFormat(), Path.of("."), new LuceneShardStats()) + ); } /** @@ -269,7 +273,7 @@ public void testMergedSegmentWriterGenerationIsPersisted() throws IOException { writeSegment(writer, 2L, 3, 2); writer.commit(); - LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath); + LuceneMerger merger = new LuceneMerger(writer, new LuceneDataFormat(), dataPath, new LuceneShardStats()); SegmentInfos infos = getSegmentInfos(writer); List segments = buildSegments(infos); diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneReaderManagerTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneReaderManagerTests.java index 787f74d68c948..88ebba5dc080e 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneReaderManagerTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/LuceneReaderManagerTests.java @@ -25,6 +25,7 @@ import org.opensearch.be.lucene.index.LuceneCommitter; import org.opensearch.be.lucene.index.LuceneIndexingExecutionEngine; import org.opensearch.be.lucene.index.LuceneWriter; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.settings.Settings; import org.opensearch.core.index.shard.ShardId; @@ -230,7 +231,12 @@ private void stampLatestSegmentGeneration(long generation) throws IOException { } public void testAfterRefreshCreatesReader() throws IOException { - LuceneReaderManager rm = new LuceneReaderManager(dataFormat, openReader(), new java.util.concurrent.ConcurrentHashMap<>()); + LuceneReaderManager rm = new LuceneReaderManager( + dataFormat, + openReader(), + new java.util.concurrent.ConcurrentHashMap<>(), + (dr, sis) -> DirectoryReader.openIfChanged(dr) + ); CatalogSnapshot snap = stubSnapshot(1); expectThrows(IllegalStateException.class, () -> rm.getReader(snap)); @@ -239,7 +245,12 @@ public void testAfterRefreshCreatesReader() throws IOException { } public void testAfterRefreshNoOpWhenDidRefreshFalse() throws IOException { - LuceneReaderManager rm = new LuceneReaderManager(dataFormat, openReader(), new java.util.concurrent.ConcurrentHashMap<>()); + LuceneReaderManager rm = new LuceneReaderManager( + dataFormat, + openReader(), + new java.util.concurrent.ConcurrentHashMap<>(), + (dr, sis) -> DirectoryReader.openIfChanged(dr) + ); CatalogSnapshot snap = stubSnapshot(1); rm.afterRefresh(false, snap); @@ -247,7 +258,12 @@ public void testAfterRefreshNoOpWhenDidRefreshFalse() throws IOException { } public void testMultipleRefreshesWithIndexing() throws IOException { - LuceneReaderManager rm = new LuceneReaderManager(dataFormat, openReader(), new java.util.concurrent.ConcurrentHashMap<>()); + LuceneReaderManager rm = new LuceneReaderManager( + dataFormat, + openReader(), + new java.util.concurrent.ConcurrentHashMap<>(), + (dr, sis) -> DirectoryReader.openIfChanged(dr) + ); // Empty initial reader — no segments yet. CatalogSnapshot snap1 = stubSnapshot(1); @@ -280,7 +296,12 @@ public void testMultipleRefreshesWithIndexing() throws IOException { } public void testOnDeletedClosesReader() throws IOException { - LuceneReaderManager rm = new LuceneReaderManager(dataFormat, openReader(), new java.util.concurrent.ConcurrentHashMap<>()); + LuceneReaderManager rm = new LuceneReaderManager( + dataFormat, + openReader(), + new java.util.concurrent.ConcurrentHashMap<>(), + (dr, sis) -> DirectoryReader.openIfChanged(dr) + ); CatalogSnapshot snap = stubSnapshot(1); rm.afterRefresh(true, snap); @@ -292,17 +313,32 @@ public void testOnDeletedClosesReader() throws IOException { } public void testOnDeletedUnknownSnapshotIsNoOp() throws IOException { - LuceneReaderManager rm = new LuceneReaderManager(dataFormat, openReader(), new java.util.concurrent.ConcurrentHashMap<>()); + LuceneReaderManager rm = new LuceneReaderManager( + dataFormat, + openReader(), + new java.util.concurrent.ConcurrentHashMap<>(), + (dr, sis) -> DirectoryReader.openIfChanged(dr) + ); rm.onDeleted(stubSnapshot(99)); } public void testGetReaderThrowsForUnknownSnapshot() throws IOException { - LuceneReaderManager rm = new LuceneReaderManager(dataFormat, openReader(), new java.util.concurrent.ConcurrentHashMap<>()); + LuceneReaderManager rm = new LuceneReaderManager( + dataFormat, + openReader(), + new java.util.concurrent.ConcurrentHashMap<>(), + (dr, sis) -> DirectoryReader.openIfChanged(dr) + ); expectThrows(IllegalStateException.class, () -> rm.getReader(stubSnapshot(42))); } public void testDuplicateAfterRefreshIsIdempotent() throws IOException { - LuceneReaderManager rm = new LuceneReaderManager(dataFormat, openReader(), new java.util.concurrent.ConcurrentHashMap<>()); + LuceneReaderManager rm = new LuceneReaderManager( + dataFormat, + openReader(), + new java.util.concurrent.ConcurrentHashMap<>(), + (dr, sis) -> DirectoryReader.openIfChanged(dr) + ); CatalogSnapshot snap = stubSnapshot(1); rm.afterRefresh(true, snap); @@ -325,9 +361,15 @@ public void testCreateReaderManagerWithLuceneIndexingEngine() throws IOException IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); ShardPath shardPath = new ShardPath(false, dataPath, dataPath, shardId); Store store = new Store(shardId, idxSettings, new NIOFSDirectory(dataPath), new DummyShardLock(shardId), (x) -> {}, shardPath); - store.createEmpty(org.apache.lucene.util.Version.LATEST); Path translogPath = dataPath.resolve("translog"); java.nio.file.Files.createDirectories(translogPath); + String translogUUID = org.opensearch.index.translog.Translog.createEmptyTranslog( + translogPath, + org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED, + shardId, + 1L + ); + store.createEmpty(org.apache.lucene.util.Version.LATEST, translogUUID); EngineConfig engineConfig = new EngineConfig.Builder().indexSettings(idxSettings) .store(store) .codecService(new CodecService(null, idxSettings, LogManager.getLogger(getClass()), java.util.List.of())) @@ -344,7 +386,7 @@ public void testCreateReaderManagerWithLuceneIndexingEngine() throws IOException .retentionLeasesSupplier(() -> new RetentionLeases(0, 0, java.util.Collections.emptyList())) .build(); CommitterConfig cs = new CommitterConfig(engineConfig, () -> {}); - LuceneCommitter committer = new LuceneCommitter(cs); + LuceneCommitter committer = new LuceneCommitter(cs, new LuceneShardStats()); try { LuceneIndexingExecutionEngine engine = new LuceneIndexingExecutionEngine( diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/DeleterImplTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/DeleterImplTests.java index 4ef22c9c9449d..006d88d7109b7 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/DeleterImplTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/DeleterImplTests.java @@ -11,6 +11,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.util.BytesRef; import org.opensearch.be.lucene.LuceneDataFormat; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.index.engine.dataformat.DeleteInput; import org.opensearch.index.engine.dataformat.DeleteResult; import org.opensearch.index.engine.dataformat.DeleterImpl; @@ -37,7 +38,7 @@ public void setUp() throws Exception { } private LuceneWriter createWriter(Path baseDir, long generation) throws IOException { - return new LuceneWriter(generation, 0L, dataFormat, baseDir, null, Codec.getDefault(), null); + return new LuceneWriter(generation, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats()); } private void addDoc(LuceneWriter writer, String id, int rowId) throws IOException { @@ -60,7 +61,7 @@ private LuceneWriter createWriterWithDoc(Path baseDir, long generation, String i public void testGenerationMatchesWriter() throws IOException { Path baseDir = createTempDir(); long gen = randomLongBetween(1, 100); - try (LuceneWriter writer = new LuceneWriter(gen, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(gen, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { DeleterImpl deleter = new DeleterImpl<>(writer); assertEquals("Deleter generation should match writer generation", gen, deleter.generation()); } diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterCSManagerIntegrationTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterCSManagerIntegrationTests.java index 71fd81be7a5f6..0e798582b5459 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterCSManagerIntegrationTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterCSManagerIntegrationTests.java @@ -14,6 +14,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.NIOFSDirectory; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.concurrent.GatedConditionalCloseable; import org.opensearch.common.settings.Settings; @@ -26,7 +27,6 @@ import org.opensearch.index.engine.exec.FileDeleter; import org.opensearch.index.engine.exec.Segment; import org.opensearch.index.engine.exec.WriterFileSet; -import org.opensearch.index.engine.exec.commit.Committer; import org.opensearch.index.engine.exec.commit.Committer.CommitInput; import org.opensearch.index.engine.exec.commit.CommitterConfig; import org.opensearch.index.engine.exec.coord.CatalogSnapshot; @@ -112,7 +112,14 @@ private TestEnv createTestEnv() throws IOException { Path translogDir = dataPath.resolve("translog"); Files.createDirectories(translogDir); // Create a real translog so readGlobalCheckpoint works during safe bootstrap - Translog.createEmptyTranslog(translogDir, shardId, SequenceNumbers.NO_OPS_PERFORMED, 1L, TRANSLOG_UUID, null); + String createdTranslogUUID = Translog.createEmptyTranslog( + translogDir, + shardId, + SequenceNumbers.NO_OPS_PERFORMED, + 1L, + TRANSLOG_UUID, + null + ); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); Store store = new Store( shardId, @@ -122,9 +129,10 @@ private TestEnv createTestEnv() throws IOException { Store.OnClose.EMPTY, shardPath ); - store.createEmpty(org.apache.lucene.util.Version.LATEST); + store.createEmpty(org.apache.lucene.util.Version.LATEST, createdTranslogUUID); LuceneCommitter committer = new LuceneCommitter( - new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}) + new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}), + new LuceneShardStats() ); Path parquetDir = dataPath.resolve(PARQUET_FORMAT); Files.createDirectories(parquetDir); @@ -488,7 +496,14 @@ public void testRecoveryAfterCrashTrimsUnsafeCommits() throws Exception { // Phase 1: Pre-crash — 3 commits { - Translog.createEmptyTranslog(translogDir, shardId, SequenceNumbers.NO_OPS_PERFORMED, 1L, TRANSLOG_UUID, null); + String phaseTranslogUUID = Translog.createEmptyTranslog( + translogDir, + shardId, + SequenceNumbers.NO_OPS_PERFORMED, + 1L, + TRANSLOG_UUID, + null + ); Store store = new Store( shardId, indexSettings, @@ -497,9 +512,10 @@ public void testRecoveryAfterCrashTrimsUnsafeCommits() throws Exception { Store.OnClose.EMPTY, shardPath ); - store.createEmpty(org.apache.lucene.util.Version.LATEST); + store.createEmpty(org.apache.lucene.util.Version.LATEST, phaseTranslogUUID); LuceneCommitter committer = new LuceneCommitter( - new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}) + new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}), + new LuceneShardStats() ); lucene0 = ingestLuceneDocs(committer, store); @@ -573,7 +589,8 @@ public void testRecoveryAfterCrashTrimsUnsafeCommits() throws Exception { shardPath ); LuceneCommitter committer = new LuceneCommitter( - new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}) + new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}), + new LuceneShardStats() ); assertEquals("Only safe commit remains", 1, DirectoryReader.listCommits(store.directory()).size()); @@ -623,7 +640,14 @@ public void testRecoveryThenNormalOperationWorks() throws Exception { // Phase 1: Pre-crash — 2 commits { - Translog.createEmptyTranslog(translogDir, shardId, SequenceNumbers.NO_OPS_PERFORMED, 1L, TRANSLOG_UUID, null); + String phaseTranslogUUID = Translog.createEmptyTranslog( + translogDir, + shardId, + SequenceNumbers.NO_OPS_PERFORMED, + 1L, + TRANSLOG_UUID, + null + ); Store store = new Store( shardId, indexSettings, @@ -632,9 +656,10 @@ public void testRecoveryThenNormalOperationWorks() throws Exception { Store.OnClose.EMPTY, shardPath ); - store.createEmpty(org.apache.lucene.util.Version.LATEST); + store.createEmpty(org.apache.lucene.util.Version.LATEST, phaseTranslogUUID); LuceneCommitter committer = new LuceneCommitter( - new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}) + new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}), + new LuceneShardStats() ); lucene0 = ingestLuceneDocs(committer, store); @@ -689,7 +714,8 @@ public void testRecoveryThenNormalOperationWorks() throws Exception { shardPath ); LuceneCommitter committer = new LuceneCommitter( - new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}) + new CommitterConfig(buildEngineConfig(indexSettings, store, shardId, translogDir), () -> {}), + new LuceneShardStats() ); assertEquals(1, DirectoryReader.listCommits(store.directory()).size()); diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterFactoryTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterFactoryTests.java index 022da0e14cd1b..7c28d07bbf905 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterFactoryTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterFactoryTests.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.util.Version; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.BigArrays; import org.opensearch.core.index.shard.ShardId; @@ -20,7 +21,9 @@ import org.opensearch.index.engine.exec.commit.Committer; import org.opensearch.index.engine.exec.commit.CommitterConfig; import org.opensearch.index.seqno.RetentionLeases; +import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.store.Store; +import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogConfig; import org.opensearch.test.DummyShardLock; import org.opensearch.test.IndexSettingsModule; @@ -43,9 +46,10 @@ public void testGetCommitterReturnsLuceneCommitter() throws IOException { Files.createDirectories(dataPath); Path translogPath = dataPath.resolve("translog"); Files.createDirectories(translogPath); + String translogUUID = Translog.createEmptyTranslog(translogPath, SequenceNumbers.NO_OPS_PERFORMED, shardId, 1L); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); Store store = new Store(shardId, indexSettings, new NIOFSDirectory(dataPath), new DummyShardLock(shardId)); - store.createEmpty(Version.LATEST); + store.createEmpty(Version.LATEST, translogUUID); Committer committer = null; try { @@ -55,7 +59,7 @@ public void testGetCommitterReturnsLuceneCommitter() throws IOException { .translogConfig(new TranslogConfig(shardId, translogPath, indexSettings, BigArrays.NON_RECYCLING_INSTANCE, "", false)) .retentionLeasesSupplier(() -> new RetentionLeases(0, 0, Collections.emptyList())) .build(); - LuceneCommitterFactory committerFactory = new LuceneCommitterFactory(); + LuceneCommitterFactory committerFactory = new LuceneCommitterFactory(new LuceneShardStats()); committer = committerFactory.getCommitter(new CommitterConfig(engineConfig, () -> {})); assertTrue("getCommitter() should return a LuceneCommitter instance", committer instanceof LuceneCommitter); diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterTests.java index 9ee9581f38315..8ab37aca355be 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneCommitterTests.java @@ -14,6 +14,7 @@ import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.util.Version; import org.opensearch.be.lucene.LucenePlugin; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.BigArrays; @@ -23,10 +24,13 @@ import org.opensearch.index.engine.CommitStats; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.EngineConfigFactory; +import org.opensearch.index.engine.exec.commit.Committer; import org.opensearch.index.engine.exec.commit.CommitterConfig; import org.opensearch.index.seqno.RetentionLeases; +import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.store.Store; import org.opensearch.index.translog.InternalTranslogFactory; +import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogConfig; import org.opensearch.plugins.EnginePlugin; import org.opensearch.plugins.PluginsService; @@ -56,9 +60,10 @@ private CommitterConfig createCommitterConfig() throws IOException { Files.createDirectories(dataPath); Path translogPath = dataPath.resolve("translog"); Files.createDirectories(translogPath); + String translogUUID = Translog.createEmptyTranslog(translogPath, SequenceNumbers.NO_OPS_PERFORMED, shardId, 1L); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); Store store = new Store(shardId, indexSettings, new NIOFSDirectory(dataPath), new DummyShardLock(shardId)); - store.createEmpty(Version.LATEST); + store.createEmpty(Version.LATEST, translogUUID); PluginsService mockPluginsService = mock(PluginsService.class); when(mockPluginsService.filterPlugins(EnginePlugin.class)).thenReturn(List.of(new LucenePlugin())); @@ -102,7 +107,7 @@ private CommitterConfig createCommitterConfig() throws IOException { public void testConstructorOpensIndexWriter() throws IOException { CommitterConfig settings = createCommitterConfig(); - LuceneCommitter committer = new LuceneCommitter(settings); + LuceneCommitter committer = new LuceneCommitter(settings, new LuceneShardStats()); try { IndexWriter writer = committer.getIndexWriter(); assertNotNull(writer); @@ -115,7 +120,7 @@ public void testConstructorOpensIndexWriter() throws IOException { public void testCloseReleasesIndexWriter() throws IOException { CommitterConfig settings = createCommitterConfig(); - LuceneCommitter committer = new LuceneCommitter(settings); + LuceneCommitter committer = new LuceneCommitter(settings, new LuceneShardStats()); assertNotNull(committer.getIndexWriter()); committer.close(); @@ -125,10 +130,11 @@ public void testCloseReleasesIndexWriter() throws IOException { public void testCommitRoundTrip() throws IOException { CommitterConfig settings = createCommitterConfig(); - LuceneCommitter committer = new LuceneCommitter(settings); + LuceneCommitter committer = new LuceneCommitter(settings, new LuceneShardStats()); try { + long genBeforeCommit = committer.getCommitStats().getGeneration(); Map commitData = Map.of("key1", "value1", "key2", "value2", "_snapshot_", "serialized-data"); - committer.commit(commitData); + committer.commit(new Committer.CommitInput(commitData.entrySet(), null)); Map readBack = committer.getLastCommittedData(); @@ -137,7 +143,7 @@ public void testCommitRoundTrip() throws IOException { assertEquals("serialized-data", readBack.get("_snapshot_")); CommitStats stats = committer.getCommitStats(); - assertEquals(2L, stats.getGeneration()); + assertEquals(genBeforeCommit + 1, stats.getGeneration()); assertEquals(readBack, stats.getUserData()); } finally { committer.close(); @@ -147,9 +153,9 @@ public void testCommitRoundTrip() throws IOException { public void testCommitWithEmptyData() throws IOException { CommitterConfig settings = createCommitterConfig(); - LuceneCommitter committer = new LuceneCommitter(settings); + LuceneCommitter committer = new LuceneCommitter(settings, new LuceneShardStats()); try { - committer.commit(Map.of()); + committer.commit(new Committer.CommitInput(Map.of().entrySet(), null)); assertTrue(committer.getLastCommittedData().isEmpty()); } finally { committer.close(); @@ -159,10 +165,13 @@ public void testCommitWithEmptyData() throws IOException { public void testCommitAfterCloseThrows() throws IOException { CommitterConfig config = createCommitterConfig(); - LuceneCommitter committer = new LuceneCommitter(config); + LuceneCommitter committer = new LuceneCommitter(config, new LuceneShardStats()); committer.close(); - expectThrows(IllegalStateException.class, () -> committer.commit(Map.of("key", "value"))); + expectThrows( + IllegalStateException.class, + () -> committer.commit(new Committer.CommitInput(Map.of("key", "value").entrySet(), null)) + ); config.engineConfig().getStore().close(); } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneDeleteExecutionEngineTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneDeleteExecutionEngineTests.java index 5298974ac342b..5a1af68553daa 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneDeleteExecutionEngineTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneDeleteExecutionEngineTests.java @@ -14,6 +14,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import org.opensearch.be.lucene.LuceneDataFormat; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.BigArrays; import org.opensearch.core.index.shard.ShardId; @@ -27,7 +28,9 @@ import org.opensearch.index.engine.dataformat.Writer; import org.opensearch.index.engine.exec.commit.CommitterConfig; import org.opensearch.index.seqno.RetentionLeases; +import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.store.Store; +import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogConfig; import org.opensearch.test.DummyShardLock; import org.opensearch.test.IndexSettingsModule; @@ -63,9 +66,10 @@ public void setUp() throws Exception { Files.createDirectories(dataPath); Path translogPath = dataPath.resolve("translog"); Files.createDirectories(translogPath); + String translogUUID = Translog.createEmptyTranslog(translogPath, SequenceNumbers.NO_OPS_PERFORMED, shardId, 1L); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); store = new Store(shardId, indexSettings, new NIOFSDirectory(dataPath), new DummyShardLock(shardId)); - store.createEmpty(Version.LATEST); + store.createEmpty(Version.LATEST, translogUUID); EngineConfig engineConfig = new EngineConfig.Builder().indexSettings(indexSettings) .store(store) @@ -74,8 +78,10 @@ public void setUp() throws Exception { .retentionLeasesSupplier(() -> new RetentionLeases(0, 0, Collections.emptyList())) .build(); - committer = (LuceneCommitter) new LuceneCommitterFactory().getCommitter(new CommitterConfig(engineConfig, () -> {})); - deleteEngine = new LuceneDeleteExecutionEngine(new LuceneDataFormat(), committer); + committer = (LuceneCommitter) new LuceneCommitterFactory(new LuceneShardStats()).getCommitter( + new CommitterConfig(engineConfig, () -> {}) + ); + deleteEngine = new LuceneDeleteExecutionEngine(new LuceneDataFormat(), committer, new LuceneShardStats()); dataFormat = new LuceneDataFormat(); } @@ -96,7 +102,7 @@ public void tearDown() throws Exception { private LuceneWriter createLuceneWriter(long generation) throws IOException { Path writerDir = baseDir.resolve("writers"); Files.createDirectories(writerDir); - return new LuceneWriter(generation, 0L, dataFormat, baseDir, null, Codec.getDefault(), null); + return new LuceneWriter(generation, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats()); } private Writer createMockCompositeWriter(long generation, boolean hasLucene, boolean hasParquet) { diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngineTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngineTests.java index f22bb8155726c..636efed3dde2c 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngineTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngineTests.java @@ -17,6 +17,7 @@ import org.apache.lucene.tests.analysis.MockAnalyzer; import org.opensearch.be.lucene.LuceneDataFormat; import org.opensearch.be.lucene.LucenePlugin; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.BigArrays; @@ -38,9 +39,11 @@ import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.TextFieldMapper.TextFieldType; import org.opensearch.index.seqno.RetentionLeases; +import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.ShardPath; import org.opensearch.index.store.Store; import org.opensearch.index.translog.InternalTranslogFactory; +import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogConfig; import org.opensearch.plugins.EnginePlugin; import org.opensearch.plugins.PluginsService; @@ -82,6 +85,9 @@ private LuceneCommitter createCommitter() throws IOException { Files.createDirectories(dataPath); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); shardPath = new ShardPath(false, dataPath, dataPath, shardId); + Path translogPath = dataPath.resolve("translog"); + java.nio.file.Files.createDirectories(translogPath); + String translogUUID = Translog.createEmptyTranslog(translogPath, SequenceNumbers.NO_OPS_PERFORMED, shardId, 1L); store = new Store( shardId, indexSettings, @@ -90,13 +96,10 @@ private LuceneCommitter createCommitter() throws IOException { Store.OnClose.EMPTY, shardPath ); - store.createEmpty(org.apache.lucene.util.Version.LATEST); + store.createEmpty(org.apache.lucene.util.Version.LATEST, translogUUID); PluginsService mockPluginsService = mock(PluginsService.class); when(mockPluginsService.filterPlugins(EnginePlugin.class)).thenReturn(List.of(new LucenePlugin())); - - Path translogPath = dataPath.resolve("translog"); - java.nio.file.Files.createDirectories(translogPath); EngineConfig engineConfig = new EngineConfigFactory(mockPluginsService, indexSettings).newEngineConfig( shardId, null, @@ -133,7 +136,7 @@ private LuceneCommitter createCommitter() throws IOException { null ); CommitterConfig settings = new CommitterConfig(engineConfig, () -> {}); - return new LuceneCommitter(settings); + return new LuceneCommitter(settings, new LuceneShardStats()); } @Override @@ -171,7 +174,18 @@ public void testRefreshIncorporatesLuceneSegments() throws IOException { MappedFieldType textField = new org.opensearch.index.mapper.TextFieldMapper.TextFieldType("content"); long generation = 1L; - try (LuceneWriter luceneWriter = new LuceneWriter(generation, 0L, luceneDataFormat, tempBase, null, Codec.getDefault(), null)) { + try ( + LuceneWriter luceneWriter = new LuceneWriter( + generation, + 0L, + luceneDataFormat, + tempBase, + null, + Codec.getDefault(), + null, + new LuceneShardStats() + ) + ) { for (int i = 0; i < numDocs; i++) { LuceneDocumentInput input = new LuceneDocumentInput(); input.addField(textField, "doc_" + i); diff --git a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneWriterTests.java b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneWriterTests.java index 2e166058b3d30..2b205a292254b 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneWriterTests.java +++ b/sandbox/plugins/analytics-backend-lucene/src/test/java/org/opensearch/be/lucene/index/LuceneWriterTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.util.BytesRef; import org.opensearch.be.lucene.LuceneDataFormat; +import org.opensearch.be.lucene.stats.LuceneShardStats; import org.opensearch.index.engine.dataformat.DeleteInput; import org.opensearch.index.engine.dataformat.FileInfos; import org.opensearch.index.engine.dataformat.WriteResult; @@ -71,7 +72,7 @@ private MappedFieldType mockKeywordField(String name) { public void testAddDocAndFlushProducesSingleSegment() throws IOException { Path baseDir = createTempDir(); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { int numDocs = randomIntBetween(5, 20); MappedFieldType textField = mockTextField("content"); for (int i = 0; i < numDocs; i++) { @@ -102,7 +103,7 @@ public void testRowIdMatchesLuceneDocId() throws IOException { Path baseDir = createTempDir(); int numDocs = randomIntBetween(10, 50); MappedFieldType textField = mockTextField("content"); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { for (int i = 0; i < numDocs; i++) { LuceneDocumentInput input = new LuceneDocumentInput(); input.addField(textField, "doc " + i); @@ -129,7 +130,7 @@ public void testRowIdMatchesLuceneDocId() throws IOException { public void testFlushWithNoDocsReturnsEmpty() throws IOException { Path baseDir = createTempDir(); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { FileInfos fileInfos = writer.flush(); assertTrue(fileInfos.writerFilesMap().isEmpty()); } @@ -139,7 +140,7 @@ public void testWriterGenerationIsPreserved() throws IOException { Path baseDir = createTempDir(); long gen = randomLongBetween(1, 100); MappedFieldType textField = mockTextField("content"); - try (LuceneWriter writer = new LuceneWriter(gen, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(gen, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { assertThat(writer.generation(), equalTo(gen)); LuceneDocumentInput input = new LuceneDocumentInput(); @@ -156,7 +157,7 @@ public void testWriterGenerationIsPreserved() throws IOException { public void testKeywordFieldsAreIndexed() throws IOException { Path baseDir = createTempDir(); MappedFieldType keywordField = mockKeywordField("status"); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { LuceneDocumentInput input = new LuceneDocumentInput(); input.addField(keywordField, "active"); input.setRowId(LuceneDocumentInput.ROW_ID_FIELD, 0); @@ -178,7 +179,7 @@ public void testUnsupportedFieldTypeIsSilentlySkipped() throws IOException { when(numericField.typeName()).thenReturn("integer"); when(numericField.name()).thenReturn("count"); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { LuceneDocumentInput input = new LuceneDocumentInput(); // Should not throw — unsupported types are silently skipped (handled by other formats) input.addField(numericField, 42); @@ -192,7 +193,7 @@ public void testMixedTextAndKeywordFields() throws IOException { MappedFieldType textField = mockTextField("title"); MappedFieldType keywordField = mockKeywordField("category"); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { int numDocs = randomIntBetween(5, 15); for (int i = 0; i < numDocs; i++) { LuceneDocumentInput input = new LuceneDocumentInput(); @@ -219,7 +220,7 @@ public void testWriteAndFlushEndToEndWithTextAndKeyword() throws IOException { MappedFieldType keywordField = mockKeywordField("status"); int numDocs = randomIntBetween(5, 20); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { for (int i = 0; i < numDocs; i++) { LuceneDocumentInput input = new LuceneDocumentInput(); input.addField(textField, "hello world " + i); @@ -270,8 +271,8 @@ public void testMultipleWriterGenerationsProduceIsolatedSegments() throws IOExce // Create both writers without closing them until after verification, // because close() deletes the temp directory. - LuceneWriter writer1 = new LuceneWriter(gen1, 0L, dataFormat, baseDir, null, Codec.getDefault(), null); - LuceneWriter writer2 = new LuceneWriter(gen2, 0L, dataFormat, baseDir, null, Codec.getDefault(), null); + LuceneWriter writer1 = new LuceneWriter(gen1, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats()); + LuceneWriter writer2 = new LuceneWriter(gen2, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats()); try { for (int i = 0; i < numDocs1; i++) { LuceneDocumentInput input = new LuceneDocumentInput(); @@ -322,7 +323,7 @@ public void testMultipleWriterGenerationsProduceIsolatedSegments() throws IOExce public void testGetWriterForFormatReturnsItselfForLucene() throws IOException { Path baseDir = createTempDir(); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { Optional> result = writer.getWriterForFormat("lucene"); assertTrue("Should return present for 'lucene'", result.isPresent()); @@ -332,7 +333,7 @@ public void testGetWriterForFormatReturnsItselfForLucene() throws IOException { public void testGetWriterForFormatReturnsEmptyForOtherFormats() throws IOException { Path baseDir = createTempDir(); - try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null)) { + try (LuceneWriter writer = new LuceneWriter(1L, 0L, dataFormat, baseDir, null, Codec.getDefault(), null, new LuceneShardStats())) { Optional> parquetResult = writer.getWriterForFormat("parquet"); Optional> nullResult = writer.getWriterForFormat(null); diff --git a/sandbox/plugins/composite-engine/src/internalClusterTest/java/org/opensearch/composite/DataFormatStatsIT.java b/sandbox/plugins/composite-engine/src/internalClusterTest/java/org/opensearch/composite/DataFormatStatsIT.java new file mode 100644 index 0000000000000..a8572009e355b --- /dev/null +++ b/sandbox/plugins/composite-engine/src/internalClusterTest/java/org/opensearch/composite/DataFormatStatsIT.java @@ -0,0 +1,235 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.composite; + +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.notNullValue; + +/** + * Integration tests for the composite-engine REST API endpoints: + *

    + *
  • {@code GET /_plugins/dataformat_stats}
  • + *
  • {@code GET /_plugins/composite/{index}/_catalog_snapshot}
  • + *
  • {@code GET /_plugins/parquet/{index}/_analyze}
  • + *
+ * + * @opensearch.experimental + */ +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, numDataNodes = 1) +public class DataFormatStatsIT extends AbstractCompositeEngineIT { + + private static final String INDEX_NAME = "test-dataformat-stats"; + + @SuppressWarnings("unchecked") + public void testDataFormatStats() throws Exception { + createCompositeIndex(INDEX_NAME + "-stats"); + indexDocs(INDEX_NAME + "-stats", 10, 0); + flushIndex(INDEX_NAME + "-stats"); + + Response response = getRestClient().performRequest(new Request("GET", "/_plugins/dataformat_stats")); + assertThat(response.getStatusLine().getStatusCode(), org.hamcrest.Matchers.equalTo(200)); + + Map responseMap = XContentHelper.convertToMap( + JsonXContent.jsonXContent, + response.getEntity().getContent(), + true + ); + + Map indices = (Map) responseMap.get("indices"); + assertThat(indices, notNullValue()); + assertTrue("Response should contain the test index", indices.containsKey(INDEX_NAME + "-stats")); + + Map indexStats = (Map) indices.get(INDEX_NAME + "-stats"); + Map composite = (Map) indexStats.get("composite"); + assertThat(composite, notNullValue()); + + // Verify per_format breakdown exists with parquet stats + Map perFormat = (Map) composite.get("per_format"); + assertThat("per_format breakdown should exist", perFormat, notNullValue()); + assertTrue("per_format should contain parquet stats", perFormat.containsKey("parquet")); + } + + @SuppressWarnings("unchecked") + public void testDataFormatStatsWithShardLevel() throws Exception { + createCompositeIndex(INDEX_NAME + "-shard"); + indexDocs(INDEX_NAME + "-shard", 10, 0); + flushIndex(INDEX_NAME + "-shard"); + + Response response = getRestClient().performRequest(new Request("GET", "/_plugins/dataformat_stats?level=shards")); + assertThat(response.getStatusLine().getStatusCode(), org.hamcrest.Matchers.equalTo(200)); + + Map responseMap = XContentHelper.convertToMap( + JsonXContent.jsonXContent, + response.getEntity().getContent(), + true + ); + + Map indices = (Map) responseMap.get("indices"); + Map indexStats = (Map) indices.get(INDEX_NAME + "-shard"); + assertThat(indexStats, notNullValue()); + + // Verify shard-level detail is present + Map shards = (Map) indexStats.get("shards"); + assertThat("Shard-level detail should be present when level=shards", shards, notNullValue()); + assertFalse("Shards map should not be empty", shards.isEmpty()); + } + + @SuppressWarnings("unchecked") + public void testCatalogSnapshot() throws Exception { + createCompositeIndex(INDEX_NAME + "-catalog"); + indexDocs(INDEX_NAME + "-catalog", 10, 0); + flushIndex(INDEX_NAME + "-catalog"); + + Response response = getRestClient().performRequest( + new Request("GET", "/_plugins/composite/" + INDEX_NAME + "-catalog/_catalog_snapshot") + ); + assertThat(response.getStatusLine().getStatusCode(), org.hamcrest.Matchers.equalTo(200)); + + Map responseMap = XContentHelper.convertToMap( + JsonXContent.jsonXContent, + response.getEntity().getContent(), + true + ); + + assertEquals(INDEX_NAME + "-catalog", responseMap.get("index")); + assertThat(responseMap.get("generation"), notNullValue()); + + List segments = (List) responseMap.get("segments"); + assertThat("segments array should exist", segments, notNullValue()); + assertThat("segments should not be empty", segments.size(), greaterThan(0)); + + Map summary = (Map) responseMap.get("summary"); + assertThat("summary should exist", summary, notNullValue()); + assertThat("summary should have by_format", summary.get("by_format"), notNullValue()); + assertThat("summary should have by_extension", summary.get("by_extension"), notNullValue()); + } + + @SuppressWarnings("unchecked") + public void testParquetAnalyze() throws Exception { + createCompositeIndex(INDEX_NAME + "-analyze"); + indexDocs(INDEX_NAME + "-analyze", 10, 0); + flushIndex(INDEX_NAME + "-analyze"); + + Response response = getRestClient().performRequest( + new Request("GET", "/_plugins/parquet/" + INDEX_NAME + "-analyze/_analyze") + ); + assertThat(response.getStatusLine().getStatusCode(), org.hamcrest.Matchers.equalTo(200)); + + Map responseMap = XContentHelper.convertToMap( + JsonXContent.jsonXContent, + response.getEntity().getContent(), + true + ); + + assertEquals(INDEX_NAME + "-analyze", responseMap.get("index")); + assertThat("total_rows should be > 0", ((Number) responseMap.get("total_rows")).longValue(), greaterThan(0L)); + assertThat("total_size_bytes should be > 0", ((Number) responseMap.get("total_size_bytes")).longValue(), greaterThan(0L)); + + List> fields = (List>) responseMap.get("fields"); + assertThat("fields array should exist", fields, notNullValue()); + assertThat("fields should not be empty", fields.size(), greaterThan(0)); + + // Verify each field has required attributes + for (Map field : fields) { + assertThat("field should have name", field.get("name"), notNullValue()); + assertThat("field should have type", field.get("type"), notNullValue()); + assertThat("field should have compression", field.get("compression"), notNullValue()); + assertThat("field should have encodings", field.get("encodings"), notNullValue()); + assertThat("field should have total_compressed_bytes", field.get("total_compressed_bytes"), notNullValue()); + assertThat("field should have total_uncompressed_bytes", field.get("total_uncompressed_bytes"), notNullValue()); + } + + // Verify compression_ratio > 0 for at least one field + boolean hasPositiveRatio = fields.stream() + .anyMatch(f -> ((Number) f.get("compression_ratio")).doubleValue() > 0); + assertTrue("At least one field should have compression_ratio > 0", hasPositiveRatio); + + // Verify footer_size exists and is >= 0 + assertThat("footer_size should exist", responseMap.get("footer_size"), notNullValue()); + assertThat("footer_size should be >= 0", ((Number) responseMap.get("footer_size")).longValue(), greaterThanOrEqualTo(0L)); + + // Verify sorting_columns field exists (may be null if no sort configured) + assertTrue("sorting_columns field should exist", responseMap.containsKey("sorting_columns")); + + // Verify each field has new required attributes + for (Map field : fields) { + assertThat("field should have num_values > 0", ((Number) field.get("num_values")).longValue(), greaterThan(0L)); + assertThat("field has_bloom_filter should be boolean", field.get("has_bloom_filter"), instanceOf(Boolean.class)); + assertThat( + "field bloom_filter_size should be >= 0", + ((Number) field.get("bloom_filter_size")).longValue(), + greaterThanOrEqualTo(0L) + ); + assertThat("field should have total_num_pages >= 0", ((Number) field.get("total_num_pages")).longValue(), + greaterThanOrEqualTo(0L)); + + Map stats = (Map) field.get("stats"); + assertThat("field should have stats object", stats, notNullValue()); + assertTrue("stats should have min", stats.containsKey("min")); + assertTrue("stats should have max", stats.containsKey("max")); + assertTrue("stats should have null_count", stats.containsKey("null_count")); + } + } + + @SuppressWarnings("unchecked") + public void testParquetAnalyzeWithFileLevel() throws Exception { + createCompositeIndex(INDEX_NAME + "-file"); + indexDocs(INDEX_NAME + "-file", 10, 0); + flushIndex(INDEX_NAME + "-file"); + + Response response = getRestClient().performRequest( + new Request("GET", "/_plugins/parquet/" + INDEX_NAME + "-file/_analyze?file_level=true") + ); + assertThat(response.getStatusLine().getStatusCode(), org.hamcrest.Matchers.equalTo(200)); + + Map responseMap = XContentHelper.convertToMap( + JsonXContent.jsonXContent, + response.getEntity().getContent(), + true + ); + + List files = (List) responseMap.get("files"); + assertThat("files array should be present when file_level=true", files, notNullValue()); + assertThat("files should not be empty", files.size(), greaterThan(0)); + + // Verify each file entry has row_groups with columns and page_stats + boolean foundPageStats = false; + for (Object fileObj : files) { + Map file = (Map) fileObj; + List> rowGroups = (List>) file.get("row_groups"); + assertThat("file entry should have row_groups", rowGroups, notNullValue()); + + for (Map rowGroup : rowGroups) { + List> columns = (List>) rowGroup.get("columns"); + assertThat("row_group should have columns", columns, notNullValue()); + + for (Map column : columns) { + Map pageStats = (Map) column.get("page_stats"); + if (pageStats != null) { + foundPageStats = true; + assertThat("page_stats should have num_pages", pageStats.get("num_pages"), notNullValue()); + assertThat("page_stats should have boundary_order", pageStats.get("boundary_order"), notNullValue()); + } + } + } + } + assertTrue("At least one column should have page_stats with num_pages and boundary_order", foundPageStats); + } +} diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeDataFormatPlugin.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeDataFormatPlugin.java index 22d33ffcb31e9..4155afee57149 100644 --- a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeDataFormatPlugin.java +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeDataFormatPlugin.java @@ -11,12 +11,18 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.ValidationException; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.inject.Module; import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.IndexScopedSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.settings.SettingsFilter; +import org.opensearch.composite.action.CatalogSnapshotAction; +import org.opensearch.composite.action.DataFormatStatsAction; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; @@ -32,9 +38,12 @@ import org.opensearch.index.shard.IndexSettingProvider; import org.opensearch.indices.IndexCreationException; import org.opensearch.indices.IndicesService; +import org.opensearch.plugins.ActionPlugin; import org.opensearch.plugins.ExtensiblePlugin; import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; +import org.opensearch.rest.RestController; +import org.opensearch.rest.RestHandler; import org.opensearch.script.ScriptService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.client.Client; @@ -77,7 +86,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class CompositeDataFormatPlugin extends Plugin implements DataFormatPlugin { +public class CompositeDataFormatPlugin extends Plugin implements DataFormatPlugin, ActionPlugin { private static final Logger logger = LogManager.getLogger(CompositeDataFormatPlugin.class); @@ -322,4 +331,22 @@ public Map getStoreStrategies(IndexSettings indexSett } return Map.copyOf(strategies); } + + @Override + public List getRestHandlers( + Settings settings, + RestController restController, + ClusterSettings clusterSettings, + IndexScopedSettings indexScopedSettings, + SettingsFilter settingsFilter, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier nodesInCluster + ) { + return List.of(new DataFormatStatsAction(), new CatalogSnapshotAction()); + } + + @Override + public Collection createGuiceModules() { + return List.of(b -> b.bind(CompositeRegistryInitializer.class).asEagerSingleton()); + } } diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeIndexingExecutionEngine.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeIndexingExecutionEngine.java index e932b7214dc6b..e2db47b8dd09b 100644 --- a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeIndexingExecutionEngine.java +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeIndexingExecutionEngine.java @@ -14,6 +14,9 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.io.IOUtils; import org.opensearch.composite.merge.CompositeMerger; +import org.opensearch.composite.stats.CompositeShardStats; +import org.opensearch.composite.stats.CompositeStatsRegistry; +import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.dataformat.DataFormatPlugin; @@ -47,6 +50,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; /** @@ -71,6 +75,8 @@ public class CompositeIndexingExecutionEngine implements IndexingExecutionEngine private final Set> secondaryEngines; private final CompositeDataFormat compositeDataFormat; private final Committer committer; + private final ShardId shardId; + private final CompositeShardStats stats = new CompositeShardStats(); /** * Constructs a CompositeIndexingExecutionEngine by reading index settings to @@ -140,6 +146,10 @@ public CompositeIndexingExecutionEngine( this.compositeDataFormat = new CompositeDataFormat(primaryFormat, allFormats); this.committer = committer; + this.shardId = store != null ? store.shardId() : null; + if (this.shardId != null) { + CompositeStatsRegistry.getInstance().register(this.shardId, this); + } } /** @@ -184,13 +194,20 @@ private static void validateFormatIsRegistered(DataFormatRegistry registry, Stri */ @Override public Writer createWriter(WriterConfig config) { - return new CompositeWriter(this, config); + return new CompositeWriter(this, config, stats); + } + + /** + * Returns the shard-level stats collector for this engine. + */ + public CompositeShardStats getStats() { + return stats; } /** {@inheritDoc} Delegates to the primary engine's merger. */ @Override public Merger getMerger() { - return new CompositeMerger(this, compositeDataFormat); + return new CompositeMerger(this, compositeDataFormat, stats); } /** @@ -204,24 +221,37 @@ public Merger getMerger() { */ @Override public RefreshResult refresh(RefreshInput refreshInput) throws IOException { - RefreshResult primary = primaryEngine.refresh(refreshInput); - List secResults = new ArrayList<>(); - for (IndexingExecutionEngine engine : secondaryEngines) { - secResults.add(engine.refresh(refreshInput)); - } + long startNanos = System.nanoTime(); + try { + long primaryStart = System.nanoTime(); + RefreshResult primary = primaryEngine.refresh(refreshInput); + stats.getOrCreateFormatStats(primaryEngine.getDataFormat().name()) + .addRefreshTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - primaryStart)); + + List secResults = new ArrayList<>(); + for (IndexingExecutionEngine engine : secondaryEngines) { + long secStart = System.nanoTime(); + secResults.add(engine.refresh(refreshInput)); + stats.getOrCreateFormatStats(engine.getDataFormat().name()) + .addRefreshTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - secStart)); + } - Map mergedByGen = new LinkedHashMap<>(); - buildSegment(primary, mergedByGen); - for (RefreshResult secResult : secResults) { - buildSegment(secResult, mergedByGen); - } + Map mergedByGen = new LinkedHashMap<>(); + buildSegment(primary, mergedByGen); + for (RefreshResult secResult : secResults) { + buildSegment(secResult, mergedByGen); + } - List merged = new ArrayList<>(mergedByGen.size()); - for (Segment.Builder builder : mergedByGen.values()) { - merged.add(builder.build()); - } + List merged = new ArrayList<>(mergedByGen.size()); + for (Segment.Builder builder : mergedByGen.values()) { + merged.add(builder.build()); + } - return new RefreshResult(merged); + return new RefreshResult(merged); + } finally { + stats.incRefreshTotal(); + stats.addRefreshTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)); + } } private void buildSegment(RefreshResult primary, Map mergedByGen) { @@ -324,6 +354,9 @@ public CompositeDocumentInput newDocumentInput() { */ @Override public void close() throws IOException { + if (this.shardId != null) { + CompositeStatsRegistry.getInstance().unregister(this.shardId); + } IOUtils.closeWhileHandlingException(primaryEngine); secondaryEngines.forEach(IOUtils::closeWhileHandlingException); IOUtils.closeWhileHandlingException(committer); diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeRegistryInitializer.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeRegistryInitializer.java new file mode 100644 index 0000000000000..aa83d41fb1431 --- /dev/null +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeRegistryInitializer.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.composite; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.inject.Inject; +import org.opensearch.composite.stats.CompositeStatsRegistry; +import org.opensearch.indices.IndicesService; + +/** + * Guice-managed eager singleton that wires {@link IndicesService} into the + * {@link CompositeStatsRegistry} after node injection completes. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class CompositeRegistryInitializer { + + @Inject + public CompositeRegistryInitializer(IndicesService indicesService) { + CompositeStatsRegistry.getInstance().setIndicesService(indicesService); + } +} diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeWriter.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeWriter.java index ba8145406d0df..0a9da541966be 100644 --- a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeWriter.java +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeWriter.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.composite.stats.CompositeShardStats; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.dataformat.DocumentInput; import org.opensearch.index.engine.dataformat.FileInfos; @@ -25,6 +26,7 @@ import java.util.IdentityHashMap; import java.util.Map; import java.util.Optional; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; /** @@ -48,6 +50,7 @@ class CompositeWriter implements Writer { private final Map>> secondaryWritersByFormat; private final long writerGeneration; private final AtomicReference state; + private final CompositeShardStats stats; private long mappingVersion; /** @@ -78,11 +81,13 @@ enum WriterState { * * @param engine the composite indexing execution engine * @param config the writer configuration + * @param stats the shard-level stats collector */ @SuppressWarnings("unchecked") - CompositeWriter(CompositeIndexingExecutionEngine engine, WriterConfig config) { + CompositeWriter(CompositeIndexingExecutionEngine engine, WriterConfig config, CompositeShardStats stats) { this.state = new AtomicReference<>(WriterState.ACTIVE); this.writerGeneration = config.writerGeneration(); + this.stats = stats; IndexingExecutionEngine primaryDelegate = engine.getPrimaryDelegate(); this.primaryFormat = primaryDelegate.getDataFormat(); @@ -108,77 +113,117 @@ public WriteResult addDoc(CompositeDocumentInput doc) throws IOException { throw new IllegalStateException("Cannot add document to writer in state " + state.get()); } - // Write to primary first - WriteResult primaryResult = primaryWriter.addDoc(doc.getPrimaryInput()); - switch (primaryResult) { - case WriteResult.Success s -> logger.trace("Successfully added document in primary format [{}]", primaryFormat.name()); - case WriteResult.Failure f -> { - logger.debug("Failed to add document in primary format [{}]", primaryFormat.name()); - return primaryResult; + long startNanos = System.nanoTime(); + try { + // Write to primary first + long primaryStart = System.nanoTime(); + WriteResult primaryResult = primaryWriter.addDoc(doc.getPrimaryInput()); + long primaryElapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - primaryStart); + CompositeShardStats.FormatStats primaryFormatStats = stats.getOrCreateFormatStats(primaryFormat.name()); + switch (primaryResult) { + case WriteResult.Success s -> { + logger.trace("Successfully added document in primary format [{}]", primaryFormat.name()); + primaryFormatStats.addDocsIndexed(1); + primaryFormatStats.addIndexTimeMillis(primaryElapsed); + } + case WriteResult.Failure f -> { + logger.debug("Failed to add document in primary format [{}]", primaryFormat.name()); + primaryFormatStats.incIndexFailures(); + return primaryResult; + } } - } - // Then write to each secondary — keyed lookup by DataFormat (equals/hashCode based on name) - Map> secondaryInputs = doc.getSecondaryInputs(); - for (Map.Entry> inputEntry : secondaryInputs.entrySet()) { - DataFormat format = inputEntry.getKey(); - Writer> writer = secondaryWritersByFormat.get(format); - if (writer == null) { - logger.warn("No writer found for secondary format [{}], skipping", format.name()); - continue; - } - WriteResult result = writer.addDoc(inputEntry.getValue()); - switch (result) { - case WriteResult.Success s -> logger.trace("Successfully added document in secondary format [{}]", format.name()); - case WriteResult.Failure f -> { - logger.debug("Failed to add document in secondary format [{}]", format.name()); - return result; + // Then write to each secondary — keyed lookup by DataFormat (equals/hashCode based on name) + Map> secondaryInputs = doc.getSecondaryInputs(); + for (Map.Entry> inputEntry : secondaryInputs.entrySet()) { + DataFormat format = inputEntry.getKey(); + Writer> writer = secondaryWritersByFormat.get(format); + if (writer == null) { + logger.warn("No writer found for secondary format [{}], skipping", format.name()); + continue; + } + long secStart = System.nanoTime(); + WriteResult result = writer.addDoc(inputEntry.getValue()); + long secElapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - secStart); + CompositeShardStats.FormatStats formatStats = stats.getOrCreateFormatStats(format.name()); + switch (result) { + case WriteResult.Success s -> { + logger.trace("Successfully added document in secondary format [{}]", format.name()); + formatStats.addDocsIndexed(1); + formatStats.addIndexTimeMillis(secElapsed); + } + case WriteResult.Failure f -> { + logger.debug("Failed to add document in secondary format [{}]", format.name()); + formatStats.incIndexFailures(); + return result; + } } } - } - return primaryResult; + return primaryResult; + } finally { + stats.addDocsIndexed(1); + stats.addIndexTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)); + } } @Override public FileInfos flush() throws IOException { setFlushPending(); - FileInfos.Builder builder = FileInfos.builder(); - // Flush primary - Optional primaryWfs = primaryWriter.flush().getWriterFileSet(primaryFormat); - primaryWfs.ifPresent(writerFileSet -> { - // Primary format's WriterFileSet must have the same generation as this composite writer - assert writerFileSet.writerGeneration() == writerGeneration : "primary WriterFileSet generation [" - + writerFileSet.writerGeneration() - + "] must match composite writer generation [" - + writerGeneration - + "]"; - builder.putWriterFileSet(primaryFormat, writerFileSet); - }); - // Flush secondaries - for (Writer> writer : secondaryWritersByFormat.values()) { - FileInfos fileInfos = writer.flush(); - // Iterate all format entries in the returned FileInfos - for (Map.Entry fileEntry : fileInfos.writerFilesMap().entrySet()) { - // Secondary format's WriterFileSet must also match this writer's generation - assert fileEntry.getValue().writerGeneration() == writerGeneration : "secondary WriterFileSet generation [" - + fileEntry.getValue().writerGeneration() - + "] for format [" - + fileEntry.getKey().name() + long startNanos = System.nanoTime(); + try { + FileInfos.Builder builder = FileInfos.builder(); + // Flush primary + long primaryStart = System.nanoTime(); + Optional primaryWfs = primaryWriter.flush().getWriterFileSet(primaryFormat); + stats.getOrCreateFormatStats(primaryFormat.name()) + .addFlushTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - primaryStart)); + primaryWfs.ifPresent(writerFileSet -> { + // Primary format's WriterFileSet must have the same generation as this composite writer + assert writerFileSet.writerGeneration() == writerGeneration : "primary WriterFileSet generation [" + + writerFileSet.writerGeneration() + "] must match composite writer generation [" + writerGeneration + "]"; - builder.putWriterFileSet(fileEntry.getKey(), fileEntry.getValue()); + builder.putWriterFileSet(primaryFormat, writerFileSet); + }); + // Flush secondaries + for (Map.Entry>> entry : secondaryWritersByFormat.entrySet()) { + long secStart = System.nanoTime(); + FileInfos fileInfos = entry.getValue().flush(); + stats.getOrCreateFormatStats(entry.getKey().name()) + .addFlushTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - secStart)); + // Iterate all format entries in the returned FileInfos + for (Map.Entry fileEntry : fileInfos.writerFilesMap().entrySet()) { + // Secondary format's WriterFileSet must also match this writer's generation + assert fileEntry.getValue().writerGeneration() == writerGeneration : "secondary WriterFileSet generation [" + + fileEntry.getValue().writerGeneration() + + "] for format [" + + fileEntry.getKey().name() + + "] must match composite writer generation [" + + writerGeneration + + "]"; + builder.putWriterFileSet(fileEntry.getKey(), fileEntry.getValue()); + } } + return builder.build(); + } finally { + stats.incFlushTotal(); + stats.addFlushTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)); } - return builder.build(); } @Override public void sync() throws IOException { - primaryWriter.sync(); - for (Writer> writer : secondaryWritersByFormat.values()) { - writer.sync(); + long startNanos = System.nanoTime(); + try { + primaryWriter.sync(); + for (Writer> writer : secondaryWritersByFormat.values()) { + writer.sync(); + } + } finally { + stats.incSyncTotal(); + stats.addSyncTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)); } } diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/action/CatalogSnapshotAction.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/action/CatalogSnapshotAction.java new file mode 100644 index 0000000000000..be539bb082dcc --- /dev/null +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/action/CatalogSnapshotAction.java @@ -0,0 +1,219 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.composite.action; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.composite.stats.CompositeStatsRegistry; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexService; +import org.opensearch.index.engine.exec.Segment; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.BytesRestResponse; +import org.opensearch.rest.RestRequest; +import org.opensearch.transport.client.node.NodeClient; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * REST handler for {@code GET /_plugins/composite/{index}/_catalog_snapshot}. + *

+ * Returns a file-level breakdown of the current {@link CatalogSnapshot} by format and extension. + * Supports optional {@code shard} query parameter to filter to a specific shard. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class CatalogSnapshotAction extends BaseRestHandler { + + @Override + public String getName() { + return "catalog_snapshot_action"; + } + + @Override + public List routes() { + return List.of(new Route(RestRequest.Method.GET, "/_plugins/composite/{index}/_catalog_snapshot")); + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + String indexName = request.param("index"); + String shardParam = request.param("shard"); + Integer shardFilter = shardParam != null ? Integer.parseInt(shardParam) : null; + + return channel -> { + try { + CompositeStatsRegistry registry = CompositeStatsRegistry.getInstance(); + IndicesService indicesService = registry.getIndicesService(); + if (indicesService == null) { + channel.sendResponse(new BytesRestResponse(RestStatus.SERVICE_UNAVAILABLE, "IndicesService not available")); + return; + } + + // Find matching shards from the registry + Map matchingShards = new HashMap<>(); + for (ShardId shardId : registry.getEngines().keySet()) { + if (shardId.getIndexName().equals(indexName)) { + if (shardFilter == null || shardId.id() == shardFilter) { + matchingShards.put(shardId, null); + } + } + } + + if (matchingShards.isEmpty()) { + channel.sendResponse( + new BytesRestResponse(RestStatus.NOT_FOUND, "No composite engine found for index [" + indexName + "]") + ); + return; + } + + // Use the first matching shard to get the catalog snapshot + ShardId targetShardId = matchingShards.keySet().iterator().next(); + IndexService indexService = indicesService.indexServiceSafe(targetShardId.getIndex()); + IndexShard indexShard = indexService.getShard(targetShardId.id()); + + try (GatedCloseable snapshotRef = indexShard.getCatalogSnapshot()) { + CatalogSnapshot snapshot = snapshotRef.get(); + XContentBuilder builder = channel.newBuilder(); + buildResponse(builder, indexName, snapshot); + channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); + } + } catch (Exception e) { + channel.sendResponse(new BytesRestResponse(channel, e)); + } + }; + } + + private void buildResponse(XContentBuilder builder, String indexName, CatalogSnapshot snapshot) throws IOException { + List segments = snapshot.getSegments(); + + // Summary accumulators + Map byExtension = new HashMap<>(); + Map byFormat = new HashMap<>(); + + builder.startObject(); + builder.field("index", indexName); + builder.field("generation", snapshot.getGeneration()); + builder.field("version", snapshot.getVersion()); + builder.field("num_docs", snapshot.getNumDocs()); + + builder.startArray("segments"); + for (Segment segment : segments) { + builder.startObject(); + builder.field("generation", segment.generation()); + builder.startObject("formats"); + + for (Map.Entry entry : segment.dfGroupedSearchableFiles().entrySet()) { + String format = entry.getKey(); + WriterFileSet wfs = entry.getValue(); + + builder.startObject(format); + builder.field("num_rows", wfs.numRows()); + + long formatTotalSize = 0; + builder.startArray("files"); + for (String fileName : wfs.files()) { + Path filePath = Path.of(wfs.directory(), fileName); + long size = 0; + try { + if (Files.exists(filePath)) { + size = Files.size(filePath); + } + } catch (IOException ignored) {} + + builder.startObject(); + builder.field("name", fileName); + builder.field("size_bytes", size); + builder.endObject(); + + formatTotalSize += size; + + // Track by extension + String ext = getExtension(fileName); + byExtension.computeIfAbsent(ext, k -> new ExtensionSummary()).add(size); + } + builder.endArray(); + builder.field("total_size_bytes", formatTotalSize); + builder.endObject(); + + // Track by format + byFormat.computeIfAbsent(format, k -> new FormatSummary()).add(wfs.files().size(), formatTotalSize, wfs.numRows()); + } + + builder.endObject(); // formats + builder.endObject(); // segment + } + builder.endArray(); // segments + + // Summary + builder.startObject("summary"); + builder.field("total_segments", segments.size()); + + builder.startObject("by_extension"); + for (Map.Entry entry : byExtension.entrySet()) { + builder.startObject(entry.getKey()); + builder.field("file_count", entry.getValue().fileCount); + builder.field("total_size_bytes", entry.getValue().totalSize); + builder.endObject(); + } + builder.endObject(); + + builder.startObject("by_format"); + for (Map.Entry entry : byFormat.entrySet()) { + builder.startObject(entry.getKey()); + builder.field("file_count", entry.getValue().fileCount); + builder.field("total_size_bytes", entry.getValue().totalSize); + builder.field("total_rows", entry.getValue().totalRows); + builder.endObject(); + } + builder.endObject(); + + builder.endObject(); // summary + builder.endObject(); // root + } + + private static String getExtension(String fileName) { + int dot = fileName.lastIndexOf('.'); + return dot >= 0 ? fileName.substring(dot) : ""; + } + + private static class ExtensionSummary { + int fileCount; + long totalSize; + + void add(long size) { + fileCount++; + totalSize += size; + } + } + + private static class FormatSummary { + int fileCount; + long totalSize; + long totalRows; + + void add(int files, long size, long rows) { + fileCount += files; + totalSize += size; + totalRows += rows; + } + } +} diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/action/DataFormatStatsAction.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/action/DataFormatStatsAction.java new file mode 100644 index 0000000000000..ad6b72df924e4 --- /dev/null +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/action/DataFormatStatsAction.java @@ -0,0 +1,106 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.composite.action; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.composite.CompositeIndexingExecutionEngine; +import org.opensearch.composite.stats.CompositeShardStats; +import org.opensearch.composite.stats.CompositeStatsRegistry; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.BytesRestResponse; +import org.opensearch.rest.RestRequest; +import org.opensearch.transport.client.node.NodeClient; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * REST handler for {@code GET /_plugins/dataformat_stats} and + * {@code GET /_plugins/dataformat_stats/{index}}. + *

+ * Collects composite engine stats from all active shard engines and returns + * them grouped by index. Supports optional {@code level=shards} query param + * for shard-level detail. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DataFormatStatsAction extends BaseRestHandler { + + @Override + public String getName() { + return "dataformat_stats_action"; + } + + @Override + public List routes() { + return List.of( + new Route(RestRequest.Method.GET, "/_plugins/dataformat_stats"), + new Route(RestRequest.Method.GET, "/_plugins/dataformat_stats/{index}") + ); + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + String indexFilter = request.param("index"); + String level = request.param("level"); + boolean shardLevel = "shards".equals(level); + + return channel -> { + try { + Map engines = CompositeStatsRegistry.getInstance().getEngines(); + + // Group stats by index name + Map> byIndex = new HashMap<>(); + for (Map.Entry entry : engines.entrySet()) { + String indexName = entry.getKey().getIndexName(); + if (indexFilter != null && !indexFilter.equals(indexName)) { + continue; + } + byIndex.computeIfAbsent(indexName, k -> new HashMap<>()).put(entry.getKey(), entry.getValue().getStats()); + } + + XContentBuilder builder = channel.newBuilder(); + builder.startObject(); + builder.startObject("indices"); + for (Map.Entry> indexEntry : byIndex.entrySet()) { + builder.startObject(indexEntry.getKey()); + + // Aggregated stats across shards + builder.startObject("composite"); + CompositeShardStats aggregated = CompositeShardStats.aggregate(indexEntry.getValue().values()); + aggregated.toXContent(builder, request); + builder.endObject(); + + // Shard-level detail + if (shardLevel) { + builder.startObject("shards"); + for (Map.Entry shardEntry : indexEntry.getValue().entrySet()) { + builder.startObject(String.valueOf(shardEntry.getKey().id())); + shardEntry.getValue().toXContent(builder, request); + builder.endObject(); + } + builder.endObject(); + } + + builder.endObject(); + } + builder.endObject(); + builder.endObject(); + channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); + } catch (Exception e) { + channel.sendResponse(new BytesRestResponse(channel, e)); + } + }; + } +} diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMergeExecutor.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMergeExecutor.java index caf75785175db..d973765d32a19 100644 --- a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMergeExecutor.java +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMergeExecutor.java @@ -9,6 +9,7 @@ package org.opensearch.composite.merge; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.composite.stats.CompositeShardStats; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.dataformat.MergeInput; import org.opensearch.index.engine.dataformat.MergeResult; @@ -23,6 +24,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; /** * Executes a composite merge: primary format first, then secondaries using the @@ -35,9 +37,11 @@ public class CompositeMergeExecutor { private final Map mergers; + private final CompositeShardStats stats; - public CompositeMergeExecutor(Map mergers) { + public CompositeMergeExecutor(Map mergers, CompositeShardStats stats) { this.mergers = Map.copyOf(mergers); + this.stats = stats; } /** @@ -47,9 +51,13 @@ public CompositeMergeExecutor(Map mergers) { * @return the combined merge result across all formats */ public MergeResult execute(MergePlan plan) { + long startNanos = System.nanoTime(); List completed = new ArrayList<>(); try { + long primaryStart = System.nanoTime(); FormatMergeResult primaryResult = mergeFormat(plan, plan.primaryFormat(), null); + stats.getOrCreateFormatStats(plan.primaryFormat().name()) + .addMergeTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - primaryStart)); completed.add(primaryResult); RowIdMapping mapping = plan.hasSecondaries() @@ -58,14 +66,33 @@ public MergeResult execute(MergePlan plan) { : null; for (DataFormat secondary : plan.secondaryFormats()) { - completed.add(mergeFormat(plan, secondary, mapping)); + long secStart = System.nanoTime(); + try { + FormatMergeResult secResult = mergeFormat(plan, secondary, mapping); + stats.getOrCreateFormatStats(secondary.name()) + .addMergeTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - secStart)); + completed.add(secResult); + } catch (Exception e) { + stats.getOrCreateFormatStats(secondary.name()).incMergeFailures(); + throw e; + } } + // Count total input segments across all formats + long inputCount = 0; + for (DataFormat format : plan.filesByFormat().keySet()) { + inputCount += plan.filesFor(format).size(); + } + stats.addMergeSegmentsInputTotal(inputCount); + return toMergeResult(completed, mapping); } catch (Exception e) { completed.forEach(FormatMergeResult::cleanup); if (e instanceof RuntimeException re) throw re; throw new UncheckedIOException((IOException) e); + } finally { + stats.incMergeTotal(); + stats.addMergeTimeMillis(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)); } } diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMerger.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMerger.java index b32d50a1368f1..5ecb30e4faf80 100644 --- a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMerger.java +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMerger.java @@ -11,6 +11,7 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.composite.CompositeDataFormat; import org.opensearch.composite.CompositeIndexingExecutionEngine; +import org.opensearch.composite.stats.CompositeShardStats; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.dataformat.IndexingExecutionEngine; import org.opensearch.index.engine.dataformat.MergeInput; @@ -41,10 +42,10 @@ public class CompositeMerger implements Merger { private final List secondaryFormats; private final CompositeMergeExecutor executor; - public CompositeMerger(CompositeIndexingExecutionEngine engine, CompositeDataFormat compositeDataFormat) { + public CompositeMerger(CompositeIndexingExecutionEngine engine, CompositeDataFormat compositeDataFormat, CompositeShardStats stats) { this.primaryFormat = compositeDataFormat.getPrimaryDataFormat(); this.secondaryFormats = resolveSecondaryFormats(compositeDataFormat, primaryFormat); - this.executor = new CompositeMergeExecutor(buildMergerMap(engine)); + this.executor = new CompositeMergeExecutor(buildMergerMap(engine), stats); } @Override diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/stats/CompositeShardStats.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/stats/CompositeShardStats.java new file mode 100644 index 0000000000000..e1d2506b06c85 --- /dev/null +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/stats/CompositeShardStats.java @@ -0,0 +1,382 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.composite.stats; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; + +/** + * Shard-level statistics collector for the Composite data format plugin. + * Tracks aggregate counters across all formats and per-format breakdown stats. + * Uses LongAdder for high-throughput counters and AtomicLong for gauges. + */ +@ExperimentalApi +public class CompositeShardStats implements ToXContentFragment, Writeable { + + // Indexing counters (aggregate) + private final LongAdder docsIndexedTotal = new LongAdder(); + private final LongAdder indexTimeMillis = new LongAdder(); + + // Refresh counters (aggregate) + private final LongAdder refreshTotal = new LongAdder(); + private final LongAdder refreshTimeMillis = new LongAdder(); + + // Merge counters (aggregate) + private final LongAdder mergeTotal = new LongAdder(); + private final LongAdder mergeTimeMillis = new LongAdder(); + private final LongAdder mergeSegmentsInputTotal = new LongAdder(); + + // Flush counters (aggregate) + private final LongAdder flushTotal = new LongAdder(); + private final LongAdder flushTimeMillis = new LongAdder(); + + // Sync counters (aggregate) + private final LongAdder syncTotal = new LongAdder(); + private final LongAdder syncTimeMillis = new LongAdder(); + + // Memory gauge + private final AtomicLong nativeBytesUsed = new AtomicLong(); + + // Per-format breakdown + private final ConcurrentHashMap perFormatStats = new ConcurrentHashMap<>(); + + public CompositeShardStats() {} + + public CompositeShardStats(StreamInput in) throws IOException { + // Indexing + docsIndexedTotal.add(in.readVLong()); + indexTimeMillis.add(in.readVLong()); + + // Refresh + refreshTotal.add(in.readVLong()); + refreshTimeMillis.add(in.readVLong()); + + // Merge + mergeTotal.add(in.readVLong()); + mergeTimeMillis.add(in.readVLong()); + mergeSegmentsInputTotal.add(in.readVLong()); + + // Flush + flushTotal.add(in.readVLong()); + flushTimeMillis.add(in.readVLong()); + + // Sync + syncTotal.add(in.readVLong()); + syncTimeMillis.add(in.readVLong()); + + // Memory + nativeBytesUsed.set(in.readVLong()); + + // Per-format + int size = in.readVInt(); + for (int i = 0; i < size; i++) { + String formatName = in.readString(); + perFormatStats.put(formatName, new FormatStats(in)); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // Indexing + out.writeVLong(docsIndexedTotal.sum()); + out.writeVLong(indexTimeMillis.sum()); + + // Refresh + out.writeVLong(refreshTotal.sum()); + out.writeVLong(refreshTimeMillis.sum()); + + // Merge + out.writeVLong(mergeTotal.sum()); + out.writeVLong(mergeTimeMillis.sum()); + out.writeVLong(mergeSegmentsInputTotal.sum()); + + // Flush + out.writeVLong(flushTotal.sum()); + out.writeVLong(flushTimeMillis.sum()); + + // Sync + out.writeVLong(syncTotal.sum()); + out.writeVLong(syncTimeMillis.sum()); + + // Memory + out.writeVLong(nativeBytesUsed.get()); + + // Per-format + out.writeVInt(perFormatStats.size()); + for (Map.Entry entry : perFormatStats.entrySet()) { + out.writeString(entry.getKey()); + entry.getValue().writeTo(out); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // Indexing + builder.startObject("indexing"); + builder.field("docs_indexed_total", docsIndexedTotal.sum()); + builder.field("index_time_millis", indexTimeMillis.sum()); + builder.endObject(); + + // Refresh + builder.startObject("refresh"); + builder.field("refresh_total", refreshTotal.sum()); + builder.field("refresh_time_millis", refreshTimeMillis.sum()); + builder.endObject(); + + // Merge + builder.startObject("merge"); + builder.field("merge_total", mergeTotal.sum()); + builder.field("merge_time_millis", mergeTimeMillis.sum()); + builder.field("merge_segments_input_total", mergeSegmentsInputTotal.sum()); + builder.endObject(); + + // Flush + builder.startObject("flush"); + builder.field("flush_total", flushTotal.sum()); + builder.field("flush_time_millis", flushTimeMillis.sum()); + builder.endObject(); + + // Sync + builder.startObject("sync"); + builder.field("sync_total", syncTotal.sum()); + builder.field("sync_time_millis", syncTimeMillis.sum()); + builder.endObject(); + + // Memory + builder.startObject("memory"); + builder.field("native_bytes_used", nativeBytesUsed.get()); + builder.endObject(); + + // Per-format breakdown + builder.startObject("per_format"); + for (Map.Entry entry : perFormatStats.entrySet()) { + builder.startObject(entry.getKey()); + entry.getValue().toXContent(builder, params); + builder.endObject(); + } + builder.endObject(); + + return builder; + } + + /** + * Returns this instance as a snapshot. Since LongAdder.sum() provides a point-in-time + * view and the class implements Writeable, it can serialize its own current state. + */ + public CompositeShardStats snapshot() { + return this; + } + + /** + * Creates an aggregated stats instance by summing counters and taking max for gauges + * across multiple shard stats. + */ + public static CompositeShardStats aggregate(Collection shardStats) { + CompositeShardStats agg = new CompositeShardStats(); + for (CompositeShardStats s : shardStats) { + agg.docsIndexedTotal.add(s.docsIndexedTotal.sum()); + agg.indexTimeMillis.add(s.indexTimeMillis.sum()); + agg.refreshTotal.add(s.refreshTotal.sum()); + agg.refreshTimeMillis.add(s.refreshTimeMillis.sum()); + agg.mergeTotal.add(s.mergeTotal.sum()); + agg.mergeTimeMillis.add(s.mergeTimeMillis.sum()); + agg.mergeSegmentsInputTotal.add(s.mergeSegmentsInputTotal.sum()); + agg.flushTotal.add(s.flushTotal.sum()); + agg.flushTimeMillis.add(s.flushTimeMillis.sum()); + agg.syncTotal.add(s.syncTotal.sum()); + agg.syncTimeMillis.add(s.syncTimeMillis.sum()); + agg.nativeBytesUsed.addAndGet(s.nativeBytesUsed.get()); + for (Map.Entry entry : s.perFormatStats.entrySet()) { + agg.getOrCreateFormatStats(entry.getKey()).addFrom(entry.getValue()); + } + } + return agg; + } + + /** + * Lazily creates and returns per-format stats for the given format name. + */ + public FormatStats getOrCreateFormatStats(String formatName) { + return perFormatStats.computeIfAbsent(formatName, k -> new FormatStats()); + } + + // --- Indexing methods --- + + public void addDocsIndexed(long n) { + docsIndexedTotal.add(n); + } + + public void addIndexTimeMillis(long ms) { + indexTimeMillis.add(ms); + } + + // --- Refresh methods --- + + public void incRefreshTotal() { + refreshTotal.increment(); + } + + public void addRefreshTimeMillis(long ms) { + refreshTimeMillis.add(ms); + } + + // --- Merge methods --- + + public void incMergeTotal() { + mergeTotal.increment(); + } + + public void addMergeTimeMillis(long ms) { + mergeTimeMillis.add(ms); + } + + public void addMergeSegmentsInputTotal(long n) { + mergeSegmentsInputTotal.add(n); + } + + // --- Flush methods --- + + public void incFlushTotal() { + flushTotal.increment(); + } + + public void addFlushTimeMillis(long ms) { + flushTimeMillis.add(ms); + } + + // --- Sync methods --- + + public void incSyncTotal() { + syncTotal.increment(); + } + + public void addSyncTimeMillis(long ms) { + syncTimeMillis.add(ms); + } + + // --- Memory gauge methods --- + + public void setNativeBytesUsed(long bytes) { + nativeBytesUsed.set(bytes); + } + + /** + * Per-format statistics breakdown. Tracks indexing, refresh, merge, flush, + * and sort metrics for an individual data format within the composite engine. + */ + @ExperimentalApi + public static class FormatStats implements ToXContentFragment, Writeable { + + private final LongAdder docsIndexedTotal = new LongAdder(); + private final LongAdder indexTimeMillis = new LongAdder(); + private final LongAdder indexFailures = new LongAdder(); + private final LongAdder refreshTimeMillis = new LongAdder(); + private final LongAdder mergeTimeMillis = new LongAdder(); + private final LongAdder mergeFailures = new LongAdder(); + private final LongAdder flushTimeMillis = new LongAdder(); + private final LongAdder sortTimeMillis = new LongAdder(); + + public FormatStats() {} + + public FormatStats(StreamInput in) throws IOException { + docsIndexedTotal.add(in.readVLong()); + indexTimeMillis.add(in.readVLong()); + indexFailures.add(in.readVLong()); + refreshTimeMillis.add(in.readVLong()); + mergeTimeMillis.add(in.readVLong()); + mergeFailures.add(in.readVLong()); + flushTimeMillis.add(in.readVLong()); + sortTimeMillis.add(in.readVLong()); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(docsIndexedTotal.sum()); + out.writeVLong(indexTimeMillis.sum()); + out.writeVLong(indexFailures.sum()); + out.writeVLong(refreshTimeMillis.sum()); + out.writeVLong(mergeTimeMillis.sum()); + out.writeVLong(mergeFailures.sum()); + out.writeVLong(flushTimeMillis.sum()); + out.writeVLong(sortTimeMillis.sum()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field("docs_indexed_total", docsIndexedTotal.sum()); + builder.field("index_time_millis", indexTimeMillis.sum()); + builder.field("index_failures", indexFailures.sum()); + builder.field("refresh_time_millis", refreshTimeMillis.sum()); + builder.field("merge_time_millis", mergeTimeMillis.sum()); + builder.field("merge_failures", mergeFailures.sum()); + builder.field("flush_time_millis", flushTimeMillis.sum()); + builder.field("sort_time_millis", sortTimeMillis.sum()); + return builder; + } + + // --- FormatStats mutators --- + + public void addDocsIndexed(long n) { + docsIndexedTotal.add(n); + } + + public void addIndexTimeMillis(long ms) { + indexTimeMillis.add(ms); + } + + public void incIndexFailures() { + indexFailures.increment(); + } + + public void addRefreshTimeMillis(long ms) { + refreshTimeMillis.add(ms); + } + + public void addMergeTimeMillis(long ms) { + mergeTimeMillis.add(ms); + } + + public void incMergeFailures() { + mergeFailures.increment(); + } + + public void addFlushTimeMillis(long ms) { + flushTimeMillis.add(ms); + } + + public void addSortTimeMillis(long ms) { + sortTimeMillis.add(ms); + } + + /** + * Adds all counters from another FormatStats instance into this one. + */ + public void addFrom(FormatStats other) { + docsIndexedTotal.add(other.docsIndexedTotal.sum()); + indexTimeMillis.add(other.indexTimeMillis.sum()); + indexFailures.add(other.indexFailures.sum()); + refreshTimeMillis.add(other.refreshTimeMillis.sum()); + mergeTimeMillis.add(other.mergeTimeMillis.sum()); + mergeFailures.add(other.mergeFailures.sum()); + flushTimeMillis.add(other.flushTimeMillis.sum()); + sortTimeMillis.add(other.sortTimeMillis.sum()); + } + } +} diff --git a/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/stats/CompositeStatsRegistry.java b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/stats/CompositeStatsRegistry.java new file mode 100644 index 0000000000000..78bebd8235598 --- /dev/null +++ b/sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/stats/CompositeStatsRegistry.java @@ -0,0 +1,59 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.composite.stats; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.composite.CompositeIndexingExecutionEngine; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.indices.IndicesService; + +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Singleton registry that tracks all active {@link CompositeIndexingExecutionEngine} instances + * by their {@link ShardId}. Thread-safe via ConcurrentHashMap. + * + * @opensearch.experimental + */ +@ExperimentalApi +public final class CompositeStatsRegistry { + + private static final CompositeStatsRegistry INSTANCE = new CompositeStatsRegistry(); + + private final ConcurrentHashMap engines = new ConcurrentHashMap<>(); + private volatile IndicesService indicesService; + + private CompositeStatsRegistry() {} + + public static CompositeStatsRegistry getInstance() { + return INSTANCE; + } + + public void setIndicesService(IndicesService indicesService) { + this.indicesService = indicesService; + } + + public IndicesService getIndicesService() { + return indicesService; + } + + public void register(ShardId shardId, CompositeIndexingExecutionEngine engine) { + engines.put(shardId, engine); + } + + public void unregister(ShardId shardId) { + engines.remove(shardId); + } + + public Map getEngines() { + return Collections.unmodifiableMap(engines); + } +} diff --git a/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/CompositeIndexingExecutionEngineTests.java b/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/CompositeIndexingExecutionEngineTests.java index b32f2d1a9176c..58d40daf202c4 100644 --- a/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/CompositeIndexingExecutionEngineTests.java +++ b/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/CompositeIndexingExecutionEngineTests.java @@ -247,12 +247,14 @@ private static class TrackingCommitter implements Committer { public CommitResult commit(CommitInput commitData) { commitCalled = true; lastCommitData = StreamSupport.stream(commitData.userData().spliterator(), false) - .collect(Collectors.toMap( - Map.Entry::getKey, - Map.Entry::getValue, - (existing, replacement) -> replacement, // Merge function for duplicate keys - HashMap::new - )); + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (existing, replacement) -> replacement, // Merge function for duplicate keys + HashMap::new + ) + ); return null; } diff --git a/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/CompositeWriterTests.java b/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/CompositeWriterTests.java index bb9bd32aca41e..7293fcebefef2 100644 --- a/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/CompositeWriterTests.java +++ b/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/CompositeWriterTests.java @@ -8,6 +8,7 @@ package org.opensearch.composite; +import org.opensearch.composite.stats.CompositeShardStats; import org.opensearch.index.engine.dataformat.FileInfos; import org.opensearch.index.engine.dataformat.WriterConfig; import org.opensearch.test.OpenSearchTestCase; @@ -29,20 +30,20 @@ public void setUp() throws Exception { public void testWriterGenerationIsPreserved() throws IOException { long gen = randomLongBetween(0, 1000); - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(gen)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(gen), new CompositeShardStats()); assertEquals(gen, writer.getWriterGeneration()); writer.close(); } public void testAbortedDefaultsToFalse() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); assertFalse(writer.isAborted()); assertEquals(CompositeWriter.WriterState.ACTIVE, writer.getState()); writer.close(); } public void testAbortSetsAbortedFlag() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); writer.abort(); assertTrue(writer.isAborted()); assertEquals(CompositeWriter.WriterState.ABORTED, writer.getState()); @@ -50,14 +51,14 @@ public void testAbortSetsAbortedFlag() throws IOException { } public void testFlushPendingDefaultsToFalse() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); assertFalse(writer.isFlushPending()); assertEquals(CompositeWriter.WriterState.ACTIVE, writer.getState()); writer.close(); } public void testSetFlushPendingSetsFlag() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); writer.setFlushPending(); assertTrue(writer.isFlushPending()); assertEquals(CompositeWriter.WriterState.FLUSH_PENDING, writer.getState()); @@ -65,7 +66,7 @@ public void testSetFlushPendingSetsFlag() throws IOException { } public void testAbortDoesNotTransitionFromFlushPending() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); writer.setFlushPending(); expectThrows(IllegalStateException.class, writer::abort); assertTrue(writer.isFlushPending()); @@ -74,7 +75,7 @@ public void testAbortDoesNotTransitionFromFlushPending() throws IOException { } public void testFlushPendingDoesNotTransitionFromAborted() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); writer.abort(); expectThrows(IllegalStateException.class, writer::setFlushPending); assertTrue(writer.isAborted()); @@ -83,40 +84,40 @@ public void testFlushPendingDoesNotTransitionFromAborted() throws IOException { } public void testFlushReturnsFileInfos() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); FileInfos fileInfos = writer.flush(); assertNotNull(fileInfos); writer.close(); } public void testSyncDoesNotThrow() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); writer.sync(); writer.close(); } public void testCloseDoesNotThrow() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); writer.close(); // calling close again should also not throw writer.close(); } public void testMappingVersionSetAtConstruction() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); assertEquals(0L, writer.mappingVersion()); writer.close(); } public void testUpdateMappingVersionPropagates() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); writer.updateMappingVersion(5L); assertEquals(5L, writer.mappingVersion()); writer.close(); } public void testIsSchemaMutableReturnsFalseWhenAnySubWriterImmutable() throws IOException { - CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0)); + CompositeWriter writer = new CompositeWriter(engine, new WriterConfig(0), new CompositeShardStats()); assertTrue(writer.isSchemaMutable()); // Make the primary sub-writer immutable diff --git a/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/merge/CompositeMergerTests.java b/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/merge/CompositeMergerTests.java index a25b88919b26f..dc29b41b05ac8 100644 --- a/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/merge/CompositeMergerTests.java +++ b/sandbox/plugins/composite-engine/src/test/java/org/opensearch/composite/merge/CompositeMergerTests.java @@ -14,6 +14,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.composite.CompositeDataFormat; import org.opensearch.composite.CompositeIndexingExecutionEngine; +import org.opensearch.composite.stats.CompositeShardStats; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; @@ -135,7 +136,7 @@ public void testDoMergePrimaryOnlyNoSecondaries() throws IOException { MergeHandler handler = new MergeHandler( snapshotSupplier, - new CompositeMerger(engineNoSecondary, primaryOnlyFormat), + new CompositeMerger(engineNoSecondary, primaryOnlyFormat, new CompositeShardStats()), SHARD_ID, mock(MergeHandler.MergePolicy.class), mock(MergeHandler.MergeListener.class), @@ -219,7 +220,7 @@ public void testDoMergeMultipleSecondariesFailsFastOnFirstError() throws IOExcep MergeHandler handler = new MergeHandler( snapshotSupplier, - new CompositeMerger(multiEngine, multiFormat), + new CompositeMerger(multiEngine, multiFormat, new CompositeShardStats()), SHARD_ID, mock(MergeHandler.MergePolicy.class), mock(MergeHandler.MergeListener.class), @@ -370,7 +371,7 @@ public void testDoMergeSkipsSecondaryThatEqualsPrimary() throws IOException { MergeHandler handler = new MergeHandler( snapshotSupplier, - new CompositeMerger(dupEngine, dupFormat), + new CompositeMerger(dupEngine, dupFormat, new CompositeShardStats()), SHARD_ID, mock(MergeHandler.MergePolicy.class), mock(MergeHandler.MergeListener.class), @@ -565,7 +566,7 @@ public void testCleanupStaleMergedFilesLogsExceptionOnDeleteFailure() throws IOE private MergeHandler createHandler() { return new MergeHandler( snapshotSupplier, - new CompositeMerger(compositeEngine, compositeDataFormat), + new CompositeMerger(compositeEngine, compositeDataFormat, new CompositeShardStats()), SHARD_ID, mock(MergeHandler.MergePolicy.class), mock(MergeHandler.MergeListener.class), @@ -584,7 +585,7 @@ private MergeHandler createHandlerWithRealPolicy() { DataFormatAwareMergePolicy policy = new DataFormatAwareMergePolicy(indexSettings.getMergePolicy(true), SHARD_ID); return new MergeHandler( snapshotSupplier, - new CompositeMerger(compositeEngine, compositeDataFormat), + new CompositeMerger(compositeEngine, compositeDataFormat, new CompositeShardStats()), SHARD_ID, policy, policy, diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/ParquetDataFormatPlugin.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/ParquetDataFormatPlugin.java index c44dac2a792e9..fddc65f43d2ca 100644 --- a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/ParquetDataFormatPlugin.java +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/ParquetDataFormatPlugin.java @@ -9,9 +9,14 @@ package org.opensearch.parquet; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Module; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.IndexScopedSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.settings.SettingsFilter; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -26,12 +31,17 @@ import org.opensearch.index.engine.dataformat.IndexingExecutionEngine; import org.opensearch.index.engine.dataformat.StoreStrategy; import org.opensearch.index.store.PrecomputedChecksumStrategy; +import org.opensearch.parquet.action.ParquetAnalyzeAction; +import org.opensearch.parquet.action.ParquetRegistryInitializer; import org.opensearch.parquet.engine.ParquetDataFormat; import org.opensearch.parquet.engine.ParquetIndexingEngine; import org.opensearch.parquet.fields.ArrowSchemaBuilder; import org.opensearch.parquet.store.ParquetStoreStrategy; +import org.opensearch.plugins.ActionPlugin; import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; +import org.opensearch.rest.RestController; +import org.opensearch.rest.RestHandler; import org.opensearch.script.ScriptService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; @@ -59,7 +69,7 @@ * routing directory events, and closing native resources are all handled * there. The plugin stays purely declarative. */ -public class ParquetDataFormatPlugin extends Plugin implements DataFormatPlugin { +public class ParquetDataFormatPlugin extends Plugin implements DataFormatPlugin, ActionPlugin { /** * Current parquet writer format version, long-encoded (plugin-defined namespace; the @@ -151,4 +161,22 @@ public List> getExecutorBuilders(Settings settings) { ) ); } + + @Override + public List getRestHandlers( + Settings settings, + RestController restController, + ClusterSettings clusterSettings, + IndexScopedSettings indexScopedSettings, + SettingsFilter settingsFilter, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier nodesInCluster + ) { + return List.of(new ParquetAnalyzeAction()); + } + + @Override + public Collection createGuiceModules() { + return List.of(b -> b.bind(ParquetRegistryInitializer.class).asEagerSingleton()); + } } diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/action/ParquetAnalyzeAction.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/action/ParquetAnalyzeAction.java new file mode 100644 index 0000000000000..1e0bb6383c499 --- /dev/null +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/action/ParquetAnalyzeAction.java @@ -0,0 +1,323 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.parquet.action; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexService; +import org.opensearch.index.engine.exec.Segment; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.parquet.bridge.RustBridge; +import org.opensearch.parquet.engine.ParquetDataFormat; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.BytesRestResponse; +import org.opensearch.rest.RestRequest; +import org.opensearch.transport.client.node.NodeClient; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * REST handler for {@code GET /_plugins/parquet/{index}/_analyze}. + * Returns column-level statistics for parquet files in an index. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class ParquetAnalyzeAction extends BaseRestHandler { + + @Override + public String getName() { + return "parquet_analyze_action"; + } + + @Override + public List routes() { + return List.of(new Route(RestRequest.Method.GET, "/_plugins/parquet/{index}/_analyze")); + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + String indexName = request.param("index"); + String shardParam = request.param("shard"); + Integer shardFilter = shardParam != null ? Integer.parseInt(shardParam) : null; + boolean fileLevel = request.paramAsBoolean("file_level", false); + + return channel -> { + try { + IndicesService indicesService = ParquetRegistryInitializer.getIndicesService(); + if (indicesService == null) { + channel.sendResponse(new BytesRestResponse(RestStatus.SERVICE_UNAVAILABLE, "IndicesService not available")); + return; + } + + IndexService indexService = findIndexService(indicesService, indexName); + if (indexService == null) { + channel.sendResponse(new BytesRestResponse(RestStatus.NOT_FOUND, "Index [" + indexName + "] not found")); + return; + } + + long totalRows = 0; + long totalSizeBytes = 0; + long totalFooterSize = 0; + List sortingColumns = null; + Map fieldStatsMap = new HashMap<>(); + List> fileDetails = new ArrayList<>(); + + for (IndexShard shard : indexService) { + if (shardFilter != null && shard.shardId().id() != shardFilter) { + continue; + } + try (GatedCloseable snapshotRef = shard.getCatalogSnapshot()) { + CatalogSnapshot snapshot = snapshotRef.get(); + for (Segment segment : snapshot.getSegments()) { + WriterFileSet wfs = segment.dfGroupedSearchableFiles().get(ParquetDataFormat.PARQUET_DATA_FORMAT_NAME); + if (wfs == null) { + continue; + } + totalRows += wfs.numRows(); + for (String fileName : wfs.files()) { + String filePath = Path.of(wfs.directory(), fileName).toString(); + totalSizeBytes += java.nio.file.Files.size(Path.of(filePath)); + String json = RustBridge.analyzeFile(filePath); + Map parsed = parseAnalyzeJson(json); + aggregateFields(parsed, fieldStatsMap); + totalFooterSize += toLong(parsed.get("footer_size")); + // Take sorting_columns from the first file + if (sortingColumns == null && parsed.get("sorting_columns") instanceof List sc) { + sortingColumns = new ArrayList<>(sc); + } + if (fileLevel) { + Map fileDetail = new HashMap<>(); + fileDetail.put("file", fileName); + fileDetail.put("detail", parsed); + fileDetails.add(fileDetail); + } + } + } + } + } + + XContentBuilder builder = channel.newBuilder(); + builder.startObject(); + builder.field("index", indexName); + builder.field("total_rows", totalRows); + builder.field("total_size_bytes", totalSizeBytes); + builder.field("footer_size", totalFooterSize); + if (sortingColumns != null) { + builder.field("sorting_columns", sortingColumns); + } + buildFieldsArray(builder, fieldStatsMap); + if (fileLevel) { + buildFilesArray(builder, fileDetails); + } + builder.endObject(); + channel.sendResponse(new BytesRestResponse(RestStatus.OK, builder)); + } catch (Exception e) { + channel.sendResponse(new BytesRestResponse(channel, e)); + } + }; + } + + private IndexService findIndexService(IndicesService indicesService, String indexName) { + for (IndexService indexService : indicesService) { + if (indexService.index().getName().equals(indexName)) { + return indexService; + } + } + return null; + } + + @SuppressWarnings("unchecked") + private static Map parseAnalyzeJson(String json) { + try { + return org.opensearch.common.xcontent.XContentHelper.convertToMap( + org.opensearch.common.xcontent.XContentType.JSON.xContent(), + json, + false + ); + } catch (Exception e) { + return Map.of(); + } + } + + @SuppressWarnings("unchecked") + private static void aggregateFields(Map parsed, Map fieldStatsMap) { + // Aggregate columns from row_groups (the enhanced Rust output nests columns inside row_groups) + Object rowGroupsObj = parsed.get("row_groups"); + if (rowGroupsObj instanceof List rowGroups) { + for (Object rgObj : rowGroups) { + if (rgObj instanceof Map rg) { + Object columnsObj = rg.get("columns"); + if (columnsObj instanceof List columns) { + aggregateColumnsFromList(columns, fieldStatsMap); + } + } + } + } + // Also handle flat "columns" key for backward compatibility + Object columnsObj = parsed.get("columns"); + if (columnsObj instanceof List columns) { + aggregateColumnsFromList(columns, fieldStatsMap); + } + } + + @SuppressWarnings("unchecked") + private static void aggregateColumnsFromList(List columns, Map fieldStatsMap) { + for (Object colObj : columns) { + if (colObj instanceof Map col) { + String name = (String) col.get("name"); + if (name == null) continue; + FieldStats stats = fieldStatsMap.computeIfAbsent(name, k -> new FieldStats()); + Object type = col.get("type"); + if (type != null) stats.type = (String) type; + Object compression = col.get("compression"); + if (compression != null) stats.compression = (String) compression; + if (col.get("encodings") instanceof List encodings) { + for (Object enc : encodings) { + stats.encodings.add(String.valueOf(enc)); + } + } + stats.totalCompressedBytes += toLong(col.get("compressed_bytes")); + stats.totalUncompressedBytes += toLong(col.get("uncompressed_bytes")); + stats.nullCount += toLong(col.get("null_count")); + stats.numValues += toLong(col.get("num_values")); + // Bloom filter aggregation + if (Boolean.TRUE.equals(col.get("has_bloom_filter"))) { + stats.hasBloomFilter = true; + } + stats.bloomFilterSize += toLong(col.get("bloom_filter_size")); + // Stats aggregation (min/max/null_count/distinct_count) + if (col.get("stats") instanceof Map colStats) { + String min = (String) colStats.get("min"); + String max = (String) colStats.get("max"); + if (min != null && (stats.statsMin == null || min.compareTo(stats.statsMin) < 0)) { + stats.statsMin = min; + } + if (max != null && (stats.statsMax == null || max.compareTo(stats.statsMax) > 0)) { + stats.statsMax = max; + } + stats.statsNullCount += toLong(colStats.get("null_count")); + Object distinctObj = colStats.get("distinct_count"); + if (distinctObj == null) { + stats.distinctCountIsNull = true; + } else if (!stats.distinctCountIsNull) { + stats.statsDistinctCount += toLong(distinctObj); + } + } + // Page stats — aggregate total num_pages + if (col.get("page_stats") instanceof Map pageStats) { + stats.totalNumPages += toLong(pageStats.get("num_pages")); + } + } + } + } + + private static long toLong(Object obj) { + if (obj instanceof Number n) { + return n.longValue(); + } + return 0L; + } + + private static void buildFieldsArray(XContentBuilder builder, Map fieldStatsMap) throws IOException { + builder.startArray("fields"); + for (Map.Entry entry : fieldStatsMap.entrySet()) { + FieldStats stats = entry.getValue(); + builder.startObject(); + builder.field("name", entry.getKey()); + builder.field("type", stats.type); + builder.field("compression", stats.compression); + builder.array("encodings", stats.encodings.toArray(new String[0])); + builder.field("total_compressed_bytes", stats.totalCompressedBytes); + builder.field("total_uncompressed_bytes", stats.totalUncompressedBytes); + double ratio = stats.totalCompressedBytes > 0 ? (double) stats.totalUncompressedBytes / stats.totalCompressedBytes : 0.0; + builder.field("compression_ratio", Math.round(ratio * 100.0) / 100.0); + builder.field("null_count", stats.nullCount); + builder.field("num_values", stats.numValues); + builder.field("has_bloom_filter", stats.hasBloomFilter); + builder.field("bloom_filter_size", stats.bloomFilterSize); + // Stats object with merged min/max/null_count/distinct_count + builder.startObject("stats"); + builder.field("min", stats.statsMin); + builder.field("max", stats.statsMax); + builder.field("null_count", stats.statsNullCount); + if (stats.distinctCountIsNull) { + builder.nullField("distinct_count"); + } else { + builder.field("distinct_count", stats.statsDistinctCount); + } + builder.endObject(); + builder.field("total_num_pages", stats.totalNumPages); + builder.endObject(); + } + builder.endArray(); + } + + @SuppressWarnings("unchecked") + private static void buildFilesArray(XContentBuilder builder, List> fileDetails) throws IOException { + builder.startArray("files"); + for (Map fileDetail : fileDetails) { + builder.startObject(); + builder.field("file", fileDetail.get("file")); + Object detail = fileDetail.get("detail"); + if (detail instanceof Map detailMap) { + // Include footer_size per file + if (detailMap.containsKey("footer_size")) { + builder.field("footer_size", detailMap.get("footer_size")); + } + // Include sorting_columns per file + if (detailMap.get("sorting_columns") instanceof List sc) { + builder.field("sorting_columns", sc); + } + // Include full row_groups with page_stats + Object rowGroups = detailMap.get("row_groups"); + if (rowGroups instanceof List rgs) { + builder.startArray("row_groups"); + for (Object rg : rgs) { + builder.value(rg); + } + builder.endArray(); + } + } + builder.endObject(); + } + builder.endArray(); + } + + private static class FieldStats { + String type = "UNKNOWN"; + String compression = "UNKNOWN"; + Set encodings = new HashSet<>(); + long totalCompressedBytes; + long totalUncompressedBytes; + long nullCount; + long numValues; + boolean hasBloomFilter; + long bloomFilterSize; + // Stats aggregation + String statsMin; + String statsMax; + long statsNullCount; + Long statsDistinctCount = 0L; + boolean distinctCountIsNull; + long totalNumPages; + } +} diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/action/ParquetRegistryInitializer.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/action/ParquetRegistryInitializer.java new file mode 100644 index 0000000000000..bb5119ed5c851 --- /dev/null +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/action/ParquetRegistryInitializer.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.parquet.action; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.inject.Inject; +import org.opensearch.indices.IndicesService; + +/** + * Guice-managed eager singleton that captures {@link IndicesService} for the + * Parquet analyze REST action. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class ParquetRegistryInitializer { + + private static volatile IndicesService indicesService; + + @Inject + public ParquetRegistryInitializer(IndicesService indicesService) { + ParquetRegistryInitializer.indicesService = indicesService; + } + + public static IndicesService getIndicesService() { + return indicesService; + } +} diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/NativeParquetWriter.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/NativeParquetWriter.java index 3cf94b31bb73c..f2f3d976c0e71 100644 --- a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/NativeParquetWriter.java +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/NativeParquetWriter.java @@ -9,8 +9,10 @@ package org.opensearch.parquet.bridge; import org.opensearch.common.SetOnce; +import org.opensearch.parquet.stats.ParquetShardStats; import java.io.IOException; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; /** @@ -33,6 +35,7 @@ public class NativeParquetWriter { private final AtomicBoolean writerFlushed = new AtomicBoolean(false); private final String filePath; private final SetOnce metadata = new SetOnce<>(); + private final ParquetShardStats stats; private volatile boolean initialized = false; /** @@ -40,9 +43,20 @@ public class NativeParquetWriter { * call {@link #initialize(String, long, ParquetSortConfig, long)} before the first write. * * @param filePath the path to the Parquet file to write + * @param stats shard-level stats collector */ - public NativeParquetWriter(String filePath) { + public NativeParquetWriter(String filePath, ParquetShardStats stats) { this.filePath = filePath; + this.stats = stats; + } + + /** + * Creates a new NativeParquetWriter handle without stats collection. + * + * @param filePath the path to the Parquet file to write + */ + public NativeParquetWriter(String filePath) { + this(filePath, new ParquetShardStats()); } /** @@ -88,7 +102,17 @@ public void write(long arrayAddress, long schemaAddress) throws IOException { if (initialized == false) { throw new IllegalStateException("Writer not initialized: " + filePath); } - RustBridge.write(filePath, arrayAddress, schemaAddress); + long startNanos = System.nanoTime(); + try { + RustBridge.write(filePath, arrayAddress, schemaAddress); + stats.incNativeWriteTotal(); + } catch (IOException e) { + stats.incNativeWriteFailures(); + throw e; + } finally { + long elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + stats.addNativeWriteTimeMillis(elapsed); + } } /** @@ -102,7 +126,17 @@ public void write(long arrayAddress, long schemaAddress) throws IOException { public ParquetFileMetadata flush() throws IOException { if (writerFlushed.compareAndSet(false, true)) { if (initialized) { - metadata.set(RustBridge.finalizeWriter(filePath)); + long startNanos = System.nanoTime(); + try { + metadata.set(RustBridge.finalizeWriter(filePath)); + stats.incNativeFinalizeTotal(); + } catch (IOException e) { + stats.incNativeFinalizeFailures(); + throw e; + } finally { + long elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + stats.addNativeFinalizeTimeMillis(elapsed); + } } } return metadata.get(); @@ -118,7 +152,17 @@ public void sync() throws IOException { if (!writerFlushed.get()) { flush(); } - RustBridge.syncToDisk(filePath); + long startNanos = System.nanoTime(); + try { + RustBridge.syncToDisk(filePath); + stats.incNativeSyncTotal(); + } catch (IOException e) { + stats.incNativeSyncFailures(); + throw e; + } finally { + long elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + stats.addNativeSyncTimeMillis(elapsed); + } } /** diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/RustBridge.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/RustBridge.java index 6b8d9507cdcf4..2f10e635f606a 100644 --- a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/RustBridge.java +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/bridge/RustBridge.java @@ -43,6 +43,7 @@ public class RustBridge { private static final MethodHandle MERGE_FILES; private static final MethodHandle FREE_MERGE_RESULT; private static final MethodHandle READ_AS_JSON; + private static final MethodHandle ANALYZE_FILE; static { SymbolLookup lib = NativeLibraryLoader.symbolLookup(); @@ -185,6 +186,21 @@ public class RustBridge { ValueLayout.ADDRESS // out_len ) ); + ANALYZE_FILE = lib.find("parquet_analyze_file") + .map( + symbol -> linker.downcallHandle( + symbol, + FunctionDescriptor.of( + ValueLayout.JAVA_LONG, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, // file + ValueLayout.ADDRESS, // out_buf + ValueLayout.JAVA_LONG, // buf_capacity + ValueLayout.ADDRESS // out_len + ) + ) + ) + .orElse(null); } public static void initLogger() {} @@ -446,6 +462,24 @@ private static java.lang.foreign.MemorySegment marshalBoolList(NativeCall call, return seg; } + /** + * Analyzes a parquet file and returns column-level statistics as a JSON string. + */ + public static String analyzeFile(String file) throws IOException { + if (ANALYZE_FILE == null) { + throw new UnsupportedOperationException("parquet_analyze_file not available in native library"); + } + try (var call = new NativeCall()) { + var f = call.str(file); + int bufSize = 10 * 1024 * 1024; // 10MB + var outBuf = call.buf(bufSize); + var outLen = call.longOut(); + call.invokeIO(ANALYZE_FILE, f.segment(), f.len(), outBuf, (long) bufSize, outLen); + int len = (int) outLen.get(ValueLayout.JAVA_LONG, 0); + return new String(outBuf.asSlice(0, len).toArray(ValueLayout.JAVA_BYTE), StandardCharsets.UTF_8); + } + } + /** * Reads a parquet file and returns its contents as a JSON string. */ diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/engine/ParquetIndexingEngine.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/engine/ParquetIndexingEngine.java index d6d97a57a1d95..9db1d57a017ae 100644 --- a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/engine/ParquetIndexingEngine.java +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/engine/ParquetIndexingEngine.java @@ -30,6 +30,7 @@ import org.opensearch.parquet.memory.ArrowBufferPool; import org.opensearch.parquet.merge.NativeParquetMergeStrategy; import org.opensearch.parquet.merge.ParquetMergeExecutor; +import org.opensearch.parquet.stats.ParquetShardStats; import org.opensearch.parquet.writer.ParquetDocumentInput; import org.opensearch.parquet.writer.ParquetWriter; import org.opensearch.threadpool.ThreadPool; @@ -83,6 +84,7 @@ public class ParquetIndexingEngine implements IndexingExecutionEngine createWriter(WriterConfig config) { bufferPool, indexSettings, threadPool, - checksumStrategy + checksumStrategy, + stats ); } diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/merge/NativeParquetMergeStrategy.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/merge/NativeParquetMergeStrategy.java index d224976a09cb2..5fa7a423d3406 100644 --- a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/merge/NativeParquetMergeStrategy.java +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/merge/NativeParquetMergeStrategy.java @@ -23,6 +23,7 @@ import org.opensearch.parquet.bridge.ParquetFileMetadata; import org.opensearch.parquet.bridge.RustBridge; import org.opensearch.parquet.engine.ParquetIndexingEngine; +import org.opensearch.parquet.stats.ParquetShardStats; import java.nio.file.Files; import java.nio.file.Path; @@ -30,6 +31,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; /** * Implements merging of Parquet files. @@ -42,17 +44,20 @@ public class NativeParquetMergeStrategy implements ParquetMergeStrategy { private final String indexName; private final ShardPath shardPath; private final TriConsumer checksumUpdater; + private final ParquetShardStats stats; public NativeParquetMergeStrategy( DataFormat dataFormat, String indexName, ShardPath shardPath, - TriConsumer checksumUpdater + TriConsumer checksumUpdater, + ParquetShardStats stats ) { this.dataFormat = dataFormat; this.indexName = indexName; this.shardPath = shardPath; this.checksumUpdater = checksumUpdater; + this.stats = stats; } @Override @@ -78,6 +83,7 @@ public MergeResult mergeParquetFiles(MergeInput mergeInput) { Path mergedFilePath = ParquetIndexingEngine.buildParquetFilePath(shardPath, writerGeneration, "merged"); String mergedFileName = mergedFilePath.getFileName().toString(); + long startNanos = System.nanoTime(); try { // Merge files in Rust MergeFilesResult merged = RustBridge.mergeParquetFilesInRust(filePaths, mergedFilePath.toString(), indexName); @@ -107,9 +113,18 @@ public MergeResult mergeParquetFiles(MergeInput mergeInput) { ); Map mergedWriterFileSetMap = Collections.singletonMap(dataFormat, mergedWriterFileSet); + long elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + stats.incMergeTotal(); + stats.addMergeTimeMillis(elapsed); + stats.addMergeInputFilesTotal(filePaths.size()); + stats.addMergeOutputRowsTotal(mergeMetadata.numRows()); + return new MergeResult(mergedWriterFileSetMap, rowIdMapping); } catch (Exception exception) { + stats.incMergeFailures(); + long elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + stats.addMergeTimeMillis(elapsed); logger.error(() -> new ParameterizedMessage("Merge failed while creating merged file [{}]", mergedFilePath), exception); try { Files.deleteIfExists(mergedFilePath); diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/stats/ParquetShardStats.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/stats/ParquetShardStats.java new file mode 100644 index 0000000000000..90fe4cd6672ea --- /dev/null +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/stats/ParquetShardStats.java @@ -0,0 +1,401 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.parquet.stats; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; + +/** + * Shard-level statistics collector for the Parquet data format plugin. + * Uses LongAdder for high-throughput counters and AtomicLong for gauges. + * Serves as both the live collector and the serializable snapshot. + */ +@ExperimentalApi +public class ParquetShardStats implements ToXContentFragment, Writeable { + + // Indexing counters + private final LongAdder docsIndexedTotal = new LongAdder(); + private final LongAdder docsIndexedFailures = new LongAdder(); + private final LongAdder indexTimeMillis = new LongAdder(); + + // VSR Pipeline counters + gauge + private final LongAdder vsrRotationsTotal = new LongAdder(); + private final LongAdder vsrRotationWaitMillis = new LongAdder(); + private final AtomicLong vsrRowsCurrent = new AtomicLong(); + + // Native Write counters + private final LongAdder nativeWriteTotal = new LongAdder(); + private final LongAdder nativeWriteTimeMillis = new LongAdder(); + private final LongAdder nativeWriteFailures = new LongAdder(); + private final LongAdder nativeFinalizeTotal = new LongAdder(); + private final LongAdder nativeFinalizeTimeMillis = new LongAdder(); + private final LongAdder nativeFinalizeFailures = new LongAdder(); + private final LongAdder nativeSyncTotal = new LongAdder(); + private final LongAdder nativeSyncTimeMillis = new LongAdder(); + private final LongAdder nativeSyncFailures = new LongAdder(); + + // Sort counters + private final LongAdder sortTotal = new LongAdder(); + private final LongAdder sortTimeMillis = new LongAdder(); + private final LongAdder sortInMemoryTotal = new LongAdder(); + private final LongAdder sortStreamingTotal = new LongAdder(); + + // Merge counters + private final LongAdder mergeTotal = new LongAdder(); + private final LongAdder mergeTimeMillis = new LongAdder(); + private final LongAdder mergeFailures = new LongAdder(); + private final LongAdder mergeInputFilesTotal = new LongAdder(); + private final LongAdder mergeOutputRowsTotal = new LongAdder(); + + // Rate Limiting counters + private final LongAdder rateLimitPauseTimeMillis = new LongAdder(); + private final LongAdder rateLimitBytesWritten = new LongAdder(); + + // Memory gauges + private final AtomicLong arrowAllocatedBytes = new AtomicLong(); + private final AtomicLong arrowMaxBytes = new AtomicLong(); + private final AtomicLong rustWriterMemoryBytes = new AtomicLong(); + + // Background Write counters + private final LongAdder backgroundWriteTotal = new LongAdder(); + private final LongAdder backgroundWriteWaitMillis = new LongAdder(); + private final LongAdder backgroundWriteTimeouts = new LongAdder(); + + public ParquetShardStats() {} + + public ParquetShardStats(StreamInput in) throws IOException { + // Indexing + docsIndexedTotal.add(in.readVLong()); + docsIndexedFailures.add(in.readVLong()); + indexTimeMillis.add(in.readVLong()); + + // VSR + vsrRotationsTotal.add(in.readVLong()); + vsrRotationWaitMillis.add(in.readVLong()); + vsrRowsCurrent.set(in.readVLong()); + + // Native Write + nativeWriteTotal.add(in.readVLong()); + nativeWriteTimeMillis.add(in.readVLong()); + nativeWriteFailures.add(in.readVLong()); + nativeFinalizeTotal.add(in.readVLong()); + nativeFinalizeTimeMillis.add(in.readVLong()); + nativeFinalizeFailures.add(in.readVLong()); + nativeSyncTotal.add(in.readVLong()); + nativeSyncTimeMillis.add(in.readVLong()); + nativeSyncFailures.add(in.readVLong()); + + // Sort + sortTotal.add(in.readVLong()); + sortTimeMillis.add(in.readVLong()); + sortInMemoryTotal.add(in.readVLong()); + sortStreamingTotal.add(in.readVLong()); + + // Merge + mergeTotal.add(in.readVLong()); + mergeTimeMillis.add(in.readVLong()); + mergeFailures.add(in.readVLong()); + mergeInputFilesTotal.add(in.readVLong()); + mergeOutputRowsTotal.add(in.readVLong()); + + // Rate Limiting + rateLimitPauseTimeMillis.add(in.readVLong()); + rateLimitBytesWritten.add(in.readVLong()); + + // Memory + arrowAllocatedBytes.set(in.readVLong()); + arrowMaxBytes.set(in.readVLong()); + rustWriterMemoryBytes.set(in.readVLong()); + + // Background Write + backgroundWriteTotal.add(in.readVLong()); + backgroundWriteWaitMillis.add(in.readVLong()); + backgroundWriteTimeouts.add(in.readVLong()); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // Indexing + out.writeVLong(docsIndexedTotal.sum()); + out.writeVLong(docsIndexedFailures.sum()); + out.writeVLong(indexTimeMillis.sum()); + + // VSR + out.writeVLong(vsrRotationsTotal.sum()); + out.writeVLong(vsrRotationWaitMillis.sum()); + out.writeVLong(vsrRowsCurrent.get()); + + // Native Write + out.writeVLong(nativeWriteTotal.sum()); + out.writeVLong(nativeWriteTimeMillis.sum()); + out.writeVLong(nativeWriteFailures.sum()); + out.writeVLong(nativeFinalizeTotal.sum()); + out.writeVLong(nativeFinalizeTimeMillis.sum()); + out.writeVLong(nativeFinalizeFailures.sum()); + out.writeVLong(nativeSyncTotal.sum()); + out.writeVLong(nativeSyncTimeMillis.sum()); + out.writeVLong(nativeSyncFailures.sum()); + + // Sort + out.writeVLong(sortTotal.sum()); + out.writeVLong(sortTimeMillis.sum()); + out.writeVLong(sortInMemoryTotal.sum()); + out.writeVLong(sortStreamingTotal.sum()); + + // Merge + out.writeVLong(mergeTotal.sum()); + out.writeVLong(mergeTimeMillis.sum()); + out.writeVLong(mergeFailures.sum()); + out.writeVLong(mergeInputFilesTotal.sum()); + out.writeVLong(mergeOutputRowsTotal.sum()); + + // Rate Limiting + out.writeVLong(rateLimitPauseTimeMillis.sum()); + out.writeVLong(rateLimitBytesWritten.sum()); + + // Memory + out.writeVLong(arrowAllocatedBytes.get()); + out.writeVLong(arrowMaxBytes.get()); + out.writeVLong(rustWriterMemoryBytes.get()); + + // Background Write + out.writeVLong(backgroundWriteTotal.sum()); + out.writeVLong(backgroundWriteWaitMillis.sum()); + out.writeVLong(backgroundWriteTimeouts.sum()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // Indexing + builder.startObject("indexing"); + builder.field("docs_indexed_total", docsIndexedTotal.sum()); + builder.field("docs_indexed_failures", docsIndexedFailures.sum()); + builder.field("index_time_millis", indexTimeMillis.sum()); + builder.endObject(); + + // VSR + builder.startObject("vsr"); + builder.field("vsr_rotations_total", vsrRotationsTotal.sum()); + builder.field("vsr_rotation_wait_millis", vsrRotationWaitMillis.sum()); + builder.field("vsr_rows_current", vsrRowsCurrent.get()); + builder.endObject(); + + // Native Write + builder.startObject("native_write"); + builder.field("native_write_total", nativeWriteTotal.sum()); + builder.field("native_write_time_millis", nativeWriteTimeMillis.sum()); + builder.field("native_write_failures", nativeWriteFailures.sum()); + builder.field("native_finalize_total", nativeFinalizeTotal.sum()); + builder.field("native_finalize_time_millis", nativeFinalizeTimeMillis.sum()); + builder.field("native_finalize_failures", nativeFinalizeFailures.sum()); + builder.field("native_sync_total", nativeSyncTotal.sum()); + builder.field("native_sync_time_millis", nativeSyncTimeMillis.sum()); + builder.field("native_sync_failures", nativeSyncFailures.sum()); + builder.endObject(); + + // Sort + builder.startObject("sort"); + builder.field("sort_total", sortTotal.sum()); + builder.field("sort_time_millis", sortTimeMillis.sum()); + builder.field("sort_in_memory_total", sortInMemoryTotal.sum()); + builder.field("sort_streaming_total", sortStreamingTotal.sum()); + builder.endObject(); + + // Merge + builder.startObject("merge"); + builder.field("merge_total", mergeTotal.sum()); + builder.field("merge_time_millis", mergeTimeMillis.sum()); + builder.field("merge_failures", mergeFailures.sum()); + builder.field("merge_input_files_total", mergeInputFilesTotal.sum()); + builder.field("merge_output_rows_total", mergeOutputRowsTotal.sum()); + builder.endObject(); + + // Rate Limiting + builder.startObject("rate_limit"); + builder.field("rate_limit_pause_time_millis", rateLimitPauseTimeMillis.sum()); + builder.field("rate_limit_bytes_written", rateLimitBytesWritten.sum()); + builder.endObject(); + + // Memory + builder.startObject("memory"); + builder.field("arrow_allocated_bytes", arrowAllocatedBytes.get()); + builder.field("arrow_max_bytes", arrowMaxBytes.get()); + builder.field("rust_writer_memory_bytes", rustWriterMemoryBytes.get()); + builder.endObject(); + + // Background Write + builder.startObject("background_write"); + builder.field("background_write_total", backgroundWriteTotal.sum()); + builder.field("background_write_wait_millis", backgroundWriteWaitMillis.sum()); + builder.field("background_write_timeouts", backgroundWriteTimeouts.sum()); + builder.endObject(); + + return builder; + } + + /** + * Returns this instance as a snapshot. Since LongAdder.sum() provides a point-in-time + * view and the class implements Writeable, it can serialize its own current state. + */ + public ParquetShardStats snapshot() { + return this; + } + + // --- Indexing methods --- + + public void addDocsIndexed(long n) { + docsIndexedTotal.add(n); + } + + public void incDocsIndexedFailures() { + docsIndexedFailures.increment(); + } + + public void addIndexTimeMillis(long ms) { + indexTimeMillis.add(ms); + } + + // --- VSR Pipeline methods --- + + public void incVsrRotations() { + vsrRotationsTotal.increment(); + } + + public void addVsrRotationWaitMillis(long ms) { + vsrRotationWaitMillis.add(ms); + } + + public void setVsrRowsCurrent(long rows) { + vsrRowsCurrent.set(rows); + } + + // --- Native Write methods --- + + public void incNativeWriteTotal() { + nativeWriteTotal.increment(); + } + + public void addNativeWriteTimeMillis(long ms) { + nativeWriteTimeMillis.add(ms); + } + + public void incNativeWriteFailures() { + nativeWriteFailures.increment(); + } + + public void incNativeFinalizeTotal() { + nativeFinalizeTotal.increment(); + } + + public void addNativeFinalizeTimeMillis(long ms) { + nativeFinalizeTimeMillis.add(ms); + } + + public void incNativeFinalizeFailures() { + nativeFinalizeFailures.increment(); + } + + public void incNativeSyncTotal() { + nativeSyncTotal.increment(); + } + + public void addNativeSyncTimeMillis(long ms) { + nativeSyncTimeMillis.add(ms); + } + + public void incNativeSyncFailures() { + nativeSyncFailures.increment(); + } + + // --- Sort methods --- + + public void incSortTotal() { + sortTotal.increment(); + } + + public void addSortTimeMillis(long ms) { + sortTimeMillis.add(ms); + } + + public void incSortInMemoryTotal() { + sortInMemoryTotal.increment(); + } + + public void incSortStreamingTotal() { + sortStreamingTotal.increment(); + } + + // --- Merge methods --- + + public void incMergeTotal() { + mergeTotal.increment(); + } + + public void addMergeTimeMillis(long ms) { + mergeTimeMillis.add(ms); + } + + public void incMergeFailures() { + mergeFailures.increment(); + } + + public void addMergeInputFilesTotal(long n) { + mergeInputFilesTotal.add(n); + } + + public void addMergeOutputRowsTotal(long n) { + mergeOutputRowsTotal.add(n); + } + + // --- Rate Limiting methods --- + + public void addRateLimitPauseTimeMillis(long ms) { + rateLimitPauseTimeMillis.add(ms); + } + + public void addRateLimitBytesWritten(long bytes) { + rateLimitBytesWritten.add(bytes); + } + + // --- Memory gauge methods --- + + public void setArrowAllocatedBytes(long bytes) { + arrowAllocatedBytes.set(bytes); + } + + public void setArrowMaxBytes(long bytes) { + arrowMaxBytes.set(bytes); + } + + public void setRustWriterMemoryBytes(long bytes) { + rustWriterMemoryBytes.set(bytes); + } + + // --- Background Write methods --- + + public void incBackgroundWriteTotal() { + backgroundWriteTotal.increment(); + } + + public void addBackgroundWriteWaitMillis(long ms) { + backgroundWriteWaitMillis.add(ms); + } + + public void incBackgroundWriteTimeouts() { + backgroundWriteTimeouts.increment(); + } +} diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/vsr/VSRManager.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/vsr/VSRManager.java index 74e7c3c65cac5..7be98c7b1f4aa 100644 --- a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/vsr/VSRManager.java +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/vsr/VSRManager.java @@ -26,6 +26,7 @@ import org.opensearch.parquet.fields.ArrowFieldRegistry; import org.opensearch.parquet.fields.ParquetField; import org.opensearch.parquet.memory.ArrowBufferPool; +import org.opensearch.parquet.stats.ParquetShardStats; import org.opensearch.parquet.writer.FieldValuePair; import org.opensearch.parquet.writer.ParquetDocumentInput; import org.opensearch.threadpool.ThreadPool; @@ -68,6 +69,7 @@ public class VSRManager implements AutoCloseable { private final ThreadPool threadPool; private final String vsrRotationThread; private final long writerGeneration; + private final ParquetShardStats stats; private volatile Future pendingWrite; private NativeParquetWriter writer; private final int ROTATION_TIMEOUT = 120; @@ -76,6 +78,22 @@ public class VSRManager implements AutoCloseable { /** * Creates a new VSRManager with asynchronous background writes (production default). */ + public VSRManager( + String fileName, + IndexSettings indexSettings, + Schema schema, + ArrowBufferPool bufferPool, + int maxRowsPerVSR, + ThreadPool threadPool, + long writerGeneration, + ParquetShardStats stats + ) { + this(fileName, indexSettings, schema, bufferPool, maxRowsPerVSR, threadPool, true, writerGeneration, stats); + } + + /** + * Creates a new VSRManager with asynchronous background writes and no stats collection. + */ public VSRManager( String fileName, IndexSettings indexSettings, @@ -85,7 +103,23 @@ public VSRManager( ThreadPool threadPool, long writerGeneration ) { - this(fileName, indexSettings, schema, bufferPool, maxRowsPerVSR, threadPool, true, writerGeneration); + this(fileName, indexSettings, schema, bufferPool, maxRowsPerVSR, threadPool, true, writerGeneration, new ParquetShardStats()); + } + + /** + * Creates a new VSRManager without stats collection. + */ + public VSRManager( + String fileName, + IndexSettings indexSettings, + Schema schema, + ArrowBufferPool bufferPool, + int maxRowsPerVSR, + ThreadPool threadPool, + boolean runAsync, + long writerGeneration + ) { + this(fileName, indexSettings, schema, bufferPool, maxRowsPerVSR, threadPool, runAsync, writerGeneration, new ParquetShardStats()); } /** @@ -100,6 +134,7 @@ public VSRManager( * @param runAsync if true, frozen VSR writes run on the background thread pool; * if false, they run on the calling thread (for benchmarks/tests) * @param writerGeneration the writer generation to store in file metadata + * @param stats shard-level stats collector */ public VSRManager( String fileName, @@ -109,16 +144,18 @@ public VSRManager( int maxRowsPerVSR, ThreadPool threadPool, boolean runAsync, - long writerGeneration + long writerGeneration, + ParquetShardStats stats ) { this.fileName = fileName; this.indexSettings = indexSettings; this.writerGeneration = writerGeneration; + this.stats = stats; this.vsrPool = new VSRPool("pool-" + fileName, schema, bufferPool, maxRowsPerVSR); this.threadPool = threadPool; this.vsrRotationThread = runAsync ? ParquetDataFormatPlugin.PARQUET_THREAD_POOL_NAME : ThreadPool.Names.SAME; this.managedVSR.set(vsrPool.getActiveVSR()); - this.writer = new NativeParquetWriter(fileName); + this.writer = new NativeParquetWriter(fileName, stats); } /** @@ -166,6 +203,7 @@ public void maybeRotateActiveVSR() throws IOException { if (rotated == false) { return; } + stats.incVsrRotations(); logger.debug("VSR rotation occurred for {}", fileName); ManagedVSR frozenVSR = vsrPool.getFrozenVSR(); if (frozenVSR != null) { @@ -264,13 +302,16 @@ private void awaitPendingWrite(long timeoutSeconds, boolean ignoreTimeout) throw if (pendingWrite == null) { return; } + long startNanos = System.nanoTime(); try { if (timeoutSeconds > 0) { pendingWrite.get(timeoutSeconds, TimeUnit.SECONDS); } else { pendingWrite.get(); } + stats.incBackgroundWriteTotal(); } catch (TimeoutException e) { + stats.incBackgroundWriteTimeouts(); if (ignoreTimeout) { logger.warn("Timed out waiting for background VSR write for {}", fileName); } else { @@ -279,6 +320,8 @@ private void awaitPendingWrite(long timeoutSeconds, boolean ignoreTimeout) throw } catch (Exception e) { throw new IOException("Background VSR write failed for " + fileName, e.getCause()); } finally { + long elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + stats.addBackgroundWriteWaitMillis(elapsed); pendingWrite = null; } } diff --git a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/writer/ParquetWriter.java b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/writer/ParquetWriter.java index 1dc96ed5a2857..81e97bcef08b7 100644 --- a/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/writer/ParquetWriter.java +++ b/sandbox/plugins/parquet-data-format/src/main/java/org/opensearch/parquet/writer/ParquetWriter.java @@ -21,11 +21,13 @@ import org.opensearch.parquet.bridge.ParquetFileMetadata; import org.opensearch.parquet.engine.ParquetDataFormat; import org.opensearch.parquet.memory.ArrowBufferPool; +import org.opensearch.parquet.stats.ParquetShardStats; import org.opensearch.parquet.vsr.VSRManager; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; import java.nio.file.Path; +import java.util.concurrent.TimeUnit; /** * Parquet file writer integrating OpenSearch's {@link Writer} interface with the VSR batching layer. @@ -47,6 +49,7 @@ public class ParquetWriter implements Writer { private final ParquetDataFormat dataFormat; private final VSRManager vsrManager; private final FormatChecksumStrategy checksumStrategy; + private final ParquetShardStats stats; private long mappingVersion; /** @@ -61,6 +64,7 @@ public class ParquetWriter implements Writer { * @param indexSettings index settings for writer configuration * @param threadPool the thread pool for background native writes * @param checksumStrategy strategy to register pre-computed checksums on + * @param stats shard-level stats collector */ public ParquetWriter( String file, @@ -71,13 +75,15 @@ public ParquetWriter( ArrowBufferPool bufferPool, IndexSettings indexSettings, ThreadPool threadPool, - FormatChecksumStrategy checksumStrategy + FormatChecksumStrategy checksumStrategy, + ParquetShardStats stats ) { this.file = file; this.writerGeneration = writerGeneration; this.mappingVersion = mappingVersion; this.dataFormat = dataFormat; this.checksumStrategy = checksumStrategy; + this.stats = stats; this.vsrManager = new VSRManager( file, indexSettings, @@ -85,14 +91,50 @@ public ParquetWriter( bufferPool, ParquetSettings.MAX_ROWS_PER_VSR.get(indexSettings.getSettings()), threadPool, - writerGeneration + writerGeneration, + stats + ); + } + + /** + * Creates a new ParquetWriter without stats collection. + */ + public ParquetWriter( + String file, + long writerGeneration, + long mappingVersion, + ParquetDataFormat dataFormat, + Schema schema, + ArrowBufferPool bufferPool, + IndexSettings indexSettings, + ThreadPool threadPool, + FormatChecksumStrategy checksumStrategy + ) { + this( + file, + writerGeneration, + mappingVersion, + dataFormat, + schema, + bufferPool, + indexSettings, + threadPool, + checksumStrategy, + new ParquetShardStats() ); } @Override public WriteResult addDoc(ParquetDocumentInput d) throws IOException { - vsrManager.addDocument(d); - return new WriteResult.Success(1L, 1L, 1L); + long startNanos = System.nanoTime(); + try { + vsrManager.addDocument(d); + return new WriteResult.Success(1L, 1L, 1L); + } finally { + long elapsed = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + stats.addDocsIndexed(1); + stats.addIndexTimeMillis(elapsed); + } } @Override diff --git a/sandbox/plugins/parquet-data-format/src/main/rust/src/ffm.rs b/sandbox/plugins/parquet-data-format/src/main/rust/src/ffm.rs index ab53939e6c596..faf9b07ea0116 100644 --- a/sandbox/plugins/parquet-data-format/src/main/rust/src/ffm.rs +++ b/sandbox/plugins/parquet-data-format/src/main/rust/src/ffm.rs @@ -483,3 +483,310 @@ pub unsafe extern "C" fn parquet_read_as_json( *out_len = bytes.len() as i64; Ok(0) } + +// --------------------------------------------------------------------------- +// Parquet file analysis +// --------------------------------------------------------------------------- + +/// Analyzes a parquet file and returns detailed metadata as JSON including +/// row group information, column statistics, compression, encodings, +/// bloom filter info, sort order, page-level stats, and footer size. +/// The JSON bytes are written into `out_buf`, actual length into `out_len`. +/// Returns 0 on success. +#[ffm_safe] +#[no_mangle] +pub unsafe extern "C" fn parquet_analyze_file( + file_ptr: *const u8, + file_len: i64, + out_buf: *mut u8, + buf_capacity: i64, + out_len: *mut i64, +) -> i64 { + use parquet::file::metadata::ParquetMetaDataReader; + + let filename = str_from_raw(file_ptr, file_len) + .map_err(|e| format!("parquet_analyze_file: {}", e))?.to_string(); + + let file = std::fs::File::open(&filename) + .map_err(|e| format!("Failed to open {}: {}", filename, e))?; + let file_size = file.metadata() + .map(|m| m.len() as i64) + .unwrap_or(-1); + + // Use ParquetMetaDataReader to load page index (column_index + offset_index) + let metadata = ParquetMetaDataReader::new() + .with_page_indexes(true) + .parse_and_finish(&file) + .map_err(|e| format!("Failed to read parquet metadata: {}", e))?; + let file_meta = metadata.file_metadata(); + + // Compute footer size: file_size - (data start + total data) + let footer_size = if metadata.num_row_groups() > 0 { + let first_rg = metadata.row_group(0); + let total_data: i64 = (0..metadata.num_row_groups()) + .map(|i| metadata.row_group(i).compressed_size()) + .sum(); + let first_data_offset = first_rg.column(0).data_page_offset(); + file_size - (first_data_offset + total_data) + } else { + -1 + }; + + // Extract sorting columns from first row group (file-level sort order) + let sorting_columns = metadata.row_group(0).sorting_columns().map(|cols| { + let rg = metadata.row_group(0); + cols.iter().map(|sc| { + let col_name = if (sc.column_idx as usize) < rg.num_columns() { + rg.column(sc.column_idx as usize).column_path().to_string() + } else { + format!("column_{}", sc.column_idx) + }; + serde_json::json!({ + "column_idx": sc.column_idx, + "column_name": col_name, + "descending": sc.descending, + "nulls_first": sc.nulls_first, + }) + }).collect::>() + }); + + // Page-level indexes + let col_index = metadata.column_index(); + let off_index = metadata.offset_index(); + + let mut row_groups = Vec::new(); + for rg_idx in 0..metadata.num_row_groups() { + let rg = metadata.row_group(rg_idx); + let mut columns = Vec::new(); + for col_idx in 0..rg.num_columns() { + let col = rg.column(col_idx); + + // Column statistics (min/max/null_count/distinct_count) + let stats = col.statistics().map(|s| { + let min_val = s.min_bytes_opt().map(|b| format_stat_bytes(b, col.column_type())); + let max_val = s.max_bytes_opt().map(|b| format_stat_bytes(b, col.column_type())); + serde_json::json!({ + "min": min_val, + "max": max_val, + "null_count": s.null_count_opt(), + "distinct_count": s.distinct_count_opt(), + "min_is_exact": s.min_is_exact(), + "max_is_exact": s.max_is_exact(), + }) + }); + + // Bloom filter info + let has_bloom_filter = col.bloom_filter_offset().is_some(); + let bloom_filter_size = col.bloom_filter_length().unwrap_or(0) as i64; + + // Page-level stats for this column + let page_stats = build_page_stats(col_index, off_index, rg_idx, col_idx, col.column_type()); + + let null_count = col.statistics() + .and_then(|s| s.null_count_opt()) + .map(|n| n as i64) + .unwrap_or(-1); + + columns.push(serde_json::json!({ + "name": col.column_path().to_string(), + "type": format!("{:?}", col.column_type()), + "compression": format!("{:?}", col.compression()), + "encodings": col.encodings().map(|e| format!("{:?}", e)).collect::>(), + "compressed_bytes": col.compressed_size(), + "uncompressed_bytes": col.uncompressed_size(), + "null_count": null_count, + "num_values": col.num_values(), + "has_bloom_filter": has_bloom_filter, + "bloom_filter_size": bloom_filter_size, + "stats": stats, + "page_stats": page_stats, + })); + } + + // Per-row-group sorting columns + let rg_sorting = rg.sorting_columns().map(|cols| { + cols.iter().map(|sc| { + let col_name = if (sc.column_idx as usize) < rg.num_columns() { + rg.column(sc.column_idx as usize).column_path().to_string() + } else { + format!("column_{}", sc.column_idx) + }; + serde_json::json!({ + "column_idx": sc.column_idx, + "column_name": col_name, + "descending": sc.descending, + "nulls_first": sc.nulls_first, + }) + }).collect::>() + }); + + row_groups.push(serde_json::json!({ + "ordinal": rg_idx, + "num_rows": rg.num_rows(), + "total_compressed_bytes": rg.compressed_size(), + "total_uncompressed_bytes": rg.total_byte_size(), + "sorting_columns": rg_sorting, + "columns": columns, + })); + } + + let result = serde_json::json!({ + "num_row_groups": metadata.num_row_groups(), + "num_rows": file_meta.num_rows(), + "version": file_meta.version(), + "created_by": file_meta.created_by().unwrap_or("unknown"), + "footer_size": footer_size, + "sorting_columns": sorting_columns, + "row_groups": row_groups, + }); + + let json_str = serde_json::to_string(&result) + .map_err(|e| format!("JSON serialization failed: {}", e))?; + let bytes = json_str.as_bytes(); + if bytes.len() > buf_capacity as usize { + return Err(format!("JSON output ({} bytes) exceeds buffer capacity ({})", bytes.len(), buf_capacity)); + } + std::ptr::copy_nonoverlapping(bytes.as_ptr(), out_buf, bytes.len()); + *out_len = bytes.len() as i64; + Ok(0) +} + +/// Formats raw statistics bytes into a human-readable string based on physical type. +fn format_stat_bytes(bytes: &[u8], physical_type: parquet::basic::Type) -> String { + use parquet::basic::Type; + match physical_type { + Type::BOOLEAN => { + if bytes.is_empty() { "null".to_string() } + else { format!("{}", bytes[0] != 0) } + } + Type::INT32 => { + if bytes.len() >= 4 { + format!("{}", i32::from_le_bytes(bytes[..4].try_into().unwrap_or([0; 4]))) + } else { bytes_to_hex(bytes) } + } + Type::INT64 => { + if bytes.len() >= 8 { + format!("{}", i64::from_le_bytes(bytes[..8].try_into().unwrap_or([0; 8]))) + } else { bytes_to_hex(bytes) } + } + Type::FLOAT => { + if bytes.len() >= 4 { + format!("{}", f32::from_le_bytes(bytes[..4].try_into().unwrap_or([0; 4]))) + } else { bytes_to_hex(bytes) } + } + Type::DOUBLE => { + if bytes.len() >= 8 { + format!("{}", f64::from_le_bytes(bytes[..8].try_into().unwrap_or([0; 8]))) + } else { bytes_to_hex(bytes) } + } + Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => { + String::from_utf8(bytes.to_vec()).unwrap_or_else(|_| bytes_to_hex(bytes)) + } + Type::INT96 => bytes_to_hex(bytes), + } +} + +/// Converts bytes to hex string without external dependency. +fn bytes_to_hex(bytes: &[u8]) -> String { + bytes.iter().map(|b| format!("{:02x}", b)).collect() +} + +/// Builds page-level statistics for a column from column_index and offset_index. +fn build_page_stats( + col_index: Option<&parquet::file::metadata::ParquetColumnIndex>, + off_index: Option<&parquet::file::metadata::ParquetOffsetIndex>, + rg_idx: usize, + col_idx: usize, + physical_type: parquet::basic::Type, +) -> Option { + use parquet::file::page_index::column_index::ColumnIndexMetaData; + + let rg_col_indexes = col_index?.get(rg_idx)?; + let ci = rg_col_indexes.get(col_idx)?; + let oi = off_index.and_then(|o| o.get(rg_idx)).and_then(|r| r.get(col_idx)); + + let num_pages = ci.num_pages() as usize; + if num_pages == 0 { + return None; + } + + let boundary_order = ci.get_boundary_order().map(|bo| format!("{:?}", bo)) + .unwrap_or_else(|| "UNORDERED".to_string()); + + let pages: Vec = (0..num_pages).map(|page_idx| { + let min_val = format_page_min(ci, page_idx, physical_type); + let max_val = format_page_max(ci, page_idx, physical_type); + let null_count = ci.null_count(page_idx); + + let mut page_json = serde_json::json!({ + "page_idx": page_idx, + "min": min_val, + "max": max_val, + "null_count": null_count, + }); + + if let Some(oi_meta) = oi { + let locations = oi_meta.page_locations(); + if page_idx < locations.len() { + let loc = &locations[page_idx]; + page_json["first_row_index"] = serde_json::json!(loc.first_row_index); + page_json["compressed_size"] = serde_json::json!(loc.compressed_page_size); + } + } + page_json + }).collect(); + + Some(serde_json::json!({ + "num_pages": num_pages, + "boundary_order": boundary_order, + "pages": pages, + })) +} + +/// Extracts the min value for a page from a ColumnIndexMetaData. +fn format_page_min( + ci: &parquet::file::page_index::column_index::ColumnIndexMetaData, + page_idx: usize, + _physical_type: parquet::basic::Type, +) -> Option { + use parquet::file::page_index::column_index::ColumnIndexMetaData; + match ci { + ColumnIndexMetaData::NONE => None, + ColumnIndexMetaData::BOOLEAN(idx) => idx.min_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::INT32(idx) => idx.min_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::INT64(idx) => idx.min_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::INT96(idx) => idx.min_value(page_idx).map(|v| format!("{:?}", v)), + ColumnIndexMetaData::FLOAT(idx) => idx.min_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::DOUBLE(idx) => idx.min_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::BYTE_ARRAY(idx) => idx.min_value(page_idx).map(|b: &[u8]| { + String::from_utf8(b.to_vec()).unwrap_or_else(|_| bytes_to_hex(b)) + }), + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(idx) => idx.min_value(page_idx).map(|b: &[u8]| { + String::from_utf8(b.to_vec()).unwrap_or_else(|_| bytes_to_hex(b)) + }), + } +} + +/// Extracts the max value for a page from a ColumnIndexMetaData. +fn format_page_max( + ci: &parquet::file::page_index::column_index::ColumnIndexMetaData, + page_idx: usize, + _physical_type: parquet::basic::Type, +) -> Option { + use parquet::file::page_index::column_index::ColumnIndexMetaData; + match ci { + ColumnIndexMetaData::NONE => None, + ColumnIndexMetaData::BOOLEAN(idx) => idx.max_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::INT32(idx) => idx.max_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::INT64(idx) => idx.max_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::INT96(idx) => idx.max_value(page_idx).map(|v| format!("{:?}", v)), + ColumnIndexMetaData::FLOAT(idx) => idx.max_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::DOUBLE(idx) => idx.max_value(page_idx).map(|v| format!("{}", v)), + ColumnIndexMetaData::BYTE_ARRAY(idx) => idx.max_value(page_idx).map(|b: &[u8]| { + String::from_utf8(b.to_vec()).unwrap_or_else(|_| bytes_to_hex(b)) + }), + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(idx) => idx.max_value(page_idx).map(|b: &[u8]| { + String::from_utf8(b.to_vec()).unwrap_or_else(|_| bytes_to_hex(b)) + }), + } +} \ No newline at end of file diff --git a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java index a2e26a5c72c30..f9a83c903d0e6 100644 --- a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java @@ -809,7 +809,7 @@ public void refresh(String source) throws EngineException { catalogSnapshotManager.commitNewSnapshot(result.refreshedSegments()); } else if ("flush".equals(source)) { - catalogSnapshotManager.bumpGeneration(); + catalogSnapshotManager.bumpGeneration(); } notifyRefreshListenersAfter(refreshed); } finally { @@ -919,8 +919,11 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { : "local checkpoint in commit data must be >= -1"; assert Long.parseLong(commitData.get(SequenceNumbers.MAX_SEQ_NO)) >= -1 : "max seq no in commit data must be >= -1"; - // We do an additional commit on engine start due to no catalog snapshot present in earlier commit during empty recovery - Committer.CommitResult commitResult = committer.commit(new Committer.CommitInput(commitData.entrySet(), snapshot, 0)); + // We do an additional commit on engine start due to no catalog snapshot present in earlier commit during empty + // recovery + Committer.CommitResult commitResult = committer.commit( + new Committer.CommitInput(commitData.entrySet(), snapshot, 0) + ); if (commitResult != null && snapshot instanceof DataformatAwareCatalogSnapshot dfaSnapshot) { // If the catalog snapshot changed during the flush, this will ensure the latest one diff --git a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareNRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareNRTReplicationEngine.java index 4d9da80719d01..85f2882f9bf40 100644 --- a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareNRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareNRTReplicationEngine.java @@ -142,7 +142,8 @@ public DataFormatAwareNRTReplicationEngine(EngineConfig engineConfig) { boolean success = false; Committer constructingCommitter = null; try { - this.committer = constructingCommitter = engineConfig.getCommitterFactory().getCommitter(new CommitterConfig(engineConfig, () -> {}, true)); + this.committer = constructingCommitter = engineConfig.getCommitterFactory() + .getCommitter(new CommitterConfig(engineConfig, () -> {}, true)); // Bootstrap an empty commit if no segments file exists (fresh replica). Map userData = committer.getLastCommittedData(); @@ -161,27 +162,17 @@ public DataFormatAwareNRTReplicationEngine(EngineConfig engineConfig) { Map> aggregated = new HashMap<>(); for (String formatName : allDescriptors.keySet()) { DataFormat format = registry.format(formatName); - aggregated.putAll( - registry.getReaderManager( - new ReaderManagerConfig( - Optional.of(new IndexStoreProvider() { - @Override - public FormatStore getStore(DataFormat dataFormat) { - return new FormatStore() { - @Override - public Store store() { - return store; - } - }; - } - }), - format, - registry, - store.shardPath(), - store.getDataformatAwareStoreHandles() - ) - ) - ); + aggregated.putAll(registry.getReaderManager(new ReaderManagerConfig(Optional.of(new IndexStoreProvider() { + @Override + public FormatStore getStore(DataFormat dataFormat) { + return new FormatStore() { + @Override + public Store store() { + return store; + } + }; + } + }), format, registry, store.shardPath(), store.getDataformatAwareStoreHandles()))); } readerManagersRef = Map.copyOf(aggregated); @@ -308,8 +299,12 @@ private void commitCatalogSnapshot(boolean bumpSICounter) throws IOException { snapshot.setUserData(commitData, true); commitData.put(CatalogSnapshot.CATALOG_SNAPSHOT_KEY, snapshot.serializeToString()); - CommitResult commitResult = committer.commit(new CommitInput(commitData.entrySet(), snapshot, bumpSICounter ? SI_COUNTER_INCREMENT : 0)); - catalogSnapshotManager.updateLastCommitInfo(commitResult); + CommitResult commitResult = committer.commit( + new CommitInput(commitData.entrySet(), snapshot, bumpSICounter ? SI_COUNTER_INCREMENT : 0) + ); + if (commitResult != null) { + catalogSnapshotManager.updateLastCommitInfo(commitResult); + } snapshotRef.markSuccess(); } translogManager.syncTranslog(); @@ -882,7 +877,11 @@ private void maybeFailEngine(String source, Exception e) { * {@link org.opensearch.index.engine.exec.coord}. */ // Visible for testing. - static Map buildReplicaFileDeleters(ShardPath shardPath, DataFormatRegistry registry, CommitFileManager commitFileManager) { + static Map buildReplicaFileDeleters( + ShardPath shardPath, + DataFormatRegistry registry, + CommitFileManager commitFileManager + ) { Map deleters = new HashMap<>(); for (DataFormat format : registry.getRegisteredFormats()) { final String formatName = format.name(); @@ -979,7 +978,7 @@ public List onInit(List commits) { public synchronized List onCommit(List commits) { lastSnapshot = commits.getLast(); List toDelete = new ArrayList<>(); - for (int i = 0; i < commits.size() - 1; i ++) { + for (int i = 0; i < commits.size() - 1; i++) { CatalogSnapshot currentCommit = commits.get(i); if (!acquiredSnapshots.contains(currentCommit)) { toDelete.add(currentCommit); @@ -991,9 +990,7 @@ public synchronized List onCommit(List commits @Override public synchronized GatedCloseable acquireCommittedSnapshot(boolean acquiringSafe) { acquiredSnapshots.add(lastSnapshot); - return new GatedCloseable<>(lastSnapshot, () -> { - acquiredSnapshots.remove(lastSnapshot); - }); + return new GatedCloseable<>(lastSnapshot, () -> { acquiredSnapshots.remove(lastSnapshot); }); } /** diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeletionPolicy.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeletionPolicy.java index 44fae5ba8db76..c1a069ffb3b37 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeletionPolicy.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeletionPolicy.java @@ -82,4 +82,3 @@ default CatalogSnapshot findSafeCommit(List commits) throws IOE throw new UnsupportedOperationException("findSafeCommit not supported by this policy"); } } - diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java index 9a3cec432e3b1..fd240775fd7a1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java @@ -48,6 +48,14 @@ public interface Committer extends CommitFileManager, Closeable { record CommitResult(String commitFileName, long generation, long commitDataFormatVersion) { } + /** + * Input to a commit operation, bundling the user data to persist, the associated catalog snapshot, + * and a bump counter for generation advancement without content changes (e.g., force flush). + * + * @param userData key-value pairs to persist as commit metadata + * @param catalogSnapshot the catalog snapshot associated with this commit, or {@code null} for lightweight commits + * @param bumpCounter number of generation bumps to apply; 0 for normal commits + */ @ExperimentalApi record CommitInput(Iterable> userData, CatalogSnapshot catalogSnapshot, int bumpCounter) { @@ -98,4 +106,3 @@ public CommitInput(Iterable> userData, CatalogSnapshot */ List listCommittedSnapshots() throws IOException; } - diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java index 8855f364db44b..622bf5da88fcc 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java @@ -24,7 +24,7 @@ import org.opensearch.index.engine.exec.FilesListener; import org.opensearch.index.engine.exec.Segment; import org.opensearch.index.engine.exec.WriterFileSet; -import org.opensearch.index.engine.exec.commit.Committer; +import org.opensearch.index.engine.exec.commit.Committer.CommitResult; import org.opensearch.index.shard.ShardPath; import java.io.Closeable; @@ -36,9 +36,6 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; - -import static org.opensearch.index.engine.exec.commit.Committer.*; /** * Manages the lifecycle of {@link CatalogSnapshot} instances for the composite multi-format engine @@ -270,11 +267,9 @@ assert assertSegmentGenerationFileConsistency(refreshedSegments) assert refreshedSegments.stream().flatMap(s -> s.dfGroupedSearchableFiles().values().stream()).allMatch(wfs -> wfs.numRows() > 0) : "every WriterFileSet must have a positive row count"; - installSnapshot(newSnapshot); } - /** * Replaces the current snapshot with one received from the primary via segment replication. * Replica-only: does not fire beforeRefresh/afterRefresh since the catalog snapshot @@ -325,7 +320,8 @@ public synchronized void bumpGeneration() throws IOException { DataformatAwareCatalogSnapshot newSnapshot = new DataformatAwareCatalogSnapshot( latestCatalogSnapshot.getId() + 1, // This is unique for each catalog snapshot managed by this manager. - latestCatalogSnapshot.getGeneration() + 1, // This is for commit generation tracking. So this should increase as well. Handles force flush cases + latestCatalogSnapshot.getGeneration() + 1, // This is for commit generation tracking. So this should increase as well. Handles + // force flush cases latestCatalogSnapshot.getVersion(), // This increases if there is an actual change in the snapshot. latestCatalogSnapshot.getSegments(), latestCatalogSnapshot.getLastWriterGeneration() + 1, @@ -338,8 +334,20 @@ public synchronized void bumpGeneration() throws IOException { installSnapshot(newSnapshot); } + /** + * Updates the latest catalog snapshot with commit metadata from a successful flush. + * Called by the engine after {@link org.opensearch.index.engine.exec.commit.Committer#commit} + * returns a non-null result, recording the segments file name, Lucene generation, and + * data format version so that replicas and recovery can identify the commit point. + * + * @param commitResult the result of the commit containing the segments_N filename, generation, and format version + */ public synchronized void updateLastCommitInfo(CommitResult commitResult) { - latestCatalogSnapshot.setLastCommitInfo(commitResult.commitFileName(), commitResult.generation(), commitResult.commitDataFormatVersion()); + latestCatalogSnapshot.setLastCommitInfo( + commitResult.commitFileName(), + commitResult.generation(), + commitResult.commitDataFormatVersion() + ); } /** diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/DataformatAwareCatalogSnapshot.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/DataformatAwareCatalogSnapshot.java index 535fb103f2f9b..4aeb472ca3cb3 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/DataformatAwareCatalogSnapshot.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/DataformatAwareCatalogSnapshot.java @@ -80,7 +80,6 @@ public class DataformatAwareCatalogSnapshot extends CatalogSnapshot { */ private volatile Object replicatingCommitData; - /** * Constructs a new DataformatAwareCatalogSnapshot. * @@ -333,7 +332,8 @@ public synchronized String getLastCommitFileName() { @Override public long getLastCommitGeneration() { - assert lastCommitGeneration >= 0 : "Before this is obtained, at least one commit should have been obtained"; + assert lastCommitGeneration >= 0 + : "Before this is obtained, at least one snapshot should've been committed: " + this; return lastCommitGeneration; } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java index 4e7bc44e56067..eb02e752cdc56 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java @@ -69,7 +69,6 @@ public class IndexFileDeleter { */ private final Map> pendingDeletes; - /** * Callback invoked when a CatalogSnapshot's refCount reaches 0 via the deletion policy * path ({@link #onCommit}, {@link #revisitPolicy}, or init). Allows the owning @@ -78,6 +77,17 @@ public class IndexFileDeleter { */ private final Consumer onSnapshotDeletedCallback; + public IndexFileDeleter( + CatalogSnapshotDeletionPolicy deletionPolicy, + FileDeleter fileDeleter, + Map filesListeners, + List initialCommittedSnapshots, + ShardPath shardPath, + CommitFileManager commitFileManager + ) throws IOException { + this(deletionPolicy, fileDeleter, filesListeners, initialCommittedSnapshots, shardPath, commitFileManager, s -> {}); + } + public IndexFileDeleter( CatalogSnapshotDeletionPolicy deletionPolicy, FileDeleter fileDeleter, diff --git a/server/src/test/java/org/opensearch/index/engine/CombinedDeletionPolicyTests.java b/server/src/test/java/org/opensearch/index/engine/CombinedDeletionPolicyTests.java index dd469644709ec..55713c83cc6ab 100644 --- a/server/src/test/java/org/opensearch/index/engine/CombinedDeletionPolicyTests.java +++ b/server/src/test/java/org/opensearch/index/engine/CombinedDeletionPolicyTests.java @@ -67,8 +67,7 @@ public void testKeepCommitsAfterGlobalCheckpoint() throws Exception { final SoftDeletesPolicy softDeletesPolicy = new SoftDeletesPolicy( globalCheckpoint::get, NO_OPS_PERFORMED, - extraRetainedOps, - () -> RetentionLeases.EMPTY + extraRetainedOps, () -> RetentionLeases.EMPTY ); TranslogDeletionPolicy translogPolicy = createTranslogDeletionPolicy(); CombinedDeletionPolicy indexPolicy = newCombinedDeletionPolicy(translogPolicy, softDeletesPolicy, globalCheckpoint); diff --git a/server/src/test/java/org/opensearch/index/engine/DataFormatAwareEngineRecoveryTests.java b/server/src/test/java/org/opensearch/index/engine/DataFormatAwareEngineRecoveryTests.java index 6a9bd0d858405..7c0267b210829 100644 --- a/server/src/test/java/org/opensearch/index/engine/DataFormatAwareEngineRecoveryTests.java +++ b/server/src/test/java/org/opensearch/index/engine/DataFormatAwareEngineRecoveryTests.java @@ -11,6 +11,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.opensearch.Version; @@ -35,6 +36,7 @@ import org.opensearch.index.engine.exec.commit.Committer.CommitResult; import org.opensearch.index.engine.exec.commit.CommitterFactory; import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.engine.exec.coord.CatalogSnapshotManager; import org.opensearch.index.engine.exec.coord.DataformatAwareCatalogSnapshot; import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.MapperService; @@ -129,18 +131,26 @@ static class PersistentCommitter implements Committer { private final Store store; private volatile Map committedData; private volatile CatalogSnapshot lastCommittedSnapshot; + private final long initialCommitGeneration; PersistentCommitter(Store store) throws IOException { this.store = store; - this.committedData = Map.copyOf(store.readLastCommittedSegmentsInfo().getUserData()); + SegmentInfos segmentInfos = store.readLastCommittedSegmentsInfo(); + this.committedData = Map.copyOf(segmentInfos.getUserData()); + this.initialCommitGeneration = segmentInfos.getGeneration(); // Deserialize existing catalog snapshot if present String serialized = committedData.get(CatalogSnapshot.CATALOG_SNAPSHOT_KEY); if (serialized != null) { - try { - this.lastCommittedSnapshot = DataformatAwareCatalogSnapshot.deserializeFromString(serialized, dir -> dir); - } catch (IOException e) { - // Deserialization failed — start without committed snapshot - } + this.lastCommittedSnapshot = DataformatAwareCatalogSnapshot.deserializeFromString( + serialized, + store.shardFormatDirectoryResolver() + ); + // Carry forward the commit info from the Lucene commit + ((DataformatAwareCatalogSnapshot) this.lastCommittedSnapshot).setLastCommitInfo( + segmentInfos.getSegmentsFileName(), + segmentInfos.getGeneration(), + 0L + ); } } @@ -157,20 +167,21 @@ public CommitResult commit(CommitInput commitInput) throws IOException { writer.commit(); } this.committedData = StreamSupport.stream(commitInput.userData().spliterator(), false) - .collect(Collectors.toMap( - Map.Entry::getKey, - Map.Entry::getValue, - (existing, replacement) -> replacement, // Merge function for duplicate keys - HashMap::new - )); + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (existing, replacement) -> replacement, // Merge function for duplicate keys + HashMap::new + ) + ); // Store the catalog snapshot if present in commit data String serialized = committedData.get(CatalogSnapshot.CATALOG_SNAPSHOT_KEY); if (serialized != null) { - try { - this.lastCommittedSnapshot = DataformatAwareCatalogSnapshot.deserializeFromString(serialized, dir -> dir); - } catch (IOException e) { - // If deserialization fails, keep the previous snapshot - } + this.lastCommittedSnapshot = DataformatAwareCatalogSnapshot.deserializeFromString( + serialized, + store.shardFormatDirectoryResolver() + ); } return new CommitResult("segments_1", 1L, 0L); } @@ -195,7 +206,18 @@ public List listCommittedSnapshots() { if (lastCommittedSnapshot != null) { return List.of(lastCommittedSnapshot); } - return List.of(); + // Fresh index — no committed catalog snapshot yet. Provide an initial empty + // snapshot so CatalogSnapshotManager can be constructed (it requires non-empty). + DataformatAwareCatalogSnapshot initial = (DataformatAwareCatalogSnapshot) CatalogSnapshotManager.createInitialSnapshot( + 0L, + 0L, + 0L, + List.of(), + -1L, + committedData + ); + initial.setLastCommitInfo("segments_" + initialCommitGeneration, initialCommitGeneration, 0L); + return List.of(initial); } @Override @@ -1052,4 +1074,35 @@ public void testSegmentGenerationsAreUniqueAfterRecovery() throws IOException { } } } + + /** + * Simulates the production scenario where a Lucene commit exists without a serialized + * CatalogSnapshot (e.g. the initial commit from {@code store.createEmpty()} or a commit + * written by a non-DFA engine before migration). The PersistentCommitter must synthesize + * an initial empty snapshot with correct lastCommitGeneration so the engine can open. + *

+ * This mirrors the handling in {@code LuceneCommitter.loadCommittedSnapshots()} where + * commits without {@code _catalog_snapshot_} get a synthetic initial snapshot seeded + * from the on-disk segments_N generation. + */ + public void testEngineOpensWithCommitLackingCatalogSnapshot() throws IOException { + // Bootstrap creates a commit with NO _catalog_snapshot_ key — simulates store.createEmpty() + // or a pre-DFA engine commit. + try (DataFormatAwareEngine engine = createEngine()) { + // recoverFromTranslog triggers onAfterTranslogRecovery → flush, which writes + // the first commit WITH _catalog_snapshot_. But the engine must have opened + // successfully from the bootstrap commit that lacked it. + engine.translogManager().recoverFromTranslog(ignore -> 0, engine.getProcessedLocalCheckpoint(), Long.MAX_VALUE); + + // Engine is functional — can index and refresh + for (int i = 0; i < 5; i++) { + Engine.IndexResult result = engine.index(indexOp(Integer.toString(i))); + assertThat(result.getResultType(), equalTo(Engine.Result.Type.SUCCESS)); + } + engine.refresh("test"); + try (GatedCloseable ref = engine.acquireSnapshot()) { + assertThat(ref.get().getSegments().size(), greaterThan(0)); + } + } + } } diff --git a/server/src/test/java/org/opensearch/index/engine/DataFormatAwareNRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/DataFormatAwareNRTReplicationEngineTests.java index 96a98e28cc388..201fe85517a2b 100644 --- a/server/src/test/java/org/opensearch/index/engine/DataFormatAwareNRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/DataFormatAwareNRTReplicationEngineTests.java @@ -225,7 +225,7 @@ private DataformatAwareCatalogSnapshot buildReplicationSnapshot(long id, long ge if (historyUUID != null) { userData.put(Engine.HISTORY_UUID_KEY, historyUUID); } - return (DataformatAwareCatalogSnapshot) CatalogSnapshotManager.createInitialSnapshot( + DataformatAwareCatalogSnapshot snapshot = (DataformatAwareCatalogSnapshot) CatalogSnapshotManager.createInitialSnapshot( id, gen, gen, @@ -233,6 +233,9 @@ private DataformatAwareCatalogSnapshot buildReplicationSnapshot(long id, long ge gen, userData ); + // Simulate the primary having committed this snapshot (sets lastCommitGeneration). + snapshot.setLastCommitInfo("segments_" + gen, gen, 0L); + return snapshot; } // ---------- Tests ---------- @@ -337,7 +340,8 @@ public void testBuildReplicaFileDeletersCoversLuceneAndNonDefaultFormats() throw java.nio.file.Files.createDirectories(parquetDir); ShardPath shardPath = new ShardPath(false, root, root, shardId); - Map deleters = DataFormatAwareNRTReplicationEngine.buildReplicaFileDeleters(shardPath, registry); + bootstrapStoreWithMetadata(store, UUID.randomUUID().toString()); + Map deleters = DataFormatAwareNRTReplicationEngine.buildReplicaFileDeleters(shardPath, registry, new InMemoryCommitter(store)); assertTrue("parquet deleter must be present", deleters.containsKey("parquet")); assertTrue("lucene deleter must be present", deleters.containsKey("lucene")); diff --git a/server/src/test/java/org/opensearch/index/engine/exec/coord/IndexFileDeleterTests.java b/server/src/test/java/org/opensearch/index/engine/exec/coord/IndexFileDeleterTests.java index eba6dd145c0e3..15db993a44f4e 100644 --- a/server/src/test/java/org/opensearch/index/engine/exec/coord/IndexFileDeleterTests.java +++ b/server/src/test/java/org/opensearch/index/engine/exec/coord/IndexFileDeleterTests.java @@ -83,7 +83,8 @@ public void testAddFileReferencesTracksNewFiles() throws IOException { Map.of(), List.of(cs1), null, - null + null, + cs -> {} ); Map> newFiles = deleter.addFileReferences( diff --git a/test/framework/src/main/java/org/opensearch/index/engine/dataformat/stub/InMemoryCommitter.java b/test/framework/src/main/java/org/opensearch/index/engine/dataformat/stub/InMemoryCommitter.java index 768478d450713..be3f9a54ab48e 100644 --- a/test/framework/src/main/java/org/opensearch/index/engine/dataformat/stub/InMemoryCommitter.java +++ b/test/framework/src/main/java/org/opensearch/index/engine/dataformat/stub/InMemoryCommitter.java @@ -8,13 +8,18 @@ package org.opensearch.index.engine.dataformat.stub; +import org.apache.lucene.index.SegmentInfos; import org.opensearch.index.engine.CommitStats; import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.exec.commit.Committer; +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.engine.exec.coord.CatalogSnapshotManager; +import org.opensearch.index.engine.exec.coord.DataformatAwareCatalogSnapshot; import org.opensearch.index.store.Store; import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -25,20 +30,25 @@ */ public class InMemoryCommitter implements Committer { private volatile Map committedData; + private final long initialCommitGeneration; public InMemoryCommitter(Store store) throws IOException { - this.committedData = Map.copyOf(store.readLastCommittedSegmentsInfo().getUserData()); + SegmentInfos segmentInfos = store.readLastCommittedSegmentsInfo(); + this.committedData = Map.copyOf(segmentInfos.getUserData()); + this.initialCommitGeneration = segmentInfos.getGeneration(); } @Override public CommitResult commit(CommitInput commitData) { this.committedData = StreamSupport.stream(commitData.userData().spliterator(), false) - .collect(Collectors.toMap( - Map.Entry::getKey, - Map.Entry::getValue, - (existing, replacement) -> replacement, // Merge function for duplicate keys - HashMap::new - )); + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (existing, replacement) -> replacement, // Merge function for duplicate keys + HashMap::new + ) + ); return null; } @@ -61,12 +71,21 @@ public SafeCommitInfo getSafeCommitInfo() { public void close() {} @Override - public java.util.List listCommittedSnapshots() { - return java.util.List.of(); + public List listCommittedSnapshots() { + DataformatAwareCatalogSnapshot snapshot = (DataformatAwareCatalogSnapshot) CatalogSnapshotManager.createInitialSnapshot( + 0L, + 0L, + 0L, + List.of(), + -1L, + committedData + ); + snapshot.setLastCommitInfo("segments_" + initialCommitGeneration, initialCommitGeneration, 0L); + return List.of(snapshot); } @Override - public void deleteCommit(org.opensearch.index.engine.exec.coord.CatalogSnapshot snapshot) {} + public void deleteCommit(CatalogSnapshot snapshot) {} @Override public boolean isCommitManagedFile(String fileName) { @@ -74,7 +93,7 @@ public boolean isCommitManagedFile(String fileName) { } @Override - public byte[] serializeToCommitFormat(org.opensearch.index.engine.exec.coord.CatalogSnapshot snapshot) { + public byte[] serializeToCommitFormat(CatalogSnapshot snapshot) { // Test stub does not upload to remote store. throw new UnsupportedOperationException("InMemoryCommitter does not serialize commits"); }