apache · steFaiz · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BucketedDvMaintainer.java b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/BucketedDvMaintainer.java
@@ -35,12 +35,13 @@
 public class BucketedDvMaintainer {
 
     private final DeletionVectorsIndexFile dvIndexFile;
-    private final Map<String, DeletionVector> deletionVectors;
+    private final Map<DeletionFileKey, DeletionVector> deletionVectors;
     protected final boolean bitmap64;
     private boolean modified;
 
     private BucketedDvMaintainer(
-            DeletionVectorsIndexFile dvIndexFile, Map<String, DeletionVector> deletionVectors) {
+            DeletionVectorsIndexFile dvIndexFile,
+            Map<DeletionFileKey, DeletionVector> deletionVectors) {
         this.dvIndexFile = dvIndexFile;
         this.deletionVectors = deletionVectors;
         this.bitmap64 = dvIndexFile.bitmap64();
@@ -59,8 +60,12 @@ private DeletionVector createNewDeletionVector() {
      * @param position The row position within the file that has been deleted.
      */
     public void notifyNewDeletion(String fileName, long position) {
+        notifyNewDeletion(DeletionFileKey.ofFileName(fileName), position);
+    }
+
+    public void notifyNewDeletion(DeletionFileKey key, long position) {
         DeletionVector deletionVector =
-                deletionVectors.computeIfAbsent(fileName, k -> createNewDeletionVector());
+                deletionVectors.computeIfAbsent(key, k -> createNewDeletionVector());
         if (deletionVector.checkedDelete(position)) {
             modified = true;
         }
@@ -73,7 +78,11 @@ public void notifyNewDeletion(String fileName, long position) {
      * @param deletionVector The deletion vector
      */
     public void notifyNewDeletion(String fileName, DeletionVector deletionVector) {
-        deletionVectors.put(fileName, deletionVector);
+        notifyNewDeletion(DeletionFileKey.ofFileName(fileName), deletionVector);
+    }
+
+    public void notifyNewDeletion(DeletionFileKey key, DeletionVector deletionVector) {
+        deletionVectors.put(key, deletionVector);
         modified = true;
     }
 
@@ -85,11 +94,15 @@ public void notifyNewDeletion(String fileName, DeletionVector deletionVector) {
      * @param deletionVector The deletion vector
      */
     public void mergeNewDeletion(String fileName, DeletionVector deletionVector) {
-        DeletionVector old = deletionVectors.get(fileName);
+        mergeNewDeletion(DeletionFileKey.ofFileName(fileName), deletionVector);
+    }
+
+    public void mergeNewDeletion(DeletionFileKey key, DeletionVector deletionVector) {
+        DeletionVector old = deletionVectors.get(key);
         if (old != null) {
             deletionVector.merge(old);
         }
-        deletionVectors.put(fileName, deletionVector);
+        deletionVectors.put(key, deletionVector);
         modified = true;
     }
 
@@ -100,8 +113,12 @@ public void mergeNewDeletion(String fileName, DeletionVector deletionVector) {
      * @param fileName The name of the file whose deletion vector should be removed.
      */
     public void removeDeletionVectorOf(String fileName) {
-        if (deletionVectors.containsKey(fileName)) {
-            deletionVectors.remove(fileName);
+        removeDeletionVectorOf(DeletionFileKey.ofFileName(fileName));
+    }
+
+    public void removeDeletionVectorOf(DeletionFileKey key) {
+        if (deletionVectors.containsKey(key)) {
+            deletionVectors.remove(key);
             modified = true;
         }
     }
@@ -128,15 +145,19 @@ public Optional<IndexFileMeta> writeDeletionVectorsIndex() {
      *     Optional} if not.
      */
     public Optional<DeletionVector> deletionVectorOf(String fileName) {
-        return Optional.ofNullable(deletionVectors.get(fileName));
+        return deletionVectorOf(DeletionFileKey.ofFileName(fileName));
+    }
+
+    public Optional<DeletionVector> deletionVectorOf(DeletionFileKey key) {
+        return Optional.ofNullable(deletionVectors.get(key));
     }
 
     public DeletionVectorsIndexFile dvIndexFile() {
         return dvIndexFile;
     }
 
     @VisibleForTesting
-    public Map<String, DeletionVector> deletionVectors() {
+    public Map<DeletionFileKey, DeletionVector> deletionVectors() {
         return deletionVectors;
     }
 
@@ -166,13 +187,15 @@ public BucketedDvMaintainer create(
             if (restoredFiles == null) {
                 restoredFiles = Collections.emptyList();
             }
-            Map<String, DeletionVector> deletionVectors =
+            Map<DeletionFileKey, DeletionVector> deletionVectors =
                     new HashMap<>(handler.readAllDeletionVectors(partition, bucket, restoredFiles));
             return create(partition, bucket, deletionVectors);
         }
 
         public BucketedDvMaintainer create(
-                BinaryRow partition, int bucket, Map<String, DeletionVector> deletionVectors) {
+                BinaryRow partition,
+                int bucket,
+                Map<DeletionFileKey, DeletionVector> deletionVectors) {
             return new BucketedDvMaintainer(handler.dvIndex(partition, bucket), deletionVectors);
         }
     }

diff --git a/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DataEvolutionApplyDvReader.java b/paimon-core/src/main/java/org/apache/paimon/deletionvectors/DataEvolutionApplyDvReader.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.deletionvectors;
+
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.reader.RecordReader;
+import org.apache.paimon.table.SpecialFields;
+import org.apache.paimon.table.source.DeletionFile;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.utils.Preconditions;
+import org.apache.paimon.utils.ProjectedRow;
+import org.apache.paimon.utils.Range;
+
+import javax.annotation.Nullable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The RecordReader to apply deletion vectors for data evolution tables. At first, readType will be
+ * enriched by `_ROW_ID`, then the returned id will be filtered by DVs.
+ *
+ * <p>This reader assumes that the underlying reader will return monotonically incrementing
+ * _ROW_IDs, which is guaranteed by the current implementation.
+ */
+public class DataEvolutionApplyDvReader implements RecordReader<InternalRow> {
+
+    private final RecordReader<InternalRow> reader;
+    private final List<RowRangeDeletionVector> deletionVectors;
+    @Nullable private final ProjectedRow projectedRow;
+    private final int rowIdIndex;
+
+    private long lastRowId = -1;
+    private int nextDvIndex;
+    private RowRangeDeletionVector currentDv;
+
+    public DataEvolutionApplyDvReader(RecordReader<InternalRow> reader, Info info) {
+        this.reader = reader;
+        this.deletionVectors = new ArrayList<>(info.deletionVectors);
+        this.deletionVectors.sort(Comparator.comparingLong(dv -> dv.range.from));
+        this.rowIdIndex = info.rowIdIndex;
+        this.projectedRow = info.projectedRow;
+
+        this.nextDvIndex = 1;
+        this.currentDv = deletionVectors.get(0);
+    }
+
+    @Nullable
+    @Override
+    public RecordIterator<InternalRow> readBatch() throws IOException {
+        RecordIterator<InternalRow> iterator = reader.readBatch();
+        if (iterator == null) {
+            return null;
+        }
+
+        return new RecordIterator<InternalRow>() {
+
+            @Nullable
+            @Override
+            public InternalRow next() throws IOException {
+                while (true) {
+                    InternalRow row = iterator.next();
+                    if (row == null) {
+                        return null;
+                    }
+
+                    if (!isDeleted(row)) {
+                        if (projectedRow != null) {
+                            return projectedRow.replaceRow(row);
+                        }
+                        return row;
+                    }
+                }
+            }
+
+            @Override
+            public void releaseBatch() {
+                iterator.releaseBatch();
+            }
+        };
+    }
+
+    private boolean isDeleted(InternalRow row) {
+        long rowId = row.getLong(rowIdIndex);
+        checkRowIdMonotonicity(rowId);
+
+        moveToPossibleDv(rowId);
+
+        if (currentDv == null || !currentDv.mayContains(rowId)) {
+            return false;
+        }
+
+        return currentDv.isDeleted(rowId);
+    }
+
+    private void checkRowIdMonotonicity(long rowId) {
+        if (lastRowId >= 0) {
+            Preconditions.checkState(
+                    rowId > lastRowId,
+                    "This reader works only if underlying reader produces incremental _ROW_IDs.");
+        }
+
+        lastRowId = rowId;
+    }
+
+    private void moveToPossibleDv(long rowId) {
+        if (currentDv == null) {
+            return;
+        }
+
+        while (rowId > currentDv.range.to) {
+            if (nextDvIndex >= deletionVectors.size()) {
+                currentDv = null;
+                return;
+            }
+            currentDv = deletionVectors.get(nextDvIndex);
+            nextDvIndex++;
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        reader.close();
+    }
+
+    public static Info readInfo(
+            FileIO fileIO, RowType readRowType, Map<Range, DeletionFile> deletionFiles)
+            throws IOException {
+        if (deletionFiles == null || deletionFiles.isEmpty()) {
+            return Info.noDeletionVectors(readRowType);
+        }
+
+        List<RowRangeDeletionVector> deletionVectors = new ArrayList<>(deletionFiles.size());
+        for (Map.Entry<Range, DeletionFile> entry : deletionFiles.entrySet()) {
+            DeletionVector deletionVector = DeletionVector.read(fileIO, entry.getValue());
+            if (!deletionVector.isEmpty()) {
+                deletionVectors.add(new RowRangeDeletionVector(entry.getKey(), deletionVector));
+            }
+        }
+        if (deletionVectors.isEmpty()) {
+            return Info.noDeletionVectors(readRowType);
+        }
+
+        int rowIdIndex = readRowType.getFieldIndex(SpecialFields.ROW_ID.name());
+        RowType actualReadType = readRowType;
+        ProjectedRow projectedRow = null;
+        if (rowIdIndex == -1) {
+            actualReadType = SpecialFields.rowTypeWithRowId(readRowType);
+            rowIdIndex = actualReadType.getFieldCount() - 1;
+            int[] mappings = new int[readRowType.getFieldCount()];
+            for (int i = 0; i < readRowType.getFieldCount(); i++) {
+                mappings[i] = i;
+            }
+            projectedRow = ProjectedRow.from(mappings);
+        }
+
+        return new Info(deletionVectors, rowIdIndex, actualReadType, projectedRow);
+    }
+
+    /** Information for data evolution deletion vector applying. */
+    public static class Info {
+
+        private final List<RowRangeDeletionVector> deletionVectors;
+        private final int rowIdIndex;
+        public final RowType actualReadType;
+        @Nullable private final ProjectedRow projectedRow;
+
+        private Info(
+                List<RowRangeDeletionVector> deletionVectors,
+                int rowIdIndex,
+                RowType actualReadType,
+                @Nullable ProjectedRow projectedRow) {
+            this.deletionVectors = deletionVectors;
+            this.rowIdIndex = rowIdIndex;
+            this.actualReadType = actualReadType;
+            this.projectedRow = projectedRow;
+        }
+
+        private static Info noDeletionVectors(RowType readRowType) {
+            return new Info(Collections.emptyList(), -1, readRowType, null);
+        }
+
+        public boolean hasDeletionVectors() {
+            return !deletionVectors.isEmpty();
+        }
+    }
+
+    /** Deletion Vector and range pair. */
+    private static class RowRangeDeletionVector {
+
+        private final Range range;
+        private final DeletionVector deletionVector;
+
+        private RowRangeDeletionVector(Range range, DeletionVector deletionVector) {
+            this.range = range;
+            this.deletionVector = deletionVector;
+        }
+
+        boolean mayContains(long rowId) {
+            return rowId <= range.to && rowId >= range.from;
+        }
+
+        boolean isDeleted(long rowId) {
+            return deletionVector.isDeleted(rowId - range.from);
+        }
+    }
+}