diff --git a/build.sh b/build.sh
index acef0765b43baf..205682b39f8dc7 100755
--- a/build.sh
+++ b/build.sh
@@ -1094,6 +1094,23 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
done
unset CONN_PLUGIN_DIR conn_module conn_plugin_target conn_module_dir conn_zip
+ # RC-4: self-contain the paimon connector plugin for OSS. The connector sets
+ # fs.oss.impl=com.aliyun.jindodata.oss.JindoOssFileSystem; that impl lives in the jindofs jars,
+ # which are packaged from thirdparty by post-build.sh into fe/lib/jindofs (NOT a maven artifact).
+ # The plugin runs child-first, so without its OWN copy JindoOssFileSystem resolves from the parent
+ # 'app' classloader and cannot be cast to the plugin's child-loaded org.apache.hadoop.fs.FileSystem.
+ # Copy the jindofs jars into the paimon plugin lib so JindoOssFileSystem loads child-first alongside
+ # the plugin's own hadoop FileSystem (same self-contained intent as the bundled hadoop-aws/S3A).
+ # Naturally gated: a no-op unless jindofs was packaged (--jindofs / DISABLE_BUILD_JINDOFS=OFF).
+ # CAVEAT (docker-gated, enablePaimonTest=true): jindo-core ships a native lib that can bind to only one
+ # classloader per JVM, so this is safe only while no concurrent non-paimon path loads jindo from
+ # fe/lib/jindofs in the same FE process.
+ PAIMON_CONN_LIB="${DORIS_OUTPUT}/fe/plugins/connector/paimon/lib"
+ if [[ -d "${PAIMON_CONN_LIB}" && -d "${DORIS_OUTPUT}/fe/lib/jindofs" ]]; then
+ cp -p "${DORIS_OUTPUT}/fe/lib/jindofs/"*.jar "${PAIMON_CONN_LIB}/" 2>/dev/null || true
+ fi
+ unset PAIMON_CONN_LIB
+
if [ "${TARGET_SYSTEM}" = "Darwin" ] || [ "${TARGET_SYSTEM}" = "Linux" ]; then
mkdir -p "${DORIS_OUTPUT}/fe/arthas"
rm -rf "${DORIS_OUTPUT}/fe/arthas/*"
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/maxcompute/MCUtils.java b/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MCUtils.java
similarity index 97%
rename from fe/fe-common/src/main/java/org/apache/doris/common/maxcompute/MCUtils.java
rename to fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MCUtils.java
index fc7f47fc2689a8..225f953b82e753 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/maxcompute/MCUtils.java
+++ b/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MCUtils.java
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-package org.apache.doris.common.maxcompute;
+package org.apache.doris.maxcompute;
+
+import org.apache.doris.common.maxcompute.MCProperties;
import com.aliyun.auth.credentials.Credential;
import com.aliyun.auth.credentials.provider.EcsRamRoleCredentialProvider;
diff --git a/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java b/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java
index 336991f3802726..fad4c82a9245da 100644
--- a/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java
+++ b/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java
@@ -19,7 +19,6 @@
import org.apache.doris.common.jni.JniScanner;
import org.apache.doris.common.jni.vec.ColumnType;
-import org.apache.doris.common.maxcompute.MCUtils;
import com.aliyun.odps.Odps;
import com.aliyun.odps.table.configuration.CompressionCodec;
diff --git a/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MaxComputeJniWriter.java b/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MaxComputeJniWriter.java
index 9788184057ee74..c13d5cdc4f3a9e 100644
--- a/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MaxComputeJniWriter.java
+++ b/fe/be-java-extensions/max-compute-connector/src/main/java/org/apache/doris/maxcompute/MaxComputeJniWriter.java
@@ -21,7 +21,6 @@
import org.apache.doris.common.jni.vec.VectorColumn;
import org.apache.doris.common.jni.vec.VectorTable;
import org.apache.doris.common.maxcompute.MCProperties;
-import org.apache.doris.common.maxcompute.MCUtils;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsType;
diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
index b40ff54fbd829c..7ae509152a4be3 100644
--- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
+++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
@@ -37,6 +37,7 @@
import java.io.IOException;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
@@ -134,6 +135,12 @@ private int[] getProjected() {
}
private List getPredicates() {
+ // Backstop for a missing paimon_predicate param (scan with no pushed-down filter): a null here means
+ // "no filter", not an error. Guard the unconditional deserialize so the JNI reader never NPEs on
+ // deserialize(null) ("encodedStr is null"). The FE producer also always emits an (empty) predicate now.
+ if (paimonPredicate == null) {
+ return Collections.emptyList();
+ }
List predicates = PaimonUtils.deserialize(paimonPredicate);
if (LOG.isDebugEnabled()) {
LOG.debug("predicates:{}", predicates);
diff --git a/fe/be-java-extensions/preload-extensions/pom.xml b/fe/be-java-extensions/preload-extensions/pom.xml
index 6ec9b1e6158d7f..7ffc2ea15c3a37 100644
--- a/fe/be-java-extensions/preload-extensions/pom.xml
+++ b/fe/be-java-extensions/preload-extensions/pom.xml
@@ -62,6 +62,18 @@ under the License.
commons-io
${commons-io.version}
+
+
+ commons-lang
+ commons-lang
+ runtime
+
org.apache.arrow
arrow-memory-unsafe
diff --git a/fe/fe-common/pom.xml b/fe/fe-common/pom.xml
index 3452c3e596775c..35dc8860944560 100644
--- a/fe/fe-common/pom.xml
+++ b/fe/fe-common/pom.xml
@@ -134,23 +134,15 @@ under the License.
antlr4-runtime
${antlr4.version}
+
- com.aliyun.odps
- odps-sdk-core
-
-
- org.apache.arrow
- arrow-vector
-
-
- org.ini4j
- ini4j
-
-
- org.bouncycastle
- bcprov-jdk18on
-
-
+ io.netty
+ netty-all
+
+
+ com.google.protobuf
+ protobuf-java
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/Connector.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/Connector.java
index cd2b1766adaec2..a3c8eafa415dbd 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/Connector.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/Connector.java
@@ -18,11 +18,13 @@
package org.apache.doris.connector.api;
import org.apache.doris.connector.api.scan.ConnectorScanPlanProvider;
+import org.apache.doris.connector.api.write.ConnectorWritePlanProvider;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
+import java.util.OptionalLong;
import java.util.Set;
/**
@@ -41,6 +43,14 @@ default ConnectorScanPlanProvider getScanPlanProvider() {
return null;
}
+ /**
+ * Returns the write plan provider for sink ({@code TDataSink}) generation,
+ * or {@code null} if this connector does not support writes.
+ */
+ default ConnectorWritePlanProvider getWritePlanProvider() {
+ return null;
+ }
+
/** Returns the set of capabilities this connector supports. */
default Set getCapabilities() {
return Collections.emptySet();
@@ -118,4 +128,28 @@ default void close() throws IOException {
default String executeRestRequest(String path, String body) {
throw new UnsupportedOperationException("REST passthrough not supported by this connector");
}
+
+ /**
+ * Invalidates any connector-side per-table cache (e.g. a latest-snapshot/version cache) so a subsequent
+ * read reflects the latest external state. Called by the engine on {@code REFRESH TABLE}. The names are
+ * the REMOTE db/table names (as seen by the connector). Default no-op for connectors that cache nothing.
+ */
+ default void invalidateTable(String dbName, String tableName) {
+ }
+
+ /** Invalidates all connector-side per-table caches. Default no-op. */
+ default void invalidateAll() {
+ }
+
+ /**
+ * Optional per-connector override of the catalog's schema-cache TTL (in seconds), consulted generically by
+ * the engine when sizing the schema meta-cache. Semantics match {@code schema.cache.ttl-second}:
+ * {@code 0} disables schema caching (always read fresh), {@code -1} = no expiration, {@code > 0} = TTL.
+ * Lets a connector make its own cache knob also govern schema freshness (e.g. paimon's
+ * {@code meta.cache.paimon.table.ttl-second}, which legacy used for the whole table cache). An explicit
+ * user {@code schema.cache.ttl-second} always wins over this. Default: no override.
+ */
+ default OptionalLong schemaCacheTtlSecondOverride() {
+ return OptionalLong.empty();
+ }
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorCapability.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorCapability.java
index 53337ed656a3c2..c5e89dfd2cadae 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorCapability.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorCapability.java
@@ -49,6 +49,37 @@ public enum ConnectorCapability {
* parallel writers should declare this capability.
*/
SUPPORTS_PARALLEL_WRITE,
+ /**
+ * Indicates the connector requires dynamic-partition writes to be hash-distributed by
+ * partition columns and locally sorted by them before reaching the sink.
+ *
+ * Streaming partition writers (e.g. the MaxCompute Storage API) close the previous
+ * partition writer as soon as a new partition value appears; un-grouped (unsorted)
+ * multi-partition rows therefore cause "writer has been closed" errors. The planner uses
+ * this capability to require a hash-by-partition distribution plus a mandatory local sort
+ * on the partition columns for dynamic-partition writes.
+ *
+ * A connector declaring this is expected to also declare
+ * {@link #SUPPORTS_PARALLEL_WRITE} (hash distribution is inherently parallel) and
+ * {@link #SINK_REQUIRE_FULL_SCHEMA_ORDER}: the sink distribution locates partition columns by their
+ * full-schema position in the child output, which only holds when the bind layer projects the
+ * write to full-schema order (the projection gated by {@code SINK_REQUIRE_FULL_SCHEMA_ORDER}). A
+ * connector declaring this without {@code SINK_REQUIRE_FULL_SCHEMA_ORDER} would shuffle/sort by the
+ * wrong column whenever cols order diverges from the full schema.
+ */
+ SINK_REQUIRE_PARTITION_LOCAL_SORT,
+ /**
+ * Indicates the connector's write path maps data columns positionally against the full
+ * table schema (e.g. MaxCompute's columnar Storage API / JNI writer), rather than by column name.
+ *
+ * For such connectors the sink's output rows must be projected to full table schema order
+ * with any unmentioned columns filled (NULL / default) — exactly like the legacy MaxCompute bind
+ * path — so that a reordered or partial explicit column list does not land values in the wrong
+ * remote columns. Name-mapped connectors (e.g. JDBC, which builds an {@code INSERT INTO t (cols)}
+ * statement) must NOT declare this capability: their data stays in user/cols order to match the
+ * generated column list.
+ */
+ SINK_REQUIRE_FULL_SCHEMA_ORDER,
/**
* Indicates the connector supports passthrough query via the {@code query()} TVF.
*
@@ -56,5 +87,16 @@ public enum ConnectorCapability {
* {@link ConnectorTableOps#getColumnsFromQuery} to provide column metadata
* for arbitrary SQL queries passed through to the remote data source.
*/
- SUPPORTS_PASSTHROUGH_QUERY
+ SUPPORTS_PASSTHROUGH_QUERY,
+ /**
+ * Indicates the connector exposes per-partition statistics (record count, on-disk size,
+ * file count) via {@link ConnectorTableOps#listPartitions}.
+ *
+ * {@code SHOW PARTITIONS} renders a rich multi-column result (Partition / PartitionKey /
+ * RecordCount / FileSizeInBytes / FileCount) for connectors declaring this capability, instead
+ * of the single partition-name column used by connectors that only implement
+ * {@code listPartitionNames}. This is distinct from {@link #SUPPORTS_STATISTICS}, which is
+ * table-level statistics for the optimizer.
+ */
+ SUPPORTS_PARTITION_STATS
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorColumn.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorColumn.java
index 5b8b537d0a3841..5e1e2db86162ef 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorColumn.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorColumn.java
@@ -30,6 +30,13 @@ public final class ConnectorColumn {
private final boolean nullable;
private final String defaultValue;
private final boolean isKey;
+ private final boolean isAutoInc;
+ private final boolean isAggregated;
+ // Marks a "with local time zone" timestamp column. fe-core's ConnectorColumnConverter translates
+ // this into Column.setWithTZExtraInfo() so DESC shows the WITH_TIMEZONE "Extra" marker, matching
+ // legacy PaimonExternalTable/PaimonSysExternalTable/IcebergUtils which set it from the SOURCE type
+ // root regardless of the timestamp_tz mapping flag. Defaults false; set via withTimeZone().
+ private final boolean withTimeZone;
public ConnectorColumn(String name, ConnectorType type, String comment,
boolean nullable, String defaultValue) {
@@ -38,12 +45,42 @@ public ConnectorColumn(String name, ConnectorType type, String comment,
public ConnectorColumn(String name, ConnectorType type, String comment,
boolean nullable, String defaultValue, boolean isKey) {
+ this(name, type, comment, nullable, defaultValue, isKey, false);
+ }
+
+ public ConnectorColumn(String name, ConnectorType type, String comment,
+ boolean nullable, String defaultValue, boolean isKey, boolean isAutoInc) {
+ this(name, type, comment, nullable, defaultValue, isKey, isAutoInc, false);
+ }
+
+ public ConnectorColumn(String name, ConnectorType type, String comment,
+ boolean nullable, String defaultValue, boolean isKey, boolean isAutoInc,
+ boolean isAggregated) {
+ this(name, type, comment, nullable, defaultValue, isKey, isAutoInc, isAggregated, false);
+ }
+
+ private ConnectorColumn(String name, ConnectorType type, String comment,
+ boolean nullable, String defaultValue, boolean isKey, boolean isAutoInc,
+ boolean isAggregated, boolean withTimeZone) {
this.name = Objects.requireNonNull(name, "name");
this.type = Objects.requireNonNull(type, "type");
this.comment = comment;
this.nullable = nullable;
this.defaultValue = defaultValue;
this.isKey = isKey;
+ this.isAutoInc = isAutoInc;
+ this.isAggregated = isAggregated;
+ this.withTimeZone = withTimeZone;
+ }
+
+ /**
+ * Returns a copy of this column marked as a "with local time zone" timestamp. See
+ * {@link #isWithTimeZone()}; the marker is intentionally orthogonal to the mapped {@link #getType()}
+ * so it survives even when the column is mapped to a plain DATETIME (timestamp_tz mapping off).
+ */
+ public ConnectorColumn withTimeZone() {
+ return new ConnectorColumn(name, type, comment, nullable, defaultValue,
+ isKey, isAutoInc, isAggregated, true);
}
public String getName() {
@@ -70,6 +107,18 @@ public boolean isKey() {
return isKey;
}
+ public boolean isAutoInc() {
+ return isAutoInc;
+ }
+
+ public boolean isAggregated() {
+ return isAggregated;
+ }
+
+ public boolean isWithTimeZone() {
+ return withTimeZone;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
@@ -81,6 +130,9 @@ public boolean equals(Object o) {
ConnectorColumn that = (ConnectorColumn) o;
return nullable == that.nullable
&& isKey == that.isKey
+ && isAutoInc == that.isAutoInc
+ && isAggregated == that.isAggregated
+ && withTimeZone == that.withTimeZone
&& name.equals(that.name)
&& type.equals(that.type)
&& Objects.equals(comment, that.comment)
@@ -89,7 +141,8 @@ public boolean equals(Object o) {
@Override
public int hashCode() {
- return Objects.hash(name, type, comment, nullable, defaultValue, isKey);
+ return Objects.hash(name, type, comment, nullable, defaultValue, isKey, isAutoInc, isAggregated,
+ withTimeZone);
}
@Override
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorMetadata.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorMetadata.java
index 56adb847880e80..6b1bf79b65de87 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorMetadata.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorMetadata.java
@@ -17,10 +17,15 @@
package org.apache.doris.connector.api;
+import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.mvcc.ConnectorMvccSnapshot;
+import org.apache.doris.connector.api.mvcc.ConnectorTimeTravelSpec;
+
import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
+import java.util.Optional;
/**
* Central metadata interface that a connector must implement.
@@ -44,6 +49,59 @@ default Map getProperties() {
return Collections.emptyMap();
}
+ // ──────────────────── MVCC Snapshots ────────────────────
+
+ /**
+ * Returns the current snapshot at query begin time, used as the MVCC pin
+ * for all subsequent reads of {@code handle}.
+ *
+ * Returning {@link Optional#empty()} means the connector does not
+ * support MVCC and reads see whatever is current.
+ */
+ default Optional beginQuerySnapshot(
+ ConnectorSession session, ConnectorTableHandle handle) {
+ return Optional.empty();
+ }
+
+ /**
+ * Resolves an explicit time-travel spec (extracted from {@code FOR TIME AS OF} /
+ * {@code FOR VERSION AS OF}, or the {@code @tag} / {@code @branch} / {@code @incr}
+ * scan params) into a pinned snapshot.
+ *
+ * The connector owns all provider-specific parsing of {@code spec} (snapshot-id
+ * lookup, datetime parsing, tag/branch resolution, incremental-window validation).
+ * The returned snapshot's {@link ConnectorMvccSnapshot#getProperties()} carries the
+ * connector's scan options and its {@link ConnectorMvccSnapshot#getSchemaId()} is the
+ * resolved schema version.
+ *
+ * Returns {@link Optional#empty()} when the spec is unsupported or the target is not
+ * found, in which case the engine surfaces a user error. The default returns empty:
+ * connectors without time-travel do not honor explicit specs.
+ */
+ default Optional resolveTimeTravel(
+ ConnectorSession session, ConnectorTableHandle handle,
+ ConnectorTimeTravelSpec spec) {
+ return Optional.empty();
+ }
+
+ /**
+ * Threads a pinned MVCC / time-travel {@code snapshot} into the table handle BEFORE
+ * {@code planScan}, so an MVCC-capable connector can return a handle that reads at that
+ * snapshot (mirrors the {@code applyFilter} / {@code applyProjection} handle-update pattern).
+ *
+ * Contract for MVCC connectors: thread the FULL {@code snapshot.getProperties()}
+ * (the scan-options map) into the returned handle so the read path sees exactly the
+ * connector-resolved options. When {@code properties} is empty, fall back to setting
+ * {@code scan.snapshot-id = snapshot.getSnapshotId()} (latest-pin parity).
+ *
+ * The default returns {@code handle} unchanged: connectors without time-travel ignore the
+ * pin and read whatever is current.
+ */
+ default ConnectorTableHandle applySnapshot(ConnectorSession session,
+ ConnectorTableHandle handle, ConnectorMvccSnapshot snapshot) {
+ return handle; // default: connectors without time-travel ignore the pin
+ }
+
@Override
default void close() throws IOException {
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorPartitionInfo.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorPartitionInfo.java
index fb8d8879ee420a..fe345d0b620bce 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorPartitionInfo.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorPartitionInfo.java
@@ -26,13 +26,32 @@
*/
public final class ConnectorPartitionInfo {
+ /** Sentinel for "unknown" on the numeric stats fields. */
+ public static final long UNKNOWN = -1L;
+
private final String partitionName;
private final Map partitionValues;
private final Map properties;
+ private final long rowCount;
+ private final long sizeBytes;
+ private final long lastModifiedMillis;
+ private final long fileCount;
+ /**
+ * Backward-compatible constructor. Numeric stats fields are set to
+ * {@link #UNKNOWN}.
+ */
public ConnectorPartitionInfo(String partitionName,
Map partitionValues,
Map properties) {
+ this(partitionName, partitionValues, properties,
+ UNKNOWN, UNKNOWN, UNKNOWN, UNKNOWN);
+ }
+
+ public ConnectorPartitionInfo(String partitionName,
+ Map partitionValues,
+ Map properties,
+ long rowCount, long sizeBytes, long lastModifiedMillis, long fileCount) {
this.partitionName = Objects.requireNonNull(
partitionName, "partitionName");
this.partitionValues = partitionValues == null
@@ -41,6 +60,10 @@ public ConnectorPartitionInfo(String partitionName,
this.properties = properties == null
? Collections.emptyMap()
: Collections.unmodifiableMap(properties);
+ this.rowCount = rowCount;
+ this.sizeBytes = sizeBytes;
+ this.lastModifiedMillis = lastModifiedMillis;
+ this.fileCount = fileCount;
}
public String getPartitionName() {
@@ -55,6 +78,26 @@ public Map getProperties() {
return properties;
}
+ /** @return row count, or {@link #UNKNOWN} when not collected. */
+ public long getRowCount() {
+ return rowCount;
+ }
+
+ /** @return on-disk size in bytes, or {@link #UNKNOWN}. */
+ public long getSizeBytes() {
+ return sizeBytes;
+ }
+
+ /** @return last-modified epoch millis, or {@link #UNKNOWN}. */
+ public long getLastModifiedMillis() {
+ return lastModifiedMillis;
+ }
+
+ /** @return number of data files in the partition, or {@link #UNKNOWN}. */
+ public long getFileCount() {
+ return fileCount;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
@@ -64,19 +107,27 @@ public boolean equals(Object o) {
return false;
}
ConnectorPartitionInfo that = (ConnectorPartitionInfo) o;
- return partitionName.equals(that.partitionName)
+ return rowCount == that.rowCount
+ && sizeBytes == that.sizeBytes
+ && lastModifiedMillis == that.lastModifiedMillis
+ && fileCount == that.fileCount
+ && partitionName.equals(that.partitionName)
&& partitionValues.equals(that.partitionValues)
&& properties.equals(that.properties);
}
@Override
public int hashCode() {
- return Objects.hash(partitionName, partitionValues, properties);
+ return Objects.hash(partitionName, partitionValues, properties,
+ rowCount, sizeBytes, lastModifiedMillis, fileCount);
}
@Override
public String toString() {
return "ConnectorPartitionInfo{name='" + partitionName
- + "', values=" + partitionValues + "}";
+ + "', values=" + partitionValues
+ + ", rowCount=" + rowCount
+ + ", sizeBytes=" + sizeBytes
+ + ", fileCount=" + fileCount + "}";
}
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorSchemaOps.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorSchemaOps.java
index addb6d929ac20f..da6bfeac408266 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorSchemaOps.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorSchemaOps.java
@@ -44,6 +44,16 @@ default ConnectorDatabaseMetadata getDatabase(
"getDatabase not implemented");
}
+ /**
+ * Whether this connector supports CREATE DATABASE. Defaults to false so the FE
+ * {@code CREATE DATABASE IF NOT EXISTS} remote existence precheck applies only to
+ * connectors that can actually create databases; connectors that cannot keep their
+ * existing "CREATE DATABASE not supported" behavior unchanged.
+ */
+ default boolean supportsCreateDatabase() {
+ return false;
+ }
+
/** Creates a new database with the given name and properties. */
default void createDatabase(ConnectorSession session,
String dbName, Map properties) {
@@ -57,4 +67,15 @@ default void dropDatabase(ConnectorSession session,
throw new DorisConnectorException(
"DROP DATABASE not supported");
}
+
+ /**
+ * Drops the specified database, cascading to its tables when {@code force} is
+ * true. The default delegates to the non-cascading 3-arg form, so connectors
+ * that do not support cascade keep their current behavior with zero change;
+ * a connector that supports FORCE overrides this overload.
+ */
+ default void dropDatabase(ConnectorSession session,
+ String dbName, boolean ifExists, boolean force) {
+ dropDatabase(session, dbName, ifExists);
+ }
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorSession.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorSession.java
index 16a471b7dbd4b1..5e151ccb7da4eb 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorSession.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorSession.java
@@ -17,7 +17,10 @@
package org.apache.doris.connector.api;
+import org.apache.doris.connector.api.handle.ConnectorTransaction;
+
import java.util.Map;
+import java.util.Optional;
/**
* Session context passed to every connector operation.
@@ -60,4 +63,43 @@ public interface ConnectorSession {
default Map getSessionProperties() {
return java.util.Collections.emptyMap();
}
+
+ /**
+ * Returns the transaction this session is currently bound to, if any.
+ *
+ * Used by connectors whose {@code begin*} write operations need to
+ * attach work to an outer transaction opened by
+ * {@link ConnectorWriteOps#beginTransaction(ConnectorSession)}.
+ * Connectors with statement-scoped writes (e.g. JDBC auto-commit) can
+ * ignore this and the default empty value.
+ */
+ default Optional getCurrentTransaction() {
+ return Optional.empty();
+ }
+
+ /**
+ * Binds a transaction to this session so that connector {@code begin*} /
+ * {@code planWrite} operations can attach their work to it. Mutable session
+ * implementations (e.g. the engine's {@code ConnectorSessionImpl}) override
+ * this; the default rejects binding, matching the empty default of
+ * {@link #getCurrentTransaction()}.
+ */
+ default void setCurrentTransaction(ConnectorTransaction txn) {
+ throw new UnsupportedOperationException("setCurrentTransaction is not supported by this session");
+ }
+
+ /**
+ * Allocates a globally-unique engine (Doris) transaction id for a connector
+ * transaction opened via {@link ConnectorWriteOps#beginTransaction(ConnectorSession)}.
+ *
+ * The id is the engine-side transaction id: it is registered in the engine
+ * transaction registry and stamped into the connector's data sink, so a
+ * connector must obtain it from the engine rather than mint its own. The
+ * default throws; the engine session implementation overrides it.
+ *
+ * @return a fresh engine transaction id
+ */
+ default long allocateTransactionId() {
+ throw new UnsupportedOperationException("transaction id allocation not supported");
+ }
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorTableOps.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorTableOps.java
index 8a6caa7cb84f6f..c397a0e18d7b2d 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorTableOps.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorTableOps.java
@@ -17,8 +17,11 @@
package org.apache.doris.connector.api;
+import org.apache.doris.connector.api.ddl.ConnectorCreateTableRequest;
import org.apache.doris.connector.api.handle.ConnectorColumnHandle;
import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.mvcc.ConnectorMvccSnapshot;
+import org.apache.doris.connector.api.pushdown.ConnectorExpression;
import java.util.Collections;
import java.util.List;
@@ -37,6 +40,32 @@ default Optional getTableHandle(
return Optional.empty();
}
+ /**
+ * Lists the system-table names supported for the given base table
+ * (e.g. ["snapshots", "schemas", "options", "audit_log", "binlog"]).
+ *
+ * The names are WITHOUT any "$" prefix; fe-core composes the
+ * "{baseTable}${sysName}" reference name. Default: empty (no system
+ * tables). Implemented by connectors that expose system tables.
+ */
+ default List listSupportedSysTables(ConnectorSession session,
+ ConnectorTableHandle baseTableHandle) {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Returns a handle for the named system table of the given base table,
+ * or empty if this connector does not expose that system table.
+ *
+ * The returned handle is connector-internal and carries whatever the
+ * connector needs (system-table name, scan-routing hints, etc.); it is
+ * opaque to fe-core. {@code sysName} is the bare name (no "$").
+ */
+ default Optional getSysTableHandle(ConnectorSession session,
+ ConnectorTableHandle baseTableHandle, String sysName) {
+ return Optional.empty();
+ }
+
/** Returns the schema (columns, format, etc.) for the given table. */
default ConnectorTableSchema getTableSchema(
ConnectorSession session, ConnectorTableHandle handle) {
@@ -44,6 +73,20 @@ default ConnectorTableSchema getTableSchema(
"getTableSchema not implemented");
}
+ /**
+ * Returns the schema AT {@code snapshot.getSchemaId()} — the schema as of the
+ * pinned snapshot, for time-travel reads under schema evolution.
+ *
+ * The default ignores the snapshot and returns the latest schema via
+ * {@link #getTableSchema(ConnectorSession, ConnectorTableHandle)}. A connector that
+ * supports schema-at-snapshot overrides this to resolve the schema version.
+ */
+ default ConnectorTableSchema getTableSchema(
+ ConnectorSession session, ConnectorTableHandle handle,
+ ConnectorMvccSnapshot snapshot) {
+ return getTableSchema(session, handle);
+ }
+
/** Returns a name-to-handle map for all columns of the table. */
default Map getColumnHandles(
ConnectorSession session, ConnectorTableHandle handle) {
@@ -65,6 +108,27 @@ default void createTable(ConnectorSession session,
"CREATE TABLE not supported");
}
+ /**
+ * Creates a table with full DDL semantics (partition, bucket, external,
+ * {@code IF NOT EXISTS}).
+ *
+ * Connectors should override this when they support advanced
+ * {@code CREATE TABLE} options. The default degrades to the legacy
+ * {@link #createTable(ConnectorSession, ConnectorTableSchema, Map)},
+ * dropping partition / bucket / external / {@code ifNotExists} info.
+ *
+ * @throws DorisConnectorException if the connector cannot honor the request
+ */
+ default void createTable(ConnectorSession session,
+ ConnectorCreateTableRequest request) {
+ ConnectorTableSchema schema = new ConnectorTableSchema(
+ request.getTableName(),
+ request.getColumns(),
+ null,
+ request.getProperties());
+ createTable(session, schema, request.getProperties());
+ }
+
/** Drops the specified table. */
default void dropTable(ConnectorSession session,
ConnectorTableHandle handle) {
@@ -126,4 +190,38 @@ default org.apache.doris.thrift.TTableDescriptor buildTableDescriptor(
String remoteName, int numCols, long catalogId) {
return null;
}
+
+ /**
+ * Lists all partition display names (e.g., {@code "year=2024/month=01"}).
+ *
+ * Should be cheap and avoid loading per-partition metadata.
+ */
+ default List listPartitionNames(ConnectorSession session,
+ ConnectorTableHandle handle) {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Lists partitions matching the optional filter, with full metadata.
+ *
+ * Connectors should push the filter into the metastore / catalog when
+ * possible. {@code filter} is empty when the caller wants the full list.
+ */
+ default List listPartitions(ConnectorSession session,
+ ConnectorTableHandle handle,
+ Optional filter) {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Lists distinct partition column value combinations for the given columns.
+ *
+ * Used by the {@code partition_values()} TVF and by column-distinct-value
+ * optimizations. Inner list order matches {@code partitionColumns}.
+ */
+ default List> listPartitionValues(ConnectorSession session,
+ ConnectorTableHandle handle,
+ List partitionColumns) {
+ return Collections.emptyList();
+ }
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorWriteOps.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorWriteOps.java
index 8c20247867d3ee..c30c845f11022a 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorWriteOps.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ConnectorWriteOps.java
@@ -21,6 +21,7 @@
import org.apache.doris.connector.api.handle.ConnectorInsertHandle;
import org.apache.doris.connector.api.handle.ConnectorMergeHandle;
import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.handle.ConnectorTransaction;
import org.apache.doris.connector.api.write.ConnectorWriteConfig;
import java.util.Collection;
@@ -47,6 +48,16 @@ default boolean supportsInsert() {
return false;
}
+ /**
+ * Returns {@code true} if this connector supports INSERT OVERWRITE (truncate-and-insert)
+ * semantics. A connector that supports plain INSERT but not overwrite must keep this
+ * {@code false} so callers reject the command up front (fail loud) instead of silently
+ * degrading OVERWRITE to a plain append.
+ */
+ default boolean supportsInsertOverwrite() {
+ return false;
+ }
+
/** Returns {@code true} if this connector supports DELETE operations. */
default boolean supportsDelete() {
return false;
@@ -57,6 +68,22 @@ default boolean supportsMerge() {
return false;
}
+ /**
+ * Returns {@code true} if this connector uses the SPI transaction model: the engine
+ * opens a {@link org.apache.doris.connector.api.handle.ConnectorTransaction} via
+ * {@link #beginTransaction(ConnectorSession)}, binds it to the {@link ConnectorSession},
+ * and the connector's write plan attaches to that transaction (e.g. maxcompute).
+ * Connectors with statement-scoped / auto-commit writes (e.g. jdbc) leave this
+ * {@code false} and use the {@code beginInsert} / {@code finishInsert} handle model.
+ *
+ * The executor routes on this before touching any throwing-default write
+ * method, so connectors that only support the transaction model need not implement
+ * {@code getWriteConfig} / {@code beginInsert}.
+ */
+ default boolean usesConnectorTransaction() {
+ return false;
+ }
+
// ──────────────────── Write Configuration ────────────────────
/**
@@ -197,4 +224,20 @@ default void abortMerge(ConnectorSession session,
ConnectorMergeHandle handle) {
// default: no-op
}
+
+ // ──────────────────── TRANSACTION ────────────────────
+
+ /**
+ * Begins a new transaction scoped to a single SQL statement (auto-commit)
+ * or to an explicit BEGIN..COMMIT block. The returned transaction is passed
+ * to subsequent {@code begin*} / {@code finish*} / {@code abort*} calls via
+ * the same {@link ConnectorSession}.
+ *
+ * Connectors that do not support multi-statement transactions can either
+ * return a no-op transaction whose commit/rollback do nothing, or throw, in
+ * which case the engine treats every statement as auto-commit.
+ */
+ default ConnectorTransaction beginTransaction(ConnectorSession session) {
+ throw new DorisConnectorException("Transactions not supported");
+ }
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorBucketSpec.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorBucketSpec.java
new file mode 100644
index 00000000000000..32c5381a279658
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorBucketSpec.java
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.ddl;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Bucket / distribution specification carried by
+ * {@link ConnectorCreateTableRequest}.
+ *
+ * {@code algorithm} is a connector-known string. Common values:
+ *
+ * - {@code "hive_hash"} — Hive-compatible 32-bit hash.
+ * - {@code "iceberg_bucket"} — Iceberg bucket transform.
+ * - {@code "doris_default"} — Doris CRC32 distribution.
+ *
+ */
+public final class ConnectorBucketSpec {
+
+ private final List columns;
+ private final int numBuckets;
+ private final String algorithm;
+
+ public ConnectorBucketSpec(List columns, int numBuckets,
+ String algorithm) {
+ this.columns = columns == null
+ ? Collections.emptyList()
+ : Collections.unmodifiableList(columns);
+ this.numBuckets = numBuckets;
+ this.algorithm = Objects.requireNonNull(algorithm, "algorithm");
+ }
+
+ public List getColumns() {
+ return columns;
+ }
+
+ public int getNumBuckets() {
+ return numBuckets;
+ }
+
+ public String getAlgorithm() {
+ return algorithm;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof ConnectorBucketSpec)) {
+ return false;
+ }
+ ConnectorBucketSpec that = (ConnectorBucketSpec) o;
+ return numBuckets == that.numBuckets
+ && columns.equals(that.columns)
+ && algorithm.equals(that.algorithm);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(columns, numBuckets, algorithm);
+ }
+
+ @Override
+ public String toString() {
+ return "ConnectorBucketSpec{algorithm=" + algorithm
+ + ", columns=" + columns
+ + ", numBuckets=" + numBuckets + "}";
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorCreateTableRequest.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorCreateTableRequest.java
new file mode 100644
index 00000000000000..b3c9efe54cfa95
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorCreateTableRequest.java
@@ -0,0 +1,183 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.ddl;
+
+import org.apache.doris.connector.api.ConnectorColumn;
+
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Full {@code CREATE TABLE} payload passed to
+ * {@code ConnectorTableOps.createTable(session, request)}.
+ *
+ * Carries partition / bucket / external / {@code IF NOT EXISTS} information
+ * absent from the legacy
+ * {@code createTable(session, ConnectorTableSchema, Map)}
+ * signature.
+ *
+ * {@code partitionSpec} and {@code bucketSpec} are nullable when the
+ * underlying DDL omits them.
+ */
+public final class ConnectorCreateTableRequest {
+
+ private final String dbName;
+ private final String tableName;
+ private final List columns;
+ private final ConnectorPartitionSpec partitionSpec;
+ private final ConnectorBucketSpec bucketSpec;
+ private final String comment;
+ private final Map properties;
+ private final boolean ifNotExists;
+ private final boolean external;
+
+ private ConnectorCreateTableRequest(Builder b) {
+ this.dbName = Objects.requireNonNull(b.dbName, "dbName");
+ this.tableName = Objects.requireNonNull(b.tableName, "tableName");
+ this.columns = b.columns == null
+ ? Collections.emptyList()
+ : Collections.unmodifiableList(b.columns);
+ this.partitionSpec = b.partitionSpec;
+ this.bucketSpec = b.bucketSpec;
+ this.comment = b.comment;
+ this.properties = b.properties == null
+ ? Collections.emptyMap()
+ : Collections.unmodifiableMap(b.properties);
+ this.ifNotExists = b.ifNotExists;
+ this.external = b.external;
+ }
+
+ public String getDbName() {
+ return dbName;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public List getColumns() {
+ return columns;
+ }
+
+ /** @return partition spec, or {@code null} for non-partitioned tables. */
+ public ConnectorPartitionSpec getPartitionSpec() {
+ return partitionSpec;
+ }
+
+ /** @return bucket spec, or {@code null} when no bucketing is declared. */
+ public ConnectorBucketSpec getBucketSpec() {
+ return bucketSpec;
+ }
+
+ public String getComment() {
+ return comment;
+ }
+
+ public Map getProperties() {
+ return properties;
+ }
+
+ public boolean isIfNotExists() {
+ return ifNotExists;
+ }
+
+ public boolean isExternal() {
+ return external;
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ @Override
+ public String toString() {
+ return "ConnectorCreateTableRequest{" + dbName + "." + tableName
+ + ", cols=" + columns.size()
+ + ", partition=" + partitionSpec
+ + ", bucket=" + bucketSpec
+ + ", external=" + external
+ + ", ifNotExists=" + ifNotExists + "}";
+ }
+
+ public static final class Builder {
+ private String dbName;
+ private String tableName;
+ private List columns;
+ private ConnectorPartitionSpec partitionSpec;
+ private ConnectorBucketSpec bucketSpec;
+ private String comment;
+ private Map properties;
+ private boolean ifNotExists;
+ private boolean external;
+
+ public Builder dbName(String dbName) {
+ this.dbName = dbName;
+ return this;
+ }
+
+ public Builder tableName(String tableName) {
+ this.tableName = tableName;
+ return this;
+ }
+
+ public Builder columns(List columns) {
+ this.columns = columns;
+ return this;
+ }
+
+ public Builder partitionSpec(ConnectorPartitionSpec partitionSpec) {
+ this.partitionSpec = partitionSpec;
+ return this;
+ }
+
+ public Builder bucketSpec(ConnectorBucketSpec bucketSpec) {
+ this.bucketSpec = bucketSpec;
+ return this;
+ }
+
+ public Builder comment(String comment) {
+ this.comment = comment;
+ return this;
+ }
+
+ public Builder properties(Map properties) {
+ // copy to preserve caller's map identity and keep insertion order
+ this.properties = properties == null
+ ? null
+ : new LinkedHashMap<>(properties);
+ return this;
+ }
+
+ public Builder ifNotExists(boolean ifNotExists) {
+ this.ifNotExists = ifNotExists;
+ return this;
+ }
+
+ public Builder external(boolean external) {
+ this.external = external;
+ return this;
+ }
+
+ public ConnectorCreateTableRequest build() {
+ return new ConnectorCreateTableRequest(this);
+ }
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionField.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionField.java
new file mode 100644
index 00000000000000..ce16c29973440a
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionField.java
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.ddl;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A single field in a {@link ConnectorPartitionSpec}.
+ *
+ * The {@code transform} string follows Appendix B of the connector SPI RFC:
+ * {@code identity / year / month / day / hour / bucket / truncate / list / range}.
+ * Unlisted values are treated as {@code CUSTOM} and interpreted by the connector.
+ *
+ * {@code transformArgs} carries numeric parameters (e.g., {@code [16]} for
+ * {@code bucket(16, col)} or {@code [10]} for {@code truncate(10, col)}).
+ */
+public final class ConnectorPartitionField {
+
+ private final String columnName;
+ private final String transform;
+ private final List transformArgs;
+
+ public ConnectorPartitionField(String columnName, String transform,
+ List transformArgs) {
+ this.columnName = Objects.requireNonNull(columnName, "columnName");
+ this.transform = Objects.requireNonNull(transform, "transform");
+ this.transformArgs = transformArgs == null
+ ? Collections.emptyList()
+ : Collections.unmodifiableList(transformArgs);
+ }
+
+ public String getColumnName() {
+ return columnName;
+ }
+
+ public String getTransform() {
+ return transform;
+ }
+
+ public List getTransformArgs() {
+ return transformArgs;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof ConnectorPartitionField)) {
+ return false;
+ }
+ ConnectorPartitionField that = (ConnectorPartitionField) o;
+ return columnName.equals(that.columnName)
+ && transform.equals(that.transform)
+ && transformArgs.equals(that.transformArgs);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(columnName, transform, transformArgs);
+ }
+
+ @Override
+ public String toString() {
+ if (transformArgs.isEmpty()) {
+ return transform + "(" + columnName + ")";
+ }
+ return transform + transformArgs + "(" + columnName + ")";
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionSpec.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionSpec.java
new file mode 100644
index 00000000000000..2414661f3ed87f
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionSpec.java
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.ddl;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Partition specification carried by {@link ConnectorCreateTableRequest}.
+ *
+ * {@link Style} distinguishes the four supported partition flavors:
+ *
+ * - {@code IDENTITY} — Hive style: {@code PARTITIONED BY (col1, col2)}.
+ * - {@code TRANSFORM} — Iceberg style: {@code PARTITIONED BY (bucket(16, c), year(d))}.
+ * - {@code LIST} — Doris {@code PARTITION BY LIST} with explicit value definitions.
+ * - {@code RANGE} — Doris {@code PARTITION BY RANGE} with [lower, upper) tuples.
+ *
+ *
+ * {@code initialValues} is only meaningful for {@code LIST} / {@code RANGE} styles.
+ */
+public final class ConnectorPartitionSpec {
+
+ public enum Style {
+ IDENTITY,
+ TRANSFORM,
+ LIST,
+ RANGE,
+ }
+
+ private final Style style;
+ private final List fields;
+ private final List initialValues;
+
+ public ConnectorPartitionSpec(Style style,
+ List fields,
+ List initialValues) {
+ this.style = Objects.requireNonNull(style, "style");
+ this.fields = fields == null
+ ? Collections.emptyList()
+ : Collections.unmodifiableList(fields);
+ this.initialValues = initialValues == null
+ ? Collections.emptyList()
+ : Collections.unmodifiableList(initialValues);
+ }
+
+ public Style getStyle() {
+ return style;
+ }
+
+ public List getFields() {
+ return fields;
+ }
+
+ public List getInitialValues() {
+ return initialValues;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof ConnectorPartitionSpec)) {
+ return false;
+ }
+ ConnectorPartitionSpec that = (ConnectorPartitionSpec) o;
+ return style == that.style
+ && fields.equals(that.fields)
+ && initialValues.equals(that.initialValues);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(style, fields, initialValues);
+ }
+
+ @Override
+ public String toString() {
+ return "ConnectorPartitionSpec{style=" + style
+ + ", fields=" + fields
+ + ", initialValues=" + initialValues.size() + "}";
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionValueDef.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionValueDef.java
new file mode 100644
index 00000000000000..e86acaa242b4fb
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/ddl/ConnectorPartitionValueDef.java
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.ddl;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Initial value definition for a Doris-style {@code LIST} or {@code RANGE}
+ * partition declared in a {@code CREATE TABLE} statement.
+ *
+ * For {@code LIST} partitions, {@code values} contains the literal list of
+ * permitted values (each inner list is one tuple matching the partition columns).
+ * For {@code RANGE} partitions, {@code values} contains exactly two tuples
+ * representing the [lower, upper) bound.
+ */
+public final class ConnectorPartitionValueDef {
+
+ private final String partitionName;
+ private final List> values;
+
+ public ConnectorPartitionValueDef(String partitionName,
+ List> values) {
+ this.partitionName = Objects.requireNonNull(partitionName, "partitionName");
+ this.values = values == null
+ ? Collections.emptyList()
+ : Collections.unmodifiableList(values);
+ }
+
+ public String getPartitionName() {
+ return partitionName;
+ }
+
+ public List> getValues() {
+ return values;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof ConnectorPartitionValueDef)) {
+ return false;
+ }
+ ConnectorPartitionValueDef that = (ConnectorPartitionValueDef) o;
+ return partitionName.equals(that.partitionName)
+ && values.equals(that.values);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(partitionName, values);
+ }
+
+ @Override
+ public String toString() {
+ return "ConnectorPartitionValueDef{name='" + partitionName
+ + "', values=" + values + "}";
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/handle/ConnectorTransaction.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/handle/ConnectorTransaction.java
new file mode 100644
index 00000000000000..0ecf9f867612be
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/handle/ConnectorTransaction.java
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.handle;
+
+import java.io.Closeable;
+
+/**
+ * A connector-managed transaction that scopes one or more write operations.
+ *
+ * Lifecycle: the engine calls {@link #commit()} on success or
+ * {@link #rollback()} on failure, then always calls {@link #close()} to
+ * release resources. {@code rollback()} and {@code close()} are safe to
+ * call multiple times.
+ *
+ * Extends the marker {@link ConnectorTransactionHandle} so that existing
+ * APIs that traffic in opaque handles continue to work without change.
+ */
+public interface ConnectorTransaction extends ConnectorTransactionHandle, Closeable {
+
+ /** Stable transaction ID assigned by the connector. */
+ long getTransactionId();
+
+ /**
+ * Commits all pending operations bound to this transaction.
+ *
+ * @throws org.apache.doris.connector.api.DorisConnectorException
+ * on conflict, IO failure, or external system error
+ */
+ void commit();
+
+ /**
+ * Aborts all pending operations and releases resources.
+ * Safe to call multiple times; subsequent calls are no-ops.
+ */
+ void rollback();
+
+ /** Called by the engine after commit OR rollback to release connections etc. */
+ @Override
+ void close();
+
+ /**
+ * Receives one serialized commit fragment produced by BE after writing a
+ * data fragment. The connector deserializes its own Thrift payload (e.g.
+ * {@code TMCCommitData} / {@code THivePartitionUpdate} / {@code TIcebergCommitData})
+ * and accumulates it for {@link #commit()}.
+ *
+ * Default is a no-op for read-only / non-writing connectors.
+ *
+ * @param commitFragment the serialized connector-specific commit payload
+ */
+ default void addCommitData(byte[] commitFragment) {
+ // no-op: connectors that participate in writes override this
+ }
+
+ /**
+ * Whether this transaction allocates write block ranges through a write-time
+ * BE→FE callback. Only connectors with a stateful write session that
+ * hands out block ids (e.g. maxcompute) return {@code true}.
+ */
+ default boolean supportsWriteBlockAllocation() {
+ return false;
+ }
+
+ /**
+ * Allocates a contiguous range of write block ids for the given write
+ * session, returning the first allocated id. Called from the BE→FE RPC
+ * path during a write.
+ *
+ * Only invoked when {@link #supportsWriteBlockAllocation()} returns
+ * {@code true}; the default throws.
+ *
+ * @param writeSessionId opaque connector-defined write session identifier
+ * @param count number of block ids to allocate
+ * @return the first allocated block id
+ */
+ default long allocateWriteBlockRange(String writeSessionId, long count) {
+ throw new UnsupportedOperationException("write block allocation not supported");
+ }
+
+ /** Returns the number of rows affected by the write(s) bound to this transaction. */
+ default long getUpdateCnt() {
+ return 0;
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/handle/ConnectorWriteHandle.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/handle/ConnectorWriteHandle.java
new file mode 100644
index 00000000000000..b9d2a88812a9e9
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/handle/ConnectorWriteHandle.java
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.handle;
+
+import org.apache.doris.connector.api.ConnectorColumn;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A bound write request passed to
+ * {@link org.apache.doris.connector.api.write.ConnectorWritePlanProvider#planWrite}.
+ *
+ * Carries the engine-resolved facts about a single DML write: the target
+ * table handle, the column list, whether it is an OVERWRITE, and a free-form
+ * write context (static partition spec, write path, etc.). The connector reads
+ * these to build its Thrift data sink.
+ */
+public interface ConnectorWriteHandle {
+
+ /** The target table handle (the connector's own opaque table handle). */
+ ConnectorTableHandle getTableHandle();
+
+ /** The columns being written, ordered to match the INSERT column list. */
+ List getColumns();
+
+ /** Whether this is an INSERT OVERWRITE. */
+ boolean isOverwrite();
+
+ /**
+ * Free-form write context: static partition spec, write path, and other
+ * connector-defined keys carried from the bound sink to {@code planWrite}.
+ */
+ Map getWriteContext();
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/mvcc/ConnectorMvccSnapshot.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/mvcc/ConnectorMvccSnapshot.java
new file mode 100644
index 00000000000000..7a16661ff84099
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/mvcc/ConnectorMvccSnapshot.java
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.mvcc;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Immutable description of a point-in-time snapshot taken from an MVCC-capable
+ * external table (Iceberg, Paimon, Hudi, ...).
+ *
+ * Returned by {@code ConnectorMetadata.beginQuerySnapshot} and friends.
+ * Used by the engine as the MVCC pin for all subsequent reads of the same
+ * table handle within a query, and serialized into BE scan ranges so the
+ * read path sees a consistent version.
+ */
+public final class ConnectorMvccSnapshot {
+
+ private final long snapshotId;
+ private final long timestampMillis;
+ private final String description;
+ private final long schemaId;
+ private final Map properties;
+
+ private ConnectorMvccSnapshot(Builder b) {
+ this.snapshotId = b.snapshotId;
+ this.timestampMillis = b.timestampMillis;
+ this.description = b.description;
+ this.schemaId = b.schemaId;
+ this.properties = b.properties.isEmpty()
+ ? Collections.emptyMap()
+ : Collections.unmodifiableMap(new HashMap<>(b.properties));
+ }
+
+ /** Connector-assigned snapshot identifier (e.g. Iceberg snapshot id). */
+ public long getSnapshotId() {
+ return snapshotId;
+ }
+
+ /** Wall-clock time at which the snapshot was committed, in ms since epoch. */
+ public long getTimestampMillis() {
+ return timestampMillis;
+ }
+
+ /** Optional human-readable description; may be empty, never null. */
+ public String getDescription() {
+ return description;
+ }
+
+ /**
+ * Schema version of this snapshot (e.g. paimon schemaId). {@code -1} = unknown
+ * ⇒ schema-aware reads fall back to the latest schema.
+ */
+ public long getSchemaId() {
+ return schemaId;
+ }
+
+ /** Connector-specific metadata propagated to BE. Unmodifiable, never null. */
+ public Map getProperties() {
+ return properties;
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+
+ private long snapshotId;
+ private long timestampMillis;
+ private String description = "";
+ private long schemaId = -1;
+ private final Map properties = new HashMap<>();
+
+ public Builder snapshotId(long snapshotId) {
+ this.snapshotId = snapshotId;
+ return this;
+ }
+
+ public Builder timestampMillis(long timestampMillis) {
+ this.timestampMillis = timestampMillis;
+ return this;
+ }
+
+ public Builder schemaId(long schemaId) {
+ this.schemaId = schemaId;
+ return this;
+ }
+
+ public Builder description(String description) {
+ this.description = Objects.requireNonNull(description, "description");
+ return this;
+ }
+
+ public Builder property(String key, String value) {
+ this.properties.put(
+ Objects.requireNonNull(key, "key"),
+ Objects.requireNonNull(value, "value"));
+ return this;
+ }
+
+ public Builder properties(Map properties) {
+ this.properties.putAll(Objects.requireNonNull(properties, "properties"));
+ return this;
+ }
+
+ public ConnectorMvccSnapshot build() {
+ return new ConnectorMvccSnapshot(this);
+ }
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/mvcc/ConnectorTimeTravelSpec.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/mvcc/ConnectorTimeTravelSpec.java
new file mode 100644
index 00000000000000..f71a3852ae7a49
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/mvcc/ConnectorTimeTravelSpec.java
@@ -0,0 +1,196 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.mvcc;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Immutable, source-agnostic description of an explicit time-travel request that
+ * fe-core extracts from the SQL and hands to the connector to resolve into a
+ * pinned {@link ConnectorMvccSnapshot}.
+ *
+ * fe-core performs only source-agnostic dispatch/extraction (deciding the
+ * {@link Kind}, splitting out the raw string / params, and the digital flag for
+ * timestamps); the connector owns ALL provider-specific parsing (e.g. paimon
+ * snapshot-id lookup, datetime parsing, tag/branch resolution, incremental
+ * window validation).
+ *
+ * Each {@link Kind} maps to a piece of Doris time-travel syntax:
+ *
+ * - {@link Kind#SNAPSHOT_ID} — {@code FOR VERSION AS OF }:
+ * {@code stringValue} holds the snapshot-id digits.
+ * - {@link Kind#TIMESTAMP} — {@code FOR TIME AS OF }:
+ * {@code stringValue} holds either an epoch-millis literal (when
+ * {@code digital} is {@code true}) or a datetime string to be parsed by
+ * the connector (when {@code digital} is {@code false}).
+ * - {@link Kind#TAG} — {@code @tag('name')} scan param:
+ * {@code stringValue} holds the tag name.
+ * - {@link Kind#BRANCH} — {@code @branch('name')} scan param:
+ * {@code stringValue} holds the branch name.
+ * - {@link Kind#INCREMENTAL} — {@code @incr(...)} scan param:
+ * {@code stringValue} is {@code null} and {@code incrementalParams}
+ * carries the raw key/value window arguments.
+ *
+ */
+public final class ConnectorTimeTravelSpec {
+
+ /** Which flavor of explicit time-travel this spec describes. */
+ public enum Kind {
+ /** {@code FOR VERSION AS OF }. */
+ SNAPSHOT_ID,
+ /** {@code FOR TIME AS OF }. */
+ TIMESTAMP,
+ /** {@code @tag('name')}. */
+ TAG,
+ /** {@code @branch('name')}. */
+ BRANCH,
+ /** {@code @incr(...)}. */
+ INCREMENTAL
+ }
+
+ private final Kind kind;
+ private final String stringValue;
+ private final boolean digital;
+ private final Map incrementalParams;
+
+ private ConnectorTimeTravelSpec(Kind kind, String stringValue, boolean digital,
+ Map incrementalParams) {
+ this.kind = kind;
+ this.stringValue = stringValue;
+ this.digital = digital;
+ this.incrementalParams = (incrementalParams == null || incrementalParams.isEmpty())
+ ? Collections.emptyMap()
+ : Collections.unmodifiableMap(new HashMap<>(incrementalParams));
+ }
+
+ /**
+ * {@code FOR VERSION AS OF }: pin by snapshot id.
+ *
+ * @param idDigits the snapshot-id digits (connector parses to a number)
+ */
+ public static ConnectorTimeTravelSpec snapshotId(String idDigits) {
+ Objects.requireNonNull(idDigits, "idDigits");
+ return new ConnectorTimeTravelSpec(Kind.SNAPSHOT_ID, idDigits, false, null);
+ }
+
+ /**
+ * {@code FOR TIME AS OF }: pin by wall-clock time.
+ *
+ * @param value epoch-millis literal when {@code digital} is true, otherwise a
+ * datetime string the connector parses with the session time zone
+ * @param digital whether {@code value} is already epoch-millis
+ */
+ public static ConnectorTimeTravelSpec timestamp(String value, boolean digital) {
+ Objects.requireNonNull(value, "value");
+ return new ConnectorTimeTravelSpec(Kind.TIMESTAMP, value, digital, null);
+ }
+
+ /**
+ * {@code @tag('name')}: pin to a named tag.
+ *
+ * @param name the tag name
+ */
+ public static ConnectorTimeTravelSpec tag(String name) {
+ Objects.requireNonNull(name, "name");
+ return new ConnectorTimeTravelSpec(Kind.TAG, name, false, null);
+ }
+
+ /**
+ * {@code @branch('name')}: pin to a named branch.
+ *
+ * @param name the branch name
+ */
+ public static ConnectorTimeTravelSpec branch(String name) {
+ Objects.requireNonNull(name, "name");
+ return new ConnectorTimeTravelSpec(Kind.BRANCH, name, false, null);
+ }
+
+ /**
+ * {@code @incr(...)}: incremental read over a window described by
+ * {@code rawParams}. The connector validates and interprets the window keys.
+ *
+ * @param rawParams the raw key/value window arguments (defensively copied)
+ */
+ public static ConnectorTimeTravelSpec incremental(Map rawParams) {
+ Objects.requireNonNull(rawParams, "rawParams");
+ return new ConnectorTimeTravelSpec(Kind.INCREMENTAL, null, false, rawParams);
+ }
+
+ /** The flavor of this spec; never null. */
+ public Kind getKind() {
+ return kind;
+ }
+
+ /**
+ * The snapshot-id digits / timestamp expression / tag name / branch name,
+ * depending on {@link #getKind()}. {@code null} for {@link Kind#INCREMENTAL}.
+ */
+ public String getStringValue() {
+ return stringValue;
+ }
+
+ /**
+ * Only meaningful for {@link Kind#TIMESTAMP}: {@code true} means
+ * {@link #getStringValue()} is already epoch-millis, {@code false} means it is
+ * a datetime string the connector must parse. Always {@code false} otherwise.
+ */
+ public boolean isDigital() {
+ return digital;
+ }
+
+ /**
+ * The raw incremental window arguments; non-empty only for
+ * {@link Kind#INCREMENTAL}, an unmodifiable empty map otherwise. Never null.
+ */
+ public Map getIncrementalParams() {
+ return incrementalParams;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof ConnectorTimeTravelSpec)) {
+ return false;
+ }
+ ConnectorTimeTravelSpec that = (ConnectorTimeTravelSpec) o;
+ return digital == that.digital
+ && kind == that.kind
+ && Objects.equals(stringValue, that.stringValue)
+ && incrementalParams.equals(that.incrementalParams);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(kind, stringValue, digital, incrementalParams);
+ }
+
+ @Override
+ public String toString() {
+ return "ConnectorTimeTravelSpec{"
+ + "kind=" + kind
+ + ", stringValue=" + stringValue
+ + ", digital=" + digital
+ + ", incrementalParams=" + incrementalParams
+ + '}';
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/scan/ConnectorScanPlanProvider.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/scan/ConnectorScanPlanProvider.java
index fdb483f25cb9ba..0fe14be17534a0 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/scan/ConnectorScanPlanProvider.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/scan/ConnectorScanPlanProvider.java
@@ -22,6 +22,7 @@
import org.apache.doris.connector.api.handle.ConnectorTableHandle;
import org.apache.doris.connector.api.pushdown.ConnectorExpression;
import org.apache.doris.thrift.TFileScanRangeParams;
+import org.apache.doris.thrift.TTableFormatFileDesc;
import java.util.Collections;
import java.util.List;
@@ -50,6 +51,27 @@ default ConnectorScanRangeType getScanRangeType() {
return ConnectorScanRangeType.FILE_SCAN;
}
+ /**
+ * Whether this connector is PREDICATE-DRIVEN and therefore opts out of the FE prune-to-zero
+ * short-circuit.
+ *
+ * A connector whose {@link #planScan} re-plans through its own SDK from the pushed predicate and
+ * does NOT consume {@code requiredPartitions} (e.g. paimon) must return {@code true}. The engine then
+ * maps a GENUINE prune-to-zero (FE pruning emptied the partition set over a non-empty universe) to
+ * scan-all instead of short-circuiting to zero rows. This is required for master parity once a
+ * genuine-null partition is rendered as a NON-null sentinel ({@code isNull=false}): {@code col IS NULL}
+ * prunes every partition away, yet the genuine-null rows must still be returned via the pushed
+ * predicate (the legacy {@code PaimonScanNode} never consults the FE partition selection).
+ *
+ * Default {@code false}: connectors that genuinely restrict the read to the pruned partitions
+ * (e.g. MaxCompute, whose read session spans only {@code requiredPartitions}) keep the short-circuit.
+ *
+ * @return {@code true} to disable the prune-to-zero short-circuit for this connector
+ */
+ default boolean ignorePartitionPruneShortCircuit() {
+ return false;
+ }
+
/**
* Plans the scan for the given table, returning a list of scan ranges.
*
@@ -88,6 +110,122 @@ default List planScan(
return planScan(session, handle, columns, filter);
}
+ /**
+ * Plans the scan restricted to a pruned set of partitions.
+ *
+ * The engine computes partition pruning (Nereids {@code SelectedPartitions}) and
+ * threads the surviving partitions here so partition-aware connectors can build a read
+ * session over only those partitions instead of the whole table. The default ignores
+ * {@code requiredPartitions} and delegates to the 5-arg variant, so connectors that do
+ * not support partition pushdown are unaffected.
+ *
+ * Contract for {@code requiredPartitions}:
+ *
+ * - {@code null} or empty → not pruned; scan ALL partitions (default behavior).
+ * - non-empty → scan ONLY these partitions. Each entry is a partition spec string
+ * (e.g. {@code "pt=1,region=cn"}), i.e. the keys of the pruned partition map.
+ *
+ *
+ * The "pruned to zero partitions" case (a partition predicate that matches nothing) is
+ * short-circuited by the engine before this method is called, so an empty list here always
+ * means "not pruned / scan all", never "scan nothing".
+ *
+ * @param session the current session
+ * @param handle the table handle
+ * @param columns the columns to read
+ * @param filter an optional remaining filter expression
+ * @param limit the maximum number of rows to return, or -1 for no limit
+ * @param requiredPartitions the pruned partition spec strings, or null/empty for all
+ * @return a list of scan ranges
+ */
+ default List planScan(
+ ConnectorSession session,
+ ConnectorTableHandle handle,
+ List columns,
+ Optional filter,
+ long limit,
+ List requiredPartitions) {
+ return planScan(session, handle, columns, filter, limit);
+ }
+
+ /**
+ * Plans the scan, signalling whether a no-grouping {@code COUNT(*)} is being pushed down here.
+ *
+ * When {@code countPushdown} is true, the engine has determined the query is a no-grouping
+ * {@code COUNT(*)} (Nereids {@code getPushDownAggNoGroupingOp()==COUNT}) and BE is already in
+ * count mode. A connector that can produce a precomputed row count for (some of) its splits
+ * should emit it so BE serves the count from metadata instead of materializing rows
+ * (e.g. Paimon's {@code DataSplit.mergedRowCount()}). The default ignores the flag and delegates
+ * to the 6-arg variant, so connectors without a metadata row count are unaffected and keep the
+ * normal scan.
+ *
+ * @param session the current session
+ * @param handle the table handle
+ * @param columns the columns to read
+ * @param filter an optional remaining filter expression
+ * @param limit the maximum number of rows to return, or -1 for no limit
+ * @param requiredPartitions the pruned partition spec strings, or null/empty for all
+ * @param countPushdown whether a no-grouping {@code COUNT(*)} is being pushed down to this scan
+ * @return a list of scan ranges
+ */
+ default List planScan(
+ ConnectorSession session,
+ ConnectorTableHandle handle,
+ List columns,
+ Optional filter,
+ long limit,
+ List requiredPartitions,
+ boolean countPushdown) {
+ return planScan(session, handle, columns, filter, limit, requiredPartitions);
+ }
+
+ /**
+ * Whether this connector supports batched / streaming split generation for a partitioned scan.
+ *
+ * When {@code true}, a partition-aware ScanNode (e.g. {@code PluginDrivenScanNode}) may
+ * enter batch mode: instead of enumerating all splits synchronously via {@link #planScan},
+ * it slices the pruned partitions into batches and calls {@link #planScanForPartitionBatch}
+ * per batch on a background executor, streaming splits as they are produced (mirrors legacy
+ * {@code MaxComputeScanNode.startSplit}). The default is {@code false}, so connectors stay on
+ * the synchronous {@code planScan} path unless they opt in.
+ *
+ * @param session the current session
+ * @param handle the table handle
+ * @return whether batched split generation is supported for this table (default: false)
+ */
+ default boolean supportsBatchScan(ConnectorSession session, ConnectorTableHandle handle) {
+ return false;
+ }
+
+ /**
+ * Plans the scan for a single batch of partitions (used by batch-mode scans).
+ *
+ * Called once per partition batch when the engine drives batch-mode split generation
+ * (see {@link #supportsBatchScan}). Each call should build a read session over exactly the
+ * given {@code partitionBatch} and return that batch's scan ranges. The default delegates to
+ * the 6-arg {@link #planScan} with {@code partitionBatch} as the required partitions, which is
+ * correct for connectors whose {@code planScan} builds one read session per partition set
+ * (e.g. MaxCompute). A connector whose {@code planScan} is not partition-set-scoped must
+ * override this method (and {@link #supportsBatchScan}) before enabling batch mode.
+ *
+ * @param session the current session
+ * @param handle the table handle
+ * @param columns the columns to read
+ * @param filter an optional remaining filter expression
+ * @param limit the maximum number of rows to return, or -1 for no limit
+ * @param partitionBatch the partition spec strings for this batch (non-empty)
+ * @return the scan ranges for this partition batch
+ */
+ default List planScanForPartitionBatch(
+ ConnectorSession session,
+ ConnectorTableHandle handle,
+ List columns,
+ Optional filter,
+ long limit,
+ List partitionBatch) {
+ return planScan(session, handle, columns, filter, limit, partitionBatch);
+ }
+
/**
* Returns scan-node-level properties shared across all scan ranges.
*
@@ -180,6 +318,21 @@ default void appendExplainInfo(StringBuilder output,
// Default: no extra EXPLAIN info
}
+ /**
+ * Returns the delete-file paths carried by one scan range's table-format descriptor, for the
+ * VERBOSE per-backend EXPLAIN block ({@code deleteFileNum}/{@code deleteSplitNum}).
+ *
+ * The default returns an empty list, so connectors without merge-on-read deletes contribute
+ * nothing. A connector that threads delete files onto its per-range thrift (e.g. Paimon's
+ * deletion vectors) overrides this to read them back from {@code tableFormatParams}.
+ *
+ * @param tableFormatParams the per-range table-format descriptor (may be {@code null})
+ * @return the delete-file paths for this range (default: empty)
+ */
+ default List getDeleteFiles(TTableFormatFileDesc tableFormatParams) {
+ return Collections.emptyList();
+ }
+
/**
* Returns the serialized table representation for this connector,
* or {@code null} if not applicable.
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/scan/ConnectorScanRange.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/scan/ConnectorScanRange.java
index 2bab45080b24e4..1d460784e6a555 100644
--- a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/scan/ConnectorScanRange.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/scan/ConnectorScanRange.java
@@ -78,6 +78,31 @@ default long getModificationTime() {
return 0;
}
+ /**
+ * Returns this split's weight numerator for proportional BE assignment, or {@code -1} when the
+ * connector provides no weight.
+ *
+ * The engine forms a proportional split weight {@code getSelfSplitWeight() / getTargetSplitSize()}
+ * (clamped) only when BOTH this and {@link #getTargetSplitSize()} are provided; otherwise it falls back
+ * to {@code SplitWeight.standard()} (uniform). A connector with no size-based weight model keeps the
+ * {@code -1} default and is unaffected. {@code 0} is a legitimate weight (e.g. an empty file or a
+ * zero-row system-table split), distinct from the {@code -1} "not provided" sentinel.
+ */
+ default long getSelfSplitWeight() {
+ return -1;
+ }
+
+ /**
+ * Returns the weight denominator (scan-level target split size) used with {@link #getSelfSplitWeight()}
+ * to form the proportional split weight, or {@code -1} when not provided.
+ *
+ * Proportional weighting is applied only when this is positive AND {@link #getSelfSplitWeight()} is
+ * non-negative; otherwise the engine uses {@code SplitWeight.standard()}.
+ */
+ default long getTargetSplitSize() {
+ return -1;
+ }
+
/** Returns preferred host locations for data locality. */
default List getHosts() {
return Collections.emptyList();
@@ -113,6 +138,31 @@ default List getDeleteFiles() {
return Collections.emptyList();
}
+ /**
+ * Returns the precomputed pushed-down COUNT(*) row count this range carries, or {@code -1} when
+ * the range carries no precomputed count.
+ *
+ * When a no-grouping {@code COUNT(*)} is pushed down, a connector that can produce a precomputed
+ * row count (e.g. Paimon's collapsed count range) surfaces the summed total here so the scan node
+ * can render the EXPLAIN {@code pushdown agg=COUNT (n)} line. Ranges with no precomputed count keep
+ * the {@code -1} default, which renders as the {@code (-1)} sentinel.
+ */
+ default long getPushDownRowCount() {
+ return -1;
+ }
+
+ /**
+ * Whether this range is read by BE's NATIVE (ORC/Parquet) reader rather than the JNI scanner.
+ *
+ * Used by a connector that distinguishes native vs JNI sub-splits (e.g. Paimon) so the scan
+ * node can accumulate the native/total split counts for the EXPLAIN
+ * {@code paimonNativeReadSplits=/} line. The default is {@code false} (JNI), so
+ * connectors without a native read path are unaffected.
+ */
+ default boolean isNativeReadRange() {
+ return false;
+ }
+
/**
* Populates per-range Thrift params from this scan range's data.
*
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeSplit.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/write/ConnectorSinkPlan.java
similarity index 50%
rename from fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeSplit.java
rename to fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/write/ConnectorSinkPlan.java
index 0fc9fbcbfd5f63..8f9155de3cc613 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeSplit.java
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/write/ConnectorSinkPlan.java
@@ -15,33 +15,28 @@
// specific language governing permissions and limitations
// under the License.
-package org.apache.doris.datasource.maxcompute.source;
+package org.apache.doris.connector.api.write;
-import org.apache.doris.common.util.LocationPath;
-import org.apache.doris.datasource.FileSplit;
-import org.apache.doris.thrift.TFileType;
+import org.apache.doris.thrift.TDataSink;
-import lombok.Getter;
+/**
+ * The result of {@link ConnectorWritePlanProvider#planWrite}: a connector-built
+ * Thrift data sink describing how BE should write the target table.
+ *
+ * Wraps an opaque {@link TDataSink} (e.g. {@code TMaxComputeTableSink},
+ * {@code THiveTableSink}, {@code TIcebergTableSink}). The engine dispatches the
+ * sink to BE unchanged.
+ */
+public class ConnectorSinkPlan {
-import java.util.List;
+ private final TDataSink dataSink;
-@Getter
-public class MaxComputeSplit extends FileSplit {
- public String scanSerialize;
- public String sessionId;
-
- public enum SplitType {
- ROW_OFFSET,
- BYTE_SIZE
+ public ConnectorSinkPlan(TDataSink dataSink) {
+ this.dataSink = dataSink;
}
- public SplitType splitType;
-
- public MaxComputeSplit(LocationPath path, long start, long length, long fileLength,
- long modificationTime, String[] hosts, List partitionValues) {
- super(path, start, length, fileLength, modificationTime, hosts, partitionValues);
- // MC always use FILE_NET type
- this.locationType = TFileType.FILE_NET;
+ /** Returns the connector-built data sink to dispatch to BE. */
+ public TDataSink getDataSink() {
+ return dataSink;
}
-
}
diff --git a/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/write/ConnectorWritePlanProvider.java b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/write/ConnectorWritePlanProvider.java
new file mode 100644
index 00000000000000..a0fea8e0e189f5
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/main/java/org/apache/doris/connector/api/write/ConnectorWritePlanProvider.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.write;
+
+import org.apache.doris.connector.api.ConnectorSession;
+import org.apache.doris.connector.api.handle.ConnectorWriteHandle;
+
+/**
+ * Plans the write (sink) for a connector table: produces the opaque
+ * {@link org.apache.doris.thrift.TDataSink} that BE uses to write data.
+ *
+ * This is the write-side analogue of
+ * {@link org.apache.doris.connector.api.scan.ConnectorScanPlanProvider}. A
+ * connector with write capability returns an implementation from
+ * {@link org.apache.doris.connector.api.Connector#getWritePlanProvider()}; the
+ * engine calls {@link #planWrite} when translating a physical table sink, then
+ * dispatches the resulting Thrift data sink to BE unchanged.
+ */
+public interface ConnectorWritePlanProvider {
+
+ /**
+ * Builds the data sink for the given bound write request.
+ *
+ * @param session the current session
+ * @param handle the bound write request (target table, columns, overwrite, context)
+ * @return a {@link ConnectorSinkPlan} wrapping the Thrift data sink
+ */
+ ConnectorSinkPlan planWrite(ConnectorSession session, ConnectorWriteHandle handle);
+}
diff --git a/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorColumnTest.java b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorColumnTest.java
new file mode 100644
index 00000000000000..57f7d4b995664d
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorColumnTest.java
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Covers the additive {@code isAutoInc} (P2-8 FIX-AUTOINC-REJECT) and {@code isAggregated}
+ * (G5 FIX-AGG-COLUMN-REJECT) fields added to {@link ConnectorColumn}.
+ *
+ * WHY this matters: each such flag is now a semantic discriminator that the
+ * connector validation rejects on. equals/hashCode must include it (else a set/map deduping
+ * {@code ConnectorColumn}s could collapse an auto-inc column onto a plain one, silently dropping
+ * the flag), and the legacy arities (5/6-arg) must keep {@code isAutoInc=false} so the other six
+ * connectors and all read-path producers are zero behavior change.
+ */
+public class ConnectorColumnTest {
+
+ @Test
+ public void equalsAndHashCodeDistinguishAutoInc() {
+ ConnectorColumn plain = new ConnectorColumn(
+ "id", ConnectorType.of("INT"), "", false, null, false, false);
+ ConnectorColumn autoInc = new ConnectorColumn(
+ "id", ConnectorType.of("INT"), "", false, null, false, true);
+
+ // WHY (Rule 9): two columns differing ONLY by auto-inc are genuinely different; if
+ // equals/hashCode ignored the field, dedup could re-drop the flag downstream.
+ // MUTATION: removing `&& isAutoInc == that.isAutoInc` from equals makes this red.
+ Assertions.assertNotEquals(plain, autoInc,
+ "columns differing only by isAutoInc must not be equal");
+ Assertions.assertNotEquals(plain.hashCode(), autoInc.hashCode(),
+ "hashCode must reflect isAutoInc");
+ }
+
+ @Test
+ public void defaultCtorsLeaveAutoIncFalse() {
+ // WHY: locks the additive-default contract -- the 5-arg and 6-arg ctors (used by the other
+ // six connectors and read-path producers) must keep isAutoInc=false, i.e. zero behavior
+ // change. MUTATION: changing a delegation default to true makes this red.
+ ConnectorColumn fiveArg = new ConnectorColumn(
+ "c", ConnectorType.of("INT"), "", true, null);
+ ConnectorColumn sixArg = new ConnectorColumn(
+ "c", ConnectorType.of("INT"), "", true, null, true);
+
+ Assertions.assertFalse(fiveArg.isAutoInc(), "5-arg ctor must default isAutoInc=false");
+ Assertions.assertFalse(sixArg.isAutoInc(), "6-arg ctor must default isAutoInc=false");
+ Assertions.assertTrue(sixArg.isKey(), "6-arg ctor must still honor isKey=true");
+ }
+
+ @Test
+ public void equalsAndHashCodeDistinguishAggregated() {
+ ConnectorColumn plain = new ConnectorColumn(
+ "c", ConnectorType.of("INT"), "", false, null, false, false, false);
+ ConnectorColumn aggregated = new ConnectorColumn(
+ "c", ConnectorType.of("INT"), "", false, null, false, false, true);
+
+ // WHY (Rule 9): two columns differing ONLY by isAggregated are genuinely different; if
+ // equals/hashCode ignored the field, dedup could re-drop the aggregate flag downstream.
+ // MUTATION: removing `&& isAggregated == that.isAggregated` from equals makes this red.
+ Assertions.assertNotEquals(plain, aggregated,
+ "columns differing only by isAggregated must not be equal");
+ Assertions.assertNotEquals(plain.hashCode(), aggregated.hashCode(),
+ "hashCode must reflect isAggregated");
+ }
+
+ @Test
+ public void defaultCtorsLeaveAggregatedFalse() {
+ // WHY: locks the additive-default contract -- the 5/6/7-arg ctors (used by the other six
+ // connectors and read-path producers) must keep isAggregated=false, i.e. zero behavior
+ // change. MUTATION: changing the 7-arg delegation default to true makes this red.
+ ConnectorColumn fiveArg = new ConnectorColumn(
+ "c", ConnectorType.of("INT"), "", true, null);
+ ConnectorColumn sixArg = new ConnectorColumn(
+ "c", ConnectorType.of("INT"), "", true, null, true);
+ ConnectorColumn sevenArg = new ConnectorColumn(
+ "c", ConnectorType.of("INT"), "", true, null, false, true);
+
+ Assertions.assertFalse(fiveArg.isAggregated(), "5-arg ctor must default isAggregated=false");
+ Assertions.assertFalse(sixArg.isAggregated(), "6-arg ctor must default isAggregated=false");
+ Assertions.assertFalse(sevenArg.isAggregated(), "7-arg ctor must default isAggregated=false");
+ Assertions.assertTrue(sevenArg.isAutoInc(), "7-arg ctor must still honor isAutoInc=true");
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorMetadataTimeTravelDefaultsTest.java b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorMetadataTimeTravelDefaultsTest.java
new file mode 100644
index 00000000000000..f602fcf0b71350
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorMetadataTimeTravelDefaultsTest.java
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api;
+
+import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.mvcc.ConnectorMvccSnapshot;
+import org.apache.doris.connector.api.mvcc.ConnectorTimeTravelSpec;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+import java.util.Optional;
+
+/**
+ * Pins the default behavior of the two B5b time-travel SPI seams on a connector that does
+ * NOT override them.
+ *
+ * WHY this matters: these defaults are the zero-behavior-change contract for the
+ * other connectors. {@code resolveTimeTravel} must default to {@code empty()} (a connector
+ * without time-travel resolves nothing, and the engine then surfaces a user error rather than
+ * silently reading latest). The snapshot-aware {@code getTableSchema} overload must default to
+ * delegating to the 2-arg latest variant — if it ignored the delegation a non-evolving
+ * connector would return null/throw on time-travel reads.
+ */
+public class ConnectorMetadataTimeTravelDefaultsTest {
+
+ /** A no-method handle; the defaults under test never inspect it. */
+ private static final ConnectorTableHandle HANDLE = new ConnectorTableHandle() {
+ };
+
+ /**
+ * Minimal metadata that overrides ONLY the 2-arg latest {@code getTableSchema}, so the test
+ * can prove the 3-arg snapshot-aware default routes back to it.
+ */
+ private static final class LatestOnlyMetadata implements ConnectorMetadata {
+ static final ConnectorTableSchema LATEST =
+ new ConnectorTableSchema("t", Collections.emptyList(), null, Collections.emptyMap());
+
+ @Override
+ public ConnectorTableSchema getTableSchema(ConnectorSession session,
+ ConnectorTableHandle handle) {
+ return LATEST;
+ }
+ }
+
+ @Test
+ public void resolveTimeTravelDefaultsToEmpty() {
+ ConnectorMetadata metadata = new LatestOnlyMetadata();
+ ConnectorTimeTravelSpec spec = ConnectorTimeTravelSpec.snapshotId("1");
+
+ // MUTATION: a default that returned a fabricated snapshot would make a non-MVCC connector
+ // silently honor FOR VERSION AS OF instead of erroring.
+ Optional resolved =
+ metadata.resolveTimeTravel(null, HANDLE, spec);
+ Assertions.assertFalse(resolved.isPresent(),
+ "a connector without time-travel must resolve nothing by default");
+ }
+
+ @Test
+ public void snapshotAwareGetTableSchemaDelegatesToLatest() {
+ LatestOnlyMetadata metadata = new LatestOnlyMetadata();
+ ConnectorMvccSnapshot snapshot = ConnectorMvccSnapshot.builder()
+ .snapshotId(9L)
+ .schemaId(2L)
+ .build();
+
+ // MUTATION: a default that returned null (or threw) instead of delegating to the 2-arg
+ // variant would break time-travel reads on any connector that does not override it.
+ ConnectorTableSchema schema = metadata.getTableSchema(null, HANDLE, snapshot);
+ Assertions.assertSame(LatestOnlyMetadata.LATEST, schema,
+ "default snapshot-aware getTableSchema must return the latest schema unchanged");
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorPartitionInfoTest.java b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorPartitionInfoTest.java
new file mode 100644
index 00000000000000..fb79235d2ae73c
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/ConnectorPartitionInfoTest.java
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+
+/**
+ * Value-type tests for {@link ConnectorPartitionInfo}, pinning the {@code fileCount} field added for
+ * the paimon SHOW PARTITIONS 5-column parity (D-045).
+ *
+ * {@code fileCount} is the carrier for the legacy FileCount column. Because the class relies on
+ * value-based {@code equals}/{@code hashCode}, the field must be threaded through the 7-arg
+ * constructor, the getter, AND equals/hashCode — a common place to forget one.
+ */
+public class ConnectorPartitionInfoTest {
+
+ @Test
+ public void sevenArgCtorCarriesFileCount() {
+ ConnectorPartitionInfo info = new ConnectorPartitionInfo(
+ "p1", Collections.emptyMap(), Collections.emptyMap(),
+ /*rowCount*/ 42L, /*sizeBytes*/ 1024L, /*lastModifiedMillis*/ 1700000000000L,
+ /*fileCount*/ 7L);
+ // WHY: SHOW PARTITIONS' FileCount column reads getFileCount(); it must return the 7th ctor
+ // arg, not be confused with rowCount/sizeBytes/lastModifiedMillis. MUTATION: returning any
+ // other field, or dropping the assignment (-> 0) -> red.
+ Assertions.assertEquals(7L, info.getFileCount());
+ Assertions.assertEquals(42L, info.getRowCount());
+ Assertions.assertEquals(1024L, info.getSizeBytes());
+ Assertions.assertEquals(1700000000000L, info.getLastModifiedMillis());
+ }
+
+ @Test
+ public void backwardCompatCtorDefaultsFileCountToUnknown() {
+ ConnectorPartitionInfo info = new ConnectorPartitionInfo(
+ "p1", Collections.emptyMap(), Collections.emptyMap());
+ // WHY: the 3-arg back-compat ctor (used by connectors without per-partition stats, e.g.
+ // MaxCompute) must default fileCount to the UNKNOWN sentinel, like the other numeric stats.
+ // MUTATION: defaulting to 0 instead of UNKNOWN -> red.
+ Assertions.assertEquals(ConnectorPartitionInfo.UNKNOWN, info.getFileCount());
+ Assertions.assertEquals(ConnectorPartitionInfo.UNKNOWN, info.getRowCount());
+ }
+
+ @Test
+ public void equalsAndHashCodeIncludeFileCount() {
+ ConnectorPartitionInfo a = new ConnectorPartitionInfo(
+ "p1", Collections.emptyMap(), Collections.emptyMap(), 1L, 2L, 3L, 7L);
+ ConnectorPartitionInfo b = new ConnectorPartitionInfo(
+ "p1", Collections.emptyMap(), Collections.emptyMap(), 1L, 2L, 3L, 7L);
+ ConnectorPartitionInfo differByFileCount = new ConnectorPartitionInfo(
+ "p1", Collections.emptyMap(), Collections.emptyMap(), 1L, 2L, 3L, 8L);
+
+ Assertions.assertEquals(a, b);
+ Assertions.assertEquals(a.hashCode(), b.hashCode());
+ // WHY: value equality must distinguish on fileCount, or two partitions differing only in
+ // file count would be (wrongly) treated as equal. MUTATION: omitting fileCount from
+ // equals()/hashCode() -> a.equals(differByFileCount) -> red.
+ Assertions.assertNotEquals(a, differByFileCount);
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/mvcc/ConnectorMvccSnapshotTest.java b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/mvcc/ConnectorMvccSnapshotTest.java
new file mode 100644
index 00000000000000..c5665f72f96e17
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/mvcc/ConnectorMvccSnapshotTest.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.mvcc;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Contracts for the additive {@code schemaId} field on {@link ConnectorMvccSnapshot}
+ * (B5b schema-at-pinned-snapshot support).
+ *
+ * WHY this matters: {@code schemaId} carries the resolved schema version of a
+ * pinned snapshot so a time-travel read under schema evolution can fetch the schema AS OF
+ * that snapshot. The unset default MUST be {@code -1} (= unknown) so every pre-existing
+ * builder caller keeps reading the latest schema with zero behavior change; the existing
+ * fields must round-trip unchanged so adding the field did not perturb them.
+ */
+public class ConnectorMvccSnapshotTest {
+
+ @Test
+ public void schemaIdDefaultsToMinusOneWhenUnset() {
+ // WHY: -1 is the "unknown => fall back to latest schema" sentinel. Every existing builder
+ // caller (which never calls schemaId(..)) must observe -1, i.e. zero behavior change.
+ // MUTATION: defaulting the builder field to 0 makes this red and would wrongly pin schema 0.
+ ConnectorMvccSnapshot snapshot = ConnectorMvccSnapshot.builder()
+ .snapshotId(7L)
+ .build();
+ Assertions.assertEquals(-1L, snapshot.getSchemaId(),
+ "unset schemaId must default to -1 (unknown => latest schema)");
+ }
+
+ @Test
+ public void builderSetsSchemaId() {
+ ConnectorMvccSnapshot snapshot = ConnectorMvccSnapshot.builder()
+ .schemaId(3L)
+ .build();
+ // MUTATION: a builder that ignored schemaId (returned -1) makes this red.
+ Assertions.assertEquals(3L, snapshot.getSchemaId());
+ }
+
+ @Test
+ public void existingFieldsRoundTripUnaffectedBySchemaId() {
+ // WHY: the schemaId addition is purely additive; the other fields must carry through
+ // exactly as before so no existing consumer regresses.
+ ConnectorMvccSnapshot snapshot = ConnectorMvccSnapshot.builder()
+ .snapshotId(11L)
+ .timestampMillis(1700000000000L)
+ .description("d")
+ .property("scan.snapshot-id", "11")
+ .schemaId(2L)
+ .build();
+
+ Assertions.assertEquals(11L, snapshot.getSnapshotId());
+ Assertions.assertEquals(1700000000000L, snapshot.getTimestampMillis());
+ Assertions.assertEquals("d", snapshot.getDescription());
+ Assertions.assertEquals("11", snapshot.getProperties().get("scan.snapshot-id"));
+ Assertions.assertEquals(2L, snapshot.getSchemaId());
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/mvcc/ConnectorTimeTravelSpecTest.java b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/mvcc/ConnectorTimeTravelSpecTest.java
new file mode 100644
index 00000000000000..bbf9a82034abc6
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/mvcc/ConnectorTimeTravelSpecTest.java
@@ -0,0 +1,145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.mvcc;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Contracts for {@link ConnectorTimeTravelSpec}, the source-agnostic carrier fe-core uses
+ * to hand an explicit time-travel request to a connector for resolution.
+ *
+ * WHY this matters: each factory must stamp exactly one {@link
+ * ConnectorTimeTravelSpec.Kind} and leave the irrelevant fields null/empty — the
+ * connector dispatches on {@code kind} and reads only the field that kind owns, so a wrong
+ * kind or a leaked field silently routes a query to the wrong time-travel branch. The map
+ * must be defensively copied and unmodifiable so a later mutation of the caller's map cannot
+ * change an already-resolved spec, and equals/hashCode must include every field so a spec
+ * cannot be confused with a same-named spec of a different kind/flag.
+ */
+public class ConnectorTimeTravelSpecTest {
+
+ @Test
+ public void snapshotIdFactorySetsOnlySnapshotKind() {
+ ConnectorTimeTravelSpec spec = ConnectorTimeTravelSpec.snapshotId("42");
+ // MUTATION: a factory that stamped TIMESTAMP/TAG here would route the digits down the
+ // wrong connector branch.
+ Assertions.assertEquals(ConnectorTimeTravelSpec.Kind.SNAPSHOT_ID, spec.getKind());
+ Assertions.assertEquals("42", spec.getStringValue());
+ Assertions.assertFalse(spec.isDigital(), "digital is only meaningful for TIMESTAMP");
+ Assertions.assertTrue(spec.getIncrementalParams().isEmpty(),
+ "non-incremental specs carry no incremental params");
+ }
+
+ @Test
+ public void timestampFactoryCarriesDigitalFlagBothWays() {
+ // WHY: digital decides whether the connector treats the value as epoch-millis or as a
+ // datetime string to parse; flipping it changes the resolved instant. Lock both states.
+ ConnectorTimeTravelSpec epoch = ConnectorTimeTravelSpec.timestamp("1700000000000", true);
+ ConnectorTimeTravelSpec text = ConnectorTimeTravelSpec.timestamp("2024-01-01 00:00:00", false);
+
+ Assertions.assertEquals(ConnectorTimeTravelSpec.Kind.TIMESTAMP, epoch.getKind());
+ Assertions.assertTrue(epoch.isDigital(), "epoch-millis literal must be digital=true");
+ Assertions.assertEquals(ConnectorTimeTravelSpec.Kind.TIMESTAMP, text.getKind());
+ Assertions.assertFalse(text.isDigital(), "datetime string must be digital=false");
+ }
+
+ @Test
+ public void tagAndBranchFactoriesAreDistinctKinds() {
+ // WHY: tag and branch carry the same shape (a name in stringValue) but resolve via
+ // different SDK paths; if the factory collapsed them to one kind the connector would
+ // pick the wrong resolution path.
+ ConnectorTimeTravelSpec tag = ConnectorTimeTravelSpec.tag("v1");
+ ConnectorTimeTravelSpec branch = ConnectorTimeTravelSpec.branch("v1");
+
+ Assertions.assertEquals(ConnectorTimeTravelSpec.Kind.TAG, tag.getKind());
+ Assertions.assertEquals(ConnectorTimeTravelSpec.Kind.BRANCH, branch.getKind());
+ Assertions.assertEquals("v1", tag.getStringValue());
+ Assertions.assertEquals("v1", branch.getStringValue());
+ // Same name, different kind => must not be equal (else a tag query reuses a branch result).
+ Assertions.assertNotEquals(tag, branch);
+ }
+
+ @Test
+ public void incrementalFactoryHasNullStringValueAndParams() {
+ Map raw = new HashMap<>();
+ raw.put("startSnapshotId", "1");
+ raw.put("endSnapshotId", "5");
+ ConnectorTimeTravelSpec spec = ConnectorTimeTravelSpec.incremental(raw);
+
+ Assertions.assertEquals(ConnectorTimeTravelSpec.Kind.INCREMENTAL, spec.getKind());
+ // MUTATION: stuffing a stringValue for INCREMENTAL would mislead a connector that keys
+ // off stringValue presence.
+ Assertions.assertNull(spec.getStringValue(),
+ "INCREMENTAL carries its args in the params map, not stringValue");
+ Assertions.assertEquals(raw, spec.getIncrementalParams());
+ }
+
+ @Test
+ public void incrementalParamsAreDefensivelyCopiedAndUnmodifiable() {
+ Map raw = new HashMap<>();
+ raw.put("startSnapshotId", "1");
+ ConnectorTimeTravelSpec spec = ConnectorTimeTravelSpec.incremental(raw);
+
+ // WHY (Rule 9): a spec is a resolved request; mutating the caller's source map afterwards
+ // must NOT retroactively change the spec the engine already dispatched on.
+ // MUTATION: storing the map by reference (no copy) makes this assertion red.
+ raw.put("endSnapshotId", "5");
+ Assertions.assertFalse(spec.getIncrementalParams().containsKey("endSnapshotId"),
+ "spec must snapshot the params at construction, not alias the caller's map");
+
+ Assertions.assertThrows(UnsupportedOperationException.class,
+ () -> spec.getIncrementalParams().put("x", "y"),
+ "exposed params map must be unmodifiable");
+ }
+
+ @Test
+ public void equalsAndHashCodeIncludeAllFields() {
+ // WHY: two specs that differ in digital alone (or kind alone) are genuinely different
+ // time-travel targets; equals/hashCode must separate them or a cache could serve the wrong
+ // pinned snapshot.
+ ConnectorTimeTravelSpec a = ConnectorTimeTravelSpec.timestamp("100", true);
+ ConnectorTimeTravelSpec b = ConnectorTimeTravelSpec.timestamp("100", true);
+ ConnectorTimeTravelSpec digitalFlipped = ConnectorTimeTravelSpec.timestamp("100", false);
+
+ Assertions.assertEquals(a, b);
+ Assertions.assertEquals(a.hashCode(), b.hashCode());
+ // MUTATION: dropping `digital ==` from equals makes this red.
+ Assertions.assertNotEquals(a, digitalFlipped,
+ "specs differing only by the digital flag must not be equal");
+ }
+
+ @Test
+ public void factoriesRejectNullMeaningfulArgs() {
+ // WHY: a null where a snapshot id / name / params map is required is a programming error in
+ // the fe-core extractor; fail loud at construction rather than NPE deep in the connector.
+ Assertions.assertThrows(NullPointerException.class,
+ () -> ConnectorTimeTravelSpec.snapshotId(null));
+ Assertions.assertThrows(NullPointerException.class,
+ () -> ConnectorTimeTravelSpec.timestamp(null, true));
+ Assertions.assertThrows(NullPointerException.class,
+ () -> ConnectorTimeTravelSpec.tag(null));
+ Assertions.assertThrows(NullPointerException.class,
+ () -> ConnectorTimeTravelSpec.branch(null));
+ Assertions.assertThrows(NullPointerException.class,
+ () -> ConnectorTimeTravelSpec.incremental(null));
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/scan/ConnectorScanPlanProviderBatchScanTest.java b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/scan/ConnectorScanPlanProviderBatchScanTest.java
new file mode 100644
index 00000000000000..ca241402597817
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/scan/ConnectorScanPlanProviderBatchScanTest.java
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.scan;
+
+import org.apache.doris.connector.api.ConnectorSession;
+import org.apache.doris.connector.api.handle.ConnectorColumnHandle;
+import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.pushdown.ConnectorExpression;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * FIX-BATCH-MODE-SPLIT (P4-T06e / NG-7) — guards the two additive SPI defaults on
+ * {@link ConnectorScanPlanProvider}: {@code supportsBatchScan} and {@code planScanForPartitionBatch}.
+ *
+ * Why this matters: these defaults are what keep the change zero-break for the other
+ * connectors (es/jdbc/hive/paimon/hudi/trino). {@code supportsBatchScan} MUST default to false so no
+ * connector silently enters batch mode without opting in; {@code planScanForPartitionBatch} MUST
+ * delegate to the 6-arg {@code planScan} with the batch as the required partitions (and forward the
+ * limit), so a connector whose {@code planScan} is partition-set-scoped — like MaxCompute — gets
+ * correct per-batch behaviour without overriding it.
+ */
+public class ConnectorScanPlanProviderBatchScanTest {
+
+ /** Records the partition list / limit the default planScanForPartitionBatch forwards. */
+ private static final class RecordingProvider implements ConnectorScanPlanProvider {
+ static final List MARKER = Collections.emptyList();
+ List recordedRequiredPartitions;
+ long recordedLimit = Long.MIN_VALUE;
+ boolean fourArgCalled;
+
+ @Override
+ public List planScan(ConnectorSession session, ConnectorTableHandle handle,
+ List columns, Optional filter) {
+ fourArgCalled = true;
+ return MARKER;
+ }
+
+ @Override
+ public List planScan(ConnectorSession session, ConnectorTableHandle handle,
+ List columns, Optional filter,
+ long limit, List requiredPartitions) {
+ this.recordedLimit = limit;
+ this.recordedRequiredPartitions = requiredPartitions;
+ return MARKER;
+ }
+ }
+
+ @Test
+ public void testSupportsBatchScanDefaultsFalse() {
+ // Default MUST be false: any connector that does not opt in stays on the synchronous path.
+ ConnectorScanPlanProvider provider = new RecordingProvider();
+ Assertions.assertFalse(provider.supportsBatchScan(null, null));
+ }
+
+ @Test
+ public void testPlanScanForPartitionBatchDelegatesToSixArgPlanScan() {
+ // Default MUST forward the batch as requiredPartitions and pass the limit through to the
+ // 6-arg planScan, returning its result. A connector with partition-set-scoped planScan
+ // (MaxCompute) relies on this to avoid overriding the method.
+ RecordingProvider provider = new RecordingProvider();
+ List batch = Arrays.asList("pt=1", "pt=2");
+
+ List result =
+ provider.planScanForPartitionBatch(null, null, Collections.emptyList(),
+ Optional.empty(), -1L, batch);
+
+ Assertions.assertSame(RecordingProvider.MARKER, result);
+ Assertions.assertSame(batch, provider.recordedRequiredPartitions);
+ Assertions.assertEquals(-1L, provider.recordedLimit);
+ // It must route through the 6-arg overload, not collapse to the 4-arg one.
+ Assertions.assertFalse(provider.fourArgCalled);
+ }
+}
diff --git a/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/scan/ConnectorScanRangeWeightDefaultsTest.java b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/scan/ConnectorScanRangeWeightDefaultsTest.java
new file mode 100644
index 00000000000000..05d625512ba0cc
--- /dev/null
+++ b/fe/fe-connector/fe-connector-api/src/test/java/org/apache/doris/connector/api/scan/ConnectorScanRangeWeightDefaultsTest.java
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.api.scan;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * FIX-A1: a {@link ConnectorScanRange} that does not override the split-weight getters must inherit the
+ * {@code -1} "not provided" sentinel, so the engine ({@code PluginDrivenSplit}) leaves the FileSplit
+ * scheduling fields null and keeps {@code SplitWeight.standard()} (the no-regression guarantee for
+ * connectors with no size-based weight model: jdbc / es / trino / maxcompute).
+ */
+public class ConnectorScanRangeWeightDefaultsTest {
+
+ @Test
+ public void defaultWeightGettersReturnSentinel() {
+ ConnectorScanRange range = new ConnectorScanRange() {
+ @Override
+ public ConnectorScanRangeType getRangeType() {
+ return ConnectorScanRangeType.FILE_SCAN;
+ }
+
+ @Override
+ public Map getProperties() {
+ return Collections.emptyMap();
+ }
+ };
+
+ // MUTATION: a 0 default would pass PluginDrivenSplit's weight>=0 gate and (with a target) flip
+ // these connectors to proportional weighting -> a behavior change for every non-weighting connector.
+ Assertions.assertEquals(-1L, range.getSelfSplitWeight(),
+ "getSelfSplitWeight() default must be the -1 sentinel, not 0");
+ Assertions.assertEquals(-1L, range.getTargetSplitSize(),
+ "getTargetSplitSize() default must be the -1 sentinel, not 0");
+ }
+}
diff --git a/fe/fe-connector/fe-connector-hive/src/test/java/org/apache/doris/connector/hive/HiveConnectorMetadataPartitionPruningTest.java b/fe/fe-connector/fe-connector-hive/src/test/java/org/apache/doris/connector/hive/HiveConnectorMetadataPartitionPruningTest.java
new file mode 100644
index 00000000000000..51380bcf58e89b
--- /dev/null
+++ b/fe/fe-connector/fe-connector-hive/src/test/java/org/apache/doris/connector/hive/HiveConnectorMetadataPartitionPruningTest.java
@@ -0,0 +1,256 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.hive;
+
+import org.apache.doris.connector.api.ConnectorType;
+import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.pushdown.ConnectorAnd;
+import org.apache.doris.connector.api.pushdown.ConnectorColumnRef;
+import org.apache.doris.connector.api.pushdown.ConnectorComparison;
+import org.apache.doris.connector.api.pushdown.ConnectorExpression;
+import org.apache.doris.connector.api.pushdown.ConnectorFilterConstraint;
+import org.apache.doris.connector.api.pushdown.ConnectorIn;
+import org.apache.doris.connector.api.pushdown.ConnectorLiteral;
+import org.apache.doris.connector.api.pushdown.FilterApplicationResult;
+import org.apache.doris.connector.hms.HmsClient;
+import org.apache.doris.connector.hms.HmsDatabaseInfo;
+import org.apache.doris.connector.hms.HmsPartitionInfo;
+import org.apache.doris.connector.hms.HmsTableInfo;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Tests {@link HiveConnectorMetadata#applyFilter} partition pruning (P3-T07 batch C).
+ *
+ * WHY: this is the direct analog of fe-connector-hudi's HudiPartitionPruningTest —
+ * both exercise the same EQ/IN partition-pruning helpers (the Hudi T05 fix was mirrored
+ * from this Hive code). The tests are intentionally near-identical; they differ only in
+ * the handle type and that Hive resolves matched partition NAMES to
+ * {@link HmsPartitionInfo} via {@code getPartitions} (capped at 100000), whereas Hudi
+ * keeps the matched relative paths. Consolidating the two is deferred to the P7 Hive
+ * migration. These assertions pin: EQ / IN on partition columns prune; predicates on
+ * non-partition columns never prune; a no-effect predicate leaves the handle untouched
+ * ({@code Optional.empty()}); a zero-match predicate yields an empty pruned set.
+ */
+public class HiveConnectorMetadataPartitionPruningTest {
+
+ private static final List PARTITIONS = Arrays.asList(
+ "year=2023/month=12",
+ "year=2024/month=01",
+ "year=2024/month=02");
+
+ private static final List PART_KEYS = Arrays.asList("year", "month");
+
+ @Test
+ public void testEqOnPartitionColumnPrunes() {
+ Optional> result =
+ applyFilter(partitionedHandle(), eq("year", "2024"));
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertEquals(
+ Arrays.asList("year=2024/month=01", "year=2024/month=02"),
+ prunedLocations(result));
+ }
+
+ @Test
+ public void testInOnPartitionColumnPrunes() {
+ Optional> result =
+ applyFilter(partitionedHandle(), in("month", "01", "12"));
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertEquals(
+ Arrays.asList("year=2023/month=12", "year=2024/month=01"),
+ prunedLocations(result));
+ }
+
+ @Test
+ public void testAndOfTwoPartitionColumnsPrunes() {
+ Optional> result =
+ applyFilter(partitionedHandle(), and(eq("year", "2024"), eq("month", "01")));
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertEquals(
+ Collections.singletonList("year=2024/month=01"),
+ prunedLocations(result));
+ }
+
+ @Test
+ public void testNonPartitionColumnInAndIsIgnored() {
+ Optional> result =
+ applyFilter(partitionedHandle(), and(eq("year", "2024"), eq("price", "100")));
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertEquals(
+ Arrays.asList("year=2024/month=01", "year=2024/month=02"),
+ prunedLocations(result));
+ }
+
+ @Test
+ public void testNonPartitionPredicateOnlyLeavesHandleUntouched() {
+ Optional> result =
+ applyFilter(partitionedHandle(), eq("price", "100"));
+ Assertions.assertFalse(result.isPresent());
+ }
+
+ @Test
+ public void testPredicateMatchingAllPartitionsHasNoEffect() {
+ Optional> result =
+ applyFilter(partitionedHandle(), in("year", "2023", "2024"));
+ Assertions.assertFalse(result.isPresent());
+ }
+
+ @Test
+ public void testPredicateMatchingNoPartitionYieldsEmptyPrunedList() {
+ Optional> result =
+ applyFilter(partitionedHandle(), eq("year", "1999"));
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertTrue(prunedLocations(result).isEmpty());
+ }
+
+ @Test
+ public void testUnpartitionedTableIsNotTouched() {
+ HiveTableHandle handle = new HiveTableHandle.Builder("db", "t", HiveTableType.HIVE)
+ .partitionKeyNames(Collections.emptyList())
+ .build();
+ Optional> result =
+ applyFilter(handle, eq("year", "2024"));
+ Assertions.assertFalse(result.isPresent());
+ }
+
+ // ===== helpers =====
+
+ private Optional> applyFilter(
+ HiveTableHandle handle, ConnectorExpression expr) {
+ HiveConnectorMetadata metadata = new HiveConnectorMetadata(
+ new FakeHmsClient(PARTITIONS), Collections.emptyMap());
+ return metadata.applyFilter(null, handle, new ConnectorFilterConstraint(expr));
+ }
+
+ private HiveTableHandle partitionedHandle() {
+ return new HiveTableHandle.Builder("db", "t", HiveTableType.HIVE)
+ .partitionKeyNames(PART_KEYS)
+ .build();
+ }
+
+ private List prunedLocations(Optional> result) {
+ List pruned =
+ ((HiveTableHandle) result.get().getHandle()).getPrunedPartitions();
+ List locations = new ArrayList<>();
+ for (HmsPartitionInfo p : pruned) {
+ locations.add(p.getLocation());
+ }
+ return locations;
+ }
+
+ private static ConnectorColumnRef colRef(String name) {
+ return new ConnectorColumnRef(name, ConnectorType.of("STRING"));
+ }
+
+ private static ConnectorLiteral lit(String value) {
+ return new ConnectorLiteral(ConnectorType.of("STRING"), value);
+ }
+
+ private static ConnectorComparison eq(String col, String value) {
+ return new ConnectorComparison(ConnectorComparison.Operator.EQ, colRef(col), lit(value));
+ }
+
+ private static ConnectorIn in(String col, String... values) {
+ List inList = new ArrayList<>();
+ for (String v : values) {
+ inList.add(lit(v));
+ }
+ return new ConnectorIn(colRef(col), inList, false);
+ }
+
+ private static ConnectorAnd and(ConnectorExpression... children) {
+ return new ConnectorAnd(Arrays.asList(children));
+ }
+
+ /**
+ * Minimal {@link HmsClient} double. {@code listPartitionNames} returns a fixed list;
+ * {@code getPartitions} echoes each requested name back as an {@link HmsPartitionInfo}
+ * whose location IS the partition name (so the pruning selection can be asserted).
+ * The rest fail loud.
+ */
+ private static final class FakeHmsClient implements HmsClient {
+ private final List partitionNames;
+
+ FakeHmsClient(List partitionNames) {
+ this.partitionNames = partitionNames;
+ }
+
+ @Override
+ public List listPartitionNames(String dbName, String tableName, int maxParts) {
+ return partitionNames;
+ }
+
+ @Override
+ public List getPartitions(String dbName, String tableName,
+ List partNames) {
+ List result = new ArrayList<>();
+ for (String name : partNames) {
+ result.add(new HmsPartitionInfo(Collections.emptyList(), name,
+ null, null, null, Collections.emptyMap()));
+ }
+ return result;
+ }
+
+ @Override
+ public List listDatabases() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HmsDatabaseInfo getDatabase(String dbName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List listTables(String dbName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean tableExists(String dbName, String tableName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HmsTableInfo getTable(String dbName, String tableName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Map getDefaultColumnValues(String dbName, String tableName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HmsPartitionInfo getPartition(String dbName, String tableName, List values) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void close() {
+ }
+ }
+}
diff --git a/fe/fe-connector/fe-connector-hive/src/test/java/org/apache/doris/connector/hive/HiveFileFormatTest.java b/fe/fe-connector/fe-connector-hive/src/test/java/org/apache/doris/connector/hive/HiveFileFormatTest.java
new file mode 100644
index 00000000000000..d4cfe275cf48a6
--- /dev/null
+++ b/fe/fe-connector/fe-connector-hive/src/test/java/org/apache/doris/connector/hive/HiveFileFormatTest.java
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.hive;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests {@link HiveFileFormat} detection (first test for fe-connector-hive; P3-T07 batch C).
+ *
+ * WHY: the detected format selects which BE file reader runs (parquet/orc/text/json
+ * scanner). Misdetection causes read failures or silent corruption. Detection is a
+ * case-insensitive substring match on the InputFormat class name with a SerDe-library
+ * fallback — these tests pin that contract, the inputFormat-wins precedence, and the
+ * splittability of each format.
+ */
+public class HiveFileFormatTest {
+
+ @Test
+ public void testFromInputFormatDetectsByContent() {
+ Assertions.assertEquals(HiveFileFormat.PARQUET,
+ HiveFileFormat.fromInputFormat("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"));
+ Assertions.assertEquals(HiveFileFormat.ORC,
+ HiveFileFormat.fromInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"));
+ Assertions.assertEquals(HiveFileFormat.TEXT,
+ HiveFileFormat.fromInputFormat("org.apache.hadoop.mapred.TextInputFormat"));
+ Assertions.assertEquals(HiveFileFormat.JSON,
+ HiveFileFormat.fromInputFormat("org.apache.hadoop.hive.json.JsonInputFormat"));
+ }
+
+ @Test
+ public void testFromInputFormatUnknownAndNull() {
+ Assertions.assertEquals(HiveFileFormat.UNKNOWN, HiveFileFormat.fromInputFormat(null));
+ Assertions.assertEquals(HiveFileFormat.UNKNOWN,
+ HiveFileFormat.fromInputFormat("com.example.CustomInputFormat"));
+ }
+
+ @Test
+ public void testFromSerDeLib() {
+ Assertions.assertEquals(HiveFileFormat.PARQUET,
+ HiveFileFormat.fromSerDeLib("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"));
+ Assertions.assertEquals(HiveFileFormat.ORC,
+ HiveFileFormat.fromSerDeLib("org.apache.hadoop.hive.ql.io.orc.OrcSerde"));
+ Assertions.assertEquals(HiveFileFormat.TEXT,
+ HiveFileFormat.fromSerDeLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"));
+ Assertions.assertEquals(HiveFileFormat.TEXT,
+ HiveFileFormat.fromSerDeLib("org.apache.hadoop.hive.serde2.OpenCSVSerde"));
+ Assertions.assertEquals(HiveFileFormat.JSON,
+ HiveFileFormat.fromSerDeLib("org.apache.hive.hcatalog.data.JsonSerDe"));
+ Assertions.assertEquals(HiveFileFormat.UNKNOWN, HiveFileFormat.fromSerDeLib(null));
+ }
+
+ @Test
+ public void testDetectPrefersInputFormatThenFallsBackToSerDe() {
+ // inputFormat wins when recognized (even if the SerDe says otherwise)...
+ Assertions.assertEquals(HiveFileFormat.PARQUET,
+ HiveFileFormat.detect(
+ "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
+ "org.apache.hadoop.hive.ql.io.orc.OrcSerde"));
+ // ...and the SerDe is the fallback when the inputFormat is unrecognized.
+ Assertions.assertEquals(HiveFileFormat.TEXT,
+ HiveFileFormat.detect("com.example.CustomInputFormat",
+ "org.apache.hadoop.hive.serde2.OpenCSVSerde"));
+ }
+
+ @Test
+ public void testIsSplittable() {
+ Assertions.assertTrue(HiveFileFormat.PARQUET.isSplittable());
+ Assertions.assertTrue(HiveFileFormat.ORC.isSplittable());
+ Assertions.assertTrue(HiveFileFormat.TEXT.isSplittable());
+ Assertions.assertFalse(HiveFileFormat.JSON.isSplittable());
+ Assertions.assertFalse(HiveFileFormat.UNKNOWN.isSplittable());
+ }
+
+ @Test
+ public void testFormatName() {
+ Assertions.assertEquals("parquet", HiveFileFormat.PARQUET.getFormatName());
+ Assertions.assertEquals("orc", HiveFileFormat.ORC.getFormatName());
+ Assertions.assertEquals("text", HiveFileFormat.TEXT.getFormatName());
+ Assertions.assertEquals("json", HiveFileFormat.JSON.getFormatName());
+ }
+}
diff --git a/fe/fe-connector/fe-connector-hms/src/test/java/org/apache/doris/connector/hms/HmsTypeMappingTest.java b/fe/fe-connector/fe-connector-hms/src/test/java/org/apache/doris/connector/hms/HmsTypeMappingTest.java
new file mode 100644
index 00000000000000..4c63afae1f8925
--- /dev/null
+++ b/fe/fe-connector/fe-connector-hms/src/test/java/org/apache/doris/connector/hms/HmsTypeMappingTest.java
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.hms;
+
+import org.apache.doris.connector.api.ConnectorType;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+
+/**
+ * Tests {@link HmsTypeMapping} — the Hive type-string parser shared by the hms and hive
+ * connectors (first test for fe-connector-hms; P3-T07 batch C baseline).
+ *
+ * WHY: this is the SPI-clean equivalent of fe-core
+ * {@code HiveMetaStoreClientHelper.hiveTypeToDorisType}. It is pure parsing logic where
+ * bugs hide — nested complex types, precision/scale extraction, and option-driven
+ * mappings. A wrong mapping silently mistypes every column of an HMS/Hive/Iceberg-on-HMS
+ * table. These tests pin the exact ConnectorType per Hive type string and the
+ * nesting-aware field splitting (Rule 9: encode the contract, not just the happy path).
+ */
+public class HmsTypeMappingTest {
+
+ private static ConnectorType map(String hiveType) {
+ return HmsTypeMapping.toConnectorType(hiveType);
+ }
+
+ @Test
+ public void testPrimitives() {
+ Assertions.assertEquals(ConnectorType.of("BOOLEAN"), map("boolean"));
+ Assertions.assertEquals(ConnectorType.of("TINYINT"), map("tinyint"));
+ Assertions.assertEquals(ConnectorType.of("SMALLINT"), map("smallint"));
+ Assertions.assertEquals(ConnectorType.of("INT"), map("int"));
+ Assertions.assertEquals(ConnectorType.of("BIGINT"), map("bigint"));
+ Assertions.assertEquals(ConnectorType.of("FLOAT"), map("float"));
+ Assertions.assertEquals(ConnectorType.of("DOUBLE"), map("double"));
+ Assertions.assertEquals(ConnectorType.of("STRING"), map("string"));
+ Assertions.assertEquals(ConnectorType.of("DATEV2"), map("date"));
+ }
+
+ @Test
+ public void testTimestampUsesTimeScale() {
+ // Default time scale is 6.
+ Assertions.assertEquals(ConnectorType.of("DATETIMEV2", 6, -1), map("timestamp"));
+ // A custom time scale flows through.
+ Assertions.assertEquals(ConnectorType.of("DATETIMEV2", 3, -1),
+ HmsTypeMapping.toConnectorType("timestamp", new HmsTypeMapping.Options(3, false, false)));
+ }
+
+ @Test
+ public void testBinaryDefaultAndVarbinaryOption() {
+ Assertions.assertEquals(ConnectorType.of("STRING"), map("binary"));
+ Assertions.assertEquals(ConnectorType.of("VARBINARY"),
+ HmsTypeMapping.toConnectorType("binary", new HmsTypeMapping.Options(6, true, false)));
+ }
+
+ @Test
+ public void testCharAndVarcharLength() {
+ Assertions.assertEquals(ConnectorType.of("CHAR", 10, -1), map("char(10)"));
+ Assertions.assertEquals(ConnectorType.of("VARCHAR", 255, -1), map("varchar(255)"));
+ // Missing length parameter degrades to the unparameterized type, not a crash.
+ Assertions.assertEquals(ConnectorType.of("CHAR"), map("char"));
+ Assertions.assertEquals(ConnectorType.of("VARCHAR"), map("varchar"));
+ }
+
+ @Test
+ public void testDecimalPrecisionScaleAndDefaults() {
+ Assertions.assertEquals(ConnectorType.of("DECIMALV3", 10, 2), map("decimal(10,2)"));
+ // Only precision given -> default scale 0.
+ Assertions.assertEquals(ConnectorType.of("DECIMALV3", 10, 0), map("decimal(10)"));
+ // Bare decimal -> default precision 9, scale 0.
+ Assertions.assertEquals(ConnectorType.of("DECIMALV3", 9, 0), map("decimal"));
+ }
+
+ @Test
+ public void testArrayIncludingNested() {
+ Assertions.assertEquals(ConnectorType.arrayOf(ConnectorType.of("INT")), map("array"));
+ Assertions.assertEquals(
+ ConnectorType.arrayOf(ConnectorType.arrayOf(ConnectorType.of("STRING"))),
+ map("array>"));
+ }
+
+ @Test
+ public void testMapIncludingNestedValue() {
+ Assertions.assertEquals(
+ ConnectorType.mapOf(ConnectorType.of("STRING"), ConnectorType.of("INT")),
+ map("map"));
+ // The inner comma of the nested array value must NOT be mistaken for the key/value
+ // separator — this is exactly what findNextNestedField guards.
+ Assertions.assertEquals(
+ ConnectorType.mapOf(ConnectorType.of("INT"),
+ ConnectorType.arrayOf(ConnectorType.of("STRING"))),
+ map("map>"));
+ }
+
+ @Test
+ public void testStructIncludingNestedFields() {
+ Assertions.assertEquals(
+ ConnectorType.structOf(Arrays.asList("a", "b"),
+ Arrays.asList(ConnectorType.of("INT"), ConnectorType.of("STRING"))),
+ map("struct"));
+ Assertions.assertEquals(
+ ConnectorType.structOf(Arrays.asList("x", "y"),
+ Arrays.asList(ConnectorType.arrayOf(ConnectorType.of("INT")),
+ ConnectorType.mapOf(ConnectorType.of("STRING"), ConnectorType.of("BIGINT")))),
+ map("struct,y:map>"));
+ }
+
+ @Test
+ public void testTimestampWithLocalTimeZone() {
+ // Default: mapped to DATETIMEV2.
+ Assertions.assertEquals(ConnectorType.of("DATETIMEV2", 6, -1),
+ map("timestamp with local time zone"));
+ // With the timestamp-tz option: mapped to TIMESTAMPTZ.
+ Assertions.assertEquals(ConnectorType.of("TIMESTAMPTZ", 6, -1),
+ HmsTypeMapping.toConnectorType("timestamp with local time zone",
+ new HmsTypeMapping.Options(6, false, true)));
+ }
+
+ @Test
+ public void testUnsupportedTypeIsUnsupportedNotCrash() {
+ Assertions.assertEquals(ConnectorType.of("UNSUPPORTED"), map("interval_day_time"));
+ Assertions.assertEquals(ConnectorType.of("UNSUPPORTED"), map("void"));
+ }
+
+ @Test
+ public void testCaseInsensitiveAndLowercasesNestedNames() {
+ Assertions.assertEquals(ConnectorType.of("INT"), map("INT"));
+ Assertions.assertEquals(ConnectorType.arrayOf(ConnectorType.of("STRING")), map("ARRAY"));
+ // The whole type string is lowercased first, so struct field names are lowercased too.
+ Assertions.assertEquals(
+ ConnectorType.structOf(Arrays.asList("name"), Arrays.asList(ConnectorType.of("INT"))),
+ map("STRUCT"));
+ }
+
+ @Test
+ public void testFindNextNestedFieldRespectsNesting() {
+ // Top-level comma found at the right index...
+ Assertions.assertEquals(3, HmsTypeMapping.findNextNestedField("int,string"));
+ Assertions.assertEquals(10, HmsTypeMapping.findNextNestedField("array,string"));
+ // ...and a comma nested inside <> is skipped (returns the next top-level comma).
+ Assertions.assertEquals(15, HmsTypeMapping.findNextNestedField("map,extra"));
+ // No top-level comma -> returns the length.
+ Assertions.assertEquals(3, HmsTypeMapping.findNextNestedField("int"));
+ }
+}
diff --git a/fe/fe-connector/fe-connector-hudi/src/main/assembly/plugin-zip.xml b/fe/fe-connector/fe-connector-hudi/src/main/assembly/plugin-zip.xml
index 0d29baa55b34bf..9927cb0882454c 100644
--- a/fe/fe-connector/fe-connector-hudi/src/main/assembly/plugin-zip.xml
+++ b/fe/fe-connector/fe-connector-hudi/src/main/assembly/plugin-zip.xml
@@ -46,6 +46,12 @@ under the License.
org.apache.doris:fe-connector-spi
org.apache.doris:fe-extension-spi
org.apache.doris:fe-filesystem-api
+
+ org.apache.doris:fe-thrift
+ org.apache.thrift:libthrift
org.apache.logging.log4j:*
org.slf4j:*
diff --git a/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiConnectorMetadata.java b/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiConnectorMetadata.java
index 7b4fe4b0b791e5..3e43b25230fbb3 100644
--- a/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiConnectorMetadata.java
+++ b/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiConnectorMetadata.java
@@ -24,7 +24,12 @@
import org.apache.doris.connector.api.ConnectorType;
import org.apache.doris.connector.api.handle.ConnectorColumnHandle;
import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.pushdown.ConnectorAnd;
+import org.apache.doris.connector.api.pushdown.ConnectorComparison;
+import org.apache.doris.connector.api.pushdown.ConnectorExpression;
import org.apache.doris.connector.api.pushdown.ConnectorFilterConstraint;
+import org.apache.doris.connector.api.pushdown.ConnectorIn;
+import org.apache.doris.connector.api.pushdown.ConnectorLiteral;
import org.apache.doris.connector.api.pushdown.FilterApplicationResult;
import org.apache.doris.connector.hms.HmsClient;
import org.apache.doris.connector.hms.HmsClientException;
@@ -39,10 +44,13 @@
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Optional;
+import java.util.Set;
import java.util.stream.Collectors;
/**
@@ -150,17 +158,38 @@ public Optional> applyFilter(
return Optional.empty();
}
- // List all partition names from HMS (e.g. "year=2024/month=01")
- // These are relative paths that double as partition identifiers
- List partitionNames = hmsClient.listPartitionNames(
+ // Extract equality/IN predicates on partition columns from the expression.
+ // No partition predicate -> leave the handle untouched so resolvePartitions
+ // falls back to Hudi's own metadata listing (HoodieTableMetadata.getAllPartitionPaths).
+ Map> partitionPredicates = extractPartitionPredicates(
+ constraint.getExpression(), partKeyNames);
+ if (partitionPredicates.isEmpty()) {
+ return Optional.empty();
+ }
+
+ // List candidate partition names from HMS (e.g. "year=2024/month=01"). These
+ // relative paths double as partition identifiers consumed by HudiScanPlanProvider.
+ // Keep maxParts=-1 (unlimited): no silent partition truncation.
+ List allPartNames = hmsClient.listPartitionNames(
hudiHandle.getDbName(), hudiHandle.getTableName(), -1);
- if (partitionNames == null || partitionNames.isEmpty()) {
+ if (allPartNames == null || allPartNames.isEmpty()) {
+ return Optional.empty();
+ }
+
+ List matchedPartNames = prunePartitionNames(
+ allPartNames, partKeyNames, partitionPredicates);
+ if (matchedPartNames.size() == allPartNames.size()) {
+ // No pruning effect
return Optional.empty();
}
- // Build updated handle with partition paths for scan planning
+ LOG.info("Partition pruning: {}.{} all={} pruned={}",
+ hudiHandle.getDbName(), hudiHandle.getTableName(),
+ allPartNames.size(), matchedPartNames.size());
+
+ // Build updated handle carrying only the matched partition paths for scan planning.
HudiTableHandle updatedHandle = hudiHandle.toBuilder()
- .prunedPartitionPaths(partitionNames)
+ .prunedPartitionPaths(matchedPartNames)
.build();
return Optional.of(new FilterApplicationResult<>(updatedHandle, constraint.getExpression(), false));
@@ -230,8 +259,11 @@ private List getSchemaFromHms(String dbName, String tableName)
/**
* Convert Avro schema fields to ConnectorColumn list.
+ *
+ * Package-private and static so it can be unit-tested directly with a
+ * hand-built Avro schema (no live HoodieTableMetaClient needed).
*/
- private List avroSchemaToColumns(Schema avroSchema) {
+ static List avroSchemaToColumns(Schema avroSchema) {
List fields = avroSchema.getFields();
List columns = new ArrayList<>(fields.size());
for (Schema.Field field : fields) {
@@ -239,7 +271,12 @@ private List avroSchemaToColumns(Schema avroSchema) {
Schema fieldSchema = unwrapNullable(field.schema());
ConnectorType connectorType = HudiTypeMapping.fromAvroSchema(fieldSchema);
String comment = field.doc() != null ? field.doc() : "";
- columns.add(new ConnectorColumn(field.name(), connectorType, comment, nullable, null));
+ // Lower-case the top-level column name to mirror legacy
+ // HMSExternalTable.initHudiSchema (name().toLowerCase(Locale.ROOT)).
+ // Nested struct field names are left as-is here and in HudiTypeMapping,
+ // matching legacy (which lowercases only the top-level column name).
+ String columnName = field.name().toLowerCase(Locale.ROOT);
+ columns.add(new ConnectorColumn(columnName, connectorType, comment, nullable, null));
}
return columns;
}
@@ -303,4 +340,113 @@ private Configuration buildHadoopConf() {
}
return conf;
}
+
+ // ========== Partition pruning helpers ==========
+ // Mirrors HiveConnectorMetadata's EQ/IN partition pruning. Duplicated rather than
+ // shared because fe-connector-hudi depends on fe-connector-hms, not fe-connector-hive;
+ // consolidate during the Hive (P7) migration. See P3-T05 design.
+
+ /**
+ * Extracts equality predicates on partition columns from the expression tree.
+ * Supports: col = 'value', col IN ('v1', 'v2', ...), AND combinations.
+ */
+ private Map> extractPartitionPredicates(
+ ConnectorExpression expr, List partKeyNames) {
+ Set partKeySet = partKeyNames.stream().collect(Collectors.toSet());
+ Map> result = new HashMap<>();
+ extractPredicatesRecursive(expr, partKeySet, result);
+ return result;
+ }
+
+ private void extractPredicatesRecursive(ConnectorExpression expr,
+ Set partKeySet, Map> result) {
+ if (expr instanceof ConnectorAnd) {
+ for (ConnectorExpression child : ((ConnectorAnd) expr).getConjuncts()) {
+ extractPredicatesRecursive(child, partKeySet, result);
+ }
+ } else if (expr instanceof ConnectorComparison) {
+ ConnectorComparison cmp = (ConnectorComparison) expr;
+ if (cmp.getOperator() == ConnectorComparison.Operator.EQ) {
+ String colName = extractColumnName(cmp.getLeft());
+ String value = extractLiteralValue(cmp.getRight());
+ if (colName != null && value != null && partKeySet.contains(colName)) {
+ result.computeIfAbsent(colName, k -> new ArrayList<>()).add(value);
+ }
+ }
+ } else if (expr instanceof ConnectorIn) {
+ ConnectorIn inExpr = (ConnectorIn) expr;
+ if (!inExpr.isNegated()) {
+ String colName = extractColumnName(inExpr.getValue());
+ if (colName != null && partKeySet.contains(colName)) {
+ List values = new ArrayList<>();
+ for (ConnectorExpression item : inExpr.getInList()) {
+ String val = extractLiteralValue(item);
+ if (val != null) {
+ values.add(val);
+ }
+ }
+ if (!values.isEmpty()) {
+ result.computeIfAbsent(colName, k -> new ArrayList<>()).addAll(values);
+ }
+ }
+ }
+ }
+ }
+
+ private String extractColumnName(ConnectorExpression expr) {
+ if (expr instanceof org.apache.doris.connector.api.pushdown.ConnectorColumnRef) {
+ return ((org.apache.doris.connector.api.pushdown.ConnectorColumnRef) expr).getColumnName();
+ }
+ return null;
+ }
+
+ private String extractLiteralValue(ConnectorExpression expr) {
+ if (expr instanceof ConnectorLiteral) {
+ Object val = ((ConnectorLiteral) expr).getValue();
+ return val != null ? String.valueOf(val) : null;
+ }
+ return null;
+ }
+
+ /**
+ * Prunes partition names based on extracted equality predicates.
+ * Partition names follow the Hive convention: key1=val1/key2=val2
+ */
+ private List prunePartitionNames(List allPartNames,
+ List partKeyNames, Map> predicates) {
+ List matched = new ArrayList<>();
+ for (String partName : allPartNames) {
+ Map partValues = parsePartitionName(partName, partKeyNames);
+ if (matchesPredicates(partValues, predicates)) {
+ matched.add(partName);
+ }
+ }
+ return matched;
+ }
+
+ private Map parsePartitionName(String partName,
+ List partKeyNames) {
+ Map values = new HashMap<>();
+ String[] parts = partName.split("/");
+ for (String part : parts) {
+ int eq = part.indexOf('=');
+ if (eq > 0) {
+ values.put(part.substring(0, eq), part.substring(eq + 1));
+ }
+ }
+ return values;
+ }
+
+ private boolean matchesPredicates(Map partValues,
+ Map> predicates) {
+ for (Map.Entry> entry : predicates.entrySet()) {
+ String colName = entry.getKey();
+ List allowedValues = entry.getValue();
+ String actualValue = partValues.get(colName);
+ if (actualValue == null || !allowedValues.contains(actualValue)) {
+ return false;
+ }
+ }
+ return true;
+ }
}
diff --git a/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiScanPlanProvider.java b/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiScanPlanProvider.java
index d5f6b3628ddc66..9df29b5166889a 100644
--- a/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiScanPlanProvider.java
+++ b/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiScanPlanProvider.java
@@ -116,7 +116,7 @@ public List planScan(
columnNames = avroSchema.getFields().stream()
.map(Schema.Field::name).collect(Collectors.toList());
columnTypes = avroSchema.getFields().stream()
- .map(f -> HudiTypeMapping.fromAvroSchema(unwrapNullable(f.schema())).getTypeName())
+ .map(f -> HudiTypeMapping.toHiveTypeString(f.schema()))
.collect(Collectors.toList());
} catch (Exception e) {
LOG.warn("Failed to resolve Hudi schema for JNI reader, JNI splits may fail: {}",
@@ -347,17 +347,6 @@ private static String detectFileFormat(String filePath) {
return "parquet";
}
- private static Schema unwrapNullable(Schema schema) {
- if (schema.getType() == Schema.Type.UNION) {
- for (Schema s : schema.getTypes()) {
- if (s.getType() != Schema.Type.NULL) {
- return s;
- }
- }
- }
- return schema;
- }
-
private Configuration buildHadoopConf() {
Configuration conf = new Configuration();
for (Map.Entry entry : properties.entrySet()) {
diff --git a/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiScanRange.java b/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiScanRange.java
index 3e2526a261adc4..7566f9ae1b9084 100644
--- a/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiScanRange.java
+++ b/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiScanRange.java
@@ -26,7 +26,6 @@
import org.apache.doris.thrift.TTableFormatFileDesc;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@@ -56,6 +55,15 @@ public class HudiScanRange implements ConnectorScanRange {
private final String fileFormat;
private final Map partitionValues;
private final Map properties;
+ // JNI reader list fields. Kept as typed lists (NOT joined into the
+ // properties map) because Hive type strings contain commas
+ // (e.g. decimal(10,2), struct): a comma join+split
+ // round-trip would shatter them and misalign column_names/column_types.
+ // BE (hudi_jni_reader.cpp) joins these lists itself with the correct
+ // delimiters (names ',', types '#', delta logs ',').
+ private final List deltaLogs;
+ private final List columnNames;
+ private final List columnTypes;
private HudiScanRange(Builder builder) {
this.path = builder.path;
@@ -85,16 +93,17 @@ private HudiScanRange(Builder builder) {
props.put("hudi.data_file_path", builder.dataFilePath);
}
props.put("hudi.data_file_length", String.valueOf(builder.dataFileLength));
- if (builder.deltaLogs != null && !builder.deltaLogs.isEmpty()) {
- props.put("hudi.delta_logs", String.join(",", builder.deltaLogs));
- }
- if (builder.columnNames != null && !builder.columnNames.isEmpty()) {
- props.put("hudi.column_names", String.join(",", builder.columnNames));
- }
- if (builder.columnTypes != null && !builder.columnTypes.isEmpty()) {
- props.put("hudi.column_types", String.join(",", builder.columnTypes));
- }
this.properties = Collections.unmodifiableMap(props);
+
+ this.deltaLogs = builder.deltaLogs != null
+ ? Collections.unmodifiableList(new ArrayList<>(builder.deltaLogs))
+ : Collections.emptyList();
+ this.columnNames = builder.columnNames != null
+ ? Collections.unmodifiableList(new ArrayList<>(builder.columnNames))
+ : Collections.emptyList();
+ this.columnTypes = builder.columnTypes != null
+ ? Collections.unmodifiableList(new ArrayList<>(builder.columnTypes))
+ : Collections.emptyList();
}
@Override
@@ -158,8 +167,7 @@ public void populateRangeParams(TTableFormatFileDesc formatDesc,
// Dynamic format downgrade: if JNI but no delta logs, use native reader
if (isJni) {
- String deltaLogs = props.get("hudi.delta_logs");
- if (deltaLogs == null || deltaLogs.isEmpty()) {
+ if (deltaLogs.isEmpty()) {
String dataFilePath = props.getOrDefault(
"hudi.data_file_path", "");
if (!dataFilePath.isEmpty()) {
@@ -188,20 +196,18 @@ public void populateRangeParams(TTableFormatFileDesc formatDesc,
fileDesc.setDataFileLength(Long.parseLong(
props.getOrDefault("hudi.data_file_length", "0")));
- String deltaLogs = props.get("hudi.delta_logs");
- if (deltaLogs != null && !deltaLogs.isEmpty()) {
- fileDesc.setDeltaLogs(
- Arrays.asList(deltaLogs.split(",")));
+ // Set typed lists directly. BE (hudi_jni_reader.cpp) joins them with
+ // the correct delimiters: column_names ',', column_types '#', delta
+ // logs ','. Joining/splitting here would shatter comma-bearing Hive
+ // type strings (decimal(10,2), struct<...>).
+ if (!deltaLogs.isEmpty()) {
+ fileDesc.setDeltaLogs(deltaLogs);
}
- String colNames = props.get("hudi.column_names");
- if (colNames != null && !colNames.isEmpty()) {
- fileDesc.setColumnNames(
- Arrays.asList(colNames.split(",")));
+ if (!columnNames.isEmpty()) {
+ fileDesc.setColumnNames(columnNames);
}
- String colTypes = props.get("hudi.column_types");
- if (colTypes != null && !colTypes.isEmpty()) {
- fileDesc.setColumnTypes(
- Arrays.asList(colTypes.split(",")));
+ if (!columnTypes.isEmpty()) {
+ fileDesc.setColumnTypes(columnTypes);
}
}
diff --git a/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiTypeMapping.java b/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiTypeMapping.java
index 3e3d10bff7ad8c..3581bc2d1893c2 100644
--- a/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiTypeMapping.java
+++ b/fe/fe-connector/fe-connector-hudi/src/main/java/org/apache/doris/connector/hudi/HudiTypeMapping.java
@@ -78,6 +78,99 @@ public static ConnectorType fromAvroSchema(Schema avroSchema) {
}
}
+ /**
+ * Convert an Avro schema to a Hive type string, mirroring fe-core
+ * {@code HudiUtils.convertAvroToHiveType}.
+ *
+ * This feeds the BE Hudi JNI scanner's {@code hudi_column_types} param.
+ * The BE joins the per-column type list with {@code '#'} and the scanner
+ * ({@code HadoopHudiJniScanner}) splits it back on {@code '#'} — so each
+ * returned string is a single list element and may safely contain commas
+ * (e.g. {@code decimal(10,2)}, {@code struct},
+ * {@code map}).
+ *
+ * This is distinct from {@link #fromAvroSchema}, which maps Avro to a
+ * Doris {@link ConnectorType} for schema reporting. The JNI reader needs
+ * Hive type strings, not Doris type names.
+ *
+ * @throws IllegalArgumentException for unsupported types (matches the
+ * legacy fail-loud behavior)
+ */
+ public static String toHiveTypeString(Schema schema) {
+ Schema.Type type = schema.getType();
+ LogicalType logicalType = schema.getLogicalType();
+
+ switch (type) {
+ case BOOLEAN:
+ return "boolean";
+ case INT:
+ if (logicalType instanceof LogicalTypes.Date) {
+ return "date";
+ }
+ if (logicalType instanceof LogicalTypes.TimeMillis) {
+ throw unsupportedLogicalType(schema);
+ }
+ return "int";
+ case LONG:
+ if (logicalType instanceof LogicalTypes.TimestampMillis
+ || logicalType instanceof LogicalTypes.TimestampMicros) {
+ return "timestamp";
+ }
+ if (logicalType instanceof LogicalTypes.TimeMicros) {
+ throw unsupportedLogicalType(schema);
+ }
+ return "bigint";
+ case FLOAT:
+ return "float";
+ case DOUBLE:
+ return "double";
+ case STRING:
+ return "string";
+ case FIXED:
+ case BYTES:
+ if (logicalType instanceof LogicalTypes.Decimal) {
+ LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
+ return String.format("decimal(%d,%d)",
+ decimalType.getPrecision(), decimalType.getScale());
+ }
+ return "string";
+ case ARRAY:
+ return String.format("array<%s>",
+ toHiveTypeString(schema.getElementType()));
+ case RECORD:
+ List recordFields = schema.getFields();
+ if (recordFields.isEmpty()) {
+ throw new IllegalArgumentException("Record must have fields");
+ }
+ String structFields = recordFields.stream()
+ .map(field -> String.format("%s:%s", field.name(),
+ toHiveTypeString(field.schema())))
+ .collect(Collectors.joining(","));
+ return String.format("struct<%s>", structFields);
+ case MAP:
+ return String.format("map",
+ toHiveTypeString(schema.getValueType()));
+ case UNION:
+ List unionTypes = schema.getTypes().stream()
+ .filter(s -> s.getType() != Schema.Type.NULL)
+ .collect(Collectors.toList());
+ if (unionTypes.size() == 1) {
+ return toHiveTypeString(unionTypes.get(0));
+ }
+ break;
+ default:
+ break;
+ }
+
+ throw new IllegalArgumentException(String.format(
+ "Unsupported type: %s for column: %s", type.getName(), schema.getName()));
+ }
+
+ private static IllegalArgumentException unsupportedLogicalType(Schema schema) {
+ return new IllegalArgumentException(
+ String.format("Unsupported logical type: %s", schema.getLogicalType()));
+ }
+
private static ConnectorType mapIntType(LogicalType logicalType) {
if (logicalType instanceof LogicalTypes.Date) {
return ConnectorType.of("DATEV2");
diff --git a/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiPartitionPruningTest.java b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiPartitionPruningTest.java
new file mode 100644
index 00000000000000..af6b59a532be0b
--- /dev/null
+++ b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiPartitionPruningTest.java
@@ -0,0 +1,265 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.hudi;
+
+import org.apache.doris.connector.api.ConnectorType;
+import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.pushdown.ConnectorAnd;
+import org.apache.doris.connector.api.pushdown.ConnectorColumnRef;
+import org.apache.doris.connector.api.pushdown.ConnectorComparison;
+import org.apache.doris.connector.api.pushdown.ConnectorExpression;
+import org.apache.doris.connector.api.pushdown.ConnectorFilterConstraint;
+import org.apache.doris.connector.api.pushdown.ConnectorIn;
+import org.apache.doris.connector.api.pushdown.ConnectorLiteral;
+import org.apache.doris.connector.api.pushdown.FilterApplicationResult;
+import org.apache.doris.connector.hms.HmsClient;
+import org.apache.doris.connector.hms.HmsDatabaseInfo;
+import org.apache.doris.connector.hms.HmsPartitionInfo;
+import org.apache.doris.connector.hms.HmsTableInfo;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Tests {@link HudiConnectorMetadata#applyFilter} partition pruning (P3-T05).
+ *
+ * WHY: the SPI Hudi path previously listed ALL partitions unconditionally and
+ * stored them as {@code prunedPartitionPaths}, doing no EQ/IN pruning at all and
+ * silently forcing the partition source to HMS for any filtered query. These tests
+ * pin the corrected behavior, mirroring {@code HiveConnectorMetadata}:
+ *
+ * - EQ / IN predicates on partition columns reduce the scanned partition set;
+ * - predicates on non-partition columns (or range predicates) never prune;
+ * - when no partition predicate applies, the handle is left untouched
+ * ({@code Optional.empty()}) so scan planning falls back to Hudi's own listing;
+ * - a predicate that matches every / no partition is handled correctly.
+ *
+ * A test that passed against the old stub (which always returned all partitions)
+ * would be wrong — each assertion checks the precise pruned set.
+ */
+public class HudiPartitionPruningTest {
+
+ private static final List PARTITIONS = Arrays.asList(
+ "year=2023/month=12",
+ "year=2024/month=01",
+ "year=2024/month=02");
+
+ private static final List PART_KEYS = Arrays.asList("year", "month");
+
+ @Test
+ public void testEqOnPartitionColumnPrunes() {
+ // year = '2024' -> only the two 2024 partitions
+ Optional> result =
+ applyFilter(partitionedHandle(), eq("year", "2024"));
+
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertEquals(
+ Arrays.asList("year=2024/month=01", "year=2024/month=02"),
+ prunedPaths(result));
+ }
+
+ @Test
+ public void testInOnPartitionColumnPrunes() {
+ // month IN ('01', '12') -> spans years, keeps original order
+ Optional> result =
+ applyFilter(partitionedHandle(), in("month", "01", "12"));
+
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertEquals(
+ Arrays.asList("year=2023/month=12", "year=2024/month=01"),
+ prunedPaths(result));
+ }
+
+ @Test
+ public void testAndOfTwoPartitionColumnsPrunes() {
+ // year = '2024' AND month = '01' -> a single partition
+ ConnectorExpression expr = and(eq("year", "2024"), eq("month", "01"));
+ Optional> result =
+ applyFilter(partitionedHandle(), expr);
+
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertEquals(
+ Collections.singletonList("year=2024/month=01"),
+ prunedPaths(result));
+ }
+
+ @Test
+ public void testNonPartitionColumnInAndIsIgnored() {
+ // year = '2024' AND price = '100' -> prune on year only; non-partition pred ignored
+ ConnectorExpression expr = and(eq("year", "2024"), eq("price", "100"));
+ Optional> result =
+ applyFilter(partitionedHandle(), expr);
+
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertEquals(
+ Arrays.asList("year=2024/month=01", "year=2024/month=02"),
+ prunedPaths(result));
+ }
+
+ @Test
+ public void testNonPartitionPredicateOnlyLeavesHandleUntouched() {
+ // price = '100' -> no partition predicate -> Optional.empty() (no source switch)
+ Optional> result =
+ applyFilter(partitionedHandle(), eq("price", "100"));
+
+ Assertions.assertFalse(result.isPresent());
+ }
+
+ @Test
+ public void testPredicateMatchingAllPartitionsHasNoEffect() {
+ // year IN ('2023', '2024') -> matches every partition -> Optional.empty()
+ Optional> result =
+ applyFilter(partitionedHandle(), in("year", "2023", "2024"));
+
+ Assertions.assertFalse(result.isPresent());
+ }
+
+ @Test
+ public void testPredicateMatchingNoPartitionYieldsEmptyPrunedList() {
+ // year = '1999' -> matches nothing -> present handle with empty pruned set (scan 0)
+ Optional> result =
+ applyFilter(partitionedHandle(), eq("year", "1999"));
+
+ Assertions.assertTrue(result.isPresent());
+ Assertions.assertTrue(prunedPaths(result).isEmpty());
+ }
+
+ @Test
+ public void testUnpartitionedTableIsNotTouched() {
+ HudiTableHandle handle = new HudiTableHandle.Builder("db", "t", "s3://b/t", "COPY_ON_WRITE")
+ .partitionKeyNames(Collections.emptyList())
+ .build();
+ Optional> result =
+ applyFilter(handle, eq("year", "2024"));
+
+ Assertions.assertFalse(result.isPresent());
+ }
+
+ // ========== helpers ==========
+
+ private Optional> applyFilter(
+ HudiTableHandle handle, ConnectorExpression expr) {
+ HudiConnectorMetadata metadata = new HudiConnectorMetadata(
+ new FakeHmsClient(PARTITIONS), Collections.emptyMap());
+ return metadata.applyFilter(null, handle, new ConnectorFilterConstraint(expr));
+ }
+
+ private HudiTableHandle partitionedHandle() {
+ return new HudiTableHandle.Builder("db", "t", "s3://b/t", "COPY_ON_WRITE")
+ .partitionKeyNames(PART_KEYS)
+ .build();
+ }
+
+ @SuppressWarnings("unchecked")
+ private List prunedPaths(Optional> result) {
+ return ((HudiTableHandle) result.get().getHandle()).getPrunedPartitionPaths();
+ }
+
+ private static ConnectorColumnRef colRef(String name) {
+ return new ConnectorColumnRef(name, ConnectorType.of("STRING"));
+ }
+
+ private static ConnectorLiteral lit(String value) {
+ return new ConnectorLiteral(ConnectorType.of("STRING"), value);
+ }
+
+ private static ConnectorComparison eq(String col, String value) {
+ return new ConnectorComparison(ConnectorComparison.Operator.EQ, colRef(col), lit(value));
+ }
+
+ private static ConnectorIn in(String col, String... values) {
+ List inList = new ArrayList<>();
+ for (String v : values) {
+ inList.add(lit(v));
+ }
+ return new ConnectorIn(colRef(col), inList, false);
+ }
+
+ private static ConnectorAnd and(ConnectorExpression... children) {
+ return new ConnectorAnd(Arrays.asList(children));
+ }
+
+ /**
+ * Minimal {@link HmsClient} double returning a fixed partition-name list.
+ * Only {@code listPartitionNames} is exercised by partition pruning; the rest fail loud.
+ */
+ private static final class FakeHmsClient implements HmsClient {
+ private final List partitionNames;
+
+ FakeHmsClient(List partitionNames) {
+ this.partitionNames = partitionNames;
+ }
+
+ @Override
+ public List listPartitionNames(String dbName, String tableName, int maxParts) {
+ return partitionNames;
+ }
+
+ @Override
+ public List listDatabases() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HmsDatabaseInfo getDatabase(String dbName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List listTables(String dbName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean tableExists(String dbName, String tableName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HmsTableInfo getTable(String dbName, String tableName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Map getDefaultColumnValues(String dbName, String tableName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List getPartitions(String dbName, String tableName,
+ List partNames) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HmsPartitionInfo getPartition(String dbName, String tableName, List values) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void close() {
+ }
+ }
+}
diff --git a/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiScanRangeTest.java b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiScanRangeTest.java
new file mode 100644
index 00000000000000..7f8aeeebee8d0e
--- /dev/null
+++ b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiScanRangeTest.java
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.hudi;
+
+import org.apache.doris.thrift.TFileFormatType;
+import org.apache.doris.thrift.TFileRangeDesc;
+import org.apache.doris.thrift.THudiFileDesc;
+import org.apache.doris.thrift.TTableFormatFileDesc;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+
+/**
+ * Tests {@link HudiScanRange#populateRangeParams}.
+ *
+ * WHY: column_names/column_types/delta_logs are thrift {@code list};
+ * BE ({@code hudi_jni_reader.cpp}) joins them with distinct delimiters
+ * (names ',', types '#', delta logs ','). The FE must pass each per-column type
+ * as a single list element. The previous code joined them with ',' and split
+ * back by ',', which shattered comma-bearing Hive type strings
+ * ({@code decimal(10,2)}, {@code struct<...>}) and misaligned names/types.
+ * These tests pin that the typed lists survive intact and aligned.
+ */
+public class HudiScanRangeTest {
+
+ @Test
+ public void testJniListsSurviveIntactAndAligned() {
+ HudiScanRange range = new HudiScanRange.Builder()
+ .path("s3://bucket/t/file")
+ .fileFormat("jni")
+ .instantTime("20240101000000000")
+ .serde("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")
+ .inputFormat("org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat")
+ .basePath("s3://bucket/t")
+ .dataFilePath("s3://bucket/t/base.parquet")
+ .dataFileLength(123L)
+ .deltaLogs(Arrays.asList("s3://bucket/t/.f.log.1_0", "s3://bucket/t/.f.log.2_0"))
+ .columnNames(Arrays.asList("x", "y", "z"))
+ .columnTypes(Arrays.asList("int", "decimal(10,2)", "struct"))
+ .build();
+
+ TTableFormatFileDesc formatDesc = new TTableFormatFileDesc();
+ TFileRangeDesc rangeDesc = new TFileRangeDesc();
+ range.populateRangeParams(formatDesc, rangeDesc);
+
+ THudiFileDesc fileDesc = formatDesc.getHudiParams();
+
+ // Types must NOT be shattered: 3 columns -> 3 type strings (old bug
+ // produced 5: "decimal(10","2)","struct").
+ Assertions.assertEquals(Arrays.asList("int", "decimal(10,2)", "struct"),
+ fileDesc.getColumnTypes());
+ Assertions.assertEquals(Arrays.asList("x", "y", "z"), fileDesc.getColumnNames());
+ Assertions.assertEquals(Arrays.asList("s3://bucket/t/.f.log.1_0", "s3://bucket/t/.f.log.2_0"),
+ fileDesc.getDeltaLogs());
+
+ // names <-> types alignment (the JNI scanner zips them positionally).
+ Assertions.assertEquals(fileDesc.getColumnNames().size(), fileDesc.getColumnTypes().size());
+ }
+
+ @Test
+ public void testNoDeltaLogsDowngradesToNativeParquet() {
+ // MOR file slice with no delta logs -> native parquet reader; no JNI lists set.
+ HudiScanRange range = new HudiScanRange.Builder()
+ .path("s3://bucket/t/base.parquet")
+ .fileFormat("jni")
+ .dataFilePath("s3://bucket/t/base.parquet")
+ .dataFileLength(456L)
+ .build();
+
+ TTableFormatFileDesc formatDesc = new TTableFormatFileDesc();
+ TFileRangeDesc rangeDesc = new TFileRangeDesc();
+ range.populateRangeParams(formatDesc, rangeDesc);
+
+ Assertions.assertEquals(TFileFormatType.FORMAT_PARQUET, rangeDesc.getFormatType());
+ Assertions.assertFalse(formatDesc.getHudiParams().isSetColumnTypes());
+ Assertions.assertFalse(formatDesc.getHudiParams().isSetColumnNames());
+ }
+}
diff --git a/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiSchemaParityTest.java b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiSchemaParityTest.java
new file mode 100644
index 00000000000000..9ae752484e5efc
--- /dev/null
+++ b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiSchemaParityTest.java
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.hudi;
+
+import org.apache.doris.connector.api.ConnectorColumn;
+import org.apache.doris.connector.api.ConnectorType;
+
+import org.apache.avro.Schema;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Schema-level parity for the SPI Hudi metadata path (P3-T07, batch C).
+ *
+ * WHY: {@code getTableSchema} derives its column list from the Hudi Avro schema
+ * via {@link HudiConnectorMetadata#avroSchemaToColumns}. This must produce the same
+ * column set — names, order, Doris types, nullability — and the same per-column
+ * Hive type strings ({@code colTypes}) as legacy fe-core
+ * {@code HMSExternalTable.initHudiSchema} (:740-753) +
+ * {@code HudiUtils.fromAvroHudiTypeToDorisType} / {@code convertAvroToHiveType}.
+ * Because no compile path sees both modules (fe-core does not depend on the concrete
+ * connector modules), parity is asserted against golden values transcribed from —
+ * and annotated with — the legacy contract.
+ *
+ * COW vs MOR: schema derivation is table-type-agnostic on BOTH sides (neither
+ * consults COW/MOR), so a single golden schema covers both; the COW/MOR distinction
+ * lives only in scan planning and is pinned separately by {@link HudiTableTypeTest}.
+ *
+ * Two assertions deliberately encode the P3-T07 column-name-casing fix: the
+ * top-level column name is lower-cased (legacy {@code toLowerCase(Locale.ROOT)} at
+ * {@code HMSExternalTable.java:745}), while a NESTED struct field name keeps its
+ * original case (legacy lowercases only the top-level column). A test that passed
+ * with the old raw-case behavior would be wrong.
+ */
+public class HudiSchemaParityTest {
+
+ // A representative Hudi table schema in Avro JSON (the form Hudi actually stores).
+ // Mixed-case top-level names (Id, Name, Addr) and a mixed-case nested field
+ // (Street) exercise the casing boundary; the type variety mirrors the legacy
+ // type matrix (primitive, decimal, date, timestamp, nullable, array, map, struct).
+ private static final String SCHEMA_JSON =
+ "{\"type\":\"record\",\"name\":\"hudi_t\",\"fields\":["
+ + "{\"name\":\"Id\",\"type\":\"long\"},"
+ + "{\"name\":\"Name\",\"type\":[\"null\",\"string\"],\"default\":null},"
+ + "{\"name\":\"price\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\","
+ + "\"precision\":10,\"scale\":2}},"
+ + "{\"name\":\"event_date\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},"
+ + "{\"name\":\"created_at\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}},"
+ + "{\"name\":\"tags\",\"type\":{\"type\":\"array\",\"items\":\"string\"}},"
+ + "{\"name\":\"props\",\"type\":{\"type\":\"map\",\"values\":\"int\"}},"
+ + "{\"name\":\"Addr\",\"type\":{\"type\":\"record\",\"name\":\"AddrRec\",\"fields\":["
+ + "{\"name\":\"Street\",\"type\":\"string\"},{\"name\":\"zip\",\"type\":\"int\"}]}}"
+ + "]}";
+
+ // Golden column contract, mirroring legacy initHudiSchema field-by-field.
+ private static final List EXPECTED_NAMES = Arrays.asList(
+ "id", "name", "price", "event_date", "created_at", "tags", "props", "addr");
+
+ private static final List EXPECTED_TYPES = Arrays.asList(
+ ConnectorType.of("BIGINT"),
+ ConnectorType.of("STRING"),
+ ConnectorType.of("DECIMALV3", 10, 2),
+ ConnectorType.of("DATEV2"),
+ ConnectorType.of("DATETIMEV2", 6, 0),
+ ConnectorType.arrayOf(ConnectorType.of("STRING")),
+ ConnectorType.mapOf(ConnectorType.of("STRING"), ConnectorType.of("INT")),
+ ConnectorType.structOf(Arrays.asList("Street", "zip"),
+ Arrays.asList(ConnectorType.of("STRING"), ConnectorType.of("INT"))));
+
+ // Only the union-typed "Name" field is nullable; the flag must track the union,
+ // not be a constant.
+ private static final List EXPECTED_NULLABLE = Arrays.asList(
+ false, true, false, false, false, false, false, false);
+
+ // Hive type strings = legacy colTypes (convertAvroToHiveType per field).
+ private static final List EXPECTED_HIVE_TYPES = Arrays.asList(
+ "bigint", "string", "decimal(10,2)", "date", "timestamp",
+ "array", "map", "struct");
+
+ private static Schema schema() {
+ return new Schema.Parser().parse(SCHEMA_JSON);
+ }
+
+ @Test
+ public void testSchemaColumnsMirrorLegacyContract() {
+ List columns = HudiConnectorMetadata.avroSchemaToColumns(schema());
+ Assertions.assertEquals(EXPECTED_NAMES.size(), columns.size());
+ for (int i = 0; i < columns.size(); i++) {
+ ConnectorColumn col = columns.get(i);
+ Assertions.assertEquals(EXPECTED_NAMES.get(i), col.getName(), "name[" + i + "]");
+ Assertions.assertEquals(EXPECTED_TYPES.get(i), col.getType(), "type[" + i + "]");
+ Assertions.assertEquals(EXPECTED_NULLABLE.get(i), col.isNullable(), "nullable[" + i + "]");
+ }
+ }
+
+ @Test
+ public void testColumnTypeStringsMirrorLegacyColTypes() {
+ List fields = schema().getFields();
+ Assertions.assertEquals(EXPECTED_HIVE_TYPES.size(), fields.size());
+ for (int i = 0; i < fields.size(); i++) {
+ Assertions.assertEquals(EXPECTED_HIVE_TYPES.get(i),
+ HudiTypeMapping.toHiveTypeString(fields.get(i).schema()), "colType[" + i + "]");
+ }
+ }
+
+ @Test
+ public void testTopLevelNameLoweredButNestedStructNamePreserved() {
+ List columns = HudiConnectorMetadata.avroSchemaToColumns(schema());
+ ConnectorColumn addr = columns.get(7);
+ // top-level "Addr" -> "addr"
+ Assertions.assertEquals("addr", addr.getName());
+ // nested struct field "Street" keeps its case (legacy lowercases only top-level)
+ Assertions.assertEquals(Arrays.asList("Street", "zip"), addr.getType().getFieldNames());
+ Assertions.assertEquals("struct",
+ HudiTypeMapping.toHiveTypeString(schema().getFields().get(7).schema()));
+ }
+}
diff --git a/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiTableTypeTest.java b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiTableTypeTest.java
new file mode 100644
index 00000000000000..ef172b9dc17ce6
--- /dev/null
+++ b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiTableTypeTest.java
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.hudi;
+
+import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.hms.HmsClient;
+import org.apache.doris.connector.hms.HmsDatabaseInfo;
+import org.apache.doris.connector.hms.HmsPartitionInfo;
+import org.apache.doris.connector.hms.HmsTableInfo;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * COW vs MOR table-type classification on the SPI Hudi metadata path (P3-T07, batch C).
+ *
+ * WHY: schema derivation is table-type-agnostic, so the ONLY place the metadata SPI
+ * distinguishes Copy-On-Write from Merge-On-Read is {@code detectHudiTableType}, surfaced
+ * through {@code getTableHandle}. Misclassifying the type routes scan planning to the wrong
+ * split/reader strategy. These tests pin the detection from the HMS input format and the
+ * Spark provider table parameter — the "COW & MOR each one" parity requirement — plus the
+ * UNKNOWN fallback when no Hudi signal is present.
+ */
+public class HudiTableTypeTest {
+
+ private String detect(String inputFormat, Map parameters) {
+ HmsTableInfo info = HmsTableInfo.builder()
+ .dbName("db").tableName("t")
+ .location("s3://b/t")
+ .inputFormat(inputFormat)
+ .parameters(parameters)
+ .build();
+ HudiConnectorMetadata metadata =
+ new HudiConnectorMetadata(new FakeHmsClient(info), Collections.emptyMap());
+ Optional handle = metadata.getTableHandle(null, "db", "t");
+ Assertions.assertTrue(handle.isPresent());
+ return ((HudiTableHandle) handle.get()).getHudiTableType();
+ }
+
+ @Test
+ public void testCowDetectedFromInputFormat() {
+ Assertions.assertEquals("COPY_ON_WRITE",
+ detect("org.apache.hudi.hadoop.HoodieParquetInputFormat", Collections.emptyMap()));
+ }
+
+ @Test
+ public void testCowDetectedFromSparkProviderParam() {
+ // A Spark-registered Hudi table may carry no Hudi input format; the provider
+ // parameter still identifies it as COW.
+ Assertions.assertEquals("COPY_ON_WRITE",
+ detect(null, Collections.singletonMap("spark.sql.sources.provider", "hudi")));
+ }
+
+ @Test
+ public void testMorDetectedFromRealtimeInputFormat() {
+ Assertions.assertEquals("MERGE_ON_READ",
+ detect("org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat",
+ Collections.emptyMap()));
+ }
+
+ @Test
+ public void testUnknownWhenNoHudiSignal() {
+ Assertions.assertEquals("UNKNOWN",
+ detect("org.apache.hadoop.mapred.TextInputFormat", Collections.emptyMap()));
+ }
+
+ /**
+ * Minimal {@link HmsClient} double returning a fixed table. Only {@code tableExists}
+ * and {@code getTable} are exercised by {@code getTableHandle}; the rest fail loud.
+ */
+ private static final class FakeHmsClient implements HmsClient {
+ private final HmsTableInfo tableInfo;
+
+ FakeHmsClient(HmsTableInfo tableInfo) {
+ this.tableInfo = tableInfo;
+ }
+
+ @Override
+ public boolean tableExists(String dbName, String tableName) {
+ return true;
+ }
+
+ @Override
+ public HmsTableInfo getTable(String dbName, String tableName) {
+ return tableInfo;
+ }
+
+ @Override
+ public List listDatabases() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HmsDatabaseInfo getDatabase(String dbName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List listTables(String dbName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Map getDefaultColumnValues(String dbName, String tableName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List listPartitionNames(String dbName, String tableName, int maxParts) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List getPartitions(String dbName, String tableName,
+ List partNames) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public HmsPartitionInfo getPartition(String dbName, String tableName, List values) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void close() {
+ }
+ }
+}
diff --git a/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiTypeMappingTest.java b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiTypeMappingTest.java
new file mode 100644
index 00000000000000..669d5f4f96b9b3
--- /dev/null
+++ b/fe/fe-connector/fe-connector-hudi/src/test/java/org/apache/doris/connector/hudi/HudiTypeMappingTest.java
@@ -0,0 +1,220 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.connector.hudi;
+
+import org.apache.doris.connector.api.ConnectorType;
+
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+
+/**
+ * Tests {@link HudiTypeMapping#toHiveTypeString} and {@link HudiTypeMapping#fromAvroSchema}.
+ *
+ * WHY (toHiveTypeString): the BE Hudi JNI scanner ({@code HadoopHudiJniScanner})
+ * parses {@code hudi_column_types} as Hive type strings split on {@code '#'}. The FE
+ * must therefore emit full Hive type strings carrying precision/scale and
+ * subtypes — not Doris type names — or the scanner reads wrong/null columns.
+ * These tests pin the exact strings, matching fe-core
+ * {@code HudiUtils.convertAvroToHiveType}.
+ *
+ * WHY (fromAvroSchema): {@code getTableSchema} reports each column's
+ * {@link ConnectorType} from this mapper. These tests pin the Doris type per Avro
+ * type, matching fe-core {@code HudiUtils.fromAvroHudiTypeToDorisType} (P3-T07
+ * parity baseline — previously uncovered). Note the deliberate asymmetry: time
+ * types map to {@code TIMEV2} here but fail loud in {@code toHiveTypeString},
+ * exactly as the two legacy converters diverge.
+ */
+public class HudiTypeMappingTest {
+
+ @Test
+ public void testPrimitives() {
+ Assertions.assertEquals("boolean", HudiTypeMapping.toHiveTypeString(Schema.create(Schema.Type.BOOLEAN)));
+ Assertions.assertEquals("int", HudiTypeMapping.toHiveTypeString(Schema.create(Schema.Type.INT)));
+ Assertions.assertEquals("bigint", HudiTypeMapping.toHiveTypeString(Schema.create(Schema.Type.LONG)));
+ Assertions.assertEquals("float", HudiTypeMapping.toHiveTypeString(Schema.create(Schema.Type.FLOAT)));
+ Assertions.assertEquals("double", HudiTypeMapping.toHiveTypeString(Schema.create(Schema.Type.DOUBLE)));
+ Assertions.assertEquals("string", HudiTypeMapping.toHiveTypeString(Schema.create(Schema.Type.STRING)));
+ }
+
+ @Test
+ public void testDateAndTimestampLogicalTypes() {
+ Schema date = LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT));
+ Assertions.assertEquals("date", HudiTypeMapping.toHiveTypeString(date));
+
+ Schema tsMillis = LogicalTypes.timestampMillis().addToSchema(Schema.create(Schema.Type.LONG));
+ Assertions.assertEquals("timestamp", HudiTypeMapping.toHiveTypeString(tsMillis));
+
+ Schema tsMicros = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG));
+ Assertions.assertEquals("timestamp", HudiTypeMapping.toHiveTypeString(tsMicros));
+ }
+
+ @Test
+ public void testDecimalKeepsPrecisionAndScale() {
+ // Directly targets bug (a): getTypeName() previously dropped precision/scale.
+ Schema decimal = LogicalTypes.decimal(10, 2).addToSchema(Schema.create(Schema.Type.BYTES));
+ Assertions.assertEquals("decimal(10,2)", HudiTypeMapping.toHiveTypeString(decimal));
+
+ Schema decimalFixed = LogicalTypes.decimal(38, 18)
+ .addToSchema(Schema.createFixed("d", null, null, 16));
+ Assertions.assertEquals("decimal(38,18)", HudiTypeMapping.toHiveTypeString(decimalFixed));
+ }
+
+ @Test
+ public void testArray() {
+ Schema arr = Schema.createArray(Schema.create(Schema.Type.INT));
+ Assertions.assertEquals("array", HudiTypeMapping.toHiveTypeString(arr));
+ }
+
+ @Test
+ public void testMap() {
+ // Avro maps always have string keys.
+ Schema map = Schema.createMap(Schema.create(Schema.Type.LONG));
+ Assertions.assertEquals("map", HudiTypeMapping.toHiveTypeString(map));
+ }
+
+ @Test
+ public void testStructContainsCommas() {
+ // Directly targets bug (b): the comma in struct<...> must survive as a
+ // single type string; a comma join+split would shatter it.
+ Schema struct = Schema.createRecord("r", null, null, false, Arrays.asList(
+ new Schema.Field("a", Schema.create(Schema.Type.INT)),
+ new Schema.Field("b", Schema.create(Schema.Type.STRING))));
+ Assertions.assertEquals("struct", HudiTypeMapping.toHiveTypeString(struct));
+ }
+
+ @Test
+ public void testNestedComplexType() {
+ Schema struct = Schema.createRecord("r", null, null, false, Arrays.asList(
+ new Schema.Field("id", Schema.create(Schema.Type.LONG)),
+ new Schema.Field("amount",
+ LogicalTypes.decimal(12, 4).addToSchema(Schema.create(Schema.Type.BYTES)))));
+ Schema arrOfStruct = Schema.createArray(struct);
+ Assertions.assertEquals("array>",
+ HudiTypeMapping.toHiveTypeString(arrOfStruct));
+ }
+
+ @Test
+ public void testNullableUnionIsUnwrapped() {
+ Schema nullableInt = Schema.createUnion(
+ Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT));
+ Assertions.assertEquals("int", HudiTypeMapping.toHiveTypeString(nullableInt));
+ }
+
+ @Test
+ public void testUnsupportedLogicalTypeFailsLoud() {
+ // Matches legacy fail-loud: time types are unsupported.
+ Schema timeMillis = LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT));
+ Assertions.assertThrows(IllegalArgumentException.class,
+ () -> HudiTypeMapping.toHiveTypeString(timeMillis));
+ }
+
+ // ===== fromAvroSchema -> ConnectorType (parity with HudiUtils.fromAvroHudiTypeToDorisType) =====
+
+ @Test
+ public void testFromAvroSchemaPrimitives() {
+ Assertions.assertEquals(ConnectorType.of("BOOLEAN"),
+ HudiTypeMapping.fromAvroSchema(Schema.create(Schema.Type.BOOLEAN)));
+ Assertions.assertEquals(ConnectorType.of("INT"),
+ HudiTypeMapping.fromAvroSchema(Schema.create(Schema.Type.INT)));
+ Assertions.assertEquals(ConnectorType.of("BIGINT"),
+ HudiTypeMapping.fromAvroSchema(Schema.create(Schema.Type.LONG)));
+ Assertions.assertEquals(ConnectorType.of("FLOAT"),
+ HudiTypeMapping.fromAvroSchema(Schema.create(Schema.Type.FLOAT)));
+ Assertions.assertEquals(ConnectorType.of("DOUBLE"),
+ HudiTypeMapping.fromAvroSchema(Schema.create(Schema.Type.DOUBLE)));
+ Assertions.assertEquals(ConnectorType.of("STRING"),
+ HudiTypeMapping.fromAvroSchema(Schema.create(Schema.Type.STRING)));
+ // Avro bytes/fixed without a decimal logical type degrade to STRING (legacy parity).
+ Assertions.assertEquals(ConnectorType.of("STRING"),
+ HudiTypeMapping.fromAvroSchema(Schema.create(Schema.Type.BYTES)));
+ }
+
+ @Test
+ public void testFromAvroSchemaLogicalTypes() {
+ Assertions.assertEquals(ConnectorType.of("DATEV2"),
+ HudiTypeMapping.fromAvroSchema(
+ LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT))));
+ Assertions.assertEquals(ConnectorType.of("DATETIMEV2", 3, 0),
+ HudiTypeMapping.fromAvroSchema(
+ LogicalTypes.timestampMillis().addToSchema(Schema.create(Schema.Type.LONG))));
+ Assertions.assertEquals(ConnectorType.of("DATETIMEV2", 6, 0),
+ HudiTypeMapping.fromAvroSchema(
+ LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG))));
+ // Time types map to TIMEV2 here, unlike toHiveTypeString which fails loud —
+ // matching legacy HudiUtils.fromAvroHudiTypeToDorisType.
+ Assertions.assertEquals(ConnectorType.of("TIMEV2", 3, 0),
+ HudiTypeMapping.fromAvroSchema(
+ LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT))));
+ Assertions.assertEquals(ConnectorType.of("TIMEV2", 6, 0),
+ HudiTypeMapping.fromAvroSchema(
+ LogicalTypes.timeMicros().addToSchema(Schema.create(Schema.Type.LONG))));
+ }
+
+ @Test
+ public void testFromAvroSchemaDecimalKeepsPrecisionAndScale() {
+ Schema decimal = LogicalTypes.decimal(10, 2).addToSchema(Schema.create(Schema.Type.BYTES));
+ Assertions.assertEquals(ConnectorType.of("DECIMALV3", 10, 2),
+ HudiTypeMapping.fromAvroSchema(decimal));
+ }
+
+ @Test
+ public void testFromAvroSchemaComplexTypes() {
+ Assertions.assertEquals(
+ ConnectorType.arrayOf(ConnectorType.of("INT")),
+ HudiTypeMapping.fromAvroSchema(Schema.createArray(Schema.create(Schema.Type.INT))));
+ // Avro maps always have string keys.
+ Assertions.assertEquals(
+ ConnectorType.mapOf(ConnectorType.of("STRING"), ConnectorType.of("BIGINT")),
+ HudiTypeMapping.fromAvroSchema(Schema.createMap(Schema.create(Schema.Type.LONG))));
+ Schema struct = Schema.createRecord("r", null, null, false, Arrays.asList(
+ new Schema.Field("a", Schema.create(Schema.Type.INT)),
+ new Schema.Field("b", Schema.create(Schema.Type.STRING))));
+ Assertions.assertEquals(
+ ConnectorType.structOf(Arrays.asList("a", "b"),
+ Arrays.asList(ConnectorType.of("INT"), ConnectorType.of("STRING"))),
+ HudiTypeMapping.fromAvroSchema(struct));
+ }
+
+ @Test
+ public void testFromAvroSchemaNullableUnionUnwrapped() {
+ Schema nullableInt = Schema.createUnion(
+ Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT));
+ Assertions.assertEquals(ConnectorType.of("INT"),
+ HudiTypeMapping.fromAvroSchema(nullableInt));
+ }
+
+ @Test
+ public void testFromAvroSchemaEnumMapsToString() {
+ Schema enumSchema = Schema.createEnum("e", null, null, Arrays.asList("A", "B"));
+ Assertions.assertEquals(ConnectorType.of("STRING"),
+ HudiTypeMapping.fromAvroSchema(enumSchema));
+ }
+
+ @Test
+ public void testFromAvroSchemaMultiMemberUnionUnsupported() {
+ // A true union (no single non-null member) is unsupported (legacy parity).
+ Schema union = Schema.createUnion(
+ Schema.create(Schema.Type.INT), Schema.create(Schema.Type.STRING));
+ Assertions.assertEquals(ConnectorType.of("UNSUPPORTED"),
+ HudiTypeMapping.fromAvroSchema(union));
+ }
+}
diff --git a/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MCConnectorClientFactory.java b/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MCConnectorClientFactory.java
index 8e3ec3b1116987..1861e18a599078 100644
--- a/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MCConnectorClientFactory.java
+++ b/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MCConnectorClientFactory.java
@@ -38,6 +38,9 @@ private MCConnectorClientFactory() {
/**
* Validates that required authentication properties are present.
+ * Throws {@link IllegalArgumentException} so that CREATE CATALOG property
+ * validation ({@code MaxComputeConnectorProvider.validateProperties}) surfaces
+ * a clean DdlException, consistent with the other connectors' validation.
*/
public static void checkAuthProperties(Map properties) {
String authType = properties.getOrDefault(
@@ -49,7 +52,7 @@ public static void checkAuthProperties(Map properties) {
if (!properties.containsKey(MCConnectorProperties.ACCESS_KEY)
|| !properties.containsKey(
MCConnectorProperties.SECRET_KEY)) {
- throw new RuntimeException(
+ throw new IllegalArgumentException(
"Missing access key or secret key for "
+ "AK/SK auth type");
}
@@ -60,7 +63,7 @@ public static void checkAuthProperties(Map properties) {
MCConnectorProperties.SECRET_KEY)
|| !properties.containsKey(
MCConnectorProperties.RAM_ROLE_ARN)) {
- throw new RuntimeException(
+ throw new IllegalArgumentException(
"Missing access key, secret key or role arn "
+ "for RAM Role ARN auth type");
}
@@ -68,11 +71,11 @@ public static void checkAuthProperties(Map properties) {
MCConnectorProperties.AUTH_TYPE_ECS_RAM_ROLE)) {
if (!properties.containsKey(
MCConnectorProperties.ECS_RAM_ROLE)) {
- throw new RuntimeException(
+ throw new IllegalArgumentException(
"Missing role name for ECS RAM Role auth type");
}
} else {
- throw new RuntimeException(
+ throw new IllegalArgumentException(
"Unsupported auth type: " + authType);
}
}
diff --git a/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MCTypeMapping.java b/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MCTypeMapping.java
index 9a238673803929..4c8f53ded6ed58 100644
--- a/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MCTypeMapping.java
+++ b/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MCTypeMapping.java
@@ -18,6 +18,7 @@
package org.apache.doris.connector.maxcompute;
import org.apache.doris.connector.api.ConnectorType;
+import org.apache.doris.connector.api.DorisConnectorException;
import com.aliyun.odps.OdpsType;
import com.aliyun.odps.type.ArrayTypeInfo;
@@ -26,10 +27,12 @@
import com.aliyun.odps.type.MapTypeInfo;
import com.aliyun.odps.type.StructTypeInfo;
import com.aliyun.odps.type.TypeInfo;
+import com.aliyun.odps.type.TypeInfoFactory;
import com.aliyun.odps.type.VarcharTypeInfo;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
/**
* Maps MaxCompute (ODPS) type system to Doris ConnectorType.
@@ -46,7 +49,10 @@ public static ConnectorType toConnectorType(TypeInfo typeInfo) {
OdpsType odpsType = typeInfo.getOdpsType();
switch (odpsType) {
case VOID:
- return ConnectorType.of("NULL");
+ // "NULL_TYPE" is the token ScalarType.createType recognizes (-> Type.NULL),
+ // matching legacy MaxComputeExternalTable.mcTypeToDorisType VOID -> Type.NULL.
+ // "NULL" is NOT recognized (createType throws, swallowed to UNSUPPORTED).
+ return ConnectorType.of("NULL_TYPE");
case BOOLEAN:
return ConnectorType.of("BOOLEAN");
case TINYINT:
@@ -94,7 +100,12 @@ public static ConnectorType toConnectorType(TypeInfo typeInfo) {
case INTERVAL_YEAR_MONTH:
return ConnectorType.of("UNSUPPORTED");
default:
- return ConnectorType.of("UNSUPPORTED");
+ // Mirror legacy MaxComputeExternalTable.mcTypeToDorisType: fail-fast on a genuinely
+ // unknown OdpsType rather than silently degrading it to UNSUPPORTED. Known
+ // unsupported types (BINARY, INTERVAL_*, JSON) have explicit cases above, so this
+ // default is reached only by a future/unrecognized OdpsType.
+ throw new DorisConnectorException(
+ "Cannot transform unknown MaxCompute type: " + odpsType);
}
}
@@ -123,4 +134,84 @@ private static ConnectorType mapStructType(StructTypeInfo structType) {
}
return ConnectorType.structOf(names, fieldTypes);
}
+
+ /**
+ * Converts a {@link ConnectorType} (as produced by the CREATE TABLE request
+ * path) to a MaxCompute (ODPS) {@link TypeInfo}. Faithful reverse of the
+ * legacy {@code MaxComputeMetadataOps.dorisTypeToMcType}; the scalar type
+ * name is the Doris {@code PrimitiveType} name (e.g. INT, DECIMAL64,
+ * DATETIMEV2), with CHAR/VARCHAR length and DECIMAL precision/scale carried
+ * in the {@link ConnectorType} precision/scale fields.
+ *
+ * @throws DorisConnectorException if the type cannot be represented in MaxCompute
+ */
+ public static TypeInfo toMcType(ConnectorType type) {
+ String name = type.getTypeName().toUpperCase(Locale.ROOT);
+ switch (name) {
+ case "ARRAY":
+ return TypeInfoFactory.getArrayTypeInfo(
+ toMcType(type.getChildren().get(0)));
+ case "MAP":
+ return TypeInfoFactory.getMapTypeInfo(
+ toMcType(type.getChildren().get(0)),
+ toMcType(type.getChildren().get(1)));
+ case "STRUCT":
+ return toMcStructType(type);
+ default:
+ return toMcScalarType(name, type);
+ }
+ }
+
+ private static TypeInfo toMcScalarType(String name, ConnectorType type) {
+ switch (name) {
+ case "BOOLEAN":
+ return TypeInfoFactory.BOOLEAN;
+ case "TINYINT":
+ return TypeInfoFactory.TINYINT;
+ case "SMALLINT":
+ return TypeInfoFactory.SMALLINT;
+ case "INT":
+ return TypeInfoFactory.INT;
+ case "BIGINT":
+ return TypeInfoFactory.BIGINT;
+ case "FLOAT":
+ return TypeInfoFactory.FLOAT;
+ case "DOUBLE":
+ return TypeInfoFactory.DOUBLE;
+ case "CHAR":
+ return TypeInfoFactory.getCharTypeInfo(type.getPrecision());
+ case "VARCHAR":
+ return TypeInfoFactory.getVarcharTypeInfo(type.getPrecision());
+ case "STRING":
+ return TypeInfoFactory.STRING;
+ case "DECIMALV2":
+ case "DECIMAL32":
+ case "DECIMAL64":
+ case "DECIMAL128":
+ case "DECIMAL256":
+ return TypeInfoFactory.getDecimalTypeInfo(
+ type.getPrecision(), type.getScale());
+ case "DATE":
+ case "DATEV2":
+ return TypeInfoFactory.DATE;
+ case "DATETIME":
+ case "DATETIMEV2":
+ return TypeInfoFactory.DATETIME;
+ default:
+ throw new DorisConnectorException(
+ "Unsupported type for MaxCompute: " + type);
+ }
+ }
+
+ private static TypeInfo toMcStructType(ConnectorType type) {
+ List children = type.getChildren();
+ List names = type.getFieldNames();
+ List fieldNames = new ArrayList<>(children.size());
+ List fieldTypes = new ArrayList<>(children.size());
+ for (int i = 0; i < children.size(); i++) {
+ fieldNames.add(i < names.size() ? names.get(i) : "col" + i);
+ fieldTypes.add(toMcType(children.get(i)));
+ }
+ return TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypes);
+ }
}
diff --git a/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MaxComputeConnectorMetadata.java b/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MaxComputeConnectorMetadata.java
index 77aef9d8a9a514..0ba559f2d18ae3 100644
--- a/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MaxComputeConnectorMetadata.java
+++ b/fe/fe-connector/fe-connector-maxcompute/src/main/java/org/apache/doris/connector/maxcompute/MaxComputeConnectorMetadata.java
@@ -19,23 +19,41 @@
import org.apache.doris.connector.api.ConnectorColumn;
import org.apache.doris.connector.api.ConnectorMetadata;
+import org.apache.doris.connector.api.ConnectorPartitionInfo;
import org.apache.doris.connector.api.ConnectorSession;
import org.apache.doris.connector.api.ConnectorTableSchema;
+import org.apache.doris.connector.api.ConnectorType;
+import org.apache.doris.connector.api.DorisConnectorException;
+import org.apache.doris.connector.api.ddl.ConnectorBucketSpec;
+import org.apache.doris.connector.api.ddl.ConnectorCreateTableRequest;
+import org.apache.doris.connector.api.ddl.ConnectorPartitionField;
+import org.apache.doris.connector.api.ddl.ConnectorPartitionSpec;
import org.apache.doris.connector.api.handle.ConnectorColumnHandle;
import org.apache.doris.connector.api.handle.ConnectorTableHandle;
+import org.apache.doris.connector.api.handle.ConnectorTransaction;
+import org.apache.doris.connector.api.pushdown.ConnectorExpression;
import com.aliyun.odps.Column;
import com.aliyun.odps.Odps;
+import com.aliyun.odps.OdpsException;
+import com.aliyun.odps.Partition;
+import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.Table;
+import com.aliyun.odps.TableSchema;
+import com.aliyun.odps.Tables;
import com.aliyun.odps.table.TableIdentifier;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.Set;
/**
* ConnectorMetadata implementation for MaxCompute.
@@ -45,16 +63,32 @@ public class MaxComputeConnectorMetadata implements ConnectorMetadata {
private static final Logger LOG = LogManager.getLogger(
MaxComputeConnectorMetadata.class);
+ private static final long MAX_LIFECYCLE_DAYS = 37231;
+ private static final int MAX_BUCKET_NUM = 1024;
+ // Must stay byte-identical to the key ConnectorSessionBuilder.extractSessionProperties injects
+ // (GC1 / FIX-BLOCKID-CAP-CONFIG); = the legacy fe-core Config field name, surfaced via session
+ // properties because the connector cannot import fe-core Config.
+ private static final String MAX_COMPUTE_WRITE_MAX_BLOCK_COUNT = "max_compute_write_max_block_count";
+
private final Odps odps;
private final McStructureHelper structureHelper;
private final String defaultProject;
+ private final String endpoint;
+ private final String quota;
+ private final Map properties;
public MaxComputeConnectorMetadata(Odps odps,
McStructureHelper structureHelper,
- String defaultProject) {
+ String defaultProject,
+ String endpoint,
+ String quota,
+ Map properties) {
this.odps = odps;
this.structureHelper = structureHelper;
this.defaultProject = defaultProject;
+ this.endpoint = endpoint;
+ this.quota = quota;
+ this.properties = properties;
}
@Override
@@ -106,24 +140,22 @@ public ConnectorTableSchema getTableSchema(ConnectorSession session,
new ArrayList<>(dataColumns.size() + partColumns.size());
for (Column col : dataColumns) {
- columns.add(new ConnectorColumn(
+ columns.add(buildColumn(
col.getName(),
MCTypeMapping.toConnectorType(col.getTypeInfo()),
col.getComment(),
- col.isNullable(),
- null));
+ col.isNullable()));
}
List partitionColumnNames =
new ArrayList<>(partColumns.size());
for (Column partCol : partColumns) {
partitionColumnNames.add(partCol.getName());
- columns.add(new ConnectorColumn(
+ columns.add(buildColumn(
partCol.getName(),
MCTypeMapping.toConnectorType(partCol.getTypeInfo()),
partCol.getComment(),
- true,
- null));
+ true));
}
java.util.Map props = new java.util.HashMap<>();
@@ -135,6 +167,19 @@ public ConnectorTableSchema getTableSchema(ConnectorSession session,
mcHandle.getTableName(), columns, "MAX_COMPUTE", props);
}
+ /**
+ * Builds a {@link ConnectorColumn} for a MaxCompute external-table column with
+ * {@code isKey=true}, mirroring legacy {@code MaxComputeExternalTable.initSchema} (every column
+ * was a Doris key column). For external (non-OLAP) tables there is no key-based storage; the
+ * flag drives DESCRIBE's {@code Key} display and the few non-OLAP-guarded planning/BE paths that
+ * read {@code Column.isKey()} (e.g. predicate inference, slot descriptors) — all of which legacy
+ * already fed {@code true}, so this restores exact legacy parity. {@code isAutoInc} stays false.
+ */
+ static ConnectorColumn buildColumn(String name, ConnectorType type, String comment,
+ boolean nullable) {
+ return new ConnectorColumn(name, type, comment, nullable, null, true);
+ }
+
@Override
public Map getColumnHandles(
ConnectorSession session, ConnectorTableHandle handle) {
@@ -152,4 +197,448 @@ public Map getColumnHandles(
}
return result;
}
+
+ /**
+ * Builds the typed MaxCompute table descriptor for the read path. The BE
+ * {@code file_scanner} static_casts {@code table_desc()} to
+ * {@code MaxComputeTableDescriptor} unconditionally for
+ * {@code table_format_type=="max_compute"}, so the descriptor MUST be
+ * {@code MAX_COMPUTE_TABLE} with {@code mcTable} set; the null / SCHEMA_TABLE
+ * fallback would produce type confusion in BE. Mirrors legacy
+ * {@code MaxComputeExternalTable.toThrift()}.
+ *
+ * {@code project}/{@code table} use the remote-name params: the SPI read
+ * session also addresses ODPS with remote names, so the descriptor must match
+ * (see design OQ-7). The 6th ctor arg ({@code dbName}) mirrors legacy and is
+ * unread by BE for MC reads. Fully-qualified thrift names match the jdbc/es
+ * overrides and avoid new connector imports.
+ */
+ @Override
+ public org.apache.doris.thrift.TTableDescriptor buildTableDescriptor(
+ ConnectorSession session,
+ long tableId, String tableName, String dbName,
+ String remoteName, int numCols, long catalogId) {
+ org.apache.doris.thrift.TMCTable tMcTable = new org.apache.doris.thrift.TMCTable();
+ tMcTable.setEndpoint(endpoint);
+ tMcTable.setQuota(quota);
+ tMcTable.setProject(dbName);
+ tMcTable.setTable(remoteName);
+ tMcTable.setProperties(properties);
+ org.apache.doris.thrift.TTableDescriptor desc = new org.apache.doris.thrift.TTableDescriptor(
+ tableId, org.apache.doris.thrift.TTableType.MAX_COMPUTE_TABLE,
+ numCols, 0, tableName, dbName);
+ desc.setMcTable(tMcTable);
+ return desc;
+ }
+
+ // ==================== Partition listing ====================
+
+ @Override
+ public List listPartitionNames(ConnectorSession session,
+ ConnectorTableHandle handle) {
+ MaxComputeTableHandle mcHandle = (MaxComputeTableHandle) handle;
+ List partitions = structureHelper.getPartitions(
+ odps, mcHandle.getDbName(), mcHandle.getTableName());
+ List names = new ArrayList<>(partitions.size());
+ for (Partition partition : partitions) {
+ names.add(partition.getPartitionSpec().toString(false, true));
+ }
+ return names;
+ }
+
+ /**
+ * Lists all partitions. The {@code filter} is intentionally ignored: the
+ * legacy SHOW PARTITIONS path ({@code MaxComputeExternalCatalog
+ * #listPartitionNames}) returns the full partition set without pushing
+ * predicates into ODPS, and this preserves that behavior. Partitions are
+ * read directly from ODPS with no connector-side cache (P4-T02 / OQ-4).
+ */
+ @Override
+ public List listPartitions(ConnectorSession session,
+ ConnectorTableHandle handle, Optional filter) {
+ MaxComputeTableHandle mcHandle = (MaxComputeTableHandle) handle;
+ List partitions = structureHelper.getPartitions(
+ odps, mcHandle.getDbName(), mcHandle.getTableName());
+ List result = new ArrayList<>(partitions.size());
+ for (Partition partition : partitions) {
+ PartitionSpec spec = partition.getPartitionSpec();
+ Map values = new LinkedHashMap<>();
+ for (String key : spec.keys()) {
+ values.put(key, spec.get(key));
+ }
+ result.add(new ConnectorPartitionInfo(
+ spec.toString(false, true), values, Collections.emptyMap()));
+ }
+ return result;
+ }
+
+ @Override
+ public List> listPartitionValues(ConnectorSession session,
+ ConnectorTableHandle handle, List partitionColumns) {
+ MaxComputeTableHandle mcHandle = (MaxComputeTableHandle) handle;
+ List partitions = structureHelper.getPartitions(
+ odps, mcHandle.getDbName(), mcHandle.getTableName());
+ List> result = new ArrayList<>(partitions.size());
+ for (Partition partition : partitions) {
+ PartitionSpec spec = partition.getPartitionSpec();
+ List values = new ArrayList<>(partitionColumns.size());
+ for (String column : partitionColumns) {
+ values.add(spec.get(column));
+ }
+ result.add(values);
+ }
+ return result;
+ }
+
+ // ==================== Write / Transaction (P4-T03 / P4-T04) ====================
+
+ /**
+ * Declares INSERT support so the engine routes MaxCompute writes through the
+ * plugin-driven sink path. The sink is built by
+ * {@link MaxComputeWritePlanProvider#planWrite} (P4-T04) and commit is driven by
+ * {@link MaxComputeConnectorTransaction#commit()} through the SPI transaction
+ * lifecycle, so the {@code beginInsert} / {@code finishInsert} / {@code getWriteConfig}
+ * hooks carry no MaxCompute-specific work and intentionally stay the throwing
+ * defaults; the exact executor call surface is settled at the cutover (Batch C).
+ */
+ @Override
+ public boolean supportsInsert() {
+ return true;
+ }
+
+ @Override
+ public boolean supportsInsertOverwrite() {
+ // MaxCompute honors overwrite end-to-end: MaxComputeWritePlanProvider sets
+ // builder.overwrite(true) on the write session when the sink requests it.
+ return true;
+ }
+
+ /**
+ * Disables pushing predicates that contain implicit CAST expressions down to ODPS (F9 fix).
+ *
+ * The shared {@code ExprToConnectorExpressionConverter} unwraps CAST shells, so without this
+ * a predicate like {@code CAST(str_col AS INT) = 5} would be pushed to the ODPS read session as
+ * the source-side filter {@code str_col = "5"} (quoted by the column's STRING type), which ODPS
+ * evaluates as exact string equality and drops rows like {@code "05"}/{@code " 5"} at the
+ * source — silent data loss, because BE re-evaluation can only filter the returned rows down,
+ * never recover rows ODPS never returned. Returning {@code false} makes
+ * {@code PluginDrivenScanNode.buildRemainingFilter} strip CAST-bearing conjuncts before pushdown
+ * (they stay BE-only), restoring legacy parity: legacy {@code MaxComputeScanNode} likewise never
+ * pushed CAST predicates (its {@code convertSlotRefToColumnName} threw on a CAST operand and the
+ * conjunct was dropped). Mirrors {@code JdbcConnectorMetadata} and the contract documented on
+ * {@link org.apache.doris.connector.api.ConnectorPushdownOps#supportsCastPredicatePushdown}.
+ */
+ @Override
+ public boolean supportsCastPredicatePushdown(ConnectorSession session) {
+ return false;
+ }
+
+ /**
+ * MaxCompute uses the SPI transaction model: the engine opens a
+ * {@link MaxComputeConnectorTransaction} via {@link #beginTransaction} and binds it to
+ * the session; the write plan ({@code MaxComputeWritePlanProvider.planWrite}) attaches the
+ * ODPS write session to it. So the executor routes through the transaction model rather
+ * than the {@code beginInsert} / {@code finishInsert} handle model (which stays throwing-default).
+ */
+ @Override
+ public boolean usesConnectorTransaction() {
+ return true;
+ }
+
+ /**
+ * Opens a connector transaction for a MaxCompute write statement. The
+ * transaction id is the engine-side id allocated through the session, so it
+ * matches the id registered in the engine transaction registry and stamped
+ * into the data sink (see {@link MaxComputeConnectorTransaction}).
+ *
+ *
Gate-closed / dormant until the {@code max_compute} cutover: nothing
+ * routes plugin-driven MaxCompute writes through this path yet. The ODPS
+ * write session that backs commit / block allocation is created by the write
+ * plan (P4-T04), which binds it via
+ * {@link MaxComputeConnectorTransaction#setWriteSession}.
+ */
+ @Override
+ public ConnectorTransaction beginTransaction(ConnectorSession session) {
+ long maxBlockCount = resolveMaxBlockCount(session.getSessionProperties());
+ return new MaxComputeConnectorTransaction(session.allocateTransactionId(), maxBlockCount);
+ }
+
+ /**
+ * Resolves the write block-id cap from the session properties, into which fe-core's
+ * {@code ConnectorSessionBuilder} surfaces the (tunable)
+ * {@code Config.max_compute_write_max_block_count} (the connector cannot import fe-core
+ * {@code Config}). Falls back to the legacy default when the value is absent or unparseable,
+ * so any path without the injected value keeps the current behavior. Package-private +
+ * map-typed for direct unit testing without a live session.
+ */
+ static long resolveMaxBlockCount(Map sessionProperties) {
+ String value = sessionProperties.get(MAX_COMPUTE_WRITE_MAX_BLOCK_COUNT);
+ if (value == null) {
+ return MaxComputeConnectorTransaction.DEFAULT_MAX_BLOCK_COUNT;
+ }
+ try {
+ return Long.parseLong(value.trim());
+ } catch (NumberFormatException e) {
+ return MaxComputeConnectorTransaction.DEFAULT_MAX_BLOCK_COUNT;
+ }
+ }
+
+ // ==================== DDL: Create/Drop Table ====================
+
+ @Override
+ public void createTable(ConnectorSession session,
+ ConnectorCreateTableRequest request) {
+ String dbName = request.getDbName();
+ String tableName = request.getTableName();
+
+ if (structureHelper.tableExist(odps, dbName, tableName)) {
+ if (request.isIfNotExists()) {
+ LOG.info("create table[{}.{}] which already exists",
+ dbName, tableName);
+ return;
+ }
+ throw new DorisConnectorException("Table '" + tableName
+ + "' already exists in database '" + dbName + "'");
+ }
+
+ List columns = request.getColumns();
+ validateColumns(columns);
+ List partitionColumns =
+ identityPartitionColumns(request.getPartitionSpec());
+ TableSchema schema = buildSchema(columns, partitionColumns);
+
+ Long lifecycle = extractLifecycle(request.getProperties());
+ Map mcProperties =
+ extractMaxComputeProperties(request.getProperties());
+ Integer bucketNum = extractBucketNum(request.getBucketSpec());
+
+ Tables.TableCreator creator = structureHelper.createTableCreator(
+ odps, dbName, tableName, schema);
+ if (request.isIfNotExists()) {
+ creator.ifNotExists();
+ }
+ String comment = request.getComment();
+ if (comment != null && !comment.isEmpty()) {
+ creator.withComment(comment);
+ }
+ if (lifecycle != null) {
+ creator.withLifeCycle(lifecycle);
+ }
+ if (!mcProperties.isEmpty()) {
+ creator.withTblProperties(mcProperties);
+ }
+ if (bucketNum != null) {
+ creator.withDeltaTableBucketNum(bucketNum);
+ }
+
+ try {
+ creator.create();
+ } catch (OdpsException e) {
+ throw new DorisConnectorException("Failed to create MaxCompute table '"
+ + tableName + "': " + e.getMessage(), e);
+ }
+ LOG.info("created MaxCompute table {}.{}", dbName, tableName);
+ }
+
+ /**
+ * Drops the table behind {@code handle}. The SPI signature carries no
+ * {@code ifExists}; fe-core resolves the handle (absent when the table does
+ * not exist) before routing here, so the remote drop is issued idempotently.
+ */
+ @Override
+ public void dropTable(ConnectorSession session,
+ ConnectorTableHandle handle) {
+ MaxComputeTableHandle mcHandle = (MaxComputeTableHandle) handle;
+ String dbName = mcHandle.getDbName();
+ String tableName = mcHandle.getTableName();
+ try {
+ structureHelper.dropTable(odps, dbName, tableName, true);
+ } catch (OdpsException e) {
+ throw new DorisConnectorException("Failed to drop MaxCompute table '"
+ + tableName + "': " + e.getMessage(), e);
+ }
+ LOG.info("dropped MaxCompute table {}.{}", dbName, tableName);
+ }
+
+ // ==================== DDL: Create/Drop Database ====================
+
+ @Override
+ public boolean supportsCreateDatabase() {
+ return true;
+ }
+
+ @Override
+ public void createDatabase(ConnectorSession session, String dbName,
+ Map properties) {
+ structureHelper.createDb(odps, dbName, false);
+ LOG.info("created MaxCompute database {}", dbName);
+ }
+
+ @Override
+ public void dropDatabase(ConnectorSession session, String dbName,
+ boolean ifExists, boolean force) {
+ if (force) {
+ // ODPS schemas().delete() does NOT auto-cascade; enumerate and drop each
+ // table first (mirrors legacy MaxComputeMetadataOps.dropDbImpl force branch,
+ // whose enumerate-loop is itself proof that the schema delete won't cascade).
+ for (String tableName : structureHelper.listTableNames(odps, dbName)) {
+ try {
+ structureHelper.dropTable(odps, dbName, tableName, true);
+ } catch (OdpsException e) {
+ throw new DorisConnectorException("Failed to drop MaxCompute table '"
+ + tableName + "' during force-drop of database '" + dbName
+ + "': " + e.getMessage(), e);
+ }
+ }
+ }
+ structureHelper.dropDb(odps, dbName, ifExists);
+ LOG.info("dropped MaxCompute database {} (force={})", dbName, force);
+ }
+
+ // ==================== DDL helpers ====================
+
+ // package-private for unit test; reached only via createTable() in production.
+ void validateColumns(List columns) {
+ if (columns == null || columns.isEmpty()) {
+ throw new DorisConnectorException(
+ "Table must have at least one column.");
+ }
+ Set