Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
128 commits
Select commit Hold shift + click to select a range
d8ff6ba
[doc](connector) add project tracking system for catalog SPI migration
morningman May 25, 2026
aa2c287
[feat](connector) P0 SPI baseline + DDL/Partition + import gate (T03-…
morningman May 25, 2026
0e2865b
[P1-T03-T05] route plugin-driven scans first in nereids translator (#…
morningman May 25, 2026
508e7fe
[feat](connector) P2 migrate trino-connector to catalog SPI (T01-T13)…
morningman Jun 4, 2026
bfff78d
[feat](connector) P3 hudi connector hardening + test baseline + dispa…
morningman Jun 6, 2026
7383299
[refactor](connector) P4 maxcompute: remove legacy subsystem from fe-…
morningman Jun 9, 2026
e9c5b3e
update P5 handoff and fix compile issue
morningman Jun 9, 2026
cab2725
[doc](connector) P5 paimon recon + 设计 + plan-doc 同步(design-only,0 产线代码)
morningman Jun 9, 2026
f71bbcf
[test](connector) P5 paimon B0: test harness + parity baseline (T01-T02)
morningman Jun 9, 2026
9142f23
[feat](connector) P5 paimon B1: flavor assembly (T03-T05, all 5 flavors)
morningman Jun 9, 2026
deb30e9
[feat](connector) P5 paimon B2+B3: normal-read + DDL metadata (T06-T15)
morningman Jun 9, 2026
d5e3c0f
[P5-T16~T20] (connector) P5 paimon B4: sys-tables (E7) + MVCC (E5)
morningman Jun 10, 2026
9c72568
[P5-T22~T26,T31~T35] (connector+fe-core) P5 paimon B5+B6: MTMV/MVCC b…
morningman Jun 10, 2026
fc7a875
[P5-B7+fixes] (connector+fe-core) P5 paimon B7 cutover + 8 fullpath-r…
morningman Jun 11, 2026
77d4ed4
fix: FIX-URI-NORMALIZE — normalize native data-file + DV paths to BE …
morningman Jun 11, 2026
604bad6
docs: checkpoint rereview2 #1 done; hand off #2 FIX-STATIC-CREDS-BE
morningman Jun 11, 2026
f5389a8
fix: FIX-STATIC-CREDS-BE — normalize static object-store creds to BE-…
morningman Jun 11, 2026
2e327fe
docs: checkpoint rereview2 #2 done; hand off #3 FIX-SCHEMA-EVOLUTION
morningman Jun 11, 2026
66a447e
fix: FIX-SCHEMA-EVOLUTION — emit native current_schema_id/history_sch…
morningman Jun 11, 2026
0579464
fix: FIX-JDBC-DRIVER-URL — resolve+alias driver_url for BE; validate …
morningman Jun 11, 2026
c2f861e
docs: checkpoint rereview2 #4 done; hand off #5 FIX-MAPPING-FLAG-KEYS
morningman Jun 11, 2026
d683fce
fix: FIX-MAPPING-FLAG-KEYS — read canonical dotted catalog keys for p…
morningman Jun 11, 2026
5843171
docs: checkpoint #5 FIX-MAPPING-FLAG-KEYS done; hand off #6 FIX-KERBE…
morningman Jun 11, 2026
689903e
fix: FIX-KERBEROS-DOAS — wire fs/jdbc HDFS authenticator (M-8) + wrap…
morningman Jun 11, 2026
68629e0
docs: checkpoint #6 FIX-KERBEROS-DOAS done; hand off #7 FIX-FORCE-JNI…
morningman Jun 11, 2026
c2bbb85
fix: FIX-FORCE-JNI-SCANNER — honor force_jni_scanner session var on p…
morningman Jun 12, 2026
4db0f0b
docs: checkpoint #7 FIX-FORCE-JNI-SCANNER done; hand off #8 FIX-COUNT…
morningman Jun 12, 2026
09f28cd
fix: FIX-COUNT-PUSHDOWN — emit precomputed merged row count for COUNT…
morningman Jun 12, 2026
c948dac
fix: FIX-NATIVE-SUBSPLIT — sub-split large native ORC/Parquet paimon …
morningman Jun 12, 2026
dcc2714
docs: checkpoint #8 + #9 done (P2 perf-parity all clear); hand off P3…
morningman Jun 12, 2026
ea0da52
fix: FIX-CREATE-TABLE-LOCAL-CONFLICT — restore legacy local-conflict …
morningman Jun 12, 2026
8dcd843
docs: checkpoint — P3 coverage-gap verification complete (3 parity + …
morningman Jun 12, 2026
23d0943
fix: FIX-VARCHAR-BOUNDARY — read VARCHAR(65533) reported as STRING (P…
morningman Jun 12, 2026
f135c75
fix: FIX-PARTITION-NULL-SENTINEL — scan-path coerces literal \N parti…
morningman Jun 12, 2026
5faffbc
docs: P4 MINOR/NIT cleanup complete — 2 fix + 15 accept ([D-057]/[DV-…
morningman Jun 12, 2026
d5adb21
docs: roll HANDOFF to round-3 clean-room adversarial review (no prior…
morningman Jun 12, 2026
0c9865a
fix: FIX-REST-VENDED-URI-NORMALIZE — REST native object-store read th…
morningman Jun 12, 2026
da012e4
fix: FIX-JNI-FILE-FORMAT — JNI/count split emits file_format="jni" in…
morningman Jun 12, 2026
25106d6
docs: roll HANDOFF + task-list — FIX-1 (P9-1 BLOCKER) & FIX-2 (P7-1 M…
morningman Jun 12, 2026
3e38221
fix: FIX-INCR-SCAN-RESET — @incr drops legacy scan.snapshot-id/scan.m…
morningman Jun 12, 2026
179ab88
docs: roll HANDOFF + task-list — FIX-3 (P2-1 MAJOR) done, next FIX-4
morningman Jun 12, 2026
8f0ef3d
fix: FIX-FECONF-STORAGE-PARITY — FE-config storage reconstruction ful…
morningman Jun 12, 2026
05a56e0
docs: roll HANDOFF + task-list — FIX-4 (FIX-FECONF-STORAGE-PARITY) do…
morningman Jun 12, 2026
fe208a8
fix: FIX-PAIMON-HADOOP-CLASSLOADER — Paimon plugin self-contained Had…
morningman Jun 12, 2026
74154d6
fix: FIX-SHOWCREATE-PLUGIN-PROPS — scope SHOW CREATE TABLE LOCATION/P…
morningman Jun 12, 2026
2e2a93a
fix: FIX-PLUGIN-SYSTABLE-SCHEMA-CACHE — RC-7 paimon system-table sche…
morningman Jun 12, 2026
7be1b54
fix: FIX-PAIMON-DESC-ISKEY-PARITY — RC-6 DESC Key column parity (CI 9…
morningman Jun 12, 2026
ca3cbb3
fix: FIX-PAIMON-THRIFT-CLASSLOADER-SPLIT — RC-1 libthrift child-first…
morningman Jun 12, 2026
0c564fa
fix: FIX-PAIMON-JNI-PREDICATE-NULL — RC-2 encodedStr-null NPE on no-f…
morningman Jun 12, 2026
13fba36
docs: RCA + task-list for CI build 968828 (paimon external regression)
morningman Jun 12, 2026
abdb765
fix: FIX-PAIMON-OSS-JINDO-SELFCONTAINED — RC-4 bundle jindofs into pa…
morningman Jun 12, 2026
be66ac4
fix: FIX-PAIMON-S3A-SDK-SELFCONTAINED — RC-3 bundle AWS SDK into paim…
morningman Jun 12, 2026
ecf5acb
fix: FIX-PAIMON-HMS-CLIENT-SELFCONTAINED — RC-5 bundle hive metastore…
morningman Jun 12, 2026
e3c5f8a
docs: mark RC-3/4/5 self-contained classloader fixes done (CI 968828)
morningman Jun 12, 2026
ca8250f
fix: FIX-PAIMON-PATH-PARTITION-KEYS — RC partition-column double-fill…
morningman Jun 13, 2026
71f20ea
fix: FIX-PAIMON-S3-TRANSFER-MANAGER — bundle s3-transfer-manager, RC …
morningman Jun 13, 2026
aa6b3a5
fix: FIX-PAIMON-OBS-SELFCONTAINED — bundle hadoop-huaweicloud, RC obs…
morningman Jun 13, 2026
7a620a7
fix: FIX-PAIMON-HMS-THRIFT-SHADE — relocate paimon HMS thrift, RC TFr…
morningman Jun 13, 2026
b8cd74c
fix: FIX-PAIMON-EXPLAIN-GAP — re-emit paimon scan EXPLAIN lines dropp…
morningman Jun 13, 2026
af67643
docs: mark FIX-E done in task-list (CI 968994)
morningman Jun 13, 2026
123938d
fix: FIX-PAIMON-OBS-REPO — declare huawei-obs-sdk repo so the connect…
morningman Jun 13, 2026
321e6c4
test: fix PaimonMetadataOpsTest — build legacy catalog directly, not …
morningman Jun 13, 2026
905433d
fix: FIX-PAIMON-SCHEMA-DICT-SLOTS — build native -1 schema entry from…
morningman Jun 14, 2026
a282ae5
fix: FIX-PAIMON-HDFS-CLIENT — bundle hadoop-hdfs-client for filesyste…
morningman Jun 14, 2026
ceabc04
test: FIX-PAIMON-TEST-MAPREDUCE — add test-scope hadoop-mapreduce-cli…
morningman Jun 14, 2026
634f1ea
fix: FIX-PAIMON-RO-SCHEMA-DICT — emit schema dict for $ro (ReadOptimi…
morningman Jun 14, 2026
36e61c6
fix: FIX-PAIMON-DESC-WITH-TIMEZONE — carry TZ extra-info marker throu…
morningman Jun 14, 2026
1b0ae1e
fix: FIX-PAIMON-NULL-PARTITION-PRUNE — WHERE col IS NULL pruned the g…
morningman Jun 14, 2026
953c1d7
fix: FIX-PLUGIN-EXPLAIN-INPUTSPLITNUM — re-emit the inputSplitNum EXP…
morningman Jun 14, 2026
0fb6100
fix: FIX-PAIMON-VERBOSE-SPLITSTAT — re-emit the VERBOSE PaimonSplitSt…
morningman Jun 14, 2026
17a470c
fix test
morningman Jun 14, 2026
75fd0af
fix: FIX-PAIMON-MINIO-STORAGE — recognize minio.* storage keys in the…
morningman Jun 14, 2026
8d9b919
fix test paimon_time_travel.groovy
morningman Jun 14, 2026
70e934d
feat: extract storage property parsing into reusable fe-property modu…
morningman Jun 15, 2026
5bf6cee
[P0-T01] storage-refactor: recon verdict + DV-001/D-009 (bind-all mec…
morningman Jun 17, 2026
0f50a13
[P0-T02] fe-core: add FileSystemPluginManager.bindAll(rawMap) (D-009)
morningman Jun 17, 2026
ffd5466
[P1-T01] fe-connector-spi: ConnectorContext.getStorageProperties() + …
morningman Jun 17, 2026
5520975
[P1-T02] fe-core: DefaultConnectorContext.getStorageProperties() + Fi…
morningman Jun 17, 2026
4d190a7
[P1-T03-prep] record DV-002: T1 = common-case equal + documented supe…
morningman Jun 17, 2026
550c7d1
docs(storage-refactor): refresh HANDOFF for next session + encode per…
morningman Jun 17, 2026
f77e1df
[P1-T03] fe-connector-paimon: storage config via ctx.getStorageProper…
morningman Jun 17, 2026
60c6a50
[P1-T04] fe-connector-paimon: BE static creds via ctx.getStoragePrope…
morningman Jun 17, 2026
5b24d93
[P1-T05] fe-connector-paimon: drop the fe-property dependency edge
morningman Jun 17, 2026
a426648
[FU-T01] fe-filesystem-hdfs: HDFS typed BE model restores HDFS BE key…
morningman Jun 17, 2026
d9bedba
docs(storage-refactor): D-011 — do R-008 (FU-T02) + R-006 (FU-T03) be…
morningman Jun 17, 2026
e5b088b
[FU-T02] fe-filesystem-{oss,cos,obs}: emit AWS_CREDENTIALS_PROVIDER_T…
morningman Jun 17, 2026
da58cb4
[FU-T03] fe-filesystem-{s3,oss,cos,obs}: guard tuning defaults with e…
morningman Jun 17, 2026
f5b5090
docs(storage-refactor): D-012 — defer P1-T06 docker, start P2 (metast…
morningman Jun 17, 2026
51df4fc
[P3a-T01] fe-kerberos: neutral Kerberos facts carrier (AuthType + Ker…
morningman Jun 17, 2026
44d1fec
[P2-T01] fe-connector-metastore-api: neutral metastore connection con…
morningman Jun 17, 2026
039f48e
docs(storage-refactor): HANDOFF — fix P2-T01 commit ref (44d1fec4dcb)…
morningman Jun 17, 2026
7ea6352
[P2-T02] fe-connector-metastore-spi: shared metastore parsers + MetaS…
morningman Jun 17, 2026
b3c18b9
docs(storage-refactor): P2-T02 — record commit ref 7ea63528bc4
morningman Jun 17, 2026
3c1e118
[P2-T03] fe-connector-paimon: cut metastore connection logic over to …
morningman Jun 18, 2026
8b678cb
docs(storage-refactor): P2-T03 — record commit ref 3c1e118dcfa
morningman Jun 18, 2026
5c6b307
docs(storage-refactor): set next phase = P1-T07 delete fe-property (D…
morningman Jun 18, 2026
13d3876
[P1-T07] fe-property: delete orphan module (0 consumers after P1-T05)
morningman Jun 18, 2026
46e37ea
docs(catalog-spi): next session = paimon full-path clean-room review …
morningman Jun 18, 2026
5740d27
docs(catalog-spi): P6 paimon full-path clean-room review report + HAN…
morningman Jun 18, 2026
9967846
fix: FIX-C1-MINIO — bind minio.* keyed catalogs via shared fe-filesys…
morningman Jun 18, 2026
204c6fa
docs(catalog-spi): P6 C1 MinIO fix done → HANDOFF next = C2 (HDFS XML)
morningman Jun 18, 2026
e95128a
fix: FIX-C2-HDFS-XML — load hadoop.config.resources XML into the FE c…
morningman Jun 18, 2026
b3da482
docs(catalog-spi): P6 C2 HDFS XML fix done → HANDOFF next = R3-residual
morningman Jun 18, 2026
44499f0
fix: FIX-R3-RESIDUAL — emit VERBOSE backends block for every plugin c…
morningman Jun 19, 2026
287e7e5
docs(catalog-spi): P6 R3-residual fix done → HANDOFF next = R1-table
morningman Jun 19, 2026
f652b40
fix: FIX-R1-TABLE — report MySQL errno 1050 for CREATE TABLE on a rem…
morningman Jun 19, 2026
c83a10a
docs(catalog-spi): P6 R1-table fix done → HANDOFF next = C4/R2-catalo…
morningman Jun 19, 2026
82b6de0
fix: P6-C4/R2-catalog/R3-catalog — HMS socket-timeout, dead cache-key…
morningman Jun 19, 2026
4915798
docs(catalog-spi): P6 C4/R2/R3 fix done → HANDOFF next = P6-DEVIATION…
morningman Jun 19, 2026
7c59747
docs(catalog-spi): P6 — 5 deviations elected to fix (A1/A2/A3 + B-R2-…
morningman Jun 19, 2026
5fa47c2
fix: FIX-A3 — emit paimon.self_split_weight (incl. 0) for JNI splits
morningman Jun 19, 2026
2239a0a
docs(catalog-spi): P6 A3 (self_split_weight) fix done → HANDOFF next …
morningman Jun 19, 2026
1935748
fix: FIX-A2 — re-emit predicatesFromPaimon EXPLAIN line for paimon scans
morningman Jun 19, 2026
0ed5bd4
docs(catalog-spi): P6 A2 (predicatesFromPaimon) fix done → HANDOFF ne…
morningman Jun 19, 2026
10284ed
fix: FIX-B-MC2 — memoize time-travel schema-at-snapshot read across q…
morningman Jun 19, 2026
0213099
docs(catalog-spi): P6 B-MC2 (schema-at-snapshot memo) fix done → HAND…
morningman Jun 19, 2026
9d68714
fix: FIX-A1 — thread proportional split weight to the FE FileSplit (B…
morningman Jun 19, 2026
bb48925
docs(catalog-spi): P6 A1 (split-weight) fix done → HANDOFF next = B-R…
morningman Jun 19, 2026
60ed665
fix: FIX-B-R2-be — memoize the schema-evolution dict's per-schema-id …
morningman Jun 19, 2026
f650e0b
docs(catalog-spi): P6 B-R2-be (schema-dict memo) fix done → ALL 5 dev…
morningman Jun 19, 2026
2612af5
fix: P2-T04 — MetaStoreProviders ServiceLoader uses the SPI's own cla…
morningman Jun 19, 2026
e1d6f88
docs(catalog-spi): P2-T04 ServiceLoader 2-arg fix done (2612af5e88f);…
morningman Jun 19, 2026
e6f5d81
fix: FIX-1 (CI 973411) — pin paimon HiveConf classloader to the plugi…
morningman Jun 19, 2026
95ebfbc
fix: FIX-2 (CI 973411) — guard null connector in materializeLatest (d…
morningman Jun 19, 2026
26a8ecd
fix: FIX-3 (CI 973411) — distinct MTMV name for genuine-null partitio…
morningman Jun 19, 2026
f6d0890
fix: FIX-4 (CI 973411) — restore paimon table cache (data snapshot + …
morningman Jun 19, 2026
16b6255
fix: CI 973469 — paimon null-partition MTMV family + no-cache schema …
morningman Jun 20, 2026
8f65cdd
fix: CI 973480 — paimon no-cache schema reads latest via schemaManage…
morningman Jun 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
17 changes: 17 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,23 @@ if [[ "${BUILD_FE}" -eq 1 ]]; then
done
unset CONN_PLUGIN_DIR conn_module conn_plugin_target conn_module_dir conn_zip

# RC-4: self-contain the paimon connector plugin for OSS. The connector sets
# fs.oss.impl=com.aliyun.jindodata.oss.JindoOssFileSystem; that impl lives in the jindofs jars,
# which are packaged from thirdparty by post-build.sh into fe/lib/jindofs (NOT a maven artifact).
# The plugin runs child-first, so without its OWN copy JindoOssFileSystem resolves from the parent
# 'app' classloader and cannot be cast to the plugin's child-loaded org.apache.hadoop.fs.FileSystem.
# Copy the jindofs jars into the paimon plugin lib so JindoOssFileSystem loads child-first alongside
# the plugin's own hadoop FileSystem (same self-contained intent as the bundled hadoop-aws/S3A).
# Naturally gated: a no-op unless jindofs was packaged (--jindofs / DISABLE_BUILD_JINDOFS=OFF).
# CAVEAT (docker-gated, enablePaimonTest=true): jindo-core ships a native lib that can bind to only one
# classloader per JVM, so this is safe only while no concurrent non-paimon path loads jindo from
# fe/lib/jindofs in the same FE process.
PAIMON_CONN_LIB="${DORIS_OUTPUT}/fe/plugins/connector/paimon/lib"
if [[ -d "${PAIMON_CONN_LIB}" && -d "${DORIS_OUTPUT}/fe/lib/jindofs" ]]; then
cp -p "${DORIS_OUTPUT}/fe/lib/jindofs/"*.jar "${PAIMON_CONN_LIB}/" 2>/dev/null || true
fi
unset PAIMON_CONN_LIB

if [ "${TARGET_SYSTEM}" = "Darwin" ] || [ "${TARGET_SYSTEM}" = "Linux" ]; then
mkdir -p "${DORIS_OUTPUT}/fe/arthas"
rm -rf "${DORIS_OUTPUT}/fe/arthas/*"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.common.maxcompute;
package org.apache.doris.maxcompute;

import org.apache.doris.common.maxcompute.MCProperties;

import com.aliyun.auth.credentials.Credential;
import com.aliyun.auth.credentials.provider.EcsRamRoleCredentialProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import org.apache.doris.common.jni.JniScanner;
import org.apache.doris.common.jni.vec.ColumnType;
import org.apache.doris.common.maxcompute.MCUtils;

import com.aliyun.odps.Odps;
import com.aliyun.odps.table.configuration.CompressionCodec;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.apache.doris.common.jni.vec.VectorColumn;
import org.apache.doris.common.jni.vec.VectorTable;
import org.apache.doris.common.maxcompute.MCProperties;
import org.apache.doris.common.maxcompute.MCUtils;

import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsType;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
Expand Down Expand Up @@ -134,6 +135,12 @@ private int[] getProjected() {
}

private List<Predicate> getPredicates() {
// Backstop for a missing paimon_predicate param (scan with no pushed-down filter): a null here means
// "no filter", not an error. Guard the unconditional deserialize so the JNI reader never NPEs on
// deserialize(null) ("encodedStr is null"). The FE producer also always emits an (empty) predicate now.
if (paimonPredicate == null) {
return Collections.emptyList();
}
List<Predicate> predicates = PaimonUtils.deserialize(paimonPredicate);
if (LOG.isDebugEnabled()) {
LOG.debug("predicates:{}", predicates);
Expand Down
12 changes: 12 additions & 0 deletions fe/be-java-extensions/preload-extensions/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ under the License.
<artifactId>commons-io</artifactId>
<version>${commons-io.version}</version>
</dependency>
<!-- Runtime-only: Hive-using JNI scanners (paimon-hive-connector / hudi / HiveConf)
need org.apache.commons.lang.StringUtils (commons-lang 2.x) at runtime. It used to
arrive transitively via fe-common's odps-sdk-core; after P4-T09 (a53f2b17b8d) made
fe-common odps-free it was evicted from this shared preload classpath, breaking every
scanner with NoClassDefFoundError. Restore it here (version managed by fe/pom.xml) so
it lands in the preload-extensions runtime classpath, the shared parent of all JNI
scanners, while staying out of java-udf's own jar. -->
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-unsafe</artifactId>
Expand Down
24 changes: 8 additions & 16 deletions fe/fe-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -134,23 +134,15 @@ under the License.
<artifactId>antlr4-runtime</artifactId>
<version>${antlr4.version}</version>
</dependency>
<!-- Used by DorisHttpException (netty) and GsonUtilsBase (protobuf); previously pulled in
transitively via odps-sdk-core, now declared directly so fe-common is odps-free. -->
<dependency>
<groupId>com.aliyun.odps</groupId>
<artifactId>odps-sdk-core</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</exclusion>
<exclusion>
<groupId>org.ini4j</groupId>
<artifactId>ini4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk18on</artifactId>
</exclusion>
</exclusions>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</dependency>
</dependencies>
<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
package org.apache.doris.connector.api;

import org.apache.doris.connector.api.scan.ConnectorScanPlanProvider;
import org.apache.doris.connector.api.write.ConnectorWritePlanProvider;

import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.OptionalLong;
import java.util.Set;

/**
Expand All @@ -41,6 +43,14 @@ default ConnectorScanPlanProvider getScanPlanProvider() {
return null;
}

/**
* Returns the write plan provider for sink ({@code TDataSink}) generation,
* or {@code null} if this connector does not support writes.
*/
default ConnectorWritePlanProvider getWritePlanProvider() {
return null;
}

/** Returns the set of capabilities this connector supports. */
default Set<ConnectorCapability> getCapabilities() {
return Collections.emptySet();
Expand Down Expand Up @@ -118,4 +128,28 @@ default void close() throws IOException {
default String executeRestRequest(String path, String body) {
throw new UnsupportedOperationException("REST passthrough not supported by this connector");
}

/**
* Invalidates any connector-side per-table cache (e.g. a latest-snapshot/version cache) so a subsequent
* read reflects the latest external state. Called by the engine on {@code REFRESH TABLE}. The names are
* the REMOTE db/table names (as seen by the connector). Default no-op for connectors that cache nothing.
*/
default void invalidateTable(String dbName, String tableName) {
}

/** Invalidates all connector-side per-table caches. Default no-op. */
default void invalidateAll() {
}

/**
* Optional per-connector override of the catalog's schema-cache TTL (in seconds), consulted generically by
* the engine when sizing the schema meta-cache. Semantics match {@code schema.cache.ttl-second}:
* {@code 0} disables schema caching (always read fresh), {@code -1} = no expiration, {@code > 0} = TTL.
* Lets a connector make its own cache knob also govern schema freshness (e.g. paimon's
* {@code meta.cache.paimon.table.ttl-second}, which legacy used for the whole table cache). An explicit
* user {@code schema.cache.ttl-second} always wins over this. Default: no override.
*/
default OptionalLong schemaCacheTtlSecondOverride() {
return OptionalLong.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,54 @@ public enum ConnectorCapability {
* parallel writers should declare this capability.</p>
*/
SUPPORTS_PARALLEL_WRITE,
/**
* Indicates the connector requires dynamic-partition writes to be hash-distributed by
* partition columns and locally sorted by them before reaching the sink.
*
* <p>Streaming partition writers (e.g. the MaxCompute Storage API) close the previous
* partition writer as soon as a new partition value appears; un-grouped (unsorted)
* multi-partition rows therefore cause "writer has been closed" errors. The planner uses
* this capability to require a hash-by-partition distribution plus a mandatory local sort
* on the partition columns for dynamic-partition writes.</p>
*
* <p>A connector declaring this is expected to also declare
* {@link #SUPPORTS_PARALLEL_WRITE} (hash distribution is inherently parallel) and
* {@link #SINK_REQUIRE_FULL_SCHEMA_ORDER}: the sink distribution locates partition columns by their
* <b>full-schema</b> position in the child output, which only holds when the bind layer projects the
* write to full-schema order (the projection gated by {@code SINK_REQUIRE_FULL_SCHEMA_ORDER}). A
* connector declaring this without {@code SINK_REQUIRE_FULL_SCHEMA_ORDER} would shuffle/sort by the
* wrong column whenever cols order diverges from the full schema.</p>
*/
SINK_REQUIRE_PARTITION_LOCAL_SORT,
/**
* Indicates the connector's write path maps data columns <b>positionally</b> against the full
* table schema (e.g. MaxCompute's columnar Storage API / JNI writer), rather than by column name.
*
* <p>For such connectors the sink's output rows must be projected to <b>full table schema order</b>
* with any unmentioned columns filled (NULL / default) — exactly like the legacy MaxCompute bind
* path — so that a reordered or partial explicit column list does not land values in the wrong
* remote columns. Name-mapped connectors (e.g. JDBC, which builds an {@code INSERT INTO t (cols)}
* statement) must NOT declare this capability: their data stays in user/cols order to match the
* generated column list.</p>
*/
SINK_REQUIRE_FULL_SCHEMA_ORDER,
/**
* Indicates the connector supports passthrough query via the {@code query()} TVF.
*
* <p>Connectors declaring this capability must implement
* {@link ConnectorTableOps#getColumnsFromQuery} to provide column metadata
* for arbitrary SQL queries passed through to the remote data source.</p>
*/
SUPPORTS_PASSTHROUGH_QUERY
SUPPORTS_PASSTHROUGH_QUERY,
/**
* Indicates the connector exposes per-partition statistics (record count, on-disk size,
* file count) via {@link ConnectorTableOps#listPartitions}.
*
* <p>{@code SHOW PARTITIONS} renders a rich multi-column result (Partition / PartitionKey /
* RecordCount / FileSizeInBytes / FileCount) for connectors declaring this capability, instead
* of the single partition-name column used by connectors that only implement
* {@code listPartitionNames}. This is distinct from {@link #SUPPORTS_STATISTICS}, which is
* table-level statistics for the optimizer.</p>
*/
SUPPORTS_PARTITION_STATS
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ public final class ConnectorColumn {
private final boolean nullable;
private final String defaultValue;
private final boolean isKey;
private final boolean isAutoInc;
private final boolean isAggregated;
// Marks a "with local time zone" timestamp column. fe-core's ConnectorColumnConverter translates
// this into Column.setWithTZExtraInfo() so DESC shows the WITH_TIMEZONE "Extra" marker, matching
// legacy PaimonExternalTable/PaimonSysExternalTable/IcebergUtils which set it from the SOURCE type
// root regardless of the timestamp_tz mapping flag. Defaults false; set via withTimeZone().
private final boolean withTimeZone;

public ConnectorColumn(String name, ConnectorType type, String comment,
boolean nullable, String defaultValue) {
Expand All @@ -38,12 +45,42 @@ public ConnectorColumn(String name, ConnectorType type, String comment,

public ConnectorColumn(String name, ConnectorType type, String comment,
boolean nullable, String defaultValue, boolean isKey) {
this(name, type, comment, nullable, defaultValue, isKey, false);
}

public ConnectorColumn(String name, ConnectorType type, String comment,
boolean nullable, String defaultValue, boolean isKey, boolean isAutoInc) {
this(name, type, comment, nullable, defaultValue, isKey, isAutoInc, false);
}

public ConnectorColumn(String name, ConnectorType type, String comment,
boolean nullable, String defaultValue, boolean isKey, boolean isAutoInc,
boolean isAggregated) {
this(name, type, comment, nullable, defaultValue, isKey, isAutoInc, isAggregated, false);
}

private ConnectorColumn(String name, ConnectorType type, String comment,
boolean nullable, String defaultValue, boolean isKey, boolean isAutoInc,
boolean isAggregated, boolean withTimeZone) {
this.name = Objects.requireNonNull(name, "name");
this.type = Objects.requireNonNull(type, "type");
this.comment = comment;
this.nullable = nullable;
this.defaultValue = defaultValue;
this.isKey = isKey;
this.isAutoInc = isAutoInc;
this.isAggregated = isAggregated;
this.withTimeZone = withTimeZone;
}

/**
* Returns a copy of this column marked as a "with local time zone" timestamp. See
* {@link #isWithTimeZone()}; the marker is intentionally orthogonal to the mapped {@link #getType()}
* so it survives even when the column is mapped to a plain DATETIME (timestamp_tz mapping off).
*/
public ConnectorColumn withTimeZone() {
return new ConnectorColumn(name, type, comment, nullable, defaultValue,
isKey, isAutoInc, isAggregated, true);
}

public String getName() {
Expand All @@ -70,6 +107,18 @@ public boolean isKey() {
return isKey;
}

public boolean isAutoInc() {
return isAutoInc;
}

public boolean isAggregated() {
return isAggregated;
}

public boolean isWithTimeZone() {
return withTimeZone;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand All @@ -81,6 +130,9 @@ public boolean equals(Object o) {
ConnectorColumn that = (ConnectorColumn) o;
return nullable == that.nullable
&& isKey == that.isKey
&& isAutoInc == that.isAutoInc
&& isAggregated == that.isAggregated
&& withTimeZone == that.withTimeZone
&& name.equals(that.name)
&& type.equals(that.type)
&& Objects.equals(comment, that.comment)
Expand All @@ -89,7 +141,8 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
return Objects.hash(name, type, comment, nullable, defaultValue, isKey);
return Objects.hash(name, type, comment, nullable, defaultValue, isKey, isAutoInc, isAggregated,
withTimeZone);
}

@Override
Expand Down
Loading
Loading