-
Notifications
You must be signed in to change notification settings - Fork 226
[AURON #2321] Support Iceberg column rename and drop-then-add in the native scan #2322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
6c72cff
70e6956
44261cb
b2a6740
41cf5ba
854aa04
c04a037
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,13 +16,72 @@ | |
| */ | ||
| package org.apache.iceberg.spark.source | ||
|
|
||
| import scala.collection.JavaConverters._ | ||
|
|
||
| import org.apache.iceberg.types.TypeUtil | ||
|
|
||
| object AuronIcebergSourceUtil { | ||
|
|
||
| final case class RenameOrDrop(topLevel: Boolean, nested: Boolean) | ||
|
|
||
| def getClassOfSparkBatchQueryScan(): Class[SparkBatchQueryScan] = { | ||
| classOf[SparkBatchQueryScan] | ||
| } | ||
|
|
||
| def getClassOfSparkInputPartition(): Class[SparkInputPartition] = { | ||
| classOf[SparkInputPartition] | ||
| } | ||
|
|
||
| def expectedFieldIds(scan: AnyRef): Map[String, Int] = { | ||
| val expectedSchema = asBatchQueryScan(scan).expectedSchema() | ||
| expectedSchema.columns().asScala.map(field => field.name() -> field.fieldId()).toMap | ||
| } | ||
|
|
||
| def detectRenameOrDrop(scan: AnyRef): RenameOrDrop = { | ||
| val table = asBatchQueryScan(scan).table() | ||
| val currentFields = collectFieldIdToName(table.schema()) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two observations on
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed both points:
|
||
|
|
||
| table | ||
| .schemas() | ||
| .asScala | ||
| .filterNot(_._1 == table.schema().schemaId()) | ||
| .values | ||
| .foldLeft(RenameOrDrop(topLevel = false, nested = false)) { (result, schema) => | ||
| collectFieldIdToName(schema).foldLeft(result) { | ||
| case (currentResult, (fieldId, historicalField)) => | ||
| currentFields.get(fieldId) match { | ||
| case Some(currentField) if currentField.name != historicalField.name => | ||
| if (historicalField.topLevel || currentField.topLevel) { | ||
| currentResult.copy(topLevel = true) | ||
| } else { | ||
| currentResult.copy(nested = true) | ||
| } | ||
| case None => | ||
| if (historicalField.topLevel) { | ||
| currentResult.copy(topLevel = true) | ||
| } else { | ||
| currentResult.copy(nested = true) | ||
| } | ||
| case _ => | ||
| currentResult | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| final private case class FieldIdentity(name: String, topLevel: Boolean) | ||
|
|
||
| private def collectFieldIdToName(schema: org.apache.iceberg.Schema): Map[Int, FieldIdentity] = { | ||
| val topLevelFieldIds = schema.columns().asScala.map(_.fieldId()).toSet | ||
| TypeUtil | ||
| .indexById(schema.asStruct()) | ||
| .asScala | ||
| .map { case (fieldId, field) => | ||
| fieldId.toInt -> FieldIdentity(field.name(), topLevelFieldIds.contains(fieldId.toInt)) | ||
| } | ||
| .toMap | ||
| } | ||
|
|
||
| private def asBatchQueryScan(scan: AnyRef): SparkBatchQueryScan = | ||
| scan.asInstanceOf[SparkBatchQueryScan] | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,6 +25,7 @@ import org.apache.iceberg.spark.source.AuronIcebergSourceUtil | |
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.sql.auron.NativeConverters | ||
| import org.apache.spark.sql.catalyst.expressions.{And => SparkAnd, AttributeReference, EqualTo, Expression => SparkExpression, GreaterThan, GreaterThanOrEqual, In, IsNaN, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not => SparkNot, Or => SparkOr} | ||
| import org.apache.spark.sql.catalyst.trees.TreeNodeTag | ||
| import org.apache.spark.sql.connector.read.InputPartition | ||
| import org.apache.spark.sql.execution.datasources.v2.BatchScanExec | ||
| import org.apache.spark.sql.internal.SQLConf | ||
|
|
@@ -41,11 +42,24 @@ final case class IcebergScanPlan( | |
| readSchema: StructType, | ||
| fileSchema: StructType, | ||
| partitionSchema: StructType, | ||
| pruningPredicates: Seq[pb.PhysicalExprNode]) | ||
| pruningPredicates: Seq[pb.PhysicalExprNode], | ||
| fieldIdsByName: Map[String, Int]) | ||
|
|
||
| object IcebergScanSupport extends Logging { | ||
| private val scanPlanTag: TreeNodeTag[Option[IcebergScanPlan]] = TreeNodeTag( | ||
| "auron.iceberg.scan.plan") | ||
|
|
||
| def plan(exec: BatchScanExec): Option[IcebergScanPlan] = { | ||
| exec.getTagValue(scanPlanTag) match { | ||
| case Some(cached) => cached | ||
| case None => | ||
| val planned = planUncached(exec) | ||
| exec.setTagValue(scanPlanTag, planned) | ||
| planned | ||
| } | ||
| } | ||
|
|
||
| private def planUncached(exec: BatchScanExec): Option[IcebergScanPlan] = { | ||
| val scan = exec.scan | ||
| val scanClassName = scan.getClass.getName | ||
| // Only handle Iceberg scans; other sources must stay on Spark's path. | ||
|
|
@@ -75,6 +89,31 @@ object IcebergScanSupport extends Logging { | |
| partitionSchema.fields.forall(field => NativeConverters.isTypeSupported(field.dataType)), | ||
| "Has unsupported schema type.") | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Consider separating them: val fieldIdsByName = try {
AuronIcebergSourceUtil.expectedFieldIds(scan.asInstanceOf[AnyRef])
} catch { case NonFatal(t) => logWarning(...); return None }
val renameOrDrop = try {
AuronIcebergSourceUtil.detectRenameOrDrop(scan.asInstanceOf[AnyRef])
} catch { case NonFatal(t) =>
logWarning(...)
AuronIcebergSourceUtil.RenameOrDrop(topLevel = true, nested = true) // conservative
}This way a transient schema-history failure can still fall back on the ORC/nested guards while preserving field-id matching for Parquet.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Split this into two independent inspection steps.
This avoids reporting a misleading nested rename/drop fallback reason when the actual issue is schema-history inspection failure.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
| val fieldIdsByName = | ||
| try { | ||
| AuronIcebergSourceUtil.expectedFieldIds(scan.asInstanceOf[AnyRef]) | ||
| } catch { | ||
| case NonFatal(t) => | ||
| logWarning(s"Failed to inspect Iceberg field ids for $scanClassName.", t) | ||
| return None | ||
| } | ||
|
|
||
| val renameOrDrop = | ||
| try { | ||
| AuronIcebergSourceUtil.detectRenameOrDrop(scan.asInstanceOf[AnyRef]) | ||
| } catch { | ||
| case NonFatal(t) => | ||
| logWarning(s"Failed to inspect Iceberg schema history for $scanClassName.", t) | ||
| return None | ||
| } | ||
| assert(!renameOrDrop.nested, "Nested Iceberg rename or drop is not supported.") | ||
|
|
||
| val missingFieldIds = | ||
| fileSchema.fields.filterNot(field => fieldIdsByName.contains(field.name)).map(_.name) | ||
| assert( | ||
| missingFieldIds.isEmpty, | ||
| s"Missing Iceberg field ids for columns: ${missingFieldIds.mkString(", ")}") | ||
|
|
||
| val partitions = inputPartitions(exec) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: the assertion message val missing = fileSchema.fields.filterNot(f => fieldIdsByName.contains(f.name)).map(_.name)
assert(missing.isEmpty, s"Missing Iceberg field ids for columns: ${missing.mkString(", ")}")
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the assertion to be more explicit. |
||
| // Empty scan (e.g. empty table) should still build a plan to return no rows. | ||
| if (partitions.isEmpty) { | ||
|
|
@@ -86,7 +125,8 @@ object IcebergScanSupport extends Logging { | |
| readSchema, | ||
| fileSchema, | ||
| partitionSchema, | ||
| Seq.empty)) | ||
| Seq.empty, | ||
| fieldIdsByName)) | ||
| } | ||
|
|
||
| val icebergPartitions = partitions.flatMap(icebergPartition) | ||
|
|
@@ -110,6 +150,9 @@ object IcebergScanSupport extends Logging { | |
| assert( | ||
| !(format != FileFormat.PARQUET && format != FileFormat.ORC), | ||
| "Only support parquet or orc.") | ||
| assert( | ||
| !(format == FileFormat.ORC && renameOrDrop.topLevel), | ||
| "Iceberg ORC rename or drop is not supported.") | ||
|
|
||
| val pruningPredicates = collectPruningPredicates(scan.asInstanceOf[AnyRef], readSchema) | ||
| Some( | ||
|
|
@@ -119,7 +162,8 @@ object IcebergScanSupport extends Logging { | |
| readSchema, | ||
| fileSchema, | ||
| partitionSchema, | ||
| pruningPredicates)) | ||
| pruningPredicates, | ||
| fieldIdsByName)) | ||
| } | ||
|
|
||
| private def collectUnsupportedMetadataColumns(schema: StructType): Seq[String] = | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When
table_fieldhas aPARQUET:field_idbutfile_fielddoes not,is_some_andreturnsfalseand there is no name-based fallback — the column simply doesn't match.For spec-compliant Iceberg Parquet files this is fine (the Iceberg spec mandates field IDs, and arrow-rs populates them into Arrow metadata). But if an older Parquet writer omitted the
field_idin the ThriftSchemaElement, or if a non-Iceberg Parquet file happens to be served through this path, every column would fail to match and the scan would produce all-NULL rows.Consider falling back to name matching when
file_fieldlacks a field ID:This preserves field-id matching when both sides have IDs, but degrades gracefully to name matching otherwise.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated
fields_matchto use a nested match. Whenfile_fieldlacks a field id, it now falls back to case-insensitive name matching.