From e5b7602aaa8b66167ecfde548841ce8d26712163 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 1 Jun 2026 21:33:15 +0000 Subject: [PATCH 1/2] [SPARK-57198][SQL] Skip the divide-by-zero check in codegen when the divisor is a non-zero literal DivModLike (Divide/Remainder/IntegralDivide) and Pmod always emit a divide-by-zero guard `if (divisor == 0) ...` in their generated code. When the divisor is a foldable, non-null, non-zero constant the guard is dead code that can never trigger, and in ANSI mode it also registers an unreachable error-context reference in the constant pool. This skips emitting the check (and its error-context reference) when the divisor is a statically non-zero constant. Behavior and results are unchanged. Across the TPC-DS queries this removes 56 dead `if (100.0D == 0) throw divideByZeroError(...)` checks. Generated-by: Claude Code (Opus 4.8) --- .../sql/catalyst/expressions/arithmetic.scala | 87 +++++++++++++------ .../ArithmeticExpressionSuite.scala | 35 ++++++++ 2 files changed, 97 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 23fffb162a8fc..14b3e4f67fd21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -653,6 +653,14 @@ trait DivModLike extends BinaryArithmetic { case _ => x => x == 0 } + // Whether the divisor is a foldable, non-null and non-zero constant. When true, the + // divide-by-zero check is dead code (it can never trigger), so codegen skips emitting both the + // check and its otherwise-unreachable error-context reference. + private lazy val divisorIsNonZero: Boolean = right.foldable && { + val divisor = right.eval(EmptyRow) + divisor != null && !isZero(divisor) + } + final override def eval(input: InternalRow): Any = { // evaluate right first as we have a chance to skip left if right is 0 val input2 = right.eval(input) @@ -705,7 +713,9 @@ trait DivModLike extends BinaryArithmetic { s"${eval2.value} == 0" } val javaType = CodeGenerator.javaType(dataType) - val errorContextCode = getContextOrNullCode(ctx, failOnError) + // Lazy so the error-context reference is only registered when actually emitted; a statically + // non-zero divisor (see divisorIsNonZero) skips the divide-by-zero check that would use it. + lazy val errorContextCode = getContextOrNullCode(ctx, failOnError) val operation = super.dataType match { case DecimalType.Fixed(precision, scale) => val castUtils = classOf[CastUtils].getName @@ -741,25 +751,34 @@ trait DivModLike extends BinaryArithmetic { // evaluate right first as we have a chance to skip left if right is 0 if (!left.nullable && !right.nullable) { - val divByZero = if (failOnError) { - s"throw ${divideByZeroErrorCode(ctx)};" + val divisionBody = + s""" + |${eval1.code} + |$checkIntegralDivideOverflow + |$operation""".stripMargin + // A statically non-zero divisor makes the zero check dead code, so emit only the division. + val guardedBody = if (divisorIsNonZero) { + divisionBody } else { - s"${ev.isNull} = true;" + val divByZero = if (failOnError) { + s"throw ${divideByZeroErrorCode(ctx)};" + } else { + s"${ev.isNull} = true;" + } + s""" + |if ($isZero) { + | $divByZero + |} else {$divisionBody + |}""".stripMargin } ev.copy(code = code""" ${eval2.code} boolean ${ev.isNull} = false; $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if ($isZero) { - $divByZero - } else { - ${eval1.code} - $checkIntegralDivideOverflow - $operation - }""") + $guardedBody""") } else { - val nullOnErrorCondition = if (failOnError) "" else s" || $isZero" - val failOnErrorBranch = if (failOnError) { + val nullOnErrorCondition = if (failOnError || divisorIsNonZero) "" else s" || $isZero" + val failOnErrorBranch = if (failOnError && !divisorIsNonZero) { s"if ($isZero) throw ${divideByZeroErrorCode(ctx)};" } else { "" @@ -1079,6 +1098,13 @@ case class Pmod( case _ => x => x == 0 } + // Whether the divisor is a foldable, non-null and non-zero constant. When true, the + // remainder-by-zero check is dead code, so codegen skips emitting it. + private lazy val divisorIsNonZero: Boolean = right.foldable && { + val divisor = right.eval(EmptyRow) + divisor != null && !isZero(divisor) + } + private lazy val pmodFunc: (Any, Any) => Any = dataType match { case _: IntegerType => (l, r) => pmod(l.asInstanceOf[Int], r.asInstanceOf[Int]) case _: LongType => (l, r) => pmod(l.asInstanceOf[Long], r.asInstanceOf[Long]) @@ -1119,7 +1145,9 @@ case class Pmod( } val remainder = ctx.freshName("remainder") val javaType = CodeGenerator.javaType(dataType) - val errorContext = getContextOrNullCode(ctx) + // Lazy so the error-context reference is only registered when actually emitted; a statically + // non-zero divisor (see divisorIsNonZero) skips the remainder-by-zero check that would use it. + lazy val errorContext = getContextOrNullCode(ctx) val result = dataType match { case DecimalType.Fixed(precision, scale) => val decimalAdd = "$plus" @@ -1158,24 +1186,33 @@ case class Pmod( // evaluate right first as we have a chance to skip left if right is 0 if (!left.nullable && !right.nullable) { - val divByZero = if (failOnError) { - s"throw QueryExecutionErrors.remainderByZeroError($errorContext);" + val remainderBody = + s""" + |${eval1.code} + |$result""".stripMargin + // A statically non-zero divisor makes the zero check dead code, so emit only the remainder. + val guardedBody = if (divisorIsNonZero) { + remainderBody } else { - s"${ev.isNull} = true;" + val divByZero = if (failOnError) { + s"throw QueryExecutionErrors.remainderByZeroError($errorContext);" + } else { + s"${ev.isNull} = true;" + } + s""" + |if ($isZero) { + | $divByZero + |} else {$remainderBody + |}""".stripMargin } ev.copy(code = code""" ${eval2.code} boolean ${ev.isNull} = false; $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; - if ($isZero) { - $divByZero - } else { - ${eval1.code} - $result - }""") + $guardedBody""") } else { - val nullOnErrorCondition = if (failOnError) "" else s" || $isZero" - val failOnErrorBranch = if (failOnError) { + val nullOnErrorCondition = if (failOnError || divisorIsNonZero) "" else s" || $isZero" + val failOnErrorBranch = if (failOnError && !divisorIsNonZero) { s"if ($isZero) throw QueryExecutionErrors.remainderByZeroError($errorContext);" } else { "" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index 7d659fca5df2a..f8cdf825bc4c8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -912,6 +912,41 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper } } + test("SPARK-57198: skip the divide-by-zero check when the divisor is a non-zero literal") { + def codeOf(e: Expression): String = e.genCode(new CodegenContext).code.toString + + Seq(true, false).foreach { ansi => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansi.toString) { + // A non-zero literal divisor makes the zero check dead code; codegen must not emit it. + Seq( + Divide(Literal(1.0), Literal(2.0)), + Remainder(Literal(7), Literal(3)), + IntegralDivide(Literal(7L), Literal(3L)), + Pmod(Literal(7), Literal(3))).foreach { e => + val code = codeOf(e) + assert(!code.contains("ByZeroError"), + s"expected no by-zero check for a non-zero literal divisor in $e:\n$code") + } + + // A variable (non-foldable) divisor keeps the check: in ANSI mode the by-zero error must + // still be generated. + Seq( + Divide( + BoundReference(0, DoubleType, nullable = false), + BoundReference(1, DoubleType, nullable = false)), + Pmod( + BoundReference(0, IntegerType, nullable = false), + BoundReference(1, IntegerType, nullable = false))).foreach { e => + val code = codeOf(e) + if (ansi) { + assert(code.contains("ByZeroError"), + s"expected a by-zero check for a variable divisor in $e:\n$code") + } + } + } + } + } + test("SPARK-34677: exact add and subtract of day-time and year-month intervals") { Seq(EvalMode.ANSI, EvalMode.LEGACY).foreach { evalMode => checkExceptionInExpression[ArithmeticException]( From f8ad5868f4fc59933d27e228e163ad4a2d9aa634 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 1 Jun 2026 21:40:32 +0000 Subject: [PATCH 2/2] [SPARK-57198][SQL] Match Literal divisor directly instead of foldable + eval A foldable divisor is already constant-folded to a Literal before codegen, so detect the non-zero constant divisor by matching `Literal` directly rather than checking `foldable` and evaluating the expression at codegen time. Generated-by: Claude Code (Opus 4.8) --- .../sql/catalyst/expressions/arithmetic.scala | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 14b3e4f67fd21..3d1af376ab96f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -653,12 +653,13 @@ trait DivModLike extends BinaryArithmetic { case _ => x => x == 0 } - // Whether the divisor is a foldable, non-null and non-zero constant. When true, the - // divide-by-zero check is dead code (it can never trigger), so codegen skips emitting both the - // check and its otherwise-unreachable error-context reference. - private lazy val divisorIsNonZero: Boolean = right.foldable && { - val divisor = right.eval(EmptyRow) - divisor != null && !isZero(divisor) + // Whether the divisor is a non-null and non-zero literal (foldable divisors are already folded + // to a literal before codegen). When true, the divide-by-zero check is dead code (it can never + // trigger), so codegen skips emitting both the check and its otherwise-unreachable error-context + // reference. + private lazy val divisorIsNonZero: Boolean = right match { + case Literal(divisor, _) => divisor != null && !isZero(divisor) + case _ => false } final override def eval(input: InternalRow): Any = { @@ -1098,11 +1099,12 @@ case class Pmod( case _ => x => x == 0 } - // Whether the divisor is a foldable, non-null and non-zero constant. When true, the - // remainder-by-zero check is dead code, so codegen skips emitting it. - private lazy val divisorIsNonZero: Boolean = right.foldable && { - val divisor = right.eval(EmptyRow) - divisor != null && !isZero(divisor) + // Whether the divisor is a non-null and non-zero literal (foldable divisors are already folded + // to a literal before codegen). When true, the remainder-by-zero check is dead code, so codegen + // skips emitting it. + private lazy val divisorIsNonZero: Boolean = right match { + case Literal(divisor, _) => divisor != null && !isZero(divisor) + case _ => false } private lazy val pmodFunc: (Any, Any) => Any = dataType match {