From 18cf4fffeb514722d1666b2da6b06d794b03e7d0 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 2 Jun 2026 21:59:21 +0200 Subject: [PATCH 1/4] [SPARK-57211][SQL] Cast strings to TIMESTAMP_NTZ(p)/TIMESTAMP_LTZ(p) Wire Cast to support CAST( AS TIMESTAMP_NTZ(p)) and CAST( AS TIMESTAMP_LTZ(p)) for fractional-seconds precision p in [7, 9], on both the interpreted and codegen paths and across LEGACY, ANSI and TRY eval modes. Reuses the SPARK-57032 string->nanos parse helpers on SparkDateTimeUtils, which already return a normalized TimestampNanosVal and apply per-precision truncation. - Add StringType -> Timestamp{NTZ,LTZ}NanosType arms to canCast/canAnsiCast. - Add (StringType, TimestampLTZNanosType) to Cast.needsTimeZone (NTZ string is zone-independent, mirroring micro TIMESTAMP_NTZ). - Add interpreted castToTimestamp{LTZ,NTZ}Nanos and matching codegen, dispatched with the precision taken from the target type. NTZ adopts allowTimeZone = true to match the micro TIMESTAMP_NTZ string cast. Tests cover success cases over p in [7, 9], ANSI parse errors, LEGACY/TRY null on malformed input, and a flag-off FEATURE_NOT_ENABLED guard. --- .../catalyst/util/SparkDateTimeUtils.scala | 7 +- .../spark/sql/catalyst/expressions/Cast.scala | 95 ++++++++++++++++++- .../catalyst/expressions/CastSuiteBase.scala | 49 +++++++++- .../expressions/CastWithAnsiOffSuite.scala | 12 +++ .../expressions/CastWithAnsiOnSuite.scala | 18 ++++ 5 files changed, 176 insertions(+), 5 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index d7200715f9374..29f280fdd09c7 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -946,9 +946,10 @@ trait SparkDateTimeUtils { s: UTF8String, precision: Int, context: QueryContext = null): TimestampNanosVal = { - // TODO(SPARK-57032): when this is wired to a user-facing CAST(... AS TIMESTAMP_NTZ(p)), the - // cast must decide `allowTimeZone` explicitly (per ANSI/legacy mode) instead of relying on - // the `true` default used here, which silently discards a zone suffix. + // CAST(... AS TIMESTAMP_NTZ(p)) intentionally uses `allowTimeZone = true` here, mirroring the + // micro `TIMESTAMP_NTZ` string cast (`stringToTimestampWithoutTimeZoneAnsi`): a zone suffix in + // the input is silently discarded rather than rejected. Callers that need strict NTZ rejection + // should call `stringToTimestampNTZNanos` directly with `allowTimeZone = false`. stringToTimestampNTZNanos(s, precision).getOrElse { throw ExecutionErrors.invalidInputInCastToDatetimeError( s, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index ad3e22dc22575..d59b5a44d436d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte, import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.{BinaryView, UTF8String, VariantVal} +import org.apache.spark.unsafe.types.{BinaryView, TimestampNanosVal, UTF8String, VariantVal} import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper} import org.apache.spark.util.ArrayImplicits._ @@ -113,6 +113,9 @@ object Cast extends QueryErrorsBase { case (DateType, TimestampNTZType) => true case (TimestampType, TimestampNTZType) => true + case (_: StringType, _: TimestampNTZNanosType) => true + case (_: StringType, _: TimestampLTZNanosType) => true + case (_: StringType, _: CalendarIntervalType) => true case (_: StringType, _: AnsiIntervalType) => true @@ -248,6 +251,9 @@ object Cast extends QueryErrorsBase { case (DateType, TimestampNTZType) => true case (TimestampType, TimestampNTZType) => true + case (_: StringType, _: TimestampNTZNanosType) => true + case (_: StringType, _: TimestampLTZNanosType) => true + case (_: StringType, DateType) => true case (_: StringType, _: TimeType) => true case (TimestampType, DateType) => true @@ -335,6 +341,9 @@ object Cast extends QueryErrorsBase { case (TimestampType, DateType) => true case (TimestampType, TimestampNTZType) => true case (TimestampNTZType, TimestampType) => true + // NTZ string is zone-independent (mirroring micro TIMESTAMP_NTZ, which is not listed); only + // the LTZ string parse depends on the session time zone. + case (_: StringType, _: TimestampLTZNanosType) => true case (ArrayType(fromType, _), ArrayType(toType, _)) => needsTimeZone(fromType, toType) case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) => needsTimeZone(fromKey, toKey) || needsTimeZone(fromValue, toValue) @@ -786,6 +795,30 @@ case class Cast( buildCast[Long](_, ts => convertTz(ts, ZoneOffset.UTC, zoneId)) } + private[this] def castToTimestampLTZNanos( + from: DataType, + precision: Int): Any => Any = from match { + case _: StringType => + buildCast[UTF8String](_, utfs => + if (ansiEnabled) { + DateTimeUtils.stringToTimestampLTZNanosAnsi(utfs, precision, zoneId, getContextOrNull()) + } else { + DateTimeUtils.stringToTimestampLTZNanos(utfs, precision, zoneId).orNull + }) + } + + private[this] def castToTimestampNTZNanos( + from: DataType, + precision: Int): Any => Any = from match { + case _: StringType => + buildCast[UTF8String](_, utfs => + if (ansiEnabled) { + DateTimeUtils.stringToTimestampNTZNanosAnsi(utfs, precision, getContextOrNull()) + } else { + DateTimeUtils.stringToTimestampNTZNanos(utfs, precision).orNull + }) + } + private[this] def decimalToTimestamp(d: Decimal): Long = { (d.toBigDecimal * MICROS_PER_SECOND).longValue } @@ -1299,6 +1332,8 @@ case class Cast( case decimal: DecimalType => castToDecimal(from, decimal) case TimestampType => castToTimestamp(from) case TimestampNTZType => castToTimestampNTZ(from) + case t: TimestampNTZNanosType => castToTimestampNTZNanos(from, t.precision) + case t: TimestampLTZNanosType => castToTimestampLTZNanos(from, t.precision) case CalendarIntervalType => castToInterval(from) case it: DayTimeIntervalType => castToDayTimeInterval(from, it) case it: YearMonthIntervalType => castToYearMonthInterval(from, it) @@ -1409,6 +1444,8 @@ case class Cast( case decimal: DecimalType => castToDecimalCode(from, decimal, ctx) case TimestampType => castToTimestampCode(from, ctx) case TimestampNTZType => castToTimestampNTZCode(from, ctx) + case t: TimestampNTZNanosType => castToTimestampNTZNanosCode(from, t.precision, ctx) + case t: TimestampLTZNanosType => castToTimestampLTZNanosCode(from, t.precision, ctx) case CalendarIntervalType => castToIntervalCode(from) case it: DayTimeIntervalType => castToDayTimeIntervalCode(from, it) case it: YearMonthIntervalType => castToYearMonthIntervalCode(from, it) @@ -1772,6 +1809,62 @@ case class Cast( code"$evPrim = $dateTimeUtilsCls.convertTz($c, java.time.ZoneOffset.UTC, $zid);" } + private[this] def castToTimestampLTZNanosCode( + from: DataType, + precision: Int, + ctx: CodegenContext): CastFunction = from match { + case _: StringType => + val zoneIdClass = classOf[ZoneId] + val zid = JavaCode.global( + ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), + zoneIdClass) + val tsOpt = ctx.freshVariable("tsOpt", classOf[Option[TimestampNanosVal]]) + (c, evPrim, evNull) => + if (ansiEnabled) { + val errorContext = getContextOrNullCode(ctx) + code""" + $evPrim = $dateTimeUtilsCls.stringToTimestampLTZNanosAnsi( + $c, $precision, $zid, $errorContext); + """ + } else { + code""" + scala.Option $tsOpt = + $dateTimeUtilsCls.stringToTimestampLTZNanos($c, $precision, $zid); + if ($tsOpt.isDefined()) { + $evPrim = (TimestampNanosVal) $tsOpt.get(); + } else { + $evNull = true; + } + """ + } + } + + private[this] def castToTimestampNTZNanosCode( + from: DataType, + precision: Int, + ctx: CodegenContext): CastFunction = from match { + case _: StringType => + val tsOpt = ctx.freshVariable("tsOpt", classOf[Option[TimestampNanosVal]]) + (c, evPrim, evNull) => + if (ansiEnabled) { + val errorContext = getContextOrNullCode(ctx) + code""" + $evPrim = $dateTimeUtilsCls.stringToTimestampNTZNanosAnsi( + $c, $precision, $errorContext); + """ + } else { + code""" + scala.Option $tsOpt = + $dateTimeUtilsCls.stringToTimestampNTZNanos($c, $precision, true); + if ($tsOpt.isDefined()) { + $evPrim = (TimestampNanosVal) $tsOpt.get(); + } else { + $evNull = true; + } + """ + } + } + private[this] def castToIntervalCode(from: DataType): CastFunction = from match { case _: StringType => val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index e888432ef91eb..b33045ad90a83 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -22,7 +22,7 @@ import java.time.{Duration, LocalDate, LocalDateTime, LocalTime, Period} import java.time.temporal.ChronoUnit import java.util.{Calendar, Locale, TimeZone} -import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException} +import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException} import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch @@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.IntervalUtils import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration +import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes} @@ -1023,6 +1024,52 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { LocalDateTime.of(2021, 6, 17, 0, 0)) } + test("SPARK-57211: cast string to timestamp_ltz with nanosecond precision") { + foreachNanosPrecision { precision => + val truncate = nanoOfSecTruncator(precision) + outstandingZoneIds.foreach { zid => + specialNanosTs.foreach { s => + val ldt = parseSpecialNanosNTZ(s).withNano(truncate(parseSpecialNanosNTZ(s).getNano)) + val expected = instantToNanosVal(ldt.atZone(zid).toInstant) + checkEvaluation( + cast(Literal(s), TimestampLTZNanosType(precision), Option(zid.getId)), + expected) + } + } + } + } + + test("SPARK-57211: cast string to timestamp_ntz with nanosecond precision") { + foreachNanosPrecision { precision => + val truncate = nanoOfSecTruncator(precision) + specialNanosTs.foreach { s => + val ldt = parseSpecialNanosNTZ(s).withNano(truncate(parseSpecialNanosNTZ(s).getNano)) + val expected = localDateTimeToNanosVal(ldt) + // NTZ result is independent of the session time zone. + checkEvaluation(cast(Literal(s), TimestampNTZNanosType(precision)), expected) + // A zone suffix is discarded (allowTimeZone = true), mirroring micro TIMESTAMP_NTZ. + checkEvaluation(cast(Literal(s + "Z"), TimestampNTZNanosType(precision)), expected) + } + } + } + + test("SPARK-57211: nanosecond timestamp cast requires the preview flag") { + withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false") { + val expectedParams = Map( + "featureName" -> "Nanosecond-precision timestamp types", + "configKey" -> "spark.sql.timestampNanosTypes.enabled", + "configValue" -> "true") + Seq(TimestampNTZNanosType(9), TimestampLTZNanosType(9)).foreach { to => + checkError( + exception = intercept[SparkException] { + cast(Literal("2020-01-01 00:00:00"), to, UTC_OPT).checkInputDataTypes() + }, + condition = "FEATURE_NOT_ENABLED", + parameters = expectedParams) + } + } + } + test("SPARK-35112: Cast string to day-time interval") { checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType()), 0L) checkEvaluation(cast(Literal.create(" interval '0 0:0:0' Day TO second "), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala index ec347a14a9a41..f0f6cd38d725f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala @@ -56,6 +56,18 @@ class CastWithAnsiOffSuite extends CastSuiteBase { checkEvaluation(cast(123L, DecimalType(2, 0)), null) } + test("SPARK-57211: legacy mode cast malformed string to nanosecond timestamp returns null") { + Seq("123", "2015-03-18 123142", "2015-03-18X", "abdef").foreach { str => + org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils.foreachNanosPrecision { + precision => + checkEvaluation( + cast(Literal(str), TimestampLTZNanosType(precision), UTC_OPT), null) + checkEvaluation( + cast(Literal(str), TimestampNTZNanosType(precision)), null) + } + } + } + test("cast from int #2") { checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong) checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala index b76aec6d6ce0e..ce7850d8c9c11 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC} +import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils.foreachNanosPrecision import org.apache.spark.sql.errors.QueryErrorsBase import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -800,6 +801,23 @@ class CastWithAnsiOnSuite extends CastSuiteBase with QueryErrorsBase { } } + test("SPARK-57211: ANSI mode cast string to nanosecond timestamp with parse error") { + val invalidInputs = Seq( + "123", "2015-03-18 123142", "2015-03-18X", "2015/03/18", "abdef", "2015-031-8") + DateTimeTestUtils.outstandingZoneIds.foreach { zid => + foreachNanosPrecision { precision => + invalidInputs.foreach { str => + checkExceptionInExpression[DateTimeException]( + cast(Literal(str), TimestampLTZNanosType(precision), Option(zid.getId)), + castErrMsg(str, TimestampLTZNanosType(precision))) + checkExceptionInExpression[DateTimeException]( + cast(Literal(str), TimestampNTZNanosType(precision)), + castErrMsg(str, TimestampNTZNanosType(precision))) + } + } + } + } + test("ANSI mode: cast string to date with parse error") { DateTimeTestUtils.outstandingZoneIds.foreach { zid => def checkCastWithParseError(str: String): Unit = { From 2dd88e42489feb9463b726722ebb98a2a69fc583 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 2 Jun 2026 22:11:22 +0200 Subject: [PATCH 2/4] [SPARK-57211][SQL] Add cast.sql golden checks for string to nanos timestamps Add end-to-end golden-file coverage to cast.sql for casting strings to TIMESTAMP_NTZ(p)/TIMESTAMP_LTZ(p), mirroring the existing timestamp, timestamp_ntz and TIME cast checks: - Positive cases assert the result type via typeof (the reverse direction, nanos -> string rendering, is not wired yet; tracked under SPARK-57162). - Negative cases exercise the parse-error path: ANSI mode throws CAST_INVALID_INPUT, non-ANSI returns NULL. Golden files regenerated with SPARK_GENERATE_GOLDEN_FILES=1. --- .../sql-tests/analyzer-results/cast.sql.out | 28 ++++++++ .../analyzer-results/nonansi/cast.sql.out | 28 ++++++++ .../test/resources/sql-tests/inputs/cast.sql | 8 +++ .../resources/sql-tests/results/cast.sql.out | 66 +++++++++++++++++++ .../sql-tests/results/nonansi/cast.sql.out | 32 +++++++++ 5 files changed, 162 insertions(+) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out index 053d7af3df45f..0c028b3a0d3d4 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out @@ -641,6 +641,34 @@ Project [cast(a as timestamp_ntz) AS CAST(a AS TIMESTAMP_NTZ)#x] +- OneRowRelation +-- !query +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9))) +-- !query analysis +Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS typeof(CAST(2022-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7))) +-- !query analysis +Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ltz(7))) AS typeof(CAST(2022-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7)))#x] ++- OneRowRelation + + +-- !query +select cast('a' as timestamp_ntz(9)) +-- !query analysis +Project [cast(a as timestamp_ntz(9)) AS CAST(a AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +select cast('a' as timestamp_ltz(9)) +-- !query analysis +Project [cast(a as timestamp_ltz(9)) AS CAST(a AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + -- !query select cast(cast('inf' as double) as timestamp) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out index 0113716bdf712..1dfe31bc190c4 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out @@ -505,6 +505,34 @@ Project [cast(a as timestamp_ntz) AS CAST(a AS TIMESTAMP_NTZ)#x] +- OneRowRelation +-- !query +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9))) +-- !query analysis +Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS typeof(CAST(2022-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x] ++- OneRowRelation + + +-- !query +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7))) +-- !query analysis +Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ltz(7))) AS typeof(CAST(2022-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7)))#x] ++- OneRowRelation + + +-- !query +select cast('a' as timestamp_ntz(9)) +-- !query analysis +Project [cast(a as timestamp_ntz(9)) AS CAST(a AS TIMESTAMP_NTZ(9))#x] ++- OneRowRelation + + +-- !query +select cast('a' as timestamp_ltz(9)) +-- !query analysis +Project [cast(a as timestamp_ltz(9)) AS CAST(a AS TIMESTAMP_LTZ(9))#x] ++- OneRowRelation + + -- !query select cast(cast('inf' as double) as timestamp) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index 9d191dff67028..ad6d1a09b4078 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -102,6 +102,14 @@ select cast('a' as timestamp); select cast('2022-01-01 00:00:00' as timestamp_ntz); select cast('a' as timestamp_ntz); +-- SPARK-57211: cast string to nanosecond-precision timestamps TIMESTAMP_NTZ(p)/TIMESTAMP_LTZ(p). +-- The reverse direction (nanos -> string) is not wired yet, so positive cases assert the result +-- type via typeof; negative cases exercise the ANSI parse-error path. +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9))); +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7))); +select cast('a' as timestamp_ntz(9)); +select cast('a' as timestamp_ltz(9)); + select cast(cast('inf' as double) as timestamp); select cast(cast('inf' as float) as timestamp); diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index ca2f739113f13..c592f213d3fac 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -1288,6 +1288,72 @@ org.apache.spark.SparkDateTimeException } +-- !query +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9))) +-- !query schema +struct +-- !query output +timestamp_ntz(9) + + +-- !query +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7))) +-- !query schema +struct +-- !query output +timestamp_ltz(7) + + +-- !query +select cast('a' as timestamp_ntz(9)) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkDateTimeException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'a'", + "sourceType" : "\"STRING\"", + "targetType" : "\"TIMESTAMP_NTZ(9)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 36, + "fragment" : "cast('a' as timestamp_ntz(9))" + } ] +} + + +-- !query +select cast('a' as timestamp_ltz(9)) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkDateTimeException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'a'", + "sourceType" : "\"STRING\"", + "targetType" : "\"TIMESTAMP_LTZ(9)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 36, + "fragment" : "cast('a' as timestamp_ltz(9))" + } ] +} + + -- !query select cast(cast('inf' as double) as timestamp) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out index 64d7b35970551..4d5237e05b6eb 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out @@ -584,6 +584,38 @@ struct NULL +-- !query +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9))) +-- !query schema +struct +-- !query output +timestamp_ntz(9) + + +-- !query +select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7))) +-- !query schema +struct +-- !query output +timestamp_ltz(7) + + +-- !query +select cast('a' as timestamp_ntz(9)) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('a' as timestamp_ltz(9)) +-- !query schema +struct +-- !query output +NULL + + -- !query select cast(cast('inf' as double) as timestamp) -- !query schema From 4043bb3e2664098ee59d49978517134e8e182a99 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 08:05:52 +0200 Subject: [PATCH 3/4] [SPARK-57211][SQL] Keep cast.sql nanos result columns serializable for thrift ThriftServerQueryTestSuite failed on nonansi/cast.sql because a bare TIMESTAMP_NTZ(9)/TIMESTAMP_LTZ(9) result column cannot be mapped to a JDBC/Hive type name yet (nanos -> string serialization is out of scope, tracked under SPARK-57162). Wrap the negative cast checks in IS NULL so the result column is boolean; the ANSI parse-error path is unchanged. --- .../sql-tests/analyzer-results/cast.sql.out | 8 ++++---- .../sql-tests/analyzer-results/nonansi/cast.sql.out | 8 ++++---- .../src/test/resources/sql-tests/inputs/cast.sql | 7 ++++--- .../test/resources/sql-tests/results/cast.sql.out | 4 ++-- .../resources/sql-tests/results/nonansi/cast.sql.out | 12 ++++++------ 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out index 0c028b3a0d3d4..b077443a9f28c 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out @@ -656,16 +656,16 @@ Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ltz(7))) AS type -- !query -select cast('a' as timestamp_ntz(9)) +select cast('a' as timestamp_ntz(9)) is null -- !query analysis -Project [cast(a as timestamp_ntz(9)) AS CAST(a AS TIMESTAMP_NTZ(9))#x] +Project [isnull(cast(a as timestamp_ntz(9))) AS (CAST(a AS TIMESTAMP_NTZ(9)) IS NULL)#x] +- OneRowRelation -- !query -select cast('a' as timestamp_ltz(9)) +select cast('a' as timestamp_ltz(9)) is null -- !query analysis -Project [cast(a as timestamp_ltz(9)) AS CAST(a AS TIMESTAMP_LTZ(9))#x] +Project [isnull(cast(a as timestamp_ltz(9))) AS (CAST(a AS TIMESTAMP_LTZ(9)) IS NULL)#x] +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out index 1dfe31bc190c4..1255f2266629d 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out @@ -520,16 +520,16 @@ Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ltz(7))) AS type -- !query -select cast('a' as timestamp_ntz(9)) +select cast('a' as timestamp_ntz(9)) is null -- !query analysis -Project [cast(a as timestamp_ntz(9)) AS CAST(a AS TIMESTAMP_NTZ(9))#x] +Project [isnull(cast(a as timestamp_ntz(9))) AS (CAST(a AS TIMESTAMP_NTZ(9)) IS NULL)#x] +- OneRowRelation -- !query -select cast('a' as timestamp_ltz(9)) +select cast('a' as timestamp_ltz(9)) is null -- !query analysis -Project [cast(a as timestamp_ltz(9)) AS CAST(a AS TIMESTAMP_LTZ(9))#x] +Project [isnull(cast(a as timestamp_ltz(9))) AS (CAST(a AS TIMESTAMP_LTZ(9)) IS NULL)#x] +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index ad6d1a09b4078..5065e7c335e75 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -104,11 +104,12 @@ select cast('a' as timestamp_ntz); -- SPARK-57211: cast string to nanosecond-precision timestamps TIMESTAMP_NTZ(p)/TIMESTAMP_LTZ(p). -- The reverse direction (nanos -> string) is not wired yet, so positive cases assert the result --- type via typeof; negative cases exercise the ANSI parse-error path. +-- type via typeof. Negative cases exercise the ANSI parse-error path and use IS NULL so the result +-- column stays non-nanos (a bare nanos result column is not yet serializable by JDBC/thrift). select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9))); select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7))); -select cast('a' as timestamp_ntz(9)); -select cast('a' as timestamp_ltz(9)); +select cast('a' as timestamp_ntz(9)) is null; +select cast('a' as timestamp_ltz(9)) is null; select cast(cast('inf' as double) as timestamp); select cast(cast('inf' as float) as timestamp); diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index c592f213d3fac..10b6f4526889b 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -1305,7 +1305,7 @@ timestamp_ltz(7) -- !query -select cast('a' as timestamp_ntz(9)) +select cast('a' as timestamp_ntz(9)) is null -- !query schema struct<> -- !query output @@ -1330,7 +1330,7 @@ org.apache.spark.SparkDateTimeException -- !query -select cast('a' as timestamp_ltz(9)) +select cast('a' as timestamp_ltz(9)) is null -- !query schema struct<> -- !query output diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out index 4d5237e05b6eb..2b73fe4e63da5 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out @@ -601,19 +601,19 @@ timestamp_ltz(7) -- !query -select cast('a' as timestamp_ntz(9)) +select cast('a' as timestamp_ntz(9)) is null -- !query schema -struct +struct<(CAST(a AS TIMESTAMP_NTZ(9)) IS NULL):boolean> -- !query output -NULL +true -- !query -select cast('a' as timestamp_ltz(9)) +select cast('a' as timestamp_ltz(9)) is null -- !query schema -struct +struct<(CAST(a AS TIMESTAMP_LTZ(9)) IS NULL):boolean> -- !query output -NULL +true -- !query From 63b58906b94f0daee1f107fbb6985ce89f0920a5 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 3 Jun 2026 10:30:46 +0200 Subject: [PATCH 4/4] [SPARK-57211][SQL] Address review nits: explicit allowTimeZone and test import - Cast.scala: make the interpreted NTZ string parse pass allowTimeZone = true explicitly so it matches the codegen path (which must pass it since Scala default args are not visible from generated Java). - CastWithAnsiOffSuite: import foreachNanosPrecision instead of using the fully-qualified name inline, consistent with the other Cast suites. --- .../apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- .../catalyst/expressions/CastWithAnsiOffSuite.scala | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index d59b5a44d436d..a1935c7396435 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -815,7 +815,7 @@ case class Cast( if (ansiEnabled) { DateTimeUtils.stringToTimestampNTZNanosAnsi(utfs, precision, getContextOrNull()) } else { - DateTimeUtils.stringToTimestampNTZNanos(utfs, precision).orNull + DateTimeUtils.stringToTimestampNTZNanos(utfs, precision, allowTimeZone = true).orNull }) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala index f0f6cd38d725f..9f9a6f275a3fd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet} import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils.foreachNanosPrecision import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DayTimeIntervalType.{DAY, HOUR, MINUTE, SECOND} @@ -58,12 +59,11 @@ class CastWithAnsiOffSuite extends CastSuiteBase { test("SPARK-57211: legacy mode cast malformed string to nanosecond timestamp returns null") { Seq("123", "2015-03-18 123142", "2015-03-18X", "abdef").foreach { str => - org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils.foreachNanosPrecision { - precision => - checkEvaluation( - cast(Literal(str), TimestampLTZNanosType(precision), UTC_OPT), null) - checkEvaluation( - cast(Literal(str), TimestampNTZNanosType(precision)), null) + foreachNanosPrecision { precision => + checkEvaluation( + cast(Literal(str), TimestampLTZNanosType(precision), UTC_OPT), null) + checkEvaluation( + cast(Literal(str), TimestampNTZNanosType(precision)), null) } } }