Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -946,9 +946,10 @@ trait SparkDateTimeUtils {
s: UTF8String,
precision: Int,
context: QueryContext = null): TimestampNanosVal = {
// TODO(SPARK-57032): when this is wired to a user-facing CAST(... AS TIMESTAMP_NTZ(p)), the
// cast must decide `allowTimeZone` explicitly (per ANSI/legacy mode) instead of relying on
// the `true` default used here, which silently discards a zone suffix.
// CAST(... AS TIMESTAMP_NTZ(p)) intentionally uses `allowTimeZone = true` here, mirroring the
// micro `TIMESTAMP_NTZ` string cast (`stringToTimestampWithoutTimeZoneAnsi`): a zone suffix in
// the input is silently discarded rather than rejected. Callers that need strict NTZ rejection
// should call `stringToTimestampNTZNanos` directly with `allowTimeZone = false`.
stringToTimestampNTZNanos(s, precision).getOrElse {
throw ExecutionErrors.invalidInputInCastToDatetimeError(
s,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte,
import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{BinaryView, UTF8String, VariantVal}
import org.apache.spark.unsafe.types.{BinaryView, TimestampNanosVal, UTF8String, VariantVal}
import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper}
import org.apache.spark.util.ArrayImplicits._

Expand Down Expand Up @@ -113,6 +113,9 @@ object Cast extends QueryErrorsBase {
case (DateType, TimestampNTZType) => true
case (TimestampType, TimestampNTZType) => true

case (_: StringType, _: TimestampNTZNanosType) => true
case (_: StringType, _: TimestampLTZNanosType) => true

case (_: StringType, _: CalendarIntervalType) => true
case (_: StringType, _: AnsiIntervalType) => true

Expand Down Expand Up @@ -248,6 +251,9 @@ object Cast extends QueryErrorsBase {
case (DateType, TimestampNTZType) => true
case (TimestampType, TimestampNTZType) => true

case (_: StringType, _: TimestampNTZNanosType) => true
case (_: StringType, _: TimestampLTZNanosType) => true

case (_: StringType, DateType) => true
case (_: StringType, _: TimeType) => true
case (TimestampType, DateType) => true
Expand Down Expand Up @@ -335,6 +341,9 @@ object Cast extends QueryErrorsBase {
case (TimestampType, DateType) => true
case (TimestampType, TimestampNTZType) => true
case (TimestampNTZType, TimestampType) => true
// NTZ string is zone-independent (mirroring micro TIMESTAMP_NTZ, which is not listed); only
// the LTZ string parse depends on the session time zone.
case (_: StringType, _: TimestampLTZNanosType) => true
case (ArrayType(fromType, _), ArrayType(toType, _)) => needsTimeZone(fromType, toType)
case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) =>
needsTimeZone(fromKey, toKey) || needsTimeZone(fromValue, toValue)
Expand Down Expand Up @@ -786,6 +795,30 @@ case class Cast(
buildCast[Long](_, ts => convertTz(ts, ZoneOffset.UTC, zoneId))
}

private[this] def castToTimestampLTZNanos(
from: DataType,
precision: Int): Any => Any = from match {
case _: StringType =>
buildCast[UTF8String](_, utfs =>
if (ansiEnabled) {
DateTimeUtils.stringToTimestampLTZNanosAnsi(utfs, precision, zoneId, getContextOrNull())
} else {
DateTimeUtils.stringToTimestampLTZNanos(utfs, precision, zoneId).orNull
})
}

private[this] def castToTimestampNTZNanos(
from: DataType,
precision: Int): Any => Any = from match {
case _: StringType =>
buildCast[UTF8String](_, utfs =>
if (ansiEnabled) {
DateTimeUtils.stringToTimestampNTZNanosAnsi(utfs, precision, getContextOrNull())
} else {
DateTimeUtils.stringToTimestampNTZNanos(utfs, precision, allowTimeZone = true).orNull
})
}

private[this] def decimalToTimestamp(d: Decimal): Long = {
(d.toBigDecimal * MICROS_PER_SECOND).longValue
}
Expand Down Expand Up @@ -1299,6 +1332,8 @@ case class Cast(
case decimal: DecimalType => castToDecimal(from, decimal)
case TimestampType => castToTimestamp(from)
case TimestampNTZType => castToTimestampNTZ(from)
case t: TimestampNTZNanosType => castToTimestampNTZNanos(from, t.precision)
case t: TimestampLTZNanosType => castToTimestampLTZNanos(from, t.precision)
case CalendarIntervalType => castToInterval(from)
case it: DayTimeIntervalType => castToDayTimeInterval(from, it)
case it: YearMonthIntervalType => castToYearMonthInterval(from, it)
Expand Down Expand Up @@ -1409,6 +1444,8 @@ case class Cast(
case decimal: DecimalType => castToDecimalCode(from, decimal, ctx)
case TimestampType => castToTimestampCode(from, ctx)
case TimestampNTZType => castToTimestampNTZCode(from, ctx)
case t: TimestampNTZNanosType => castToTimestampNTZNanosCode(from, t.precision, ctx)
case t: TimestampLTZNanosType => castToTimestampLTZNanosCode(from, t.precision, ctx)
case CalendarIntervalType => castToIntervalCode(from)
case it: DayTimeIntervalType => castToDayTimeIntervalCode(from, it)
case it: YearMonthIntervalType => castToYearMonthIntervalCode(from, it)
Expand Down Expand Up @@ -1772,6 +1809,62 @@ case class Cast(
code"$evPrim = $dateTimeUtilsCls.convertTz($c, java.time.ZoneOffset.UTC, $zid);"
}

private[this] def castToTimestampLTZNanosCode(
from: DataType,
precision: Int,
ctx: CodegenContext): CastFunction = from match {
case _: StringType =>
val zoneIdClass = classOf[ZoneId]
val zid = JavaCode.global(
ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
zoneIdClass)
val tsOpt = ctx.freshVariable("tsOpt", classOf[Option[TimestampNanosVal]])
(c, evPrim, evNull) =>
if (ansiEnabled) {
val errorContext = getContextOrNullCode(ctx)
code"""
$evPrim = $dateTimeUtilsCls.stringToTimestampLTZNanosAnsi(
$c, $precision, $zid, $errorContext);
"""
} else {
code"""
scala.Option<TimestampNanosVal> $tsOpt =
$dateTimeUtilsCls.stringToTimestampLTZNanos($c, $precision, $zid);
if ($tsOpt.isDefined()) {
$evPrim = (TimestampNanosVal) $tsOpt.get();
} else {
$evNull = true;
}
"""
}
}

private[this] def castToTimestampNTZNanosCode(
from: DataType,
precision: Int,
ctx: CodegenContext): CastFunction = from match {
case _: StringType =>
val tsOpt = ctx.freshVariable("tsOpt", classOf[Option[TimestampNanosVal]])
(c, evPrim, evNull) =>
if (ansiEnabled) {
val errorContext = getContextOrNullCode(ctx)
code"""
$evPrim = $dateTimeUtilsCls.stringToTimestampNTZNanosAnsi(
$c, $precision, $errorContext);
"""
} else {
code"""
scala.Option<TimestampNanosVal> $tsOpt =
$dateTimeUtilsCls.stringToTimestampNTZNanos($c, $precision, true);
Comment thread
uros-b marked this conversation as resolved.
if ($tsOpt.isDefined()) {
$evPrim = (TimestampNanosVal) $tsOpt.get();
} else {
$evNull = true;
}
"""
}
}

private[this] def castToIntervalCode(from: DataType): CastFunction = from match {
case _: StringType =>
val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import java.time.{Duration, LocalDate, LocalDateTime, LocalTime, Period}
import java.time.temporal.ChronoUnit
import java.util.{Calendar, Locale, TimeZone}

import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
Expand All @@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.catalyst.util.IntervalUtils
import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration
import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes}
Expand Down Expand Up @@ -1023,6 +1024,52 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
LocalDateTime.of(2021, 6, 17, 0, 0))
}

test("SPARK-57211: cast string to timestamp_ltz with nanosecond precision") {
foreachNanosPrecision { precision =>
val truncate = nanoOfSecTruncator(precision)
outstandingZoneIds.foreach { zid =>
specialNanosTs.foreach { s =>
val ldt = parseSpecialNanosNTZ(s).withNano(truncate(parseSpecialNanosNTZ(s).getNano))
val expected = instantToNanosVal(ldt.atZone(zid).toInstant)
checkEvaluation(
cast(Literal(s), TimestampLTZNanosType(precision), Option(zid.getId)),
expected)
}
}
}
}

test("SPARK-57211: cast string to timestamp_ntz with nanosecond precision") {
foreachNanosPrecision { precision =>
val truncate = nanoOfSecTruncator(precision)
specialNanosTs.foreach { s =>
val ldt = parseSpecialNanosNTZ(s).withNano(truncate(parseSpecialNanosNTZ(s).getNano))
val expected = localDateTimeToNanosVal(ldt)
// NTZ result is independent of the session time zone.
checkEvaluation(cast(Literal(s), TimestampNTZNanosType(precision)), expected)
// A zone suffix is discarded (allowTimeZone = true), mirroring micro TIMESTAMP_NTZ.
checkEvaluation(cast(Literal(s + "Z"), TimestampNTZNanosType(precision)), expected)
}
}
}

test("SPARK-57211: nanosecond timestamp cast requires the preview flag") {
withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false") {
val expectedParams = Map(
"featureName" -> "Nanosecond-precision timestamp types",
"configKey" -> "spark.sql.timestampNanosTypes.enabled",
"configValue" -> "true")
Seq(TimestampNTZNanosType(9), TimestampLTZNanosType(9)).foreach { to =>
checkError(
exception = intercept[SparkException] {
cast(Literal("2020-01-01 00:00:00"), to, UTC_OPT).checkInputDataTypes()
},
condition = "FEATURE_NOT_ENABLED",
parameters = expectedParams)
}
}
}

test("SPARK-35112: Cast string to day-time interval") {
checkEvaluation(cast(Literal.create("0 0:0:0"), DayTimeIntervalType()), 0L)
checkEvaluation(cast(Literal.create(" interval '0 0:0:0' Day TO second "),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite
import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet}
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils.foreachNanosPrecision
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.DayTimeIntervalType.{DAY, HOUR, MINUTE, SECOND}
Expand Down Expand Up @@ -56,6 +57,17 @@ class CastWithAnsiOffSuite extends CastSuiteBase {
checkEvaluation(cast(123L, DecimalType(2, 0)), null)
}

test("SPARK-57211: legacy mode cast malformed string to nanosecond timestamp returns null") {
Seq("123", "2015-03-18 123142", "2015-03-18X", "abdef").foreach { str =>
foreachNanosPrecision { precision =>
checkEvaluation(
cast(Literal(str), TimestampLTZNanosType(precision), UTC_OPT), null)
checkEvaluation(
cast(Literal(str), TimestampNTZNanosType(precision)), null)
}
}
}

test("cast from int #2") {
checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC}
import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils.foreachNanosPrecision
import org.apache.spark.sql.errors.QueryErrorsBase
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
Expand Down Expand Up @@ -800,6 +801,23 @@ class CastWithAnsiOnSuite extends CastSuiteBase with QueryErrorsBase {
}
}

test("SPARK-57211: ANSI mode cast string to nanosecond timestamp with parse error") {
val invalidInputs = Seq(
"123", "2015-03-18 123142", "2015-03-18X", "2015/03/18", "abdef", "2015-031-8")
DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
foreachNanosPrecision { precision =>
invalidInputs.foreach { str =>
checkExceptionInExpression[DateTimeException](
cast(Literal(str), TimestampLTZNanosType(precision), Option(zid.getId)),
castErrMsg(str, TimestampLTZNanosType(precision)))
checkExceptionInExpression[DateTimeException](
cast(Literal(str), TimestampNTZNanosType(precision)),
castErrMsg(str, TimestampNTZNanosType(precision)))
}
}
}
}

test("ANSI mode: cast string to date with parse error") {
DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
def checkCastWithParseError(str: String): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,34 @@ Project [cast(a as timestamp_ntz) AS CAST(a AS TIMESTAMP_NTZ)#x]
+- OneRowRelation


-- !query
select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9)))
-- !query analysis
Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS typeof(CAST(2022-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x]
+- OneRowRelation


-- !query
select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7)))
-- !query analysis
Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ltz(7))) AS typeof(CAST(2022-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7)))#x]
+- OneRowRelation


-- !query
select cast('a' as timestamp_ntz(9)) is null
-- !query analysis
Project [isnull(cast(a as timestamp_ntz(9))) AS (CAST(a AS TIMESTAMP_NTZ(9)) IS NULL)#x]
+- OneRowRelation


-- !query
select cast('a' as timestamp_ltz(9)) is null
-- !query analysis
Project [isnull(cast(a as timestamp_ltz(9))) AS (CAST(a AS TIMESTAMP_LTZ(9)) IS NULL)#x]
+- OneRowRelation


-- !query
select cast(cast('inf' as double) as timestamp)
-- !query analysis
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,34 @@ Project [cast(a as timestamp_ntz) AS CAST(a AS TIMESTAMP_NTZ)#x]
+- OneRowRelation


-- !query
select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9)))
-- !query analysis
Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ntz(9))) AS typeof(CAST(2022-01-01 00:00:00.123456789 AS TIMESTAMP_NTZ(9)))#x]
+- OneRowRelation


-- !query
select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7)))
-- !query analysis
Project [typeof(cast(2022-01-01 00:00:00.123456789 as timestamp_ltz(7))) AS typeof(CAST(2022-01-01 00:00:00.123456789 AS TIMESTAMP_LTZ(7)))#x]
+- OneRowRelation


-- !query
select cast('a' as timestamp_ntz(9)) is null
-- !query analysis
Project [isnull(cast(a as timestamp_ntz(9))) AS (CAST(a AS TIMESTAMP_NTZ(9)) IS NULL)#x]
+- OneRowRelation


-- !query
select cast('a' as timestamp_ltz(9)) is null
-- !query analysis
Project [isnull(cast(a as timestamp_ltz(9))) AS (CAST(a AS TIMESTAMP_LTZ(9)) IS NULL)#x]
+- OneRowRelation


-- !query
select cast(cast('inf' as double) as timestamp)
-- !query analysis
Expand Down
9 changes: 9 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/cast.sql
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,15 @@ select cast('a' as timestamp);
select cast('2022-01-01 00:00:00' as timestamp_ntz);
select cast('a' as timestamp_ntz);

-- SPARK-57211: cast string to nanosecond-precision timestamps TIMESTAMP_NTZ(p)/TIMESTAMP_LTZ(p).
-- The reverse direction (nanos -> string) is not wired yet, so positive cases assert the result
-- type via typeof. Negative cases exercise the ANSI parse-error path and use IS NULL so the result
-- column stays non-nanos (a bare nanos result column is not yet serializable by JDBC/thrift).
select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ntz(9)));
select typeof(cast('2022-01-01 00:00:00.123456789' as timestamp_ltz(7)));
select cast('a' as timestamp_ntz(9)) is null;
select cast('a' as timestamp_ltz(9)) is null;

select cast(cast('inf' as double) as timestamp);
select cast(cast('inf' as float) as timestamp);

Expand Down
Loading