Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,11 @@ impl<'a> BinaryTypeCoercer<'a> {
} else if let Some(numeric) = mathematics_numerical_coercion(lhs, rhs) {
// Numeric arithmetic, e.g. Int32 + Int32
Ok(Signature::uniform(numeric))
} else if let Some(numeric) = string_numeric_coercion(lhs, rhs) {
// String to numeric arithmetic, e.g. Int64 + Utf8 or Utf8 + Float64.
// The string operand is coerced to the numeric type of the other
// operand, mirroring the behavior of comparison coercion.
Ok(Signature::uniform(numeric))
} else {
plan_err!(
"Cannot coerce arithmetic expression {} {} {} to valid types", self.lhs, self.op, self.rhs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,66 @@ use datafusion_common::assert_contains;

#[test]
fn test_coercion_error() -> Result<()> {
// Boolean is neither numeric nor string, so it cannot be coerced for arithmetic.
let coercer =
BinaryTypeCoercer::new(&DataType::Float32, &Operator::Plus, &DataType::Utf8);
BinaryTypeCoercer::new(&DataType::Boolean, &Operator::Plus, &DataType::Boolean);
let result_type = coercer.get_input_types();

let e = result_type.unwrap_err();
assert_eq!(
e.strip_backtrace(),
"Error during planning: Cannot coerce arithmetic expression Float32 + Utf8 to valid types"
"Error during planning: Cannot coerce arithmetic expression Boolean + Boolean to valid types"
);
Ok(())
}

/// Tests that arithmetic operators coerce a string operand to the numeric type
/// of the other operand (string to numeric coercion).
#[test]
fn test_type_coercion_arithmetic_string_numeric() -> Result<()> {
use DataType::*;

let arithmetic_ops = [
Operator::Plus,
Operator::Minus,
Operator::Multiply,
Operator::Divide,
Operator::Modulo,
];

for op in arithmetic_ops {
// numeric `op` string -> both coerced to the numeric type
for string_type in [Utf8, LargeUtf8, Utf8View] {
let (lhs, rhs) =
BinaryTypeCoercer::new(&Int64, &op, &string_type).get_input_types()?;
assert_eq!(lhs, Int64, "Op {op}: Int64 vs {string_type} -> lhs");
assert_eq!(rhs, Int64, "Op {op}: Int64 vs {string_type} -> rhs");

// string `op` numeric -> both coerced to the numeric type
let (lhs, rhs) =
BinaryTypeCoercer::new(&string_type, &op, &Float64).get_input_types()?;
assert_eq!(lhs, Float64, "Op {op}: {string_type} vs Float64 -> lhs");
assert_eq!(rhs, Float64, "Op {op}: {string_type} vs Float64 -> rhs");
}
}

// The result type matches same-type numeric arithmetic
test_coercion_binary_rule!(Int64, Utf8, Operator::Plus, Int64);
test_coercion_binary_rule!(Utf8, Float64, Operator::Multiply, Float64);

// string `op` string remains unsupported as the target type is ambiguous
let err = BinaryTypeCoercer::new(&Utf8, &Operator::Plus, &Utf8)
.get_input_types()
.unwrap_err()
.to_string();
assert_contains!(
&err,
"Cannot coerce arithmetic expression Utf8 + Utf8 to valid types"
);

Ok(())
}

#[test]
fn test_date_timestamp_arithmetic_error() -> Result<()> {
let (lhs, rhs) = BinaryTypeCoercer::new(
Expand Down
103 changes: 103 additions & 0 deletions datafusion/sqllogictest/test_files/string_numeric_coercion.slt
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,109 @@ SELECT arrow_cast('hello', 'RunEndEncoded("run_ends": non-null Int32, "values":
----
true

# -------------------------------------------------
# Arithmetic operators use string-to-numeric coercion.
# A string operand is coerced to the numeric type of the other operand.
# See: https://github.com/apache/datafusion/issues/23041
# -------------------------------------------------

# Literal numeric + string literal
query I
SELECT 1 + '1';
----
2

query T
SELECT arrow_typeof(1 + '1');
----
Int64

# String literal on the left-hand side
query I
SELECT '1' + 1;
----
2

# All arithmetic operators against an integer column
query I rowsort
SELECT column1 + '10' FROM t_int;
----
1010
11
15
335
509

query I rowsort
SELECT column1 - '1' FROM t_int;
----
0
324
4
498
999

query I rowsort
SELECT column1 * '2' FROM t_int;
----
10
2
2000
650
998

query I rowsort
SELECT column1 / '5' FROM t_int;
----
0
1
200
65
99

query I rowsort
SELECT column1 % '2' FROM t_int;
----
0
1
1
1
1

# String operand against a float column coerces to Float64
query R rowsort
SELECT column1 + '0.5' FROM t_float;
----
1000.6
2
326.2
5.5
500.4

query T
SELECT arrow_typeof(column1 + '0.5') FROM t_float LIMIT 1;
----
Float64

# Plan shows the string literal is cast to the numeric type, not the reverse
query TT
EXPLAIN SELECT column1 + '10' FROM t_int;
----
logical_plan
01)Projection: t_int.column1 + Int64(10) AS t_int.column1 + Utf8("10")
02)--TableScan: t_int projection=[column1]
physical_plan
01)ProjectionExec: expr=[column1@0 + 10 as t_int.column1 + Utf8("10")]
02)--DataSourceExec: partitions=1, partition_sizes=[1]

# Non-numeric string operand errors at runtime when cast to the numeric type
statement error Arrow error: Cast error: Cannot cast string 'hello' to value of Int64 type
SELECT column1 + 'hello' FROM t_int;

# Two string operands remain ambiguous and are not coerced
query error Cannot coerce arithmetic expression Utf8 \+ Utf8 to valid types
SELECT '1' + '2';

# -------------------------------------------------
# Cleanup
# -------------------------------------------------
Expand Down