From 871ecc54009141d37600311a705bb392ae05c7a0 Mon Sep 17 00:00:00 2001 From: Kevin-Li-2025 <2242139@qq.com> Date: Fri, 19 Jun 2026 13:14:09 +0100 Subject: [PATCH 1/2] Add any_value aggregate function --- .../functions-aggregate/src/any_value.rs | 125 ++++++++++++++++++ datafusion/functions-aggregate/src/lib.rs | 3 + .../test_files/aggregate_any_value.slt | 49 +++++++ 3 files changed, 177 insertions(+) create mode 100644 datafusion/functions-aggregate/src/any_value.rs create mode 100644 datafusion/sqllogictest/test_files/aggregate_any_value.slt diff --git a/datafusion/functions-aggregate/src/any_value.rs b/datafusion/functions-aggregate/src/any_value.rs new file mode 100644 index 0000000000000..dc3bd23d806fc --- /dev/null +++ b/datafusion/functions-aggregate/src/any_value.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines the ANY_VALUE aggregation. + +use std::fmt::Debug; +use std::hash::Hash; +use std::sync::Arc; + +use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::{Result, not_impl_err}; +use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; +use datafusion_expr::utils::{AggregateOrderSensitivity, format_state_name}; +use datafusion_expr::{ + Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility, +}; +use datafusion_macros::user_doc; + +use crate::first_last::TrivialFirstValueAccumulator; + +make_udaf_expr_and_func!( + AnyValue, + any_value, + expression, + "Returns an arbitrary non-null value", + any_value_udaf +); + +#[user_doc( + doc_section(label = "General Functions"), + description = "Returns an arbitrary non-null value from a group, or NULL if the group contains only NULL values.", + syntax_example = "any_value(expression)", + sql_example = r#"```sql +> SELECT any_value(column_name) FROM table_name; ++------------------------+ +| any_value(column_name) | ++------------------------+ +| arbitrary_value | ++------------------------+ +```"#, + standard_argument(name = "expression",) +)] +#[derive(PartialEq, Eq, Hash, Debug)] +pub struct AnyValue { + signature: Signature, +} + +impl Default for AnyValue { + fn default() -> Self { + Self::new() + } +} + +impl AnyValue { + pub fn new() -> Self { + Self { + signature: Signature::any(1, Volatility::Immutable), + } + } +} + +impl AggregateUDFImpl for AnyValue { + fn name(&self) -> &str { + "any_value" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + not_impl_err!("Not called because return_field is implemented") + } + + fn return_field(&self, arg_fields: &[FieldRef]) -> Result { + Ok(Arc::new( + Field::new(self.name(), arg_fields[0].data_type().clone(), true) + .with_metadata(arg_fields[0].metadata().clone()), + )) + } + + fn accumulator(&self, acc_args: AccumulatorArgs) -> Result> { + TrivialFirstValueAccumulator::try_new(acc_args.return_field.data_type(), true) + .map(|acc| Box::new(acc) as _) + } + + fn state_fields(&self, args: StateFieldsArgs) -> Result> { + Ok(vec![ + Field::new( + format_state_name(args.name, "any_value"), + args.return_type().clone(), + true, + ) + .into(), + Field::new( + format_state_name(args.name, "any_value_is_set"), + DataType::Boolean, + true, + ) + .into(), + ]) + } + + fn order_sensitivity(&self) -> AggregateOrderSensitivity { + AggregateOrderSensitivity::Insensitive + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs index 1b9996220d882..e3f2714abbf25 100644 --- a/datafusion/functions-aggregate/src/lib.rs +++ b/datafusion/functions-aggregate/src/lib.rs @@ -65,6 +65,7 @@ #[macro_use] pub mod macros; +pub mod any_value; pub mod approx_distinct; pub mod approx_median; pub mod approx_percentile_cont; @@ -102,6 +103,7 @@ use std::sync::Arc; /// Fluent-style API for creating `Expr`s pub mod expr_fn { + pub use super::any_value::any_value; pub use super::approx_distinct::approx_distinct; pub use super::approx_median::approx_median; pub use super::approx_percentile_cont::approx_percentile_cont; @@ -147,6 +149,7 @@ pub mod expr_fn { /// Returns all default aggregate functions pub fn all_default_aggregate_functions() -> Vec> { vec![ + any_value::any_value_udaf(), array_agg::array_agg_udaf(), first_last::first_value_udaf(), first_last::last_value_udaf(), diff --git a/datafusion/sqllogictest/test_files/aggregate_any_value.slt b/datafusion/sqllogictest/test_files/aggregate_any_value.slt new file mode 100644 index 0000000000000..5fe18d19fa740 --- /dev/null +++ b/datafusion/sqllogictest/test_files/aggregate_any_value.slt @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +statement ok +CREATE TABLE any_value_test AS VALUES + (1, NULL, NULL), + (1, 10, 'first'), + (1, 20, 'second'), + (2, NULL, NULL), + (2, NULL, NULL), + (3, 30, 'third'); + +query I +SELECT any_value(column2) FROM any_value_test; +---- +10 + +query IIT rowsort +SELECT column1, any_value(column2), any_value(column3) +FROM any_value_test +GROUP BY column1; +---- +1 10 first +2 NULL NULL +3 30 third + +query T +SELECT arrow_typeof(any_value(column3)) FROM any_value_test; +---- +Utf8 + +query I +SELECT any_value(column2) FROM any_value_test WHERE false; +---- +NULL From a588a2ba54b3077dc54090317bb5a672823ff5c5 Mon Sep 17 00:00:00 2001 From: Kevin-Li-2025 <2242139@qq.com> Date: Sat, 20 Jun 2026 00:36:15 +0100 Subject: [PATCH 2/2] Make any_value SQL tests deterministic --- .../test_files/aggregate_any_value.slt | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/datafusion/sqllogictest/test_files/aggregate_any_value.slt b/datafusion/sqllogictest/test_files/aggregate_any_value.slt index 5fe18d19fa740..3fe6f787d346d 100644 --- a/datafusion/sqllogictest/test_files/aggregate_any_value.slt +++ b/datafusion/sqllogictest/test_files/aggregate_any_value.slt @@ -24,19 +24,22 @@ CREATE TABLE any_value_test AS VALUES (2, NULL, NULL), (3, 30, 'third'); -query I -SELECT any_value(column2) FROM any_value_test; +query B +SELECT any_value(column2) IN (10, 20) FROM any_value_test; ---- -10 +true -query IIT rowsort -SELECT column1, any_value(column2), any_value(column3) +query IBB rowsort +SELECT + column1, + any_value(column2) IN (10, 20, 30), + any_value(column3) IN ('first', 'second', 'third') FROM any_value_test GROUP BY column1; ---- -1 10 first +1 true true 2 NULL NULL -3 30 third +3 true true query T SELECT arrow_typeof(any_value(column3)) FROM any_value_test; @@ -47,3 +50,8 @@ query I SELECT any_value(column2) FROM any_value_test WHERE false; ---- NULL + +query I +SELECT any_value(column2) FROM any_value_test WHERE column1 = 2; +---- +NULL