From 3cb7845ce6450b088df39e4fc2dad7734ac807d2 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 20 May 2026 16:50:53 -0700 Subject: [PATCH 01/15] feat(api): add PATTERN_* settings defaults to UnifiedQueryContext PPL `patterns` command's AstBuilder reads cluster settings for method/mode/ max_sample_count/buffer_limit/show_numbered_token defaults when the query omits them. Without these in the analytics-engine path's settings map, the parser reads null, falls into `PatternMethod.valueOf("NULL")`, and every `patterns` query without an explicit `method=` or `mode=` argument fails at parse time with `No enum constant PatternMethod.NULL`. Mirrors the OpenSearchSettings defaults (SIMPLE_PATTERN / LABEL / 10 / 100000 / false). Part of the analytics-engine route support for the `patterns` command. Signed-off-by: Kai Huang --- .../sql/api/UnifiedQueryContext.java | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 38c6561b40..59d2a8efea 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -6,6 +6,11 @@ package org.opensearch.sql.api; import static org.opensearch.sql.common.setting.Settings.Key.CALCITE_ENGINE_ENABLED; +import static org.opensearch.sql.common.setting.Settings.Key.PATTERN_BUFFER_LIMIT; +import static org.opensearch.sql.common.setting.Settings.Key.PATTERN_MAX_SAMPLE_COUNT; +import static org.opensearch.sql.common.setting.Settings.Key.PATTERN_METHOD; +import static org.opensearch.sql.common.setting.Settings.Key.PATTERN_MODE; +import static org.opensearch.sql.common.setting.Settings.Key.PATTERN_SHOW_NUMBERED_TOKEN; import static org.opensearch.sql.common.setting.Settings.Key.PPL_JOIN_SUBSEARCH_MAXOUT; import static org.opensearch.sql.common.setting.Settings.Key.PPL_REX_MAX_MATCH_LIMIT; import static org.opensearch.sql.common.setting.Settings.Key.PPL_SUBSEARCH_MAXOUT; @@ -145,12 +150,24 @@ public static class Builder { */ private final Map settings = new HashMap( - Map.of( - QUERY_SIZE_LIMIT, SysLimit.DEFAULT.querySizeLimit(), - PPL_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.subsearchLimit(), - PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.joinSubsearchLimit(), - CALCITE_ENGINE_ENABLED, true, - PPL_REX_MAX_MATCH_LIMIT, 10)); + Map.ofEntries( + Map.entry(QUERY_SIZE_LIMIT, SysLimit.DEFAULT.querySizeLimit()), + Map.entry(PPL_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.subsearchLimit()), + Map.entry( + PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.joinSubsearchLimit()), + Map.entry(CALCITE_ENGINE_ENABLED, true), + Map.entry(PPL_REX_MAX_MATCH_LIMIT, 10), + // PPL `patterns` command defaults — mirror the cluster-side defaults registered in + // OpenSearchSettings (DEFAULT_PATTERN_METHOD_SETTING etc.). Without these the + // analytics-engine path's AstBuilder.visitPatternsCommand reads null from + // `settings.getSettingValue(Key.PATTERN_METHOD)`, fails with + // `PatternMethod.valueOf("NULL")` IllegalArgumentException, and every query that + // omits an explicit `method=` / `mode=` argument is rejected. + Map.entry(PATTERN_METHOD, "SIMPLE_PATTERN"), + Map.entry(PATTERN_MODE, "LABEL"), + Map.entry(PATTERN_MAX_SAMPLE_COUNT, 10), + Map.entry(PATTERN_BUFFER_LIMIT, 100000), + Map.entry(PATTERN_SHOW_NUMBERED_TOKEN, false))); /** * Sets the query language frontend to be used. From ced491be69877d18d2f9988fc76afdd5da9c81d4 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 21 May 2026 10:09:12 -0700 Subject: [PATCH 02/15] feat(core): emit 4-arg regexp_replace with 'g' flag for SIMPLE patterns `buildParseRelNode` for `ParseMethod.PATTERNS` lowered through PPL's REPLACE handler, which always emits Calcite's 3-arg `REGEXP_REPLACE_3`. That works on the V2 / Calcite path (Calcite's default is replace-all), but the analytics- engine route converts the call to substrait + DataFusion, and DataFusion's `regexp_replace` defaults to first-match-only without an explicit "g" flag. The dashboard test for `source = bank | patterns email mode=label` returned `<*>@pyrami.com` instead of `<*>@<*>.<*>` because only the first `[a-zA-Z0-9]+` run was replaced. Bypass the REPLACE handler for the PATTERNS branch and emit `REGEXP_REPLACE_PG_4` directly with a constant "g" flag. Same semantics on V2 / Calcite (Calcite's REGEXP_REPLACE_PG_4 with "g" = replace-all); fixes the analytics-engine path. CalcitePPLPatternsTest plan-string expectations updated to match the 4-arg form. 17/17 unit tests pass. IT result on analytics-engine route: testSimplePatternLabelMode_NotShowNumberedToken now passes. Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 23 ++++++- .../ppl/calcite/CalcitePPLPatternsTest.java | 60 +++++++++---------- 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 10c5d2aa88..7d18064ba1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -4301,9 +4301,26 @@ private void buildParseRelNode(Parse node, CalcitePlanContext context) { } List newFields = new ArrayList<>(); for (String groupCandidate : groupCandidates) { - RexNode innerRex = - PPLFuncImpTable.INSTANCE.resolve( - context.rexBuilder, ParseUtils.BUILTIN_FUNCTION_MAP.get(parseMethod), rexNodeList); + RexNode innerRex; + if (ParseMethod.PATTERNS.equals(parseMethod)) { + // Emit `regexp_replace(field, pattern, replacement, "g")` directly so the replacement + // is global (every match replaced). DataFusion's `regexp_replace` defaults to FIRST + // match only without the "g" flag — using the 3-arg form via the REPLACE handler + // produces `<*>@pyrami.com` instead of `<*>@<*>.<*>` on the analytics-engine route. + // Calcite's REGEXP_REPLACE_PG_4 with "g" matches what `replaceAll` does, so V2 / + // Calcite-path semantics are preserved. + RexNode globalFlag = + context.rexBuilder.makeLiteral( + "g", context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true); + innerRex = + context.rexBuilder.makeCall( + SqlLibraryOperators.REGEXP_REPLACE_PG_4, + ArrayUtils.add(rexNodeList, globalFlag)); + } else { + innerRex = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, ParseUtils.BUILTIN_FUNCTION_MAP.get(parseMethod), rexNodeList); + } if (!ParseMethod.PATTERNS.equals(parseMethod)) { newFields.add( PPLFuncImpTable.INSTANCE.resolve( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java index c272453b82..18bdb95202 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java @@ -35,13 +35,13 @@ public void testPatternsLabelMode_NotShowNumberedToken_ForSimplePatternMethod() String expectedLogical = "LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR))])\n" + + " '<*>':VARCHAR, 'g':VARCHAR))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END `patterns_field`\n" + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END `patterns_field`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -56,18 +56,18 @@ public void testPatternsLabelMode_ShowNumberedToken_ForSimplePatternMethod() { String expectedLogical = "LogicalProject(ENAME=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1," + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1," - + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR)), $1), 'pattern'))]," + + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR)), $1), 'pattern'))]," + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR)), $1), 'tokens'))])\n" + + " '<*>':VARCHAR, 'g':VARCHAR)), $1), 'tokens'))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" - + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['pattern'] AS" + + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['pattern'] AS" + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR" - + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END," + + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -83,18 +83,18 @@ public void testPatternsLabelModeWithCustomPattern_ShowNumberedToken_ForSimplePa String expectedLogical = "LogicalProject(ENAME=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1," + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1," - + " '[A-H]':VARCHAR, '<*>':VARCHAR)), $1), 'pattern'))]," + + " '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR)), $1), 'pattern'))]," + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" - + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR))," + + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))," + " $1), 'tokens'))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" - + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['pattern'] AS STRING)" + + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['pattern'] AS STRING)" + " `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` =" - + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['tokens'] AS" + + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['tokens'] AS" + " MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -108,13 +108,13 @@ public void testPatternsLabelModeWithCustomField_NotShowNumberedToken_ForSimpleP String expectedLogical = "LogicalProject(ENAME=[$1], upper=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR)," - + " '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR))])\n" + + " '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END `upper`\n" + + " REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END `upper`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -130,19 +130,19 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_SimplePattern "LogicalProject(ENAME=[$1], DEPTNO=[$7]," + " patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL" + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR)), $1), 'pattern'))]," + + " '<*>':VARCHAR, 'g':VARCHAR)), $1), 'pattern'))]," + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR)), $1), 'tokens'))])\n" + + " '<*>':VARCHAR, 'g':VARCHAR)), $1), 'tokens'))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME`" - + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END," + + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE" + " WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," - + " '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" + + " '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" + " `tokens`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -248,18 +248,18 @@ public void testPatternsAggregationMode_NotShowNumberedToken_ForSimplePatternMet "LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n" + " LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL" + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR))], $f9=[10])\n" + + " '<*>':VARCHAR, 'g':VARCHAR))], $f9=[10])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," - + " '[a-zA-Z0-9]+', '<*>') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" - + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END)" + + " '[a-zA-Z0-9]+', '<*>', 'g') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" + + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)" + " `pattern_count`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END"; + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -276,21 +276,21 @@ public void testPatternsAggregationMode_ShowNumberedToken_ForSimplePatternMethod + " $2)])\n" + " LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL" + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR))], $f9=[10])\n" + + " '<*>':VARCHAR, 'g':VARCHAR))], $f9=[10])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['pattern']" + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`, 10))['pattern']" + " AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" - + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END) `pattern_count`," + + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END) `pattern_count`," + " TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['tokens']" + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`, 10))['tokens']" + " AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END"; + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -307,22 +307,22 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForSimplePa + " $3)])\n" + " LogicalProject(ENAME=[$1], DEPTNO=[$7], patterns_field=[CASE(SEARCH($1," + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1," - + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR))], $f9=[10])\n" + + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))], $f9=[10])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN ''" - + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`," + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" - + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END)" + + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)" + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''" - + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`," + " 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10)" + " `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END"; + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; verifyPPLToSparkSQL(root, expectedSparkSql); } From ee8b6ab4868168a667fbb1b4549f3d47c0f02918 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 21 May 2026 15:38:54 -0700 Subject: [PATCH 03/15] test(integ-test): add CalcitePPLDashboardPatternsIT pinning BRAIN-label dashboard query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenSearch Dashboards renders BRAIN-pattern panels with the shape: patterns ... method=BRAIN mode=label | stats count() as pattern_count, take(message, 1) as sample_logs by patterns_field | sort -pattern_count | fields patterns_field, pattern_count, sample_logs This integration test pins that shape on the analytics-engine route so regressions surface immediately. Schema-only assertions because BRAIN's clustering output is dataset-version-sensitive — the contract we care about is "the query plans, executes, and returns three columns in the right order". Currently red end-to-end pending the BRAIN label window-UDF type-cascade fix (see the OpenSearch-side WIP commit "BRAIN window UDF + dashboard query path scaffolding" — the {@code PplWindowCallRewriter} stub documents the remaining gap). Signed-off-by: Kai Huang --- .../remote/CalcitePPLDashboardPatternsIT.java | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java new file mode 100644 index 0000000000..ad9ab5b077 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java @@ -0,0 +1,74 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_HDFS_LOGS; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Pins the exact PPL query shape OpenSearch Dashboards uses to render BRAIN-pattern + * panels: {@code patterns ... method=BRAIN mode=label} followed by + * {@code stats count(), take(message, 1) by patterns_field | sort -pattern_count | + * fields patterns_field, pattern_count, sample_logs}. + * + *

The combination exercises three pieces that all have to be wired through the + * analytics-engine route together: + * + *

    + *
  • {@code INTERNAL_PATTERN} as a window function (label mode emits one + * matched wildcard pattern per row, broadcast across the partition).
  • + *
  • {@code take(field, n)} aggregate to capture a representative sample log + * per discovered pattern group.
  • + *
  • {@code count()} aggregate + {@code sort} on the result.
  • + *
+ * + *

Schema-only assertions: BRAIN's clustering depends on a corpus large enough + * that the default heuristics fire, so per-row pattern strings are sensitive to + * dataset version. The schema check guarantees the query plans, executes, and + * returns the dashboard's three expected columns in the right order. + * + * @opensearch.internal + */ +public class CalcitePPLDashboardPatternsIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.HDFS_LOGS); + } + + /** + * Mirrors the canonical Dashboards BRAIN-label pattern panel query. Unfiltered + * variant — pins the end-to-end plan compiles and returns the dashboard's + * three-column shape (matched wildcard pattern + occurrence count + sample log). + */ + @Test + public void testDashboardBrainLabelStatsByPatternsField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | patterns content method=BRAIN mode=label" + + " max_sample_count=5 variable_count_threshold=5" + + " frequency_threshold_percentage=0.2" + + " | stats count() as pattern_count, take(content, 1) as sample_logs" + + " by patterns_field" + + " | sort - pattern_count" + + " | fields patterns_field, pattern_count, sample_logs", + TEST_INDEX_HDFS_LOGS)); + verifySchemaInOrder( + result, + schema("patterns_field", "string"), + schema("pattern_count", "bigint"), + schema("sample_logs", "array")); + } +} From e9ad4e594cf6e84374df61322ec0984e0b0b381d Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 10:16:46 -0700 Subject: [PATCH 04/15] style: apply spotless formatting Spotless drift from cherry-picking the analytics-engine patterns work across upstream's recent formatting touch-ups. No behavior change. Signed-off-by: Kai Huang Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 3 +- .../remote/CalcitePPLDashboardPatternsIT.java | 35 ++++++------ .../ppl/calcite/CalcitePPLPatternsTest.java | 57 ++++++++++--------- 3 files changed, 47 insertions(+), 48 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 7d18064ba1..5d2df0eb58 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -4314,8 +4314,7 @@ private void buildParseRelNode(Parse node, CalcitePlanContext context) { "g", context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true); innerRex = context.rexBuilder.makeCall( - SqlLibraryOperators.REGEXP_REPLACE_PG_4, - ArrayUtils.add(rexNodeList, globalFlag)); + SqlLibraryOperators.REGEXP_REPLACE_PG_4, ArrayUtils.add(rexNodeList, globalFlag)); } else { innerRex = PPLFuncImpTable.INSTANCE.resolve( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java index ad9ab5b077..a473766cad 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java @@ -15,26 +15,25 @@ import org.opensearch.sql.ppl.PPLIntegTestCase; /** - * Pins the exact PPL query shape OpenSearch Dashboards uses to render BRAIN-pattern - * panels: {@code patterns ... method=BRAIN mode=label} followed by - * {@code stats count(), take(message, 1) by patterns_field | sort -pattern_count | - * fields patterns_field, pattern_count, sample_logs}. + * Pins the exact PPL query shape OpenSearch Dashboards uses to render BRAIN-pattern panels: {@code + * patterns ... method=BRAIN mode=label} followed by {@code stats count(), take(message, 1) by + * patterns_field | sort -pattern_count | fields patterns_field, pattern_count, sample_logs}. * - *

The combination exercises three pieces that all have to be wired through the - * analytics-engine route together: + *

The combination exercises three pieces that all have to be wired through the analytics-engine + * route together: * *

    - *
  • {@code INTERNAL_PATTERN} as a window function (label mode emits one - * matched wildcard pattern per row, broadcast across the partition).
  • - *
  • {@code take(field, n)} aggregate to capture a representative sample log - * per discovered pattern group.
  • - *
  • {@code count()} aggregate + {@code sort} on the result.
  • + *
  • {@code INTERNAL_PATTERN} as a window function (label mode emits one matched wildcard + * pattern per row, broadcast across the partition). + *
  • {@code take(field, n)} aggregate to capture a representative sample log per discovered + * pattern group. + *
  • {@code count()} aggregate + {@code sort} on the result. *
* - *

Schema-only assertions: BRAIN's clustering depends on a corpus large enough - * that the default heuristics fire, so per-row pattern strings are sensitive to - * dataset version. The schema check guarantees the query plans, executes, and - * returns the dashboard's three expected columns in the right order. + *

Schema-only assertions: BRAIN's clustering depends on a corpus large enough that the default + * heuristics fire, so per-row pattern strings are sensitive to dataset version. The schema check + * guarantees the query plans, executes, and returns the dashboard's three expected columns in the + * right order. * * @opensearch.internal */ @@ -47,9 +46,9 @@ public void init() throws Exception { } /** - * Mirrors the canonical Dashboards BRAIN-label pattern panel query. Unfiltered - * variant — pins the end-to-end plan compiles and returns the dashboard's - * three-column shape (matched wildcard pattern + occurrence count + sample log). + * Mirrors the canonical Dashboards BRAIN-label pattern panel query. Unfiltered variant — pins the + * end-to-end plan compiles and returns the dashboard's three-column shape (matched wildcard + * pattern + occurrence count + sample log). */ @Test public void testDashboardBrainLabelStatsByPatternsField() throws IOException { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java index 18bdb95202..6c1bbe03fd 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java @@ -64,9 +64,9 @@ public void testPatternsLabelMode_ShowNumberedToken_ForSimplePatternMethod() { verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" - + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['pattern'] AS" - + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR" + "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN ''" + + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['pattern']" + + " AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR" + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; @@ -85,17 +85,17 @@ public void testPatternsLabelModeWithCustomPattern_ShowNumberedToken_ForSimplePa + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1," + " '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR)), $1), 'pattern'))]," + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" - + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))," - + " $1), 'tokens'))])\n" + + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR," + + " 'g':VARCHAR)), $1), 'tokens'))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" - + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['pattern'] AS STRING)" - + " `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` =" - + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['tokens'] AS" - + " MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN ''" + + " ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['pattern'] AS" + + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR" + + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END," + + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -138,12 +138,12 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_SimplePattern verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME`" - + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," - + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE" - + " WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," - + " '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" - + " `tokens`\n" + "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` =" + + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," + + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN" + + " `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," + + " '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY" + + " >) `tokens`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -254,9 +254,9 @@ public void testPatternsAggregationMode_NotShowNumberedToken_ForSimplePatternMet String expectedSparkSql = "SELECT CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," - + " '[a-zA-Z0-9]+', '<*>', 'g') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" - + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)" - + " `pattern_count`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + + " '[a-zA-Z0-9]+', '<*>', 'g') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL" + + " OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g')" + + " END) `pattern_count`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; @@ -282,12 +282,13 @@ public void testPatternsAggregationMode_ShowNumberedToken_ForSimplePatternMethod String expectedSparkSql = "SELECT TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`, 10))['pattern']" - + " AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" - + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END) `pattern_count`," - + " TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`, 10))['tokens']" - + " AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`," + + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" + + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)" + + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''" + + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," + + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`," + + " `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; @@ -317,9 +318,9 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForSimplePa + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)" + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''" - + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`," - + " 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10)" - + " `sample_logs`\n" + + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," + + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`," + + " `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; From 90fe3b9e04a9abd73c8d8aca60eb12bce28e08fc Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 11:38:08 -0700 Subject: [PATCH 05/15] test(integ-test): update SIMPLE-patterns explain YAML for 4-arg regexp_replace CalciteExplainIT's `testPatternsSimplePatternMethodWith{out,AggPushDown}Explain` expected the old 3-arg `REGEXP_REPLACE(...)` form, but after the `feat(core)` commit emits 4-arg `REGEXP_REPLACE(..., 'g':VARCHAR)` the plan output now includes the extra operand both in the logical line and in the base64-encoded compounded script of the physical/pushdown plan. Regenerate both YAML expectations against the live planner. Signed-off-by: Kai Huang --- .../calcite/explain_patterns_simple_pattern.yaml | 4 ++-- .../calcite/explain_patterns_simple_pattern_agg_push.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml index ccf7e71efa..932af32b40 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml @@ -1,8 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], patterns_field=[CASE(SEARCH($9, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], patterns_field=[CASE(SEARCH($9, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..10=[{inputs}], expr#11=[Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR], expr#12=[SEARCH($t9, $t11)], expr#13=['':VARCHAR], expr#14=['[a-zA-Z0-9]+':VARCHAR], expr#15=['<*>':VARCHAR], expr#16=[REGEXP_REPLACE($t9, $t14, $t15)], expr#17=[CASE($t12, $t13, $t16)], proj#0..10=[{exprs}], patterns_field=[$t17]) + EnumerableCalc(expr#0..10=[{inputs}], expr#11=[Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR], expr#12=[SEARCH($t9, $t11)], expr#13=['':VARCHAR], expr#14=['[a-zA-Z0-9]+':VARCHAR], expr#15=['<*>':VARCHAR], expr#16=['g':VARCHAR], expr#17=[REGEXP_REPLACE($t9, $t14, $t15, $t16)], expr#18=[CASE($t12, $t13, $t17)], proj#0..10=[{exprs}], patterns_field=[$t18]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml index 80677913ec..e32d32688f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml @@ -3,8 +3,8 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'pattern'))], pattern_count=[$1], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'tokens'))], sample_logs=[$2]) LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)]) - LogicalProject(email=[$9], patterns_field=[CASE(SEARCH($9, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR))], $f18=[10]) + LogicalProject(email=[$9], patterns_field=[CASE(SEARCH($9, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))], $f18=[10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], patterns_field=[$t6], pattern_count=[$t1], tokens=[$t9], sample_logs=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"email"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#1915:LogicalAggregate.NONE.[](input=RelSubset#1914,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQIhXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDcsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1779820506007195000,"SOURCES":[0,0,2,2,0,2,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>","g"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQIhXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDcsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1779820506007195000,"SOURCES":[0,0,2,2,0,2,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>","g"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"email"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) From 467195eb15b37c2c91da25c184d4faae810c9b2c Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 11:49:07 -0700 Subject: [PATCH 06/15] fix(opensearch): collapse 4-arg REGEXP_REPLACE_PG_4 'g' to 3-arg at script pushdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `feat(core)` commit on this branch lowered PPL `patterns` to a 4-arg `REGEXP_REPLACE_PG_4(field, pattern, replacement, 'g')` so DataFusion (which defaults to first-match-only) does global replacement on the analytics-engine route. Calcite's enumerable runtime — which the V2 / Calcite-pushdown path uses to compile the serialized RexCall into Janino bytecode — has no matching `SqlFunctions.regexpReplace(String, String, String, String)` impl (only `(String, String, String, int[, ...])` variants where the 4th arg is start position, not a flags string). Janino codegen failed with `No applicable constructor/method found` for the 4-arg-with-flags call shape, breaking the patterns.md doctest (`source=apache | patterns message method=simple_pattern mode=aggregation`). Two complementary fixes: 1. `RexStandardizer.visitCall`: before serializing for pushdown, collapse `REGEXP_REPLACE_PG_4(field, pattern, replacement, 'g')` to the 3-arg `REGEXP_REPLACE_3` form. Safe because Calcite's 3-arg variant is already replace-all (same semantics as PG_4 with `g`). Only fires when the flags literal is exactly `"g"` so any future `i`/`m`/etc. use cases pass through untouched. 2. `ExtendedRelJson.toOp`: pass operand count when looking up an operator on the deserialization side so multi-arity SQL names (REGEXP_REPLACE_3 vs REGEXP_REPLACE_PG_4 vs REGEXP_REPLACE_5 all share `name="REGEXP_REPLACE"`) resolve to the right overload. Defensive — the standardizer fix above is what actually unblocks the doctest, but the resolver was picking by name alone and would have surfaced the same bug for any other overloaded builtin. Verified locally: - doctest queries (`patterns ... method=simple_pattern mode=aggregation [...]`) now return fully-tokenized output; - `CalcitePPLDashboardPatternsIT` still 1/1 PASS; - `CalcitePPLPatternsIT` still 10/15 with the same five known-pending failures (LogicalCorrelate + `_ShowNumberedToken` BRAIN cases). Signed-off-by: Kai Huang --- .../storage/serde/ExtendedRelJson.java | 31 +++++++++++++++++-- .../storage/serde/RexStandardizer.java | 22 +++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java index d77dee3e29..25a1dd6840 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java @@ -460,7 +460,8 @@ RexNode toRex(RelInput relInput, @PolyNull Object o) { distinct, false); } else { - final SqlOperator operator = requireNonNull(toOp(opMap), "operator"); + final SqlOperator operator = + requireNonNull(toOp(opMap, rexOperands.size()), "operator"); final RelDataType type; if (jsonType != null) { type = toType(typeFactory, jsonType); @@ -553,6 +554,22 @@ private static T get(Map // Copied from RelJson for the usage of custom operatorTable @Nullable SqlOperator toOp(Map map) { + return toOp(map, -1); + } + + /** + * Look up an operator matching the {@code map}'s {@code name} / {@code kind} / {@code syntax}. + * + *

When {@code operandCount >= 0}, prefer an overload whose declared operand-count range + * accepts {@code operandCount} — Calcite registers multiple operators under the same SQL name + * with different arities (e.g. {@code REGEXP_REPLACE} maps to {@code REGEXP_REPLACE_3}, + * {@code REGEXP_REPLACE_PG_4}, and {@code REGEXP_REPLACE_5} in PostgreSQL library). Picking by + * name + kind alone returns whichever overload is listed first, so a 4-arg call would bind to + * the 3-arg operator and fail at runtime with {@code IllegalArgumentException: no matching + * method} during {@code RexToLixTranslator} code-gen. Fall back to the first kind match when + * no overload's arity range fits, preserving prior behavior for callers that pass {@code -1}. + */ + @Nullable SqlOperator toOp(Map map, int operandCount) { // in case different operator has the same kind, check with both name and kind. String name = get(map, "name"); String kind = get(map, "kind"); @@ -566,11 +583,21 @@ private static T get(Map sqlSyntax, operators, SqlNameMatchers.liberal()); + SqlOperator firstKindMatch = null; for (SqlOperator operator : operators) { - if (operator.kind == sqlKind) { + if (operator.kind != sqlKind) { + continue; + } + if (firstKindMatch == null) { + firstKindMatch = operator; + } + if (operandCount < 0 || operator.getOperandCountRange().isValidCount(operandCount)) { return operator; } } + if (firstKindMatch != null) { + return firstKindMatch; + } String class_ = (String) map.get("class"); if (class_ != null) { return AvaticaUtils.instantiatePlugin(SqlOperator.class, class_); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java index 78afd4aaf0..b23d9c3829 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java @@ -32,6 +32,7 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlLibraryOperators; import org.apache.calcite.sql.type.BasicSqlType; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; @@ -73,6 +74,27 @@ public RexNode visitCall(final RexCall call, ScriptParameterHelper helper) { // We can downgrade to still use `Sarg` literal instead of replacing it with parameter. } } + // PPL's `patterns` lowering emits 4-arg `REGEXP_REPLACE_PG_4(field, pattern, replacement, + // 'g')` so the analytics-engine/DataFusion path gets global replacement (DataFusion's + // `regexp_replace` defaults to first-match-only without an explicit flag). Calcite's + // 3-arg `REGEXP_REPLACE_3` is already replace-all in its enumerable runtime, but the PG_4 + // form has no matching `SqlFunctions.regexpReplace(String, String, String, String)` impl + // — Calcite's runtime only ships the `(String, String, String, int[, ...])` shapes (the + // 4-arg variant treats the 4th arg as start-position, not a flags string). The script + // pushdown path codegen would fail with `No applicable constructor/method found`. Collapse + // the 4-arg call to the 3-arg form whenever the flags literal is exactly `"g"`, which + // preserves replace-all semantics on the V2 / Calcite-pushdown side. + if (call.op == SqlLibraryOperators.REGEXP_REPLACE_PG_4 + && call.operands.size() == 4 + && call.operands.get(3) instanceof RexLiteral flagsLit + && "g".equals(flagsLit.getValueAs(String.class))) { + return helper.rexBuilder + .makeCall( + call.getType(), + SqlLibraryOperators.REGEXP_REPLACE_3, + call.operands.subList(0, 3)) + .accept(this, helper); + } // Some functions only support limited numeric type. Keep conservative here. boolean allowNumericTypeWiden = call.op.kind.belongsTo(SqlKind.BINARY_ARITHMETIC) From cb117fb971632ae1fccbb7ea1857103484d955b0 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 14:45:18 -0700 Subject: [PATCH 07/15] fix(opensearch): revert arity-aware toOp; restore spath/JSON_EXTRACT doctest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The arity filter added to ExtendedRelJson.toOp in the previous commit broke SAFE_CAST → JSON_EXTRACT deserialization (used by `spath` lowering): the PPL JSON_EXTRACT UDF, registered as an anonymous UserDefinedFunctionBuilder subclass, doesn't expose a meaningful getOperandCountRange(), so my filter fell through to the firstKindMatch path and skipped the AvaticaUtils.instantiatePlugin "class" path that previously resolved the UDF. spath.md doctest started returning RuntimeException on `source=structured | spath input=doc_n n | eval n=cast(n as int) | stats sum(n)`. The RexStandardizer collapse (4-arg `REGEXP_REPLACE_PG_4(..., 'g')` → 3-arg `REGEXP_REPLACE_3`) already fixes the patterns.md doctest at the source side — by the time pushdown serialization runs, no 4-arg call exists for toOp to disambiguate. The arity filter was defensive only and no longer carries its weight; revert toOp to the original first-kind-match behavior, plus a spotless re-flow that came in with the same change. Verified locally on a fresh cluster: - spath.md doctest query → returns sum(n)=6 (was 500). - patterns.md doctest query → returns fully-tokenized aggregation rows. - CalcitePPLDashboardPatternsIT → 1/1 PASS. - CalcitePPLPatternsIT → 10/15 PASS (same baseline; same five known-pending BRAIN failures tracked separately). Signed-off-by: Kai Huang --- .../storage/serde/ExtendedRelJson.java | 31 ++----------------- .../storage/serde/RexStandardizer.java | 7 ++--- 2 files changed, 5 insertions(+), 33 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java index 25a1dd6840..d77dee3e29 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/ExtendedRelJson.java @@ -460,8 +460,7 @@ RexNode toRex(RelInput relInput, @PolyNull Object o) { distinct, false); } else { - final SqlOperator operator = - requireNonNull(toOp(opMap, rexOperands.size()), "operator"); + final SqlOperator operator = requireNonNull(toOp(opMap), "operator"); final RelDataType type; if (jsonType != null) { type = toType(typeFactory, jsonType); @@ -554,22 +553,6 @@ private static T get(Map // Copied from RelJson for the usage of custom operatorTable @Nullable SqlOperator toOp(Map map) { - return toOp(map, -1); - } - - /** - * Look up an operator matching the {@code map}'s {@code name} / {@code kind} / {@code syntax}. - * - *

When {@code operandCount >= 0}, prefer an overload whose declared operand-count range - * accepts {@code operandCount} — Calcite registers multiple operators under the same SQL name - * with different arities (e.g. {@code REGEXP_REPLACE} maps to {@code REGEXP_REPLACE_3}, - * {@code REGEXP_REPLACE_PG_4}, and {@code REGEXP_REPLACE_5} in PostgreSQL library). Picking by - * name + kind alone returns whichever overload is listed first, so a 4-arg call would bind to - * the 3-arg operator and fail at runtime with {@code IllegalArgumentException: no matching - * method} during {@code RexToLixTranslator} code-gen. Fall back to the first kind match when - * no overload's arity range fits, preserving prior behavior for callers that pass {@code -1}. - */ - @Nullable SqlOperator toOp(Map map, int operandCount) { // in case different operator has the same kind, check with both name and kind. String name = get(map, "name"); String kind = get(map, "kind"); @@ -583,21 +566,11 @@ private static T get(Map sqlSyntax, operators, SqlNameMatchers.liberal()); - SqlOperator firstKindMatch = null; for (SqlOperator operator : operators) { - if (operator.kind != sqlKind) { - continue; - } - if (firstKindMatch == null) { - firstKindMatch = operator; - } - if (operandCount < 0 || operator.getOperandCountRange().isValidCount(operandCount)) { + if (operator.kind == sqlKind) { return operator; } } - if (firstKindMatch != null) { - return firstKindMatch; - } String class_ = (String) map.get("class"); if (class_ != null) { return AvaticaUtils.instantiatePlugin(SqlOperator.class, class_); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java index b23d9c3829..3bae8d8cd4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java @@ -88,11 +88,10 @@ public RexNode visitCall(final RexCall call, ScriptParameterHelper helper) { && call.operands.size() == 4 && call.operands.get(3) instanceof RexLiteral flagsLit && "g".equals(flagsLit.getValueAs(String.class))) { - return helper.rexBuilder + return helper + .rexBuilder .makeCall( - call.getType(), - SqlLibraryOperators.REGEXP_REPLACE_3, - call.operands.subList(0, 3)) + call.getType(), SqlLibraryOperators.REGEXP_REPLACE_3, call.operands.subList(0, 3)) .accept(this, helper); } // Some functions only support limited numeric type. Keep conservative here. From 9938e737749ebb1e21ec6652c13665acef50113e Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 14:59:46 -0700 Subject: [PATCH 08/15] style: trim verbose comments per review Per @penghuo: drop the verbose multi-line explanatory comments and tighten the class/method javadoc on the new dashboard IT. Signed-off-by: Kai Huang --- .../sql/api/UnifiedQueryContext.java | 6 ---- .../sql/calcite/CalciteRelNodeVisitor.java | 8 ++--- .../remote/CalcitePPLDashboardPatternsIT.java | 29 +------------------ .../storage/serde/RexStandardizer.java | 14 +++------ 4 files changed, 7 insertions(+), 50 deletions(-) diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 59d2a8efea..82c7b3cf91 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -157,12 +157,6 @@ public static class Builder { PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.joinSubsearchLimit()), Map.entry(CALCITE_ENGINE_ENABLED, true), Map.entry(PPL_REX_MAX_MATCH_LIMIT, 10), - // PPL `patterns` command defaults — mirror the cluster-side defaults registered in - // OpenSearchSettings (DEFAULT_PATTERN_METHOD_SETTING etc.). Without these the - // analytics-engine path's AstBuilder.visitPatternsCommand reads null from - // `settings.getSettingValue(Key.PATTERN_METHOD)`, fails with - // `PatternMethod.valueOf("NULL")` IllegalArgumentException, and every query that - // omits an explicit `method=` / `mode=` argument is rejected. Map.entry(PATTERN_METHOD, "SIMPLE_PATTERN"), Map.entry(PATTERN_MODE, "LABEL"), Map.entry(PATTERN_MAX_SAMPLE_COUNT, 10), diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 5d2df0eb58..be68d1b240 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -4303,12 +4303,8 @@ private void buildParseRelNode(Parse node, CalcitePlanContext context) { for (String groupCandidate : groupCandidates) { RexNode innerRex; if (ParseMethod.PATTERNS.equals(parseMethod)) { - // Emit `regexp_replace(field, pattern, replacement, "g")` directly so the replacement - // is global (every match replaced). DataFusion's `regexp_replace` defaults to FIRST - // match only without the "g" flag — using the 3-arg form via the REPLACE handler - // produces `<*>@pyrami.com` instead of `<*>@<*>.<*>` on the analytics-engine route. - // Calcite's REGEXP_REPLACE_PG_4 with "g" matches what `replaceAll` does, so V2 / - // Calcite-path semantics are preserved. + // Emit 4-arg REGEXP_REPLACE_PG_4 with "g" so DataFusion's regexp_replace + // (first-match-only by default) replaces every match. RexNode globalFlag = context.rexBuilder.makeLiteral( "g", context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java index a473766cad..188391acfc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java @@ -14,29 +14,7 @@ import org.junit.Test; import org.opensearch.sql.ppl.PPLIntegTestCase; -/** - * Pins the exact PPL query shape OpenSearch Dashboards uses to render BRAIN-pattern panels: {@code - * patterns ... method=BRAIN mode=label} followed by {@code stats count(), take(message, 1) by - * patterns_field | sort -pattern_count | fields patterns_field, pattern_count, sample_logs}. - * - *

The combination exercises three pieces that all have to be wired through the analytics-engine - * route together: - * - *

    - *
  • {@code INTERNAL_PATTERN} as a window function (label mode emits one matched wildcard - * pattern per row, broadcast across the partition). - *
  • {@code take(field, n)} aggregate to capture a representative sample log per discovered - * pattern group. - *
  • {@code count()} aggregate + {@code sort} on the result. - *
- * - *

Schema-only assertions: BRAIN's clustering depends on a corpus large enough that the default - * heuristics fire, so per-row pattern strings are sensitive to dataset version. The schema check - * guarantees the query plans, executes, and returns the dashboard's three expected columns in the - * right order. - * - * @opensearch.internal - */ +/** Pins the BRAIN-label pattern panel query shape used by OpenSearch Dashboards. */ public class CalcitePPLDashboardPatternsIT extends PPLIntegTestCase { @Override public void init() throws Exception { @@ -45,11 +23,6 @@ public void init() throws Exception { loadIndex(Index.HDFS_LOGS); } - /** - * Mirrors the canonical Dashboards BRAIN-label pattern panel query. Unfiltered variant — pins the - * end-to-end plan compiles and returns the dashboard's three-column shape (matched wildcard - * pattern + occurrence count + sample log). - */ @Test public void testDashboardBrainLabelStatsByPatternsField() throws IOException { JSONObject result = diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java index 3bae8d8cd4..f516480bac 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java @@ -74,16 +74,10 @@ public RexNode visitCall(final RexCall call, ScriptParameterHelper helper) { // We can downgrade to still use `Sarg` literal instead of replacing it with parameter. } } - // PPL's `patterns` lowering emits 4-arg `REGEXP_REPLACE_PG_4(field, pattern, replacement, - // 'g')` so the analytics-engine/DataFusion path gets global replacement (DataFusion's - // `regexp_replace` defaults to first-match-only without an explicit flag). Calcite's - // 3-arg `REGEXP_REPLACE_3` is already replace-all in its enumerable runtime, but the PG_4 - // form has no matching `SqlFunctions.regexpReplace(String, String, String, String)` impl - // — Calcite's runtime only ships the `(String, String, String, int[, ...])` shapes (the - // 4-arg variant treats the 4th arg as start-position, not a flags string). The script - // pushdown path codegen would fail with `No applicable constructor/method found`. Collapse - // the 4-arg call to the 3-arg form whenever the flags literal is exactly `"g"`, which - // preserves replace-all semantics on the V2 / Calcite-pushdown side. + // Calcite's enumerable runtime has no regexpReplace(String, String, String, String) impl, + // so 4-arg REGEXP_REPLACE_PG_4 fails Janino codegen on the script-pushdown path. Collapse + // to 3-arg REGEXP_REPLACE_3 when the flags literal is "g" — the 3-arg form is already + // replace-all, so semantics are preserved. if (call.op == SqlLibraryOperators.REGEXP_REPLACE_PG_4 && call.operands.size() == 4 && call.operands.get(3) instanceof RexLiteral flagsLit From 6e7b6762e681566cb65693a120d95433b6382293 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 15:31:47 -0700 Subject: [PATCH 09/15] test(integ-test): add verifyDataRows to dashboard patterns IT Per @dai-chen: schema-only verification doesn't catch "query succeeds but returns 0/wrong rows". Pin the 4 BRAIN clusters with their exact patterns, counts, and sample logs against the HDFS_LOGS fixture. Signed-off-by: Kai Huang --- .../remote/CalcitePPLDashboardPatternsIT.java | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java index 188391acfc..1ab5c7fc9b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLDashboardPatternsIT.java @@ -6,9 +6,12 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_HDFS_LOGS; +import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder; +import com.google.common.collect.ImmutableList; import java.io.IOException; import org.json.JSONObject; import org.junit.Test; @@ -42,5 +45,30 @@ public void testDashboardBrainLabelStatsByPatternsField() throws IOException { schema("patterns_field", "string"), schema("pattern_count", "bigint"), schema("sample_logs", "array")); + verifyDataRows( + result, + rows( + "BLOCK* NameSystem.addStoredBlock: blockMap updated: <*IP*> is added to blk_<*> size" + + " <*>", + 2, + ImmutableList.of( + "BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added" + + " to blk_-7017553867379051457 size 67108864")), + rows( + "PacketResponder failed <*> blk_<*>", + 2, + ImmutableList.of("PacketResponder failed for blk_6996194389878584395")), + rows( + "Verification succeeded <*> blk_<*>", + 2, + ImmutableList.of("Verification succeeded for blk_-1547954353065580372")), + rows( + "<*> NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_<*>_<*>_r_<*>_<*>/part<*> blk_<*>", + 2, + ImmutableList.of( + "BLOCK* NameSystem.allocateBlock:" + + " /user/root/sortrand/_temporary/_task_200811092030_0002_r_000296_0/part-00296." + + " blk_-6620182933895093708"))); } } From ab79941448130f4bd5bc0ad54a9057f5096108f6 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 15:38:41 -0700 Subject: [PATCH 10/15] refactor(core): fuse PATTERNS if-else in buildParseRelNode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per @dai-chen: the two consecutive `if (PATTERNS)` branches in buildParseRelNode share a condition; merge into a single if/else with each branch fully co-located. Pure refactor — CalcitePPLPatternsTest (logical-plan unit test) passes. Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 42 ++++++++----------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index be68d1b240..28dd29355c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -4301,22 +4301,35 @@ private void buildParseRelNode(Parse node, CalcitePlanContext context) { } List newFields = new ArrayList<>(); for (String groupCandidate : groupCandidates) { - RexNode innerRex; if (ParseMethod.PATTERNS.equals(parseMethod)) { // Emit 4-arg REGEXP_REPLACE_PG_4 with "g" so DataFusion's regexp_replace // (first-match-only by default) replaces every match. RexNode globalFlag = context.rexBuilder.makeLiteral( "g", context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true); - innerRex = + RexNode innerRex = context.rexBuilder.makeCall( SqlLibraryOperators.REGEXP_REPLACE_PG_4, ArrayUtils.add(rexNodeList, globalFlag)); + RexNode emptyString = + context.rexBuilder.makeLiteral( + "", context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true); + RexNode isEmptyCondition = + context.rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, sourceField, emptyString); + RexNode isNullCondition = + context.rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, sourceField); + // Calcite regexp_replace doesn't accept empty string; guard NULL / "" via CASE. + newFields.add( + context.rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + isNullCondition, + emptyString, + isEmptyCondition, + emptyString, + innerRex)); } else { - innerRex = + RexNode innerRex = PPLFuncImpTable.INSTANCE.resolve( context.rexBuilder, ParseUtils.BUILTIN_FUNCTION_MAP.get(parseMethod), rexNodeList); - } - if (!ParseMethod.PATTERNS.equals(parseMethod)) { newFields.add( PPLFuncImpTable.INSTANCE.resolve( context.rexBuilder, @@ -4326,25 +4339,6 @@ private void buildParseRelNode(Parse node, CalcitePlanContext context) { groupCandidate, context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true))); - } else { - RexNode emptyString = - context.rexBuilder.makeLiteral( - "", context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true); - RexNode isEmptyCondition = - context.rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, sourceField, emptyString); - RexNode isNullCondition = - context.rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, sourceField); - // Calcite regexp_replace(string, string, string) doesn't accept empty string. - // So use case when condition here to handle corner cases - newFields.add( - context.rexBuilder.makeCall( - SqlStdOperatorTable.CASE, // case - isNullCondition, - emptyString, // when field is NULL then '' - isEmptyCondition, - emptyString, // when field = '' then '' - innerRex // else regexp_replace(field, regex, replace_string) - )); } } projectPlusOverriding(newFields, groupCandidates, context); From b35aa03b78eb1d7f41f67ee071ac07814a7a9b02 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 15:54:21 -0700 Subject: [PATCH 11/15] test(integ-test): include CalcitePPLDashboardPatternsIT in CalciteNoPushdownIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per CLAUDE.md guidance, new Calcite IT classes should be added to the no-pushdown suite. Verified locally that the dashboard query also passes with pushdown disabled (Dashboard 1/1, Patterns 10/15 — same baseline). Signed-off-by: Kai Huang --- .../java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 206ee52fca..d46649d56d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -68,6 +68,7 @@ CalcitePPLCastFunctionIT.class, CalcitePPLConditionBuiltinFunctionIT.class, CalcitePPLCryptographicFunctionIT.class, + CalcitePPLDashboardPatternsIT.class, CalcitePPLDedupIT.class, CalcitePPLEventstatsIT.class, CalciteStreamstatsCommandIT.class, From 628d660871f52772e07f5093b7a527dad12df3b2 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 16:00:19 -0700 Subject: [PATCH 12/15] test(integ-test): regenerate agg-push explain YAML for 3-arg REGEXP_REPLACE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous YAML capture pre-dated the RexStandardizer 4-arg → 3-arg collapse landing. With the collapse, the pushed-down compounded script serializes the 3-arg form (SOURCES has 7 entries, no trailing 'g'). Signed-off-by: Kai Huang --- .../calcite/explain_patterns_simple_pattern_agg_push.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml index e32d32688f..0da68f49b2 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml @@ -7,4 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], patterns_field=[$t6], pattern_count=[$t1], tokens=[$t9], sample_logs=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#1915:LogicalAggregate.NONE.[](input=RelSubset#1914,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQIhXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDcsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1779820506007195000,"SOURCES":[0,0,2,2,0,2,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>","g"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQIhXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDcsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1779820506007195000,"SOURCES":[0,0,2,2,0,2,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>","g"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"email"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#121523:LogicalAggregate.NONE.[](input=RelSubset#121522,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1779836363879052000,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1779836363879052000,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"email"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) From 31f248ab52450dd50b3c2bec38ed17b301575645 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 26 May 2026 16:23:23 -0700 Subject: [PATCH 13/15] revert(core): drop SQL-side 'g' flag for patterns; move to DataFusion adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per @penghuo's review: DataFusion-specific concerns shouldn't live in SQL core. The 'g' flag is needed only because DataFusion's regexp_replace defaults to first-match-only — Calcite's 3-arg form is already replace-all on both pushdown and no-pushdown paths. Restores SQL core, RexStandardizer, the patterns unit test, and the SIMPLE- patterns explain YAMLs to their upstream/main shape. The 'g' flag is appended in opensearch-project/OpenSearch#21797's RegexpReplaceAdapter when converting 3-arg REGEXP_REPLACE to DataFusion. Same end-user behavior, smaller SQL diff, and the Calcite no-pushdown path no longer diverges from the pushdown YAML. Signed-off-by: Kai Huang --- .../sql/calcite/CalciteRelNodeVisitor.java | 48 +++++----- .../explain_patterns_simple_pattern.yaml | 4 +- ...lain_patterns_simple_pattern_agg_push.yaml | 4 +- .../storage/serde/RexStandardizer.java | 15 --- .../ppl/calcite/CalcitePPLPatternsTest.java | 93 +++++++++---------- 5 files changed, 71 insertions(+), 93 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 28dd29355c..10c5d2aa88 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -4301,15 +4301,20 @@ private void buildParseRelNode(Parse node, CalcitePlanContext context) { } List newFields = new ArrayList<>(); for (String groupCandidate : groupCandidates) { - if (ParseMethod.PATTERNS.equals(parseMethod)) { - // Emit 4-arg REGEXP_REPLACE_PG_4 with "g" so DataFusion's regexp_replace - // (first-match-only by default) replaces every match. - RexNode globalFlag = - context.rexBuilder.makeLiteral( - "g", context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true); - RexNode innerRex = - context.rexBuilder.makeCall( - SqlLibraryOperators.REGEXP_REPLACE_PG_4, ArrayUtils.add(rexNodeList, globalFlag)); + RexNode innerRex = + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, ParseUtils.BUILTIN_FUNCTION_MAP.get(parseMethod), rexNodeList); + if (!ParseMethod.PATTERNS.equals(parseMethod)) { + newFields.add( + PPLFuncImpTable.INSTANCE.resolve( + context.rexBuilder, + BuiltinFunctionName.INTERNAL_ITEM, + innerRex, + context.rexBuilder.makeLiteral( + groupCandidate, + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), + true))); + } else { RexNode emptyString = context.rexBuilder.makeLiteral( "", context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), true); @@ -4317,28 +4322,17 @@ private void buildParseRelNode(Parse node, CalcitePlanContext context) { context.rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, sourceField, emptyString); RexNode isNullCondition = context.rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, sourceField); - // Calcite regexp_replace doesn't accept empty string; guard NULL / "" via CASE. + // Calcite regexp_replace(string, string, string) doesn't accept empty string. + // So use case when condition here to handle corner cases newFields.add( context.rexBuilder.makeCall( - SqlStdOperatorTable.CASE, + SqlStdOperatorTable.CASE, // case isNullCondition, - emptyString, + emptyString, // when field is NULL then '' isEmptyCondition, - emptyString, - innerRex)); - } else { - RexNode innerRex = - PPLFuncImpTable.INSTANCE.resolve( - context.rexBuilder, ParseUtils.BUILTIN_FUNCTION_MAP.get(parseMethod), rexNodeList); - newFields.add( - PPLFuncImpTable.INSTANCE.resolve( - context.rexBuilder, - BuiltinFunctionName.INTERNAL_ITEM, - innerRex, - context.rexBuilder.makeLiteral( - groupCandidate, - context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), - true))); + emptyString, // when field = '' then '' + innerRex // else regexp_replace(field, regex, replace_string) + )); } } projectPlusOverriding(newFields, groupCandidates, context); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml index 932af32b40..ccf7e71efa 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern.yaml @@ -1,8 +1,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], patterns_field=[CASE(SEARCH($9, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], patterns_field=[CASE(SEARCH($9, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - EnumerableCalc(expr#0..10=[{inputs}], expr#11=[Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR], expr#12=[SEARCH($t9, $t11)], expr#13=['':VARCHAR], expr#14=['[a-zA-Z0-9]+':VARCHAR], expr#15=['<*>':VARCHAR], expr#16=['g':VARCHAR], expr#17=[REGEXP_REPLACE($t9, $t14, $t15, $t16)], expr#18=[CASE($t12, $t13, $t17)], proj#0..10=[{exprs}], patterns_field=[$t18]) + EnumerableCalc(expr#0..10=[{inputs}], expr#11=[Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR], expr#12=[SEARCH($t9, $t11)], expr#13=['':VARCHAR], expr#14=['[a-zA-Z0-9]+':VARCHAR], expr#15=['<*>':VARCHAR], expr#16=[REGEXP_REPLACE($t9, $t14, $t15)], expr#17=[CASE($t12, $t13, $t16)], proj#0..10=[{exprs}], patterns_field=[$t17]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml index 0da68f49b2..80677913ec 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_patterns_simple_pattern_agg_push.yaml @@ -3,8 +3,8 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'pattern'))], pattern_count=[$1], tokens=[SAFE_CAST(ITEM(PATTERN_PARSER($0, $2), 'tokens'))], sample_logs=[$2]) LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)]) - LogicalProject(email=[$9], patterns_field=[CASE(SEARCH($9, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))], $f18=[10]) + LogicalProject(email=[$9], patterns_field=[CASE(SEARCH($9, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($9, '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR))], $f18=[10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..2=[{inputs}], expr#3=[PATTERN_PARSER($t0, $t2)], expr#4=['pattern'], expr#5=[ITEM($t3, $t4)], expr#6=[SAFE_CAST($t5)], expr#7=['tokens'], expr#8=[ITEM($t3, $t7)], expr#9=[SAFE_CAST($t8)], patterns_field=[$t6], pattern_count=[$t1], tokens=[$t9], sample_logs=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#121523:LogicalAggregate.NONE.[](input=RelSubset#121522,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1779836363879052000,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1779836363879052000,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"email"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},pattern_count=COUNT($1),sample_logs=TAKE($0, $2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"patterns_field":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"pattern_count":{"value_count":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQH2nsKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiT1IiLAogICAgICAgICJraW5kIjogIk9SIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiSVMgTlVMTCIsCiAgICAgICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICAgICAic3ludGF4IjogIlBPU1RGSVgiCiAgICAgICAgICB9LAogICAgICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiPSIsCiAgICAgICAgICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAgICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICAgIF0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAzLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiUkVHRVhQX1JFUExBQ0UiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDQsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDYsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,2,0,2,2],"DIGESTS":["email.keyword","email.keyword","","","email.keyword","[a-zA-Z0-9]+","<*>"]}}}},"sample_logs":{"top_hits":{"from":0,"size":10,"version":false,"seq_no_primary_term":false,"explain":false,"_source":false,"fields":[{"field":"email"}]}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java index f516480bac..78afd4aaf0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/serde/RexStandardizer.java @@ -32,7 +32,6 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.fun.SqlLibraryOperators; import org.apache.calcite.sql.type.BasicSqlType; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; @@ -74,20 +73,6 @@ public RexNode visitCall(final RexCall call, ScriptParameterHelper helper) { // We can downgrade to still use `Sarg` literal instead of replacing it with parameter. } } - // Calcite's enumerable runtime has no regexpReplace(String, String, String, String) impl, - // so 4-arg REGEXP_REPLACE_PG_4 fails Janino codegen on the script-pushdown path. Collapse - // to 3-arg REGEXP_REPLACE_3 when the flags literal is "g" — the 3-arg form is already - // replace-all, so semantics are preserved. - if (call.op == SqlLibraryOperators.REGEXP_REPLACE_PG_4 - && call.operands.size() == 4 - && call.operands.get(3) instanceof RexLiteral flagsLit - && "g".equals(flagsLit.getValueAs(String.class))) { - return helper - .rexBuilder - .makeCall( - call.getType(), SqlLibraryOperators.REGEXP_REPLACE_3, call.operands.subList(0, 3)) - .accept(this, helper); - } // Some functions only support limited numeric type. Keep conservative here. boolean allowNumericTypeWiden = call.op.kind.belongsTo(SqlKind.BINARY_ARITHMETIC) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java index 6c1bbe03fd..c272453b82 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLPatternsTest.java @@ -35,13 +35,13 @@ public void testPatternsLabelMode_NotShowNumberedToken_ForSimplePatternMethod() String expectedLogical = "LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR, 'g':VARCHAR))])\n" + + " '<*>':VARCHAR))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END `patterns_field`\n" + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END `patterns_field`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -56,18 +56,18 @@ public void testPatternsLabelMode_ShowNumberedToken_ForSimplePatternMethod() { String expectedLogical = "LogicalProject(ENAME=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1," + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1," - + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR)), $1), 'pattern'))]," + + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR)), $1), 'pattern'))]," + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR, 'g':VARCHAR)), $1), 'tokens'))])\n" + + " '<*>':VARCHAR)), $1), 'tokens'))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN ''" - + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['pattern']" - + " AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR" - + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," + "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['pattern'] AS" + + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR" + + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END," + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -83,19 +83,19 @@ public void testPatternsLabelModeWithCustomPattern_ShowNumberedToken_ForSimplePa String expectedLogical = "LogicalProject(ENAME=[$1], patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1," + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1," - + " '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR)), $1), 'pattern'))]," + + " '[A-H]':VARCHAR, '<*>':VARCHAR)), $1), 'pattern'))]," + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" - + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR," - + " 'g':VARCHAR)), $1), 'tokens'))])\n" + + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR))," + + " $1), 'tokens'))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN ''" - + " ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END, `ENAME`)['pattern'] AS" - + " STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR" - + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END," - + " `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "SELECT `ENAME`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + + " '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['pattern'] AS STRING)" + + " `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` =" + + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END, `ENAME`)['tokens'] AS" + + " MAP< VARCHAR, VARCHAR ARRAY >) `tokens`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -108,13 +108,13 @@ public void testPatternsLabelModeWithCustomField_NotShowNumberedToken_ForSimpleP String expectedLogical = "LogicalProject(ENAME=[$1], upper=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR)," - + " '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))])\n" + + " '':VARCHAR, REGEXP_REPLACE($1, '[A-H]':VARCHAR, '<*>':VARCHAR))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `ENAME`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>', 'g') END `upper`\n" + + " REGEXP_REPLACE(`ENAME`, '[A-H]', '<*>') END `upper`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -130,20 +130,20 @@ public void testPatternsLabelModeWithPartitionBy_ShowNumberedToken_SimplePattern "LogicalProject(ENAME=[$1], DEPTNO=[$7]," + " patterns_field=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL" + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR, 'g':VARCHAR)), $1), 'pattern'))]," + + " '<*>':VARCHAR)), $1), 'pattern'))]," + " tokens=[SAFE_CAST(ITEM(PATTERN_PARSER(CASE(SEARCH($1, Sarg['':VARCHAR; NULL AS" + " TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR, 'g':VARCHAR)), $1), 'tokens'))])\n" + + " '<*>':VARCHAR)), $1), 'tokens'))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` =" - + " '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," - + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE WHEN" - + " `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," - + " '[a-zA-Z0-9]+', '<*>', 'g') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY" - + " >) `tokens`\n" + "SELECT `ENAME`, `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME`" + + " = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END," + + " `ENAME`)['pattern'] AS STRING) `patterns_field`, TRY_CAST(PATTERN_PARSER(CASE" + + " WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," + + " '[a-zA-Z0-9]+', '<*>') END, `ENAME`)['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >)" + + " `tokens`\n" + "FROM `scott`.`EMP`"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -248,18 +248,18 @@ public void testPatternsAggregationMode_NotShowNumberedToken_ForSimplePatternMet "LogicalAggregate(group=[{1}], pattern_count=[COUNT($1)], sample_logs=[TAKE($0, $2)])\n" + " LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL" + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR, 'g':VARCHAR))], $f9=[10])\n" + + " '<*>':VARCHAR))], $f9=[10])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`," - + " '[a-zA-Z0-9]+', '<*>', 'g') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL" - + " OR `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g')" - + " END) `pattern_count`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + + " '[a-zA-Z0-9]+', '<*>') END `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" + + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END)" + + " `pattern_count`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -276,22 +276,21 @@ public void testPatternsAggregationMode_ShowNumberedToken_ForSimplePatternMethod + " $2)])\n" + " LogicalProject(ENAME=[$1], patterns_field=[CASE(SEARCH($1, Sarg['':VARCHAR; NULL" + " AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1, '[a-zA-Z0-9]+':VARCHAR," - + " '<*>':VARCHAR, 'g':VARCHAR))], $f9=[10])\n" + + " '<*>':VARCHAR))], $f9=[10])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`," - + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" - + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)" - + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''" - + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," - + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`," - + " `TAKE`(`ENAME`, 10) `sample_logs`\n" + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['pattern']" + + " AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN" + + " '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END) `pattern_count`," + + " TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`, 10))['tokens']" + + " AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10) `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -308,22 +307,22 @@ public void testPatternsAggregationModeWithGroupBy_ShowNumberedToken_ForSimplePa + " $3)])\n" + " LogicalProject(ENAME=[$1], DEPTNO=[$7], patterns_field=[CASE(SEARCH($1," + " Sarg['':VARCHAR; NULL AS TRUE]:VARCHAR), '':VARCHAR, REGEXP_REPLACE($1," - + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR, 'g':VARCHAR))], $f9=[10])\n" + + " '[a-zA-Z0-9]+':VARCHAR, '<*>':VARCHAR))], $f9=[10])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `DEPTNO`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN ''" - + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END, `TAKE`(`ENAME`," + + " ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + " 10))['pattern'] AS STRING) `patterns_field`, COUNT(CASE WHEN `ENAME` IS NULL OR" - + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END)" + + " `ENAME` = '' THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END)" + " `pattern_count`, TRY_CAST(PATTERN_PARSER(CASE WHEN `ENAME` IS NULL OR `ENAME` = ''" - + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END," - + " `TAKE`(`ENAME`, 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`," - + " `TAKE`(`ENAME`, 10) `sample_logs`\n" + + " THEN '' ELSE REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END, `TAKE`(`ENAME`," + + " 10))['tokens'] AS MAP< VARCHAR, VARCHAR ARRAY >) `tokens`, `TAKE`(`ENAME`, 10)" + + " `sample_logs`\n" + "FROM `scott`.`EMP`\n" + "GROUP BY `DEPTNO`, CASE WHEN `ENAME` IS NULL OR `ENAME` = '' THEN '' ELSE" - + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>', 'g') END"; + + " REGEXP_REPLACE(`ENAME`, '[a-zA-Z0-9]+', '<*>') END"; verifyPPLToSparkSQL(root, expectedSparkSql); } From b83058237619c627c91b1ef708b5f8b09bcdb319 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 27 May 2026 09:51:22 -0700 Subject: [PATCH 14/15] test(api): pin UnifiedQueryContext PATTERN_* defaults via planner test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per @dai-chen: verify the RelNode produced when `patterns ` is run without explicit method=/mode= args — exercises that the PATTERN_METHOD and PATTERN_MODE defaults flow through to AstBuilder.visitPatternsCommand and produce a valid SIMPLE/LABEL lowering with a `patterns_field` projection. Signed-off-by: Kai Huang --- .../sql/api/UnifiedQueryPlannerTest.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java index fbf9f54dcd..48f5193352 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java @@ -144,4 +144,18 @@ public void assertionErrorIsWrappedAsSemanticCheckException() { .assertErrorMessageEquals("Failed to plan query: invalid plan structure") .assertCauseType(AssertionError.class); } + + /** + * Without the {@code PATTERN_*} defaults in {@link UnifiedQueryContext}, a bare + * {@code patterns } (no explicit {@code method=}/{@code mode=}) dies at parse time + * with {@code PatternMethod.valueOf("NULL")} because {@code AstBuilder.visitPatternsCommand} + * reads a null from {@code settings.getSettingValue(Key.PATTERN_METHOD)}. With the defaults + * present, the planner lowers patterns to SIMPLE / LABEL mode and adds {@code patterns_field}. + */ + @Test + public void testPPLPatternsPicksUpDefaults() { + givenQuery("source = catalog.employees | patterns name") + .assertPlanContains("REGEXP_REPLACE") + .assertFields("id", "name", "age", "department", "patterns_field"); + } } From 42799d4da89743975d578075bea76ed4f0d3b4ae Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Wed, 27 May 2026 10:12:50 -0700 Subject: [PATCH 15/15] style: spotlessApply Signed-off-by: Kai Huang --- .../opensearch/sql/api/UnifiedQueryPlannerTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java index 48f5193352..bf9ff38d69 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java @@ -146,11 +146,11 @@ public void assertionErrorIsWrappedAsSemanticCheckException() { } /** - * Without the {@code PATTERN_*} defaults in {@link UnifiedQueryContext}, a bare - * {@code patterns } (no explicit {@code method=}/{@code mode=}) dies at parse time - * with {@code PatternMethod.valueOf("NULL")} because {@code AstBuilder.visitPatternsCommand} - * reads a null from {@code settings.getSettingValue(Key.PATTERN_METHOD)}. With the defaults - * present, the planner lowers patterns to SIMPLE / LABEL mode and adds {@code patterns_field}. + * Without the {@code PATTERN_*} defaults in {@link UnifiedQueryContext}, a bare {@code patterns + * } (no explicit {@code method=}/{@code mode=}) dies at parse time with {@code + * PatternMethod.valueOf("NULL")} because {@code AstBuilder.visitPatternsCommand} reads a null + * from {@code settings.getSettingValue(Key.PATTERN_METHOD)}. With the defaults present, the + * planner lowers patterns to SIMPLE / LABEL mode and adds {@code patterns_field}. */ @Test public void testPPLPatternsPicksUpDefaults() {