Skip to content

Commit fcd59d0

Browse files
committed
Use structured condition tokens for filter translation
1 parent 6b7202a commit fcd59d0

5 files changed

Lines changed: 70 additions & 43 deletions

File tree

arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowFilter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
* relational expression in Arrow.
3535
*/
3636
class ArrowFilter extends Filter implements ArrowRel {
37-
private final List<List<String>> match;
37+
private final List<List<List<String>>> match;
3838

3939
ArrowFilter(RelOptCluster cluster, RelTraitSet traitSet, RelNode input, RexNode condition) {
4040
super(cluster, traitSet, input, condition);

arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowRel.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ public interface ArrowRel extends RelNode {
4141
* {@link ArrowRel} nodes into a SQL query. */
4242
class Implementor {
4343
@Nullable List<Integer> selectFields;
44-
final List<List<String>> whereClause = new ArrayList<>();
44+
final List<List<List<String>>> whereClause = new ArrayList<>();
4545
@Nullable RelOptTable table;
4646
@Nullable ArrowTable arrowTable;
4747

4848
/** Adds new predicates.
4949
*
5050
* @param predicates Predicates in CNF form (outer list is AND, inner list is OR)
5151
*/
52-
void addFilters(List<List<String>> predicates) {
52+
void addFilters(List<List<List<String>>> predicates) {
5353
whereClause.addAll(predicates);
5454
}
5555

arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public class ArrowTable extends AbstractTable
9797
* {@link org.apache.calcite.adapter.arrow.ArrowMethod#ARROW_QUERY}. */
9898
@SuppressWarnings("unused")
9999
public Enumerable<Object> query(DataContext root, ImmutableIntList fields,
100-
List<List<String>> conditions) {
100+
List<List<List<String>>> conditions) {
101101
requireNonNull(fields, "fields");
102102
final Projector projector;
103103
final Filter filter;
@@ -120,10 +120,10 @@ public Enumerable<Object> query(DataContext root, ImmutableIntList fields,
120120
projector = null;
121121

122122
final List<TreeNode> conjuncts = new ArrayList<>(conditions.size());
123-
for (List<String> orGroup : conditions) {
123+
for (List<List<String>> orGroup : conditions) {
124124
final List<TreeNode> disjuncts = new ArrayList<>(orGroup.size());
125-
for (String condition : orGroup) {
126-
disjuncts.add(parseSingleCondition(condition));
125+
for (List<String> conditionParts : orGroup) {
126+
disjuncts.add(parseSingleCondition(conditionParts));
127127
}
128128
if (disjuncts.size() == 1) {
129129
conjuncts.add(disjuncts.get(0));
@@ -178,30 +178,39 @@ private static RelDataType deduceRowType(Schema schema,
178178
return builder.build();
179179
}
180180

181-
/** Parses a single condition string into a Gandiva {@link TreeNode}.
181+
/** Parses a single condition into a Gandiva {@link TreeNode}.
182182
*
183-
* <p>The condition string format is:
183+
* <p>The condition token format is:
184184
* <ul>
185-
* <li>Binary: {@code "fieldName operator value type"},
186-
* e.g. {@code "intField equal 12 integer"}
187-
* <li>Unary: {@code "fieldName operator"},
188-
* e.g. {@code "intField isnull"}
185+
* <li>Binary: {@code [fieldName, operator, value, type]},
186+
* e.g. {@code ["intField", "equal", "12", "integer"]}</li>
187+
* <li>Unary: {@code [fieldName, operator]},
188+
* e.g. {@code ["intField", "isnull"]}</li>
189189
* </ul>
190+
*
191+
* <p>Using structured tokens avoids string splitting and safely supports
192+
* values with spaces (for example, {@code "'literal with space'"}) and
193+
* empty-string literals ({@code "''"}).
190194
*/
191-
private TreeNode parseSingleCondition(String condition) {
192-
final String[] data = condition.split(" ");
195+
private TreeNode parseSingleCondition(List<String> conditionParts) {
196+
final int size = conditionParts.size();
197+
if (size != 2 && size != 4) {
198+
throw new IllegalArgumentException("Invalid condition: " + conditionParts);
199+
}
200+
final String fieldName = conditionParts.get(0);
201+
final String operator = conditionParts.get(1);
202+
final boolean isBinary = size == 4;
203+
193204
final List<TreeNode> treeNodes = new ArrayList<>(2);
194205
treeNodes.add(
195206
TreeBuilder.makeField(schema.getFields()
196-
.get(schema.getFields().indexOf(schema.findField(data[0])))));
207+
.get(schema.getFields().indexOf(schema.findField(fieldName)))));
197208

198-
// if the split condition has more than two parts it's a binary operator
199-
// with an additional literal node
200-
if (data.length > 2) {
201-
treeNodes.add(makeLiteralNode(data[2], data[3]));
209+
if (isBinary) {
210+
treeNodes.add(makeLiteralNode(conditionParts.get(2),
211+
conditionParts.get(3)));
202212
}
203213

204-
final String operator = data[1];
205214
return TreeBuilder.makeFunction(operator, treeNodes, new ArrowType.Bool());
206215
}
207216

arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
import org.apache.calcite.sql.type.SqlTypeName;
3030
import org.apache.calcite.util.DateString;
3131

32+
import com.google.common.collect.ImmutableList;
33+
3234
import org.checkerframework.checker.nullness.qual.Nullable;
3335

3436
import java.text.SimpleDateFormat;
@@ -41,7 +43,7 @@
4143
import static java.util.Objects.requireNonNull;
4244

4345
/**
44-
* Translates a {@link RexNode} expression to a Gandiva string.
46+
* Translates a {@link RexNode} expression to Gandiva predicate tokens.
4547
*/
4648
class ArrowTranslator {
4749
final RexBuilder rexBuilder;
@@ -65,14 +67,14 @@ public static ArrowTranslator create(RexBuilder rexBuilder,
6567
* If exceeded, the original expression is returned unchanged. */
6668
private static final int MAX_CNF_NODE_COUNT = 256;
6769

68-
List<List<String>> translateMatch(RexNode condition) {
70+
List<List<List<String>>> translateMatch(RexNode condition) {
6971
// Convert to CNF; SEARCH nodes are already expanded
7072
// by ArrowFilterRule before reaching here.
7173
final RexNode cnf = RexUtil.toCnf(rexBuilder, MAX_CNF_NODE_COUNT, condition);
7274

73-
final List<List<String>> result = new ArrayList<>();
75+
final List<List<List<String>>> result = new ArrayList<>();
7476
for (RexNode conjunct : RelOptUtil.conjunctions(cnf)) {
75-
final List<String> orGroup = new ArrayList<>();
77+
final List<List<String>> orGroup = new ArrayList<>();
7678
for (RexNode disjunct : RelOptUtil.disjunctions(conjunct)) {
7779
orGroup.add(translateMatch2(disjunct));
7880
}
@@ -109,9 +111,9 @@ private static Object literalValue(RexLiteral literal) {
109111
*
110112
* @param node A RexNode that always evaluates to a boolean expression.
111113
* Currently, this method is only called from translateAnd.
112-
* @return The translated SQL string for the relation.
114+
* @return The translated predicate tokens for the relation.
113115
*/
114-
private String translateMatch2(RexNode node) {
116+
private List<String> translateMatch2(RexNode node) {
115117
switch (node.getKind()) {
116118
case EQUALS:
117119
return translateBinary("equal", "=", (RexCall) node);
@@ -135,7 +137,7 @@ private String translateMatch2(RexNode node) {
135137
return translateUnary("isnotfalse", (RexCall) node);
136138
case INPUT_REF:
137139
final RexInputRef inputRef = (RexInputRef) node;
138-
return fieldNames.get(inputRef.getIndex()) + " istrue";
140+
return ImmutableList.of(fieldNames.get(inputRef.getIndex()), "istrue");
139141
case NOT:
140142
return translateUnary("isfalse", (RexCall) node);
141143
default:
@@ -147,10 +149,10 @@ private String translateMatch2(RexNode node) {
147149
* Translates a call to a binary operator, reversing arguments if
148150
* necessary.
149151
*/
150-
private String translateBinary(String op, String rop, RexCall call) {
152+
private List<String> translateBinary(String op, String rop, RexCall call) {
151153
final RexNode left = call.operands.get(0);
152154
final RexNode right = call.operands.get(1);
153-
@Nullable String expression = translateBinary2(op, left, right);
155+
@Nullable List<String> expression = translateBinary2(op, left, right);
154156
if (expression != null) {
155157
return expression;
156158
}
@@ -162,7 +164,7 @@ private String translateBinary(String op, String rop, RexCall call) {
162164
}
163165

164166
/** Translates a call to a binary operator. Returns null on failure. */
165-
private @Nullable String translateBinary2(String op, RexNode left, RexNode right) {
167+
private @Nullable List<String> translateBinary2(String op, RexNode left, RexNode right) {
166168
if (right.getKind() != SqlKind.LITERAL) {
167169
return null;
168170
}
@@ -180,8 +182,8 @@ private String translateBinary(String op, String rop, RexCall call) {
180182
}
181183
}
182184

183-
/** Combines a field name, operator, and literal to produce a predicate string. */
184-
private String translateOp2(String op, String name, RexLiteral right) {
185+
/** Combines a field name, operator, and literal to produce predicate tokens. */
186+
private List<String> translateOp2(String op, String name, RexLiteral right) {
185187
Object value = literalValue(right);
186188
String valueString = value.toString();
187189
String valueType = getLiteralType(right.getType());
@@ -193,13 +195,13 @@ private String translateOp2(String op, String name, RexLiteral right) {
193195
valueString = "'" + valueString + "'";
194196
}
195197
}
196-
return name + " " + op + " " + valueString + " " + valueType;
198+
return ImmutableList.of(name, op, valueString, valueType);
197199
}
198200

199201
/** Translates a call to a unary operator. */
200-
private String translateUnary(String op, RexCall call) {
202+
private List<String> translateUnary(String op, RexCall call) {
201203
final RexNode opNode = call.operands.get(0);
202-
@Nullable String expression = translateUnary2(op, opNode);
204+
@Nullable List<String> expression = translateUnary2(op, opNode);
203205

204206
if (expression != null) {
205207
return expression;
@@ -209,7 +211,7 @@ private String translateUnary(String op, RexCall call) {
209211
}
210212

211213
/** Translates a call to a unary operator. Returns null on failure. */
212-
private @Nullable String translateUnary2(String op, RexNode opNode) {
214+
private @Nullable List<String> translateUnary2(String op, RexNode opNode) {
213215
if (opNode.getKind() == SqlKind.INPUT_REF) {
214216
final RexInputRef inputRef = (RexInputRef) opNode;
215217
final String name = fieldNames.get(inputRef.getIndex());
@@ -219,9 +221,9 @@ private String translateUnary(String op, RexCall call) {
219221
return null;
220222
}
221223

222-
/** Combines a field name and a unary operator to produce a predicate string. */
223-
private static String translateUnaryOp(String op, String name) {
224-
return name + " " + op;
224+
/** Combines a field name and a unary operator to produce predicate tokens. */
225+
private static List<String> translateUnaryOp(String op, String name) {
226+
return ImmutableList.of(name, op);
225227
}
226228

227229
private static String getLiteralType(RelDataType type) {

arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -580,14 +580,13 @@ static void initializeArrowState(@TempDir Path sharedTempDir)
580580
.explainContains(plan);
581581
}
582582

583-
@Disabled("literal with space is not supported")
584583
@Test void testLiteralWithSpace() {
585584
String sql = "select \"intField\", \"stringField\" as \"my Field\"\n"
586585
+ "from arrowdata\n"
587586
+ "where \"stringField\" = 'literal with space'";
588587
String plan = "PLAN=ArrowToEnumerableConverter\n"
589-
+ " ArrowProject(intField=[$0], my Field=[$1])\n"
590-
+ " ArrowFilter(condition=[=($1, '2')])\n"
588+
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
589+
+ " ArrowFilter(condition=[=($1, 'literal with space')])\n"
591590
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
592591
String result = "";
593592

@@ -615,6 +614,23 @@ static void initializeArrowState(@TempDir Path sharedTempDir)
615614
.explainContains(plan);
616615
}
617616

617+
@Test void testLiteralWithEmptyString() {
618+
String sql = "select \"intField\", \"stringField\"\n"
619+
+ "from arrowdata\n"
620+
+ "where \"stringField\" = ''";
621+
String plan = "PLAN=ArrowToEnumerableConverter\n"
622+
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
623+
+ " ArrowFilter(condition=[=($1, '')])\n"
624+
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
625+
String result = "";
626+
627+
CalciteAssert.that()
628+
.with(arrow)
629+
.query(sql)
630+
.returns(result)
631+
.explainContains(plan);
632+
}
633+
618634
@Test void testTinyIntProject() {
619635
String sql = "select DEPTNO from DEPT";
620636
String plan = "PLAN=ArrowToEnumerableConverter\n"

0 commit comments

Comments
 (0)