Skip to content

Commit e95e250

Browse files
committed
[CALCITE-6636] Support CNF condition of Arrow ArrowAdapter
1 parent ea7fb17 commit e95e250

4 files changed

Lines changed: 81 additions & 70 deletions

File tree

arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTable.java

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -121,21 +121,7 @@ public Enumerable<Object> query(DataContext root, ImmutableIntList fields,
121121

122122
final List<TreeNode> conditionNodes = new ArrayList<>(conditions.size());
123123
for (String condition : conditions) {
124-
String[] data = condition.split(" ");
125-
List<TreeNode> treeNodes = new ArrayList<>(2);
126-
treeNodes.add(
127-
TreeBuilder.makeField(schema.getFields()
128-
.get(schema.getFields().indexOf(schema.findField(data[0])))));
129-
130-
// if the split condition has more than two parts it's a binary operator
131-
// with an additional literal node
132-
if (data.length > 2) {
133-
treeNodes.add(makeLiteralNode(data[2], data[3]));
134-
}
135-
136-
String operator = data[1];
137-
conditionNodes.add(
138-
TreeBuilder.makeFunction(operator, treeNodes, new ArrowType.Bool()));
124+
conditionNodes.add(parseCondition(condition));
139125
}
140126
final Condition filterCondition;
141127
if (conditionNodes.size() == 1) {
@@ -184,6 +170,35 @@ private static RelDataType deduceRowType(Schema schema,
184170
return builder.build();
185171
}
186172

173+
private TreeNode parseCondition(String condition) {
174+
final String[] disjuncts = condition.split("\t");
175+
if (disjuncts.length == 1) {
176+
return parseSingleCondition(condition);
177+
}
178+
final List<TreeNode> orNodes = new ArrayList<>(disjuncts.length);
179+
for (String disjunct : disjuncts) {
180+
orNodes.add(parseSingleCondition(disjunct));
181+
}
182+
return TreeBuilder.makeOr(orNodes);
183+
}
184+
185+
private TreeNode parseSingleCondition(String condition) {
186+
final String[] data = condition.split(" ");
187+
final List<TreeNode> treeNodes = new ArrayList<>(2);
188+
treeNodes.add(
189+
TreeBuilder.makeField(schema.getFields()
190+
.get(schema.getFields().indexOf(schema.findField(data[0])))));
191+
192+
// if the split condition has more than two parts it's a binary operator
193+
// with an additional literal node
194+
if (data.length > 2) {
195+
treeNodes.add(makeLiteralNode(data[2], data[3]));
196+
}
197+
198+
final String operator = data[1];
199+
return TreeBuilder.makeFunction(operator, treeNodes, new ArrowType.Bool());
200+
}
201+
187202
private static TreeNode makeLiteralNode(String literal, String type) {
188203
if (type.startsWith("decimal")) {
189204
String[] typeParts =

arrow/src/main/java/org/apache/calcite/adapter/arrow/ArrowTranslator.java

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,27 @@ public static ArrowTranslator create(RexBuilder rexBuilder,
6262
}
6363

6464
List<String> translateMatch(RexNode condition) {
65-
List<RexNode> disjunctions = RelOptUtil.disjunctions(condition);
66-
if (disjunctions.size() == 1) {
67-
return translateAnd(disjunctions.get(0));
68-
} else {
69-
throw new UnsupportedOperationException("Unsupported disjunctive condition " + condition);
65+
// Expand SEARCH nodes and convert to CNF
66+
final RexNode expanded = RexUtil.expandSearch(rexBuilder, null, condition);
67+
final RexNode cnf = RexUtil.toCnf(rexBuilder, expanded);
68+
69+
final List<String> result = new ArrayList<>();
70+
for (RexNode conjunct : RelOptUtil.conjunctions(cnf)) {
71+
final List<RexNode> disjuncts = RelOptUtil.disjunctions(conjunct);
72+
if (disjuncts.size() == 1) {
73+
result.add(translateMatch2(disjuncts.get(0)));
74+
} else {
75+
final StringBuilder sb = new StringBuilder();
76+
for (int i = 0; i < disjuncts.size(); i++) {
77+
if (i > 0) {
78+
sb.append('\t');
79+
}
80+
sb.append(translateMatch2(disjuncts.get(i)));
81+
}
82+
result.add(sb.toString());
83+
}
7084
}
85+
return result;
7186
}
7287

7388
/**
@@ -93,26 +108,6 @@ private static Object literalValue(RexLiteral literal) {
93108
}
94109
}
95110

96-
/**
97-
* Translate a conjunctive predicate to a SQL string.
98-
*
99-
* @param condition A conjunctive predicate
100-
*
101-
* @return SQL string for the predicate
102-
*/
103-
private List<String> translateAnd(RexNode condition) {
104-
List<String> predicates = new ArrayList<>();
105-
for (RexNode node : RelOptUtil.conjunctions(condition)) {
106-
if (node.getKind() == SqlKind.SEARCH) {
107-
final RexNode node2 = RexUtil.expandSearch(rexBuilder, null, node);
108-
predicates.addAll(translateMatch(node2));
109-
} else {
110-
predicates.add(translateMatch2(node));
111-
}
112-
}
113-
return predicates;
114-
}
115-
116111
/**
117112
* Translates a binary or unary relation.
118113
*

arrow/src/test/java/org/apache/calcite/adapter/arrow/ArrowAdapterTest.java

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import org.apache.calcite.rel.type.RelDataTypeSystem;
2323
import org.apache.calcite.schema.Table;
2424
import org.apache.calcite.test.CalciteAssert;
25-
import org.apache.calcite.util.Bug;
2625
import org.apache.calcite.util.Sources;
2726

2827
import com.google.common.collect.ImmutableMap;
@@ -251,20 +250,10 @@ static void initializeArrowState(@TempDir Path sharedTempDir)
251250
String sql = "select \"intField\", \"stringField\"\n"
252251
+ "from arrowdata\n"
253252
+ "where \"intField\"=12 or \"stringField\"='12'";
254-
String plan;
255-
if (Bug.CALCITE_6293_FIXED) {
256-
plan = "PLAN=ArrowToEnumerableConverter\n"
257-
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
258-
+ " ArrowFilter(condition=[OR(=($0, 12), =($1, '12'))])\n"
259-
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
260-
} else {
261-
plan = "PLAN=EnumerableCalc(expr#0..1=[{inputs}], expr#2=[12], "
262-
+ "expr#3=[=($t0, $t2)], expr#4=['12':VARCHAR], expr#5=[=($t1, $t4)], "
263-
+ "expr#6=[OR($t3, $t5)], proj#0..1=[{exprs}], $condition=[$t6])\n"
264-
+ " ArrowToEnumerableConverter\n"
265-
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
266-
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
267-
}
253+
String plan = "PLAN=ArrowToEnumerableConverter\n"
254+
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
255+
+ " ArrowFilter(condition=[OR(=($0, 12), =($1, '12'))])\n"
256+
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
268257
String result = "intField=12; stringField=12\n";
269258

270259
CalciteAssert.that()
@@ -274,23 +263,35 @@ static void initializeArrowState(@TempDir Path sharedTempDir)
274263
.explainContains(plan);
275264
}
276265

266+
/** Test case for
267+
* <a href="https://issues.apache.org/jira/browse/CALCITE-6636">[CALCITE-6636]
268+
* Support CNF condition of Arrow adapter</a>. */
269+
@Test void testArrowProjectFieldsWithCnfFilter() {
270+
String sql = "select \"intField\", \"stringField\"\n"
271+
+ "from arrowdata\n"
272+
+ "where (\"intField\" > 1 and \"stringField\" = '2') or \"intField\" = 0";
273+
String plan = "PLAN=ArrowToEnumerableConverter\n"
274+
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
275+
+ " ArrowFilter(condition=[OR(AND(>($0, 1), =($1, '2')), =($0, 0))])\n"
276+
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
277+
String result = "intField=0; stringField=0\n"
278+
+ "intField=2; stringField=2\n";
279+
280+
CalciteAssert.that()
281+
.with(arrow)
282+
.query(sql)
283+
.returns(result)
284+
.explainContains(plan);
285+
}
286+
277287
@Test void testArrowProjectFieldsWithInFilter() {
278288
String sql = "select \"intField\", \"stringField\"\n"
279289
+ "from arrowdata\n"
280290
+ "where \"intField\" in (0, 1, 2)";
281-
String plan;
282-
if (Bug.CALCITE_6294_FIXED) {
283-
plan = "PLAN=ArrowToEnumerableConverter\n"
284-
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
285-
+ " ArrowFilter(condition=[OR(=($0, 0), =($0, 1), =($0, 2))])\n"
286-
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
287-
} else {
288-
plan = "PLAN=EnumerableCalc(expr#0..1=[{inputs}], expr#2=[Sarg[0, 1, 2]], "
289-
+ "expr#3=[SEARCH($t0, $t2)], proj#0..1=[{exprs}], $condition=[$t3])\n"
290-
+ " ArrowToEnumerableConverter\n"
291-
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
292-
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
293-
}
291+
String plan = "PLAN=ArrowToEnumerableConverter\n"
292+
+ " ArrowProject(intField=[$0], stringField=[$1])\n"
293+
+ " ArrowFilter(condition=[SEARCH($0, Sarg[0, 1, 2])])\n"
294+
+ " ArrowTableScan(table=[[ARROW, ARROWDATA]], fields=[[0, 1, 2, 3]])\n\n";
294295
String result = "intField=0; stringField=0\n"
295296
+ "intField=1; stringField=1\n"
296297
+ "intField=2; stringField=2\n";

core/src/main/java/org/apache/calcite/util/Bug.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,12 +207,12 @@ public abstract class Bug {
207207
/** Whether
208208
* <a href="https://issues.apache.org/jira/browse/CALCITE/issues/CALCITE-6293">
209209
* [CALCITE-6293] Support OR condition in Arrow adapter</a> is fixed. */
210-
public static final boolean CALCITE_6293_FIXED = false;
210+
public static final boolean CALCITE_6293_FIXED = true;
211211

212212
/** Whether
213213
* <a href="https://issues.apache.org/jira/browse/CALCITE/issues/CALCITE-6294">
214214
* [CALCITE-6294] Support IN filter in Arrow adapter</a> is fixed. */
215-
public static final boolean CALCITE_6294_FIXED = false;
215+
public static final boolean CALCITE_6294_FIXED = true;
216216

217217
/** Whether
218218
* <a href="https://issues.apache.org/jira/browse/CALCITE-6328">[CALCITE-6328]

0 commit comments

Comments
 (0)