Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions src/jmh/java/io/jawk/backend/AVMExpressionBenchmark.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ public class AVMExpressionBenchmark {
private AwkExpression fieldConcatenation;
private AwkExpression fieldRegexMatch;
private AwkExpression multiStringConcatenation;
private AwkExpression constantStringConcatenation;
private AwkExpression stringConstantStringConstantConcatenation;
private AwkExpression fourStringConcatenation;
private AwkExpression mixedExpression;

/**
Expand All @@ -78,6 +81,9 @@ public void setup() throws IOException {
this.fieldConcatenation = awk.compileExpression("$1 \" test\"");
this.fieldRegexMatch = awk.compileExpression("$1 ~ /test/");
this.multiStringConcatenation = awk.compileExpression("$1 \" test1\" \" test2\" \" test3\"");
this.constantStringConcatenation = awk.compileExpression("\"constant\" \"constant\" \"constant\" \"constant\"");
this.stringConstantStringConstantConcatenation = awk.compileExpression("$1 \"constant\" $2 \"constant\"");
this.fourStringConcatenation = awk.compileExpression("$1 $2 $3 $4");
this.mixedExpression = awk.compileExpression("($1 + $2) \":\" ($3 ~ /test/) \":\" $4");
this.avm = new AVM(new AwkSettings(), Collections.emptyMap());
this.avm.prepareForEval("42 3.14 test-value suffix");
Expand Down Expand Up @@ -159,6 +165,40 @@ public Object multiStringConcatenation() throws IOException {
return this.avm.eval(this.multiStringConcatenation);
}

/**
* Measures the optimized constant-folded case for four constant string
* operands.
*
* @return expression result
* @throws IOException if input preparation or evaluation fails
*/
@Benchmark
public Object constantStringConcatenation() throws IOException {
return this.avm.eval(this.constantStringConcatenation);
Comment thread
bertysentry marked this conversation as resolved.
}

/**
* Measures alternating field and constant string concatenation.
*
* @return expression result
* @throws IOException if input preparation or evaluation fails
*/
@Benchmark
public Object stringConstantStringConstantConcatenation() throws IOException {
return this.avm.eval(this.stringConstantStringConstantConcatenation);
}

/**
* Measures concatenation of four field string operands.
*
* @return expression result
* @throws IOException if input preparation or evaluation fails
*/
@Benchmark
public Object fourStringConcatenation() throws IOException {
return this.avm.eval(this.fourStringConcatenation);
}

/**
* Measures mixed numeric, string, field, and regular expression operations.
*
Expand Down
23 changes: 23 additions & 0 deletions src/main/java/io/jawk/backend/AVM.java
Original file line number Diff line number Diff line change
Expand Up @@ -1177,6 +1177,29 @@ private void executeTuples(PositionTracker position)
position.next();
break;
}
case MULTI_CONCAT: {
// arg[0] = number of stack items to concatenate
// stack[0] = last concatenation operand
CountTuple countTuple = (CountTuple) tuple;
Comment thread
bertysentry marked this conversation as resolved.
int count = (int) countTuple.getCount();
Comment thread
bertysentry marked this conversation as resolved.
// Store String references so appends run left-to-right. Converting
Comment thread
bertysentry marked this conversation as resolved.
// operands to char[] would copy them once before StringBuilder
// copies them again, and front-inserting would shift existing
// content on each operand.
String[] values = new String[count];
int resultLength = 0;
for (int i = count - 1; i >= 0; i--) {
values[i] = jrt.toAwkString(pop());
resultLength += values[i].length();
}
StringBuilder resultString = new StringBuilder(resultLength);
for (String value : values) {
resultString.append(value);
}
push(resultString.toString());
position.next();
break;
}
case ASSIGN:
case ASSIGN_NOPUSH: {
// arg[0] = offset
Expand Down
67 changes: 63 additions & 4 deletions src/main/java/io/jawk/intermediate/AwkTuples.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;
Expand Down Expand Up @@ -69,7 +70,7 @@ public class AwkTuples implements Serializable {
* can be serialized and patched efficiently. A linked list would make every
* lookup O(n) and complicate address reassignment.
*/
private java.util.List<Tuple> queue = new ArrayList<Tuple>(100) {
private List<Tuple> queue = new ArrayList<Tuple>(100) {
private static final long serialVersionUID = -6334362156408598578L;

@Override
Expand Down Expand Up @@ -1878,17 +1879,36 @@ private boolean peepholeOptimizePass() {
return false;
}

java.util.List<Tuple> original = new ArrayList<Tuple>(queue);
List<Tuple> original = new ArrayList<Tuple>(queue);
int[] indexMapping = new int[originalSize];
Arrays.fill(indexMapping, -1);
java.util.List<Tuple> optimizedQueue = new ArrayList<Tuple>(originalSize);
List<Tuple> optimizedQueue = new ArrayList<Tuple>(originalSize);
boolean[] isAddressTarget = addressTargets(original, originalSize);

boolean modified = false;
int oldIndex = 0;
int newIndex = 0;
while (oldIndex < originalSize) {
Tuple tuple = original.get(oldIndex);
// If an earlier rewrite already happened in this pass, wait for the
// next pass before collapsing concat runs. That gives literal folding
// priority so fully constant chains become one PUSH_STRING instead of a
// partially folded PUSH_STRING plus MULTI_CONCAT.
ConcatRun concatRun = !modified ? concatRun(original, isAddressTarget, oldIndex) : null;
if (concatRun != null) {
// Chained concatenations compile as a run of binary CONCAT tuples
// after all operands have been pushed. Collapse that postfix run into
// one counted MULTI_CONCAT, e.g. CONCAT, CONCAT, CONCAT ->
// MULTI_CONCAT 4.
Tuple replacement = createMultiConcat(concatRun.itemCount, tuple.getLineNumber());
optimizedQueue.add(replacement);
mapFoldedRange(indexMapping, oldIndex, concatRun.tupleCount, newIndex);
oldIndex += concatRun.tupleCount;
newIndex++;
modified = true;
continue;
}

if (tuple.getOpcode() == Opcode.ASSIGN && (oldIndex + 1) < originalSize) {
Tuple nextTuple = original.get(oldIndex + 1);
// Statement assignments compile as ASSIGN followed by POP because
Expand Down Expand Up @@ -1987,7 +2007,7 @@ private boolean peepholeOptimizePass() {
return true;
}

private boolean[] addressTargets(java.util.List<Tuple> tuples, int tupleCount) {
private boolean[] addressTargets(List<Tuple> tuples, int tupleCount) {
boolean[] targets = new boolean[tupleCount];
for (Tuple tuple : tuples) {
Address address = tuple.getAddress();
Expand All @@ -2007,6 +2027,29 @@ private void mapFoldedRange(int[] indexMapping, int startIndex, int length, int
}
}

private ConcatRun concatRun(List<Tuple> original, boolean[] isAddressTarget, int oldIndex) {
Tuple tuple = original.get(oldIndex);
if (tuple.getOpcode() != Opcode.CONCAT || isAddressTarget[oldIndex]) {
return null;
}
Comment thread
bertysentry marked this conversation as resolved.

int itemCount = 2;
int tupleCount = 1;
int currentIndex = oldIndex + 1;
while (currentIndex < original.size()
&& original.get(currentIndex).getOpcode() == Opcode.CONCAT
&& !isAddressTarget[currentIndex]) {
itemCount++;
tupleCount++;
currentIndex++;
}

if (tupleCount < 2) {
return null;
}
return new ConcatRun(tupleCount, itemCount);
}

private Object literalValue(Tuple tuple) {
switch (tuple.getOpcode()) {
case PUSH_LONG:
Expand Down Expand Up @@ -2162,6 +2205,22 @@ private Tuple createGetInputFieldConst(long fieldIndex, int lineNumber) {
return tuple;
}

private Tuple createMultiConcat(int itemCount, int lineNumber) {
Tuple tuple = new Tuple.CountTuple(Opcode.MULTI_CONCAT, itemCount);
tuple.setLineNumber(lineNumber);
return tuple;
}

private static final class ConcatRun {
private final int tupleCount;
private final int itemCount;

private ConcatRun(int tupleCount, int itemCount) {
this.tupleCount = tupleCount;
this.itemCount = itemCount;
}
}

private void remapAddresses(int[] indexMapping) {
if (indexMapping.length == 0) {
return;
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/io/jawk/intermediate/Opcode.java
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,17 @@ public enum Opcode {
* Stack after: x-concatenated-with-y ...
*/
CONCAT,
/**
* Pops and concatenates N values from the top-of-stack after AWK string
* conversion; pushes the result onto the stack. The number of items is passed
* in as a tuple argument.
* <p>
Comment thread
bertysentry marked this conversation as resolved.
* Argument: # of items (N)
* <p>
* Stack before: x1 x2 x3 .. xN ...<br/>
* Stack after: x1-concatenated-through-xN ...
*/
MULTI_CONCAT,
/**
* Assigns the top-of-stack to a variable and pushes the assigned value back
* onto the stack.
Expand Down
100 changes: 99 additions & 1 deletion src/test/java/io/jawk/AwkTupleOptimizationTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,84 @@ public void foldsLiteralStringConcatenation() throws Exception {
AwkProgram tuples = new Awk().compile(script);
List<Opcode> opcodes = collectOpcodes(tuples);
assertFalse("Literal concatenation should eliminate CONCAT tuple", opcodes.contains(Opcode.CONCAT));
assertFalse("Literal concatenation should eliminate MULTI_CONCAT tuple", opcodes.contains(Opcode.MULTI_CONCAT));
assertTrue("Expected folded literal push of foobar", hasLiteralPush(tuples, "foobar"));
}

@Test
public void foldsChainedLiteralStringConcatenation() throws Exception {
String script = "BEGIN { print \"foo\" \"bar\" \"baz\" \"qux\" }\n";
AwkTestSupport
.awkTest("folds chained literal string concatenation")
.script(script)
.expect("foobarbazqux\n")
.runAndAssert();

AwkProgram tuples = new Awk().compile(script);
List<Opcode> opcodes = collectOpcodes(tuples);
assertFalse("Chained literal concatenation should eliminate CONCAT tuple", opcodes.contains(Opcode.CONCAT));
assertFalse(
"Chained literal concatenation should eliminate MULTI_CONCAT tuple",
opcodes.contains(Opcode.MULTI_CONCAT));
assertTrue("Expected folded literal push of foobarbazqux", hasLiteralPush(tuples, "foobarbazqux"));
}

@Test
public void optimizesChainedStringConcatenationAsSingleMultiConcat() throws Exception {
String script = "BEGIN { s1 = \"alpha\"; s2 = \"beta\"; print s1 \"-\" s2 \":\" }\n";
AwkTestSupport
.awkTest("counted chained string concatenation")
.script(script)
.expect("alpha-beta:\n")
.runAndAssert();

AwkProgram tuples = new Awk().compile(script);
assertEquals(
"Expected one counted MULTI_CONCAT for the mixed chain",
1,
countOpcodeWithCount(tuples, Opcode.MULTI_CONCAT, 4));
assertEquals("Optimized mixed chain should not keep binary CONCAT tuples", 0, countOpcode(tuples, Opcode.CONCAT));
}

@Test
public void keepsParserConcatenationBinaryWhenOptimizationDisabled() throws Exception {
String script = "BEGIN { s1 = \"alpha\"; s2 = \"beta\"; print s1 \"-\" s2 \":\" }\n";
AwkProgram tuples = new Awk().compile(script, true);

assertEquals(
"Unoptimized parser output should keep one binary CONCAT per expression pair",
3,
countOpcode(tuples, Opcode.CONCAT));
assertEquals(
"Unoptimized parser output should not emit counted chain MULTI_CONCAT",
0,
countOpcode(tuples, Opcode.MULTI_CONCAT));
}

@Test
public void keepsConcatRunWhenFirstConcatIsBranchTarget() {
AwkTuples tuples = new AwkTuples();
tuples.pushSourceLineNumber(1);
Address concatTarget = tuples.createAddress("concat-target");

tuples.dereference(1, false, true);
tuples.ifFalse(concatTarget);
tuples.dereference(2, false, true);
tuples.dereference(3, false, true);
tuples.address(concatTarget);
tuples.concat();
tuples.dereference(4, false, true);
tuples.concat();

tuples.optimize();

assertEquals("Targeted CONCAT run should remain binary", 2, countOpcode(tuples, Opcode.CONCAT));
assertEquals(
"Targeted CONCAT run should not be folded into MULTI_CONCAT",
0,
countOpcode(tuples, Opcode.MULTI_CONCAT));
}

@Test
public void foldsScalarAssignmentPopIntoNonPushingAssignment() throws Exception {
String script = "BEGIN { a = -2; b = 2; c = 4; print a + b + c }\n";
Expand Down Expand Up @@ -204,6 +279,11 @@ public void doesNotFoldNumericConcatenation() throws Exception {
AwkProgram tuples = new Awk().compile(script);
List<Opcode> opcodes = collectOpcodes(tuples);
assertTrue("Numeric literal concatenation should preserve CONCAT tuple", opcodes.contains(Opcode.CONCAT));
assertEquals("Numeric literal concatenation should remain binary", 1, countOpcode(tuples, Opcode.CONCAT));
assertEquals(
"Binary numeric literal concatenation should not use MULTI_CONCAT",
0,
countOpcode(tuples, Opcode.MULTI_CONCAT));
assertFalse("Optimizer should not fold numeric/string concatenation", hasLiteralPush(tuples, "1x"));
}

Expand Down Expand Up @@ -559,8 +639,12 @@ private static boolean hasAddressTargetWithPredecessor(AwkProgram tuples, Opcode
}

private static int countOpcode(AwkProgram tuples, Opcode opcode) {
return countOpcode(rawTuples(tuples), opcode);
}

private static int countOpcode(AwkTuples tuples, Opcode opcode) {
int count = 0;
PositionTracker tracker = rawTuples(tuples).top();
PositionTracker tracker = tuples.top();
while (!tracker.isEOF()) {
if (tracker.opcode() == opcode) {
count++;
Expand All @@ -570,6 +654,20 @@ private static int countOpcode(AwkProgram tuples, Opcode opcode) {
return count;
}

private static int countOpcodeWithCount(AwkProgram tuples, Opcode opcode, long expectedCount) {
int count = 0;
PositionTracker tracker = rawTuples(tuples).top();
while (!tracker.isEOF()) {
if (tracker.opcode() == opcode
&& tracker.current() instanceof Tuple.CountTuple
&& ((Tuple.CountTuple) tracker.current()).getCount() == expectedCount) {
count++;
}
tracker.next();
}
return count;
}

private static String dumpTuples(AwkProgram tuples) throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (PrintStream ps = new PrintStream(out, true, StandardCharsets.UTF_8.name())) {
Expand Down
Loading