From d2a22954cb6584110cfcb0630cedb1f8c3bbc7ac Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 18 Mar 2026 09:12:14 -0400 Subject: [PATCH 1/9] List iteration benchmark --- .../trace/util/ListIterationBenchmark.java | 412 ++++++++++++++++++ 1 file changed, 412 insertions(+) create mode 100644 internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java diff --git a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java new file mode 100644 index 00000000000..c32b924864a --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java @@ -0,0 +1,412 @@ +package datadog.trace.util; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.CompilerControl; +import org.openjdk.jmh.annotations.CompilerControl.Mode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Benchmark comparing difference ways to iterate list of different types and sizes -- both with + * simple loop bodies (inline case) and complicated loop bodies (dont inline case). + * + * + * Java 17 - MacBook M1 - 8 threads * Benchmark (listSpec) Mode Cnt Score Error Units * ListIterationBenchmark.cstyleFor_inline COLLECTIONS_EMPTY_LIST thrpt 3 9066154714.207 ± 3993855570.335 ops/s * ListIterationBenchmark.cstyleFor:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op @@ -335,75 +337,74 @@ public enum ListSpec { @Param ListSpec listSpec; - @Benchmark - public void forEach_inline() { - this.listSpec.list.forEach(Element::manipulate_inline); - } - - @Benchmark - public void forEach_dont_inline() { - this.listSpec.list.forEach(Element::manipulate_dont_inline); - } - - @Benchmark - public void enhancedFor_inline() { - // Enhanced for-loop is just syntax sugar for an Iterator - for ( Element e : this.listSpec.list ) { - e.manipulate_inline(); - } - } - - @Benchmark - public void enhancedFor_dont_inline() { - // Enhanced for-loop is just syntax sugar for an Iterator - for ( Element e : this.listSpec.list ) { - e.manipulate_dont_inline(); - } - } - - @Benchmark - public void iterator_inline() { - for ( Iterator iter = this.listSpec.list.iterator(); iter.hasNext(); ) { - iter.next().manipulate_inline(); - } - } - - @Benchmark - public void iterator_dont_inline() { - for ( Iterator iter = this.listSpec.list.iterator(); iter.hasNext(); ) { - iter.next().manipulate_dont_inline(); - } + @Benchmark + public void forEach_inline() { + this.listSpec.list.forEach(Element::manipulate_inline); + } + + @Benchmark + public void forEach_dont_inline() { + this.listSpec.list.forEach(Element::manipulate_dont_inline); + } + + @Benchmark + public void enhancedFor_inline() { + // Enhanced for-loop is just syntax sugar for an Iterator + for (Element e : this.listSpec.list) { + e.manipulate_inline(); } - - - @Benchmark - public void cstyleFor_inline() { - for ( int i = 0; i < this.listSpec.list.size(); ++i ) { - this.listSpec.list.get(i).manipulate_inline(); - } + } + + @Benchmark + public void enhancedFor_dont_inline() { + // Enhanced for-loop is just syntax sugar for an Iterator + for (Element e : this.listSpec.list) { + e.manipulate_dont_inline(); } - - @Benchmark - public void cstyleFor_dont_inline() { - for ( int i = 0; i < this.listSpec.list.size(); ++i ) { - this.listSpec.list.get(i).manipulate_dont_inline(); - } + } + + @Benchmark + public void iterator_inline() { + for (Iterator iter = this.listSpec.list.iterator(); iter.hasNext(); ) { + iter.next().manipulate_inline(); } - - @Benchmark - public void streams_inline() { - this.listSpec.list.stream().forEach(Element::manipulate_inline); + } + + @Benchmark + public void iterator_dont_inline() { + for (Iterator iter = this.listSpec.list.iterator(); iter.hasNext(); ) { + iter.next().manipulate_dont_inline(); } - - @Benchmark - public void streams_dont_inline() { - this.listSpec.list.stream().forEach(Element::manipulate_dont_inline); + } + + @Benchmark + public void cstyleFor_inline() { + for (int i = 0; i < this.listSpec.list.size(); ++i) { + this.listSpec.list.get(i).manipulate_inline(); } - - @Benchmark - public void parallelStreams_inline() { - listSpec.list.parallelStream().forEach(Element::manipulate_dont_inline); + } + + @Benchmark + public void cstyleFor_dont_inline() { + for (int i = 0; i < this.listSpec.list.size(); ++i) { + this.listSpec.list.get(i).manipulate_dont_inline(); } + } + + @Benchmark + public void streams_inline() { + this.listSpec.list.stream().forEach(Element::manipulate_inline); + } + + @Benchmark + public void streams_dont_inline() { + this.listSpec.list.stream().forEach(Element::manipulate_dont_inline); + } + + @Benchmark + public void parallelStreams_inline() { + listSpec.list.parallelStream().forEach(Element::manipulate_dont_inline); + } @Benchmark public void parallelStreams_dont_inline() { From 27a33f03d98fdfc4616fa3e9e49ea09878a95c18 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 8 Apr 2026 12:47:38 -0400 Subject: [PATCH 4/9] spotless --- .../src/jmh/java/datadog/trace/util/ListIterationBenchmark.java | 1 + 1 file changed, 1 insertion(+) diff --git a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java index 2b7130abfa9..99ff929f9ef 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java @@ -31,6 +31,7 @@ * working with sets (uncommon in the java agent) *
  • * + * * Java 17 - MacBook M1 - 8 threads * Benchmark (listSpec) Mode Cnt Score Error Units * ListIterationBenchmark.cstyleFor_inline COLLECTIONS_EMPTY_LIST thrpt 3 9066154714.207 ± 3993855570.335 ops/s From 8056b562d3a25aa004826b85f9a60fb8e17bc045 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Mon, 22 Jun 2026 19:29:55 -0400 Subject: [PATCH 5/9] Update internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java Co-authored-by: Sarah Chen --- .../src/jmh/java/datadog/trace/util/ListIterationBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java index 99ff929f9ef..0bdf3242e1d 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java @@ -404,7 +404,7 @@ public void streams_dont_inline() { @Benchmark public void parallelStreams_inline() { - listSpec.list.parallelStream().forEach(Element::manipulate_dont_inline); + listSpec.list.parallelStream().forEach(Element::manipulate_inline); } @Benchmark From b816fdeed7ff8d2d4392b54bba6443aadb497c22 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Mon, 22 Jun 2026 19:30:08 -0400 Subject: [PATCH 6/9] Update internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java Co-authored-by: Sarah Chen --- .../src/jmh/java/datadog/trace/util/ListIterationBenchmark.java | 1 + 1 file changed, 1 insertion(+) diff --git a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java index 0bdf3242e1d..62bab289740 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java @@ -2,6 +2,7 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import java.util.List; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.CompilerControl; From 600cc9bbc171448967425401792f3cfa15c3f765 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 23 Jun 2026 13:36:25 -0400 Subject: [PATCH 7/9] Update internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java Co-authored-by: Sarah Chen --- .../src/jmh/java/datadog/trace/util/ListIterationBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java index 62bab289740..b7c527e99b3 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java @@ -16,7 +16,7 @@ import org.openjdk.jmh.annotations.Warmup; /** - * Benchmark comparing difference ways to iterate list of different types and sizes -- both with + * Benchmark comparing different ways to iterate list of different types and sizes -- both with * simple loop bodies (inline case) and complicated loop bodies (dont inline case). * *
      From 26c2c0bcd70c40d8a9776221aa793b28a377ad97 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Tue, 23 Jun 2026 13:47:42 -0400 Subject: [PATCH 8/9] Isolate per-thread collections in ListIterationBenchmark Build each thread's list (and its Elements) in a Scope.Thread @Setup so the manipulate_* mutations stay thread-local. Previously the lists lived in enum constants shared across all 8 threads, so the benchmark measured cross-thread contention on Element.num rather than iteration cost. Also bump to @Fork(2) and fix a Javadoc typo. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../trace/util/ListIterationBenchmark.java | 74 ++++++++++++------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java index b7c527e99b3..e9bc52e12a5 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java @@ -4,13 +4,16 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.function.Supplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.CompilerControl; import org.openjdk.jmh.annotations.CompilerControl.Mode; import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Threads; import org.openjdk.jmh.annotations.Warmup; @@ -293,11 +296,11 @@ * ListIterationBenchmark.streams_dont_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 88.000 ± 0.001 B/op * */ -@Fork(1) +@Fork(2) @Warmup(iterations = 2) @Measurement(iterations = 3) @Threads(8) -@State(Scope.Benchmark) +@State(Scope.Thread) public class ListIterationBenchmark { public static final class Element { int num = 0; @@ -321,38 +324,57 @@ static ArrayList newArrayList(int size) { return newList; } + /** + * Describes the list under test as a factory rather than a prebuilt instance. Each benchmark + * thread builds its own list (with its own {@link Element}s) in {@link #setUp()}, so the {@code + * manipulate_*} mutations stay thread-local — otherwise, with {@code @Threads(8)} sharing one + * list held in an enum constant, the benchmark would measure cross-thread contention on {@code + * Element.num} rather than iteration cost. + */ public enum ListSpec { - COLLECTIONS_EMPTY_LIST(Collections.emptyList()), - EMPTY_ARRAY_LIST(new ArrayList<>()), - SINGLETON_LIST(Collections.singletonList(new Element())), - ARRAY_LIST_1(newArrayList(1)), - ARRAY_LIST_5(newArrayList(5)), - ARRAY_LIST_10(newArrayList(10)), - ARRAY_LIST_100(newArrayList(100)); + COLLECTIONS_EMPTY_LIST(Collections::emptyList), + EMPTY_ARRAY_LIST(ArrayList::new), + SINGLETON_LIST(() -> Collections.singletonList(new Element())), + ARRAY_LIST_1(() -> newArrayList(1)), + ARRAY_LIST_5(() -> newArrayList(5)), + ARRAY_LIST_10(() -> newArrayList(10)), + ARRAY_LIST_100(() -> newArrayList(100)); - final List list; + private final Supplier> factory; - ListSpec(List list) { - this.list = list; + ListSpec(Supplier> factory) { + this.factory = factory; + } + + List build() { + return factory.get(); } } @Param ListSpec listSpec; + List list; + + @Setup(Level.Trial) + public void setUp() { + // Built per thread (the class is @State(Scope.Thread)) so each thread owns its own Elements. + this.list = this.listSpec.build(); + } + @Benchmark public void forEach_inline() { - this.listSpec.list.forEach(Element::manipulate_inline); + this.list.forEach(Element::manipulate_inline); } @Benchmark public void forEach_dont_inline() { - this.listSpec.list.forEach(Element::manipulate_dont_inline); + this.list.forEach(Element::manipulate_dont_inline); } @Benchmark public void enhancedFor_inline() { // Enhanced for-loop is just syntax sugar for an Iterator - for (Element e : this.listSpec.list) { + for (Element e : this.list) { e.manipulate_inline(); } } @@ -360,56 +382,56 @@ public void enhancedFor_inline() { @Benchmark public void enhancedFor_dont_inline() { // Enhanced for-loop is just syntax sugar for an Iterator - for (Element e : this.listSpec.list) { + for (Element e : this.list) { e.manipulate_dont_inline(); } } @Benchmark public void iterator_inline() { - for (Iterator iter = this.listSpec.list.iterator(); iter.hasNext(); ) { + for (Iterator iter = this.list.iterator(); iter.hasNext(); ) { iter.next().manipulate_inline(); } } @Benchmark public void iterator_dont_inline() { - for (Iterator iter = this.listSpec.list.iterator(); iter.hasNext(); ) { + for (Iterator iter = this.list.iterator(); iter.hasNext(); ) { iter.next().manipulate_dont_inline(); } } @Benchmark public void cstyleFor_inline() { - for (int i = 0; i < this.listSpec.list.size(); ++i) { - this.listSpec.list.get(i).manipulate_inline(); + for (int i = 0; i < this.list.size(); ++i) { + this.list.get(i).manipulate_inline(); } } @Benchmark public void cstyleFor_dont_inline() { - for (int i = 0; i < this.listSpec.list.size(); ++i) { - this.listSpec.list.get(i).manipulate_dont_inline(); + for (int i = 0; i < this.list.size(); ++i) { + this.list.get(i).manipulate_dont_inline(); } } @Benchmark public void streams_inline() { - this.listSpec.list.stream().forEach(Element::manipulate_inline); + this.list.stream().forEach(Element::manipulate_inline); } @Benchmark public void streams_dont_inline() { - this.listSpec.list.stream().forEach(Element::manipulate_dont_inline); + this.list.stream().forEach(Element::manipulate_dont_inline); } @Benchmark public void parallelStreams_inline() { - listSpec.list.parallelStream().forEach(Element::manipulate_inline); + this.list.parallelStream().forEach(Element::manipulate_inline); } @Benchmark public void parallelStreams_dont_inline() { - listSpec.list.parallelStream().forEach(Element::manipulate_dont_inline); + this.list.parallelStream().forEach(Element::manipulate_dont_inline); } } From 8571e72f03709f27c092e872cb544bec5bf5519a Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Wed, 24 Jun 2026 07:17:22 -0400 Subject: [PATCH 9/9] Replace stale results in ListIterationBenchmark with Java 17 numbers Drop the old (pre-per-thread-state) results table; add a condensed Java 17 block. For ArrayList the direct styles (cstyleFor/forEach/enhanced-for/iterator) cluster within ~10%; stream() is ~3.6x slower; parallelStream() is catastrophic for small lists (ForkJoinPool overhead) and erratic. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../trace/util/ListIterationBenchmark.java | 281 ++---------------- 1 file changed, 24 insertions(+), 257 deletions(-) diff --git a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java index e9bc52e12a5..30580f44719 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/ListIterationBenchmark.java @@ -36,265 +36,32 @@ *
    • *
    * - * Java 17 - MacBook M1 - 8 threads - * Benchmark (listSpec) Mode Cnt Score Error Units - * ListIterationBenchmark.cstyleFor_inline COLLECTIONS_EMPTY_LIST thrpt 3 9066154714.207 ± 3993855570.335 ops/s - * ListIterationBenchmark.cstyleFor:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.cstyleFor_dont_inline COLLECTIONS_EMPTY_LIST thrpt 3 9307532101.544 ± 3600114064.312 ops/s - * ListIterationBenchmark.cstyleFor_dont_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.cstyleFor_inline EMPTY_ARRAY_LIST thrpt 3 8553022013.203 ± 4941170671.582 ops/s - * ListIterationBenchmark.cstyleFor_inline :gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.cstyleFor_dont_inline EMPTY_ARRAY_LIST thrpt 3 8096029334.875 ± 3735770834.739 ops/s - * ListIterationBenchmark.cstyleFor_dont_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.cstyleFor_inline SINGLETON_LIST thrpt 3 579968267.534 ± 480993460.419 ops/s - * ListIterationBenchmark.cstyleFor_inline :gc.alloc.rate.norm SINGLETON_LIST thrpt 3 ≈ 10⁻⁶ B/op - * - * ListIterationBenchmark.cstyleFor_dont_inline SINGLETON_LIST thrpt 3 219512282.514 ± 10114065.364 ops/s - * ListIterationBenchmark.cstyleFor_dont_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.cstyleFor_inline ARRAY_LIST_1 thrpt 3 445550609.183 ± 430016640.001 ops/s - * ListIterationBenchmark.cstyleFor_inline :gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 ≈ 10⁻⁶ B/op - * - * ListIterationBenchmark.cstyleFor_dont_inline ARRAY_LIST_1 thrpt 3 257920434.103 ± 499635383.643 ops/s - * ListIterationBenchmark.cstyleFor_dont_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.cstyleFor_inline ARRAY_LIST_5 thrpt 3 75497912.945 ± 30020599.171 ops/s - * ListIterationBenchmark.cstyleFor_inline :gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.cstyleFor_dont_inline ARRAY_LIST_5 thrpt 3 28476601.001 ± 1230275.296 ops/s - * ListIterationBenchmark.cstyleFor_dont_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 ≈ 10⁻⁴ B/op - * - * - * ListIterationBenchmark.cstyleFor_inline ARRAY_LIST_10 thrpt 3 29817752.733 ± 20822258.640 ops/s - * ListIterationBenchmark.cstyleFor_inline :gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.cstyleFor_dont_inline ARRAY_LIST_10 thrpt 3 10586304.137 ± 694080.794 ops/s - * ListIterationBenchmark.cstyleFor_dont_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.cstyleFor_inline ARRAY_LIST_100 thrpt 3 5189749.889 ± 182890.132 ops/s - * ListIterationBenchmark.cstyleFor_inline :gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.cstyleFor_dont_inline ARRAY_LIST_100 thrpt 3 5574779.347 ± 3138942.124 ops/s - * ListIterationBenchmark.cstyleFor_dont_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.enhancedFor_inline COLLECTIONS_EMPTY_LIST thrpt 3 9207677799.793 ± 1391109060.707 ops/s - * ListIterationBenchmark.enhancedFor_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.enhancedFor_dont_inline COLLECTIONS_EMPTY_LIST thrpt 3 9223840664.732 ± 3042465993.695 ops/s - * ListIterationBenchmark.enhancedFor_dont_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.enhancedFor_inline EMPTY_ARRAY_LIST thrpt 3 8395252508.254 ± 3316954375.722 ops/s - * ListIterationBenchmark.enhancedFor_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.enhancedFor_dont_inline EMPTY_ARRAY_LIST thrpt 3 8749632223.603 ± 5103144323.039 ops/s - * ListIterationBenchmark.enhancedFor_dont_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.enhancedFor_inline SINGLETON_LIST thrpt 3 585380967.338 ± 150306592.315 ops/s - * ListIterationBenchmark.enhancedFor_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 ≈ 10⁻⁶ B/op - * - * ListIterationBenchmark.enhancedFor_dont_inline SINGLETON_LIST thrpt 3 401275107.625 ± 1875412135.090 ops/s - * ListIterationBenchmark.enhancedFor_dont_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.enhancedFor_inline ARRAY_LIST_1 thrpt 3 239663416.496 ± 9797263.026 ops/s - * ListIterationBenchmark.enhancedFor_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.enhancedFor_dont_inline ARRAY_LIST_1 thrpt 3 292347248.552 ± 402874274.980 ops/s - * ListIterationBenchmark.enhancedFor_dont_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.enhancedFor_inline ARRAY_LIST_5 thrpt 3 114233676.386 ± 17033961.163 ops/s - * ListIterationBenchmark.enhancedFor_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.enhancedFor_dont_inline ARRAY_LIST_5 thrpt 3 17788070.719 ± 185801.986 ops/s - * ListIterationBenchmark.enhancedFor_dont_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.enhancedFor_inline ARRAY_LIST_10 thrpt 3 36526081.949 ± 5409614.800 ops/s - * ListIterationBenchmark.enhancedFor_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.enhancedFor_dont_inline ARRAY_LIST_10 thrpt 3 9952121.906 ± 541730.002 ops/s - * ListIterationBenchmark.enhancedFor_dont_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.enhancedFor_inline ARRAY_LIST_100 thrpt 3 5021433.149 ± 189172.874 ops/s - * ListIterationBenchmark.enhancedFor_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.enhancedFor_dont_inline ARRAY_LIST_100 thrpt 3 3787184.732 ± 122019.171 ops/s - * ListIterationBenchmark.enhancedFor_dont_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 ≈ 10⁻³ B/op - * - * - * ListIterationBenchmark.forEach_dont_inline COLLECTIONS_EMPTY_LIST thrpt 3 9087818339.363 ± 4682854417.372 ops/s - * ListIterationBenchmark.forEach_dont_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.forEach_inline COLLECTIONS_EMPTY_LIST thrpt 3 9236676927.205 ± 8654546805.544 ops/s - * ListIterationBenchmark.forEach_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.forEach_dont_inline EMPTY_ARRAY_LIST thrpt 3 9067901137.791 ± 658593480.822 ops/s - * ListIterationBenchmark.forEach_dont_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.forEach_inline EMPTY_ARRAY_LIST thrpt 3 8338589922.946 ± 2762463965.925 ops/s - * ListIterationBenchmark.forEach_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.forEach_inline SINGLETON_LIST thrpt 3 273193041.510 ± 232676409.952 ops/s - * ListIterationBenchmark.forEach_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.forEach_dont_inline SINGLETON_LIST thrpt 3 429048764.107 ± 122641686.349 ops/s - * ListIterationBenchmark.forEach_dont_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 ≈ 10⁻⁶ B/op - * - * ListIterationBenchmark.forEach_inline ARRAY_LIST_1 thrpt 3 191331395.539 ± 21424694.743 ops/s - * ListIterationBenchmark.forEach_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.forEach_dont_inline ARRAY_LIST_1 thrpt 3 131771385.351 ± 5791600.995 ops/s - * ListIterationBenchmark.forEach_dont_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.forEach_inline ARRAY_LIST_5 thrpt 3 61657233.796 ± 22827021.020 ops/s - * ListIterationBenchmark.forEach_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.forEach_dont_inline ARRAY_LIST_5 thrpt 3 27739644.723 ± 2168701.924 ops/s - * ListIterationBenchmark.forEach_dont_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.forEach_inline ARRAY_LIST_10 thrpt 3 28127208.294 ± 26550020.011 ops/s - * ListIterationBenchmark.forEach_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.forEach_dont_inline ARRAY_LIST_10 thrpt 3 12826780.510 ± 1545440.613 ops/s - * ListIterationBenchmark.forEach_dont_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.forEach_inline ARRAY_LIST_100 thrpt 3 4919956.616 ± 2482616.871 ops/s - * ListIterationBenchmark.forEach_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.forEach_dont_inline ARRAY_LIST_100 thrpt 3 3631999.182 ± 2290995.458 ops/s - * ListIterationBenchmark.forEach_dont_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 ≈ 10⁻³ B/op - * - * - * ListIterationBenchmark.iterator_inline COLLECTIONS_EMPTY_LIST thrpt 3 8782837307.595 ± 9193909313.868 ops/s - * ListIterationBenchmark.iterator_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.iterator_dont_inline COLLECTIONS_EMPTY_LIST thrpt 3 9077833391.678 ± 9363495032.329 ops/s - * ListIterationBenchmark.iterator_dont_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.iterator_inline EMPTY_ARRAY_LIST thrpt 3 7577428097.018 ± 17869599838.589 ops/s - * ListIterationBenchmark.iterator_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.iterator_dont_inline EMPTY_ARRAY_LIST thrpt 3 8905180606.486 ± 1278759944.669 ops/s - * ListIterationBenchmark.iterator_dont_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 ≈ 10⁻⁷ B/op - * - * ListIterationBenchmark.iterator_inline SINGLETON_LIST thrpt 3 545492858.104 ± 288175308.591 ops/s - * ListIterationBenchmark.iterator_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 ≈ 10⁻⁶ B/op - * - * ListIterationBenchmark.iterator_dont_inline SINGLETON_LIST thrpt 3 227010872.669 ± 23119526.801 ops/s - * ListIterationBenchmark.iterator_dont_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.iterator_inline ARRAY_LIST_1 thrpt 3 228450106.295 ± 118964448.603 ops/s - * ListIterationBenchmark.iterator_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.iterator_dont_inline ARRAY_LIST_1 thrpt 3 137387128.594 ± 25909582.512 ops/s - * ListIterationBenchmark.iterator_dont_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.iterator_inline ARRAY_LIST_5 thrpt 3 76164387.317 ± 8753181.873 ops/s - * ListIterationBenchmark.iterator_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 ≈ 10⁻⁵ B/op - * - * ListIterationBenchmark.iterator_dont_inline ARRAY_LIST_5 thrpt 3 23222672.053 ± 14620774.912 ops/s - * ListIterationBenchmark.iterator_dont_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.iterator_inline ARRAY_LIST_10 thrpt 3 32207574.764 ± 8935430.248 ops/s - * ListIterationBenchmark.iterator_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.iterator_dont_inline ARRAY_LIST_10 thrpt 3 7744943.832 ± 4007932.991 ops/s - * ListIterationBenchmark.iterator_dont_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.iterator_inline ARRAY_LIST_100 thrpt 3 4858523.049 ± 1169051.166 ops/s - * ListIterationBenchmark.iterator_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 ≈ 10⁻⁴ B/op - * - * ListIterationBenchmark.iterator_dont_inline ARRAY_LIST_100 thrpt 3 3573806.058 ± 1033738.003 ops/s - * ListIterationBenchmark.iterator_dont_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 ≈ 10⁻³ B/op - * - * - * ListIterationBenchmark.parallelStreams_inline COLLECTIONS_EMPTY_LIST thrpt 3 378041146.558 ± 143697638.943 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 128.000 ± 0.001 B/op - * - * ListIterationBenchmark.parallelStreams_dont_inline COLLECTIONS_EMPTY_LIST thrpt 3 350864577.375 ± 117736321.914 ops/s - * ListIterationBenchmark.parallelStreams_dont_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 128.000 ± 0.001 B/op - * - * ListIterationBenchmark.parallelStreams_inline EMPTY_ARRAY_LIST thrpt 3 324256295.000 ± 62831502.030 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 160.000 ± 0.001 B/op - * - * ListIterationBenchmark.parallelStreams_dont_inline EMPTY_ARRAY_LIST thrpt 3 1044022834.772 ± 4619766802.708 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 160.000 ± 0.001 B/op - * - * ListIterationBenchmark.parallelStreams_inline SINGLETON_LIST thrpt 3 18501339.741 ± 1654479.836 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 152.000 ± 0.001 B/op - * - * ListIterationBenchmark.parallelStreams_dont_inline SINGLETON_LIST thrpt 3 21809861.051 ± 350120.124 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 152.000 ± 0.001 B/op - * - * ListIterationBenchmark.parallelStreams_inline ARRAY_LIST_1 thrpt 3 18012814.959 ± 1696186.799 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 160.000 ± 0.001 B/op - * - * ListIterationBenchmark.parallelStreams_dont_inline ARRAY_LIST_1 thrpt 3 111167193.920 ± 22996298.573 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 160.000 ± 0.001 B/op - * - * ListIterationBenchmark.parallelStreams_inline ARRAY_LIST_5 thrpt 3 2012388.452 ± 949228.198 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 480.299 ± 0.347 B/op - * - * ListIterationBenchmark.parallelStreams_dont_inline ARRAY_LIST_5 thrpt 3 2128440.686 ± 511599.227 ops/s - * ListIterationBenchmark.parallelStreams_dont_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 480.254 ± 0.357 B/op - * - * ListIterationBenchmark.parallelStreams_inline ARRAY_LIST_10 thrpt 3 1303577.389 ± 929027.756 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 880.359 ± 0.262 B/op - * - * ListIterationBenchmark.parallelStreams_dont_inline ARRAY_LIST_10 thrpt 3 1311148.884 ± 126593.404 ops/s - * ListIterationBenchmark.parallelStreams_dont_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 880.457 ± 0.052 B/op - * - * ListIterationBenchmark.parallelStreams_inline ARRAY_LIST_100 thrpt 3 581725.185 ± 79053.756 ops/s - * ListIterationBenchmark.parallelStreams_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 5200.204 ± 0.339 B/op - * - * ListIterationBenchmark.parallelStreams_dont_inline ARRAY_LIST_100 thrpt 3 535792.621 ± 171447.687 ops/s - * ListIterationBenchmark.parallelStreams_dont_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 5200.636 ± 2.516 B/op - * - * - * ListIterationBenchmark.streams_inline COLLECTIONS_EMPTY_LIST thrpt 3 1908867078.365 ± 457707512.391 ops/s - * ListIterationBenchmark.streams_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 56.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_dont_inline COLLECTIONS_EMPTY_LIST thrpt 3 1921592196.919 ± 244634653.490 ops/s - * ListIterationBenchmark.streams_dont_inline:gc.alloc.rate.norm COLLECTIONS_EMPTY_LIST thrpt 3 56.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_inline EMPTY_ARRAY_LIST thrpt 3 1214862597.257 ± 135140736.401 ops/s - * ListIterationBenchmark.streams_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 88.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_dont_inline EMPTY_ARRAY_LIST thrpt 3 1224109308.819 ± 128448610.019 ops/s - * ListIterationBenchmark.streams_dont_inline:gc.alloc.rate.norm EMPTY_ARRAY_LIST thrpt 3 88.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_inline SINGLETON_LIST thrpt 3 38323049.906 ± 12454289.128 ops/s - * ListIterationBenchmark.streams_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 80.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_dont_inline SINGLETON_LIST thrpt 3 23491667.001 ± 7585146.466 ops/s - * ListIterationBenchmark.streams_dont_inline:gc.alloc.rate.norm SINGLETON_LIST thrpt 3 80.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_inline ARRAY_LIST_1 thrpt 3 196494080.731 ± 111300975.392 ops/s - * ListIterationBenchmark.streams_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 88.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_dont_inline ARRAY_LIST_1 thrpt 3 118268890.253 ± 13653144.114 ops/s - * ListIterationBenchmark.streams_dont_inline:gc.alloc.rate.norm ARRAY_LIST_1 thrpt 3 88.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_inline ARRAY_LIST_5 thrpt 3 69135875.825 ± 3742040.817 ops/s - * ListIterationBenchmark.streams_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 88.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_dont_inline ARRAY_LIST_5 thrpt 3 46099259.535 ± 29749609.625 ops/s - * ListIterationBenchmark.streams_dont_inline:gc.alloc.rate.norm ARRAY_LIST_5 thrpt 3 88.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_inline ARRAY_LIST_10 thrpt 3 14923107.542 ± 5663775.999 ops/s - * ListIterationBenchmark.streams_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 88.000 ± 0.001 B/op - * - * ListIterationBenchmark.streams_dont_inline ARRAY_LIST_10 thrpt 3 17238302.629 ± 3449711.016 ops/s - * ListIterationBenchmark.streams_dont_inline:gc.alloc.rate.norm ARRAY_LIST_10 thrpt 3 88.000 ± 0.001 B/op + *

    Java 17 results (Apple M1, {@code @Fork(2)}, {@code @Threads(8)}; {@code ArrayList}, M ops/s = + * millions, shown at two sizes): + * Iteration style size 10 size 100 + * cstyleFor 1050 165 (fastest) + * forEach 995 163 + * enhancedFor 945 153 + * iterator 935 148 (noisier run-to-run) + * streams 158 45 (~3.6x slower; allocates) + * parallelStreams ~1 ~0.3 (catastrophic at these sizes) + * * - * ListIterationBenchmark.streams_inline ARRAY_LIST_100 thrpt 3 4974152.515 ± 1040136.146 ops/s - * ListIterationBenchmark.streams_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 88.000 ± 0.001 B/op + *

    Key findings: * - * ListIterationBenchmark.streams_dont_inline ARRAY_LIST_100 thrpt 3 5781143.104 ± 2907139.961 ops/s - * ListIterationBenchmark.streams_dont_inline:gc.alloc.rate.norm ARRAY_LIST_100 thrpt 3 88.000 ± 0.001 B/op - * + *

      + *
    • For {@code ArrayList}, the direct styles -- {@code cstyleFor}, {@code forEach}, + * enhanced-for, and explicit {@code iterator} -- cluster within ~10% of each other; escape + * analysis eliminates the iterator allocation, so enhanced-for/iterator stay competitive + * while reading cleanest (the RECOMMENDED choice). + *
    • {@code stream()} is ~3.6x slower than direct iteration and allocates per call -- avoid on + * hot paths. + *
    • {@code parallelStream()} is catastrophic for small collections (hundreds of times slower): + * ForkJoinPool split/coordinate overhead dwarfs the work, and it is run-to-run erratic. Never + * use it for the small lists typical in the agent. + *
    • {@code _inline} vs {@code _dont_inline} loop bodies barely differ at these sizes -- the + * iteration mechanics dominate, not the body. + *
    */ @Fork(2) @Warmup(iterations = 2)