diff --git a/internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBenchmark.java new file mode 100644 index 00000000000..de33c957e9a --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/api/TagMapAccessBenchmark.java @@ -0,0 +1,168 @@ +package datadog.trace.api; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Throughput microbenchmark for the core {@link TagMap} access paths — insert (direct, via Ledger, + * and HashMap variants), raw-value read, and Entry read — over a representative HTTP-server-ish tag + * set. + * + *
Threading correctness. Runs at {@code @Threads(8)}. All shared state is + * immutable ({@link #NAMES}/{@link #VALUES}); every bit of mutable state lives in a + * {@code @State(Scope.Thread)} holder so threads never contend on a shared map, index, or reader + * flyweight. Earlier TagMap benchmarks shared a cross-thread counter/index, which turned the result + * into a contention measurement rather than a TagMap measurement — this layout avoids that. Indices + * are plain per-invocation locals. + * + *
Run configuration is baked into annotations rather than relying on {@code -Pjmh.*} flags + * (which the {@code me.champeau.jmh} plugin ignores). + * + *
Key findings (MacBook M1, 8 threads, Java 17): + * + *
+ * MacBook M1 with 8 threads (Java 17)
+ *
+ * Benchmark Mode Cnt Score Error Units
+ * TagMapAccessBenchmark.getEntry thrpt 5 95559437.524 ± 1381678.908 ops/s
+ * TagMapAccessBenchmark.getObject thrpt 5 95980166.452 ± 2217719.560 ops/s
+ * TagMapAccessBenchmark.insert thrpt 5 52523529.023 ± 1816998.150 ops/s
+ * TagMapAccessBenchmark.insert_hashMap thrpt 5 65344306.574 ± 4013136.530 ops/s
+ * TagMapAccessBenchmark.insert_hashMap_builderStyle thrpt 5 28057827.189 ± 1359655.664 ops/s
+ * TagMapAccessBenchmark.insert_via_ledger thrpt 5 41169656.095 ± 773264.754 ops/s
+ *
+ */
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@Fork(2)
+@Warmup(iterations = 3)
+@Measurement(iterations = 5)
+@Threads(8)
+@State(Scope.Benchmark)
+public class TagMapAccessBenchmark {
+ // a representative HTTP-server-ish tag set (immutable -> safe to share across threads)
+ static final String[] NAMES = {
+ "http.request.method",
+ "http.response.status_code",
+ "http.route",
+ "url.path",
+ "url.scheme",
+ "server.address",
+ "server.port",
+ "client.address",
+ "network.protocol.version",
+ "user_agent.original",
+ "span.kind",
+ "component",
+ "language",
+ "error",
+ "resource.name",
+ "service.name",
+ "operation.name",
+ "env",
+ };
+
+ static final Object[] VALUES = new Object[NAMES.length];
+
+ static {
+ for (int i = 0; i < NAMES.length; ++i) {
+ VALUES[i] = "value-" + i;
+ }
+ }
+
+ /**
+ * Pre-populated read map, PER-THREAD ({@code Scope.Thread}): each thread owns its own map so
+ * reads don't contend on shared mutable state under {@code @Threads(8)}.
+ */
+ @State(Scope.Thread)
+ public static class ReadMap {
+ TagMap map;
+
+ @Setup(Level.Trial)
+ public void build() {
+ this.map = TagMap.create();
+ for (int i = 0; i < NAMES.length; ++i) {
+ this.map.set(NAMES[i], VALUES[i]);
+ }
+ }
+ }
+
+ @Benchmark
+ public TagMap insert() {
+ TagMap map = TagMap.create();
+ for (int i = 0; i < NAMES.length; ++i) {
+ map.set(NAMES[i], VALUES[i]);
+ }
+ return map;
+ }
+
+ @Benchmark
+ public TagMap insert_via_ledger() {
+ TagMap.Ledger ledger = TagMap.ledger();
+ for (int i = 0; i < NAMES.length; ++i) {
+ ledger.set(NAMES[i], VALUES[i]);
+ }
+ return ledger.build();
+ }
+
+ @Benchmark
+ public MapBecause nothing mutates after construction, a single shared instance ({@link Scope#Benchmark}) + * read by all {@code @Threads} is realistic and contention-free. This is the read-mostly + * counterpart to the per-thread mutable {@link SingleThreadedMapBenchmark} and the contended {@code + * ConcurrentHashtable} / {@code ThreadSafeMap} suites. + * + *
Compares {@code get} + {@code iterate} across {@link HashMap}, {@link LinkedHashMap}, {@link + * TreeMap}, {@link TagMap}, and {@link java.util.Map#copyOf} (via {@link + * CollectionUtils#tryMakeImmutableMap} — the JDK's compact, array-backed {@code + * ImmutableCollections.MapN}, which is what the agent actually uses for fixed config maps; Java + * 10+, falls back to the input map pre-10). {@code Map.copyOf}/{@code MapN} is the honest + * immutable-map baseline, not {@code HashMap}. + * + *
Lookups use {@code EQUAL_KEYS} (distinct String instances) to exercise {@code equals()};
+ * {@code *_sameKey} variants reuse the original interned key instances to show the identity fast
+ * path — which is the common tracer case, since map keys are typically interned tag-name constants.
+ * (Results pending a fresh multi-JVM run — {@code Map.copyOf} only materializes the compact form on
+ * Java 10+.)
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+@State(Scope.Benchmark)
+public class ImmutableMapBenchmark {
+ static final String[] INSERTION_KEYS = {
+ "foo", "bar", "baz", "quux", "foobar", "foobaz", "key0", "key1", "key2", "key3"
+ };
+
+ // Distinct String instances (not the literals used to build the maps) so lookups exercise
+ // equals(), not identity -- the realistic case for keys arriving from parsing/decoding.
+ static final String[] EQUAL_KEYS = newEqualKeys();
+
+ static String[] newEqualKeys() {
+ String[] keys = new String[INSERTION_KEYS.length];
+ for (int i = 0; i < INSERTION_KEYS.length; ++i) {
+ keys[i] = new String(INSERTION_KEYS[i]);
+ }
+ return keys;
+ }
+
+ static void fill(Map State is per-thread ({@link Scope#Thread}) so no map is ever shared — the read-mostly shared
+ * case lives in {@link ImmutableMapBenchmark}, and the contended case in the {@code
+ * ConcurrentHashtable} / {@code ThreadSafeMap} suites. Running at {@code @Threads(8)} keeps
+ * allocation / GC interactions visible without introducing lock contention.
+ *
+ * Comparing different Map types:
+ *
+ * Uncontended synchronization tax. A {@link Collections#synchronizedMap} case is included
+ * to measure what synchronization costs when there is no contention: because each thread
+ * owns its synchronized map, the monitor is only ever locked by one thread. On JVMs with biased
+ * locking (Java ≤ 11 by default) repeated same-thread locking should be nearly free; on Java 15+
+ * (biased locking disabled by default, JEP 374) it pays the full uncontended CAS. The
+ * unsynchronized {@code hashMap} {@code get}/{@code iterate} methods are the in-harness baseline;
+ * the tax is the delta to the {@code synchronizedHashMap} equivalents. Comparing across JVM
+ * versions at stock flags shows the biased-locking effect. (Results pending a fresh multi-JVM run.)
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+@State(Scope.Thread)
+public class SingleThreadedMapBenchmark {
+ static final String[] INSERTION_KEYS = {
+ "foo", "bar", "baz", "quux", "foobar", "foobaz", "key0", "key1", "key2", "key3"
+ };
+
+ // Distinct String instances so lookups exercise equals(), not identity.
+ static final String[] EQUAL_KEYS = newEqualKeys();
+
+ static String[] newEqualKeys() {
+ String[] keys = new String[INSERTION_KEYS.length];
+ for (int i = 0; i < INSERTION_KEYS.length; ++i) {
+ keys[i] = new String(INSERTION_KEYS[i]);
+ }
+ return keys;
+ }
+
+ static void fill(Map TagMap is the preferred way to store tags.
- *
- * TagMap excels at storing primitives, copying between TagMap instances, and builder idioms.
- *
- * Iterator traversal with TagMap is relatively slow, but TagMap#forEach is on par (and slightly)
- * faster than traditional map entry iteration.
- *
- * HashMap & LinkedHashMap perform equally well on get operations.
- *
- * HashMap is 2x faster throughput-wise to create and has less memory overhead because there's no
- * linked list to capture insertion order.
- *
- * TreeMap is useful when a custom Comparator is needed -- see CaseInsensitiveMapBenchmark
- *
- * HashMap & TagMap also perform exceedingly well in cases where the exact same object is used
- * for put & get operations. e.g. when using String literals or Class literals as keys
+ *
+ *
+ *
- * Benchmark comparing different Map-s...
- *
- *
- *
- * MacBook M1 1 thread (Java 21)
- *
- * Benchmark Mode Cnt Score Error Units
- * UnsynchronizedMapBenchmark.clone_hashMap thrpt 6 12482267.775 ± 236852.198 ops/s
- * UnsynchronizedMapBenchmark.clone_linkedHashMap thrpt 6 12414187.888 ± 224418.265 ops/s
- * UnsynchronizedMapBenchmark.clone_tagMap thrpt 6 49638156.234 ± 2972608.986 ops/s
- * UnsynchronizedMapBenchmark.clone_treeMap thrpt 6 16201216.086 ± 619985.352 ops/s
- *
- * UnsynchronizedMapBenchmark.create_hashMap thrpt 6 22534042.260 ± 819970.046 ops/s
- * UnsynchronizedMapBenchmark.create_hashMap_sized thrpt 6 21871270.375 ± 893842.109 ops/s
- * UnsynchronizedMapBenchmark.create_linkedHashMap thrpt 6 12905731.242 ± 8930007.156 ops/s
- * UnsynchronizedMapBenchmark.create_tagMap thrpt 6 15794277.380 ± 6069426.265 ops/s
- * UnsynchronizedMapBenchmark.create_treeMap thrpt 6 4711961.814 ± 48582.934 ops/s
- *
- * UnsynchronizedMapBenchmark.get_hashMap thrpt 6 212201631.841 ± 6223069.782 ops/s
- * UnsynchronizedMapBenchmark.get_hashMap_sameKey thrpt 6 392053406.085 ± 3938305.125 ops/s
- * UnsynchronizedMapBenchmark.get_linkedHashMap thrpt 6 210734968.352 ± 3627805.282 ops/s
- * UnsynchronizedMapBenchmark.get_tagMap thrpt 6 201864656.534 ± 4596147.771 ops/s
- * UnsynchronizedMapBenchmark.get_tagMap_sameKey thrpt 6 256311645.716 ± 13315886.308 ops/s
- * UnsynchronizedMapBenchmark.get_treeMap thrpt 6 94606404.423 ± 806879.890 ops/s
- *
- * MacBook M1 with 8 threads (Java 21)
- *
- * Benchmark Mode Cnt Score Error Units
- * UnsynchronizedMapBenchmark.clone_hashMap thrpt 6 89645484.526 ± 6546683.185 ops/s
- * UnsynchronizedMapBenchmark.clone_linkedHashMap thrpt 6 78233577.417 ± 7204526.742 ops/s
- * UnsynchronizedMapBenchmark.clone_tagMap thrpt 6 315228772.058 ± 20689692.104 ops/s
- * UnsynchronizedMapBenchmark.clone_treeMap thrpt 6 102416350.341 ± 7258040.561 ops/s
- *
- * UnsynchronizedMapBenchmark.create_hashMap thrpt 6 150462966.692 ± 11243713.572 ops/s
- * UnsynchronizedMapBenchmark.create_hashMap_sized thrpt 6 111213025.138 ± 4593366.916 ops/s
- * UnsynchronizedMapBenchmark.create_linkedHashMap thrpt 6 80882399.133 ± 19567359.487 ops/s
- * UnsynchronizedMapBenchmark.create_tagMap thrpt 6 93026443.634 ± 11831456.794 ops/s
- * UnsynchronizedMapBenchmark.create_tagMap_via_ledger thrpt 6 70769351.353 ± 3821543.185 ops/s
- * UnsynchronizedMapBenchmark.create_treeMap thrpt 6 32737595.187 ± 2638992.844 ops/s
- *
- * UnsynchronizedMapBenchmark.get_hashMap thrpt 6 1154522356.093 ± 116525174.735 ops/s
- * UnsynchronizedMapBenchmark.get_hashMap_sameKey thrpt 6 1760800709.734 ± 33551896.166 ops/s
- * UnsynchronizedMapBenchmark.get_linkedHashMap thrpt 6 1191208257.933 ± 49810465.132 ops/s
- * UnsynchronizedMapBenchmark.get_tagMap thrpt 6 933455574.646 ± 154146815.295 ops/s
- * UnsynchronizedMapBenchmark.get_tagMap_sameKey thrpt 6 1138764608.359 ± 88352911.617 ops/s
- * UnsynchronizedMapBenchmark.get_treeMap thrpt 6 490872723.682 ± 87017311.892 ops/s
- *
- * UnsynchronizedMapBenchmark.iterate_hashMap thrpt 6 351222668.708 ± 35242914.752 ops/s
- * UnsynchronizedMapBenchmark.iterate_linkedHashMap thrpt 6 406635839.285 ± 55990655.235 ops/s
- * UnsynchronizedMapBenchmark.iterate_tagMap thrpt 6 185264584.604 ± 15137886.028 ops/s
- * UnsynchronizedMapBenchmark.iterate_tagMap_forEach thrpt 6 422407681.630 ± 19493455.109 ops/s
- * UnsynchronizedMapBenchmark.iterate_treeMap thrpt 6 392884747.896 ± 80190674.417 ops/s
- *
- */
-@Fork(2)
-@Warmup(iterations = 2)
-@Measurement(iterations = 3)
-@Threads(8)
-public class UnsynchronizedMapBenchmark {
- static final String[] INSERTION_KEYS = {
- "foo", "bar", "baz", "quux", "foobar", "foobaz", "key0", "key1", "key2", "key3"
- };
-
- static final String[] EQUAL_KEYS =
- init(
- () -> {
- String[] keys = new String[INSERTION_KEYS.length];
- for (int i = 0; i < INSERTION_KEYS.length; ++i) {
- keys[i] = new String(INSERTION_KEYS[i]);
- }
- return keys;
- });
-
- static int sharedLookupIndex = 0;
-
- static String nextLookupKey() {
- return nextLookupKey(EQUAL_KEYS);
- }
-
- static String nextLookupKey(String[] keys) {
- int localIndex = ++sharedLookupIndex;
- if (localIndex >= keys.length) {
- sharedLookupIndex = localIndex = 0;
- }
- return keys[localIndex];
- }
-
- static